using System.Text.Json; using Api.Clients.Ai.Contracts; using Api.Clients.Api.Contracts; using CvMatcher.Data.Repositories.Contracts; using CvMatcher.Models.Requests; using CvMatcher.Models.Responses; using CvMatcher.Models.Settings; using Api.Services.Contracts; using Microsoft.Extensions.Options; namespace Api.Services; /// /// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching. /// public sealed class CvMatcherService : ICvMatcherService { private readonly IRagApiClient _rag; private readonly IJobTextExtractor _jobTextExtractor; private readonly IMatcherAiClient _ai; private readonly IMatcherRepository _repository; private readonly IAiPromptsRepository _aiPrompts; private readonly MatcherSettings _settings; public CvMatcherService( IRagApiClient rag, IJobTextExtractor jobTextExtractor, IMatcherAiClient ai, IMatcherRepository repository, IAiPromptsRepository aiPrompts, IOptions options) { _rag = rag; _jobTextExtractor = jobTextExtractor; _ai = ai; _repository = repository; _aiPrompts = aiPrompts; _settings = options.Value; } /// public async Task UploadCvAsync(IFormFile file, CancellationToken ct) { var response = await _rag.IndexCvPdfAsync(file, ct); return new CvUploadResponse { DocumentId = response.DocumentId, TextHash = response.TextHash, DocumentType = response.DocumentType, Title = response.Title, Chunks = response.Chunks, Characters = response.Characters, Cached = response.Cached, Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully." }; } /// public async Task FindJobsAsync(FindJobsRequest request, CancellationToken ct) { var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found."); if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase)) { throw new InvalidOperationException("The provided document is not a CV."); } var search = await _rag.SearchAsync(new RagSearchRequest { QueryText = BuildCvSearchProfile(cv.Text), TargetDocumentTypes = ["job"], TopK = request.TopK ?? _settings.TopK }, ct); var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10); var jobs = new List(); foreach (var result in search.Results.Take(deepScoreLimit)) { var job = await _rag.GetDocumentAsync(result.DocumentId, ct); if (job is null) continue; jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct)); } return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs }; } /// public async Task MatchJobAsync(MatchJobRequest request, CancellationToken ct) { if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required."); if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id."); var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found."); var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct); if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually."); var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct); var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found."); var search = await _rag.SearchAsync(new RagSearchRequest { QueryText = BuildCvSearchProfile(cv.Text), TargetDocumentTypes = ["job"], TopK = Math.Max(5, _settings.TopK) }, ct); var matchedChunks = search.Results .FirstOrDefault(x => x.DocumentId == job.DocumentId)? .MatchedChunks.Select(x => x.Text).ToArray() ?? []; return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct); } /// /// Scores a (CV, job) pair with the LLM. /// Returns a cached result immediately when the same (CV, job, language) triple has been scored before. /// When no evidence chunks are available from the vector search, falls back to the raw job text. /// private async Task ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList evidenceChunks, string? email, string language, CancellationToken ct) { var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct); if (cached is not null) return cached; var cvText = Limit(cv.Text, 18000); var jobText = Limit(job.Text, 14000); var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000); var systemPrompt = await _aiPrompts.GetAsync("ai.cv-match.system-prompt", language, ct) ?? throw new InvalidOperationException( $"AI prompt not found: key='ai.cv-match.system-prompt', language='{language}'. " + $"This is a configuration error. Ensure the cvMatcher.AiPrompts table is properly seeded with language-specific prompts."); var userPrompt = $""" CV: {cvText} JOB: {jobText} SEMANTICALLY MATCHED JOB EVIDENCE: {evidence} """; var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct); var result = ParseResult(json); result.JobDocumentId = job.Id; result.JobUrl = job.SourceUrl; result.Cached = false; await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct); return result; } /// /// Deserialises the LLM's JSON output into a . /// Returns a safe fallback response instead of throwing when the JSON cannot be parsed. /// private static JobMatchResponse ParseResult(string json) { try { var parsed = JsonSerializer.Deserialize(json, new JsonSerializerOptions(JsonSerializerDefaults.Web)); if (parsed is not null) return parsed; } catch { // Fall through to safe response. } return new JobMatchResponse { Score = 0, Summary = "The AI response could not be parsed as structured JSON.", Recommendations = ["Inspect the raw model output and tune the scoring prompt."] }; } /// /// Builds a descriptive search query from the CV text for use in vector similarity search. /// private static string BuildCvSearchProfile(string cvText) { var text = Limit(cvText, 10000); return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}"; } /// /// Extracts a short job title from the first sentence-like fragment of the job text. /// private static string ExtractJobTitle(string jobText) { var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140); return first ?? "Job description"; } /// Returns the base language code, lower-cased, defaulting to "en". private static string NormalizeLanguage(string? language) => string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim(); /// Truncates to at most characters. private static string Limit(string value, int max) => value.Length <= max ? value : value[..max]; }