using api.Services.Contracts.Rag; using Api.Models.Rag; using Api.Services.Contracts.Rag; using Api.Settings; using Microsoft.Extensions.Options; using System.Text.Json; namespace Api.Services.Rag; public sealed class CvRagService : ICvRagService { private readonly IPdfTextExtractor _pdfTextExtractor; private readonly ITextChunker _textChunker; private readonly IAiRagClient _openAi; private readonly ICvVectorStore _store; private readonly IJobTextExtractor _jobTextExtractor; private readonly RagSettings _settings; private readonly ILogger _logger; public CvRagService( IPdfTextExtractor pdfTextExtractor, ITextChunker textChunker, IAiRagClient openAi, ICvVectorStore store, IJobTextExtractor jobTextExtractor, IOptions options, ILogger logger) { _pdfTextExtractor = pdfTextExtractor; _textChunker = textChunker; _openAi = openAi; _store = store; _jobTextExtractor = jobTextExtractor; _settings = options.Value; _logger = logger; } public async Task IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct) { if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required."); if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty."); if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB."); if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted."); await using var stream = file.OpenReadStream(); var text = _pdfTextExtractor.ExtractText(stream); if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF."); var documentId = $"cv_{Guid.NewGuid():N}"; var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes)); var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap); var stored = new List(); for (var i = 0; i < chunks.Count; i++) { ct.ThrowIfCancellationRequested(); stored.Add(new StoredCvChunk { Id = Guid.NewGuid().ToString("N"), DocumentId = documentId, Text = chunks[i], Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct), ChunkIndex = i, ExpiresAt = expiresAt }); } _store.Save(documentId, stored); var summary = await SummarizeCvAsync(text, ct); return new CvIngestResponse(documentId, stored.Count, text.Length, summary); } public async Task MatchJobAsync(JobMatchRequest request, CancellationToken ct) { if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required."); if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id."); var cvChunks = _store.Get(request.CvDocumentId); if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again."); var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct); if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually."); var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct); var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK); var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}")); var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text."; var userPrompt = $$""" Compare the candidate CV context with the job description. Return this JSON shape exactly: { "score": 0, "summary": "short direct assessment", "strengths": ["strength 1"], "gaps": ["gap 1"], "recommendations": ["action 1"], "evidence": ["short CV evidence quote or paraphrase"] } Score must be 0-100. CV CONTEXT: {{cvContext}} JOB DESCRIPTION: {{jobText}} """; var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct); var response = ParseMatchResponse(content); if (response.Evidence.Count == 0) { response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList(); } return response; } private async Task SummarizeCvAsync(string cvText, CancellationToken ct) { try { var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText; var content = await _openAi.CreateChatCompletionAsync( "Return only valid JSON.", $$""" Summarize this CV in one concise sentence. Return JSON: { "summary": "..." } CV: {{shortened}} """, ct); using var doc = JsonDocument.Parse(content); return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed."; } catch (Exception ex) { _logger.LogWarning(ex, "CV summary failed"); return "CV indexed."; } } private static JobMatchResponse ParseMatchResponse(string content) { try { var response = JsonSerializer.Deserialize(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse(); response.Score = Math.Clamp(response.Score, 0, 100); response.Strengths ??= []; response.Gaps ??= []; response.Recommendations ??= []; response.Evidence ??= []; return response; } catch { return new JobMatchResponse { Score = 0, Summary = "The AI response could not be parsed. Check logs and prompt output.", Gaps = ["Invalid JSON returned by the model."], Evidence = [] }; } } }