@@ -0,0 +1,169 @@
|
||||
using Api.Models.Rag;
|
||||
using Api.Settings;
|
||||
using Microsoft.Extensions.Options;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace Api.Services.Rag;
|
||||
|
||||
public interface ICvRagService
|
||||
{
|
||||
Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
|
||||
Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct);
|
||||
}
|
||||
|
||||
public sealed class CvRagService : ICvRagService
|
||||
{
|
||||
private readonly IPdfTextExtractor _pdfTextExtractor;
|
||||
private readonly ITextChunker _textChunker;
|
||||
private readonly IOpenAiRagClient _openAi;
|
||||
private readonly ICvVectorStore _store;
|
||||
private readonly IJobTextExtractor _jobTextExtractor;
|
||||
private readonly RagSettings _settings;
|
||||
private readonly ILogger<CvRagService> _logger;
|
||||
|
||||
public CvRagService(
|
||||
IPdfTextExtractor pdfTextExtractor,
|
||||
ITextChunker textChunker,
|
||||
IOpenAiRagClient openAi,
|
||||
ICvVectorStore store,
|
||||
IJobTextExtractor jobTextExtractor,
|
||||
IOptions<RagSettings> options,
|
||||
ILogger<CvRagService> logger)
|
||||
{
|
||||
_pdfTextExtractor = pdfTextExtractor;
|
||||
_textChunker = textChunker;
|
||||
_openAi = openAi;
|
||||
_store = store;
|
||||
_jobTextExtractor = jobTextExtractor;
|
||||
_settings = options.Value;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
|
||||
{
|
||||
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
||||
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
|
||||
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
|
||||
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
|
||||
|
||||
await using var stream = file.OpenReadStream();
|
||||
var text = _pdfTextExtractor.ExtractText(stream);
|
||||
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
|
||||
|
||||
var documentId = $"cv_{Guid.NewGuid():N}";
|
||||
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
|
||||
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
|
||||
|
||||
var stored = new List<StoredCvChunk>();
|
||||
for (var i = 0; i < chunks.Count; i++)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
stored.Add(new StoredCvChunk
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
DocumentId = documentId,
|
||||
Text = chunks[i],
|
||||
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
|
||||
ChunkIndex = i,
|
||||
ExpiresAt = expiresAt
|
||||
});
|
||||
}
|
||||
|
||||
_store.Save(documentId, stored);
|
||||
var summary = await SummarizeCvAsync(text, ct);
|
||||
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
|
||||
}
|
||||
|
||||
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
|
||||
{
|
||||
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
||||
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
||||
|
||||
var cvChunks = _store.Get(request.CvDocumentId);
|
||||
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
|
||||
|
||||
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
||||
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
||||
|
||||
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
|
||||
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
|
||||
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
|
||||
|
||||
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
|
||||
var userPrompt = $$"""
|
||||
Compare the candidate CV context with the job description.
|
||||
Return this JSON shape exactly:
|
||||
{
|
||||
"score": 0,
|
||||
"summary": "short direct assessment",
|
||||
"strengths": ["strength 1"],
|
||||
"gaps": ["gap 1"],
|
||||
"recommendations": ["action 1"],
|
||||
"evidence": ["short CV evidence quote or paraphrase"]
|
||||
}
|
||||
Score must be 0-100.
|
||||
|
||||
CV CONTEXT:
|
||||
{{cvContext}}
|
||||
|
||||
JOB DESCRIPTION:
|
||||
{{jobText}}
|
||||
""";
|
||||
|
||||
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
|
||||
var response = ParseMatchResponse(content);
|
||||
if (response.Evidence.Count == 0)
|
||||
{
|
||||
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
|
||||
var content = await _openAi.CreateChatCompletionAsync(
|
||||
"Return only valid JSON.",
|
||||
$$"""
|
||||
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
|
||||
|
||||
CV:
|
||||
{{shortened}}
|
||||
""",
|
||||
ct);
|
||||
using var doc = JsonDocument.Parse(content);
|
||||
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "CV summary failed");
|
||||
return "CV indexed.";
|
||||
}
|
||||
}
|
||||
|
||||
private static JobMatchResponse ParseMatchResponse(string content)
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
|
||||
response.Score = Math.Clamp(response.Score, 0, 100);
|
||||
response.Strengths ??= [];
|
||||
response.Gaps ??= [];
|
||||
response.Recommendations ??= [];
|
||||
response.Evidence ??= [];
|
||||
return response;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return new JobMatchResponse
|
||||
{
|
||||
Score = 0,
|
||||
Summary = "The AI response could not be parsed. Check logs and prompt output.",
|
||||
Gaps = ["Invalid JSON returned by the model."],
|
||||
Evidence = []
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user