166 lines
6.5 KiB
C#
166 lines
6.5 KiB
C#
using api.Services.Contracts.Rag;
|
|
using Api.Models.Rag;
|
|
using Api.Services.Contracts.Rag;
|
|
using Api.Settings;
|
|
using Microsoft.Extensions.Options;
|
|
using System.Text.Json;
|
|
|
|
namespace Api.Services.Rag;
|
|
|
|
public sealed class CvRagService : ICvRagService
|
|
{
|
|
private readonly IPdfTextExtractor _pdfTextExtractor;
|
|
private readonly ITextChunker _textChunker;
|
|
private readonly IAiRagClient _openAi;
|
|
private readonly ICvVectorStore _store;
|
|
private readonly IJobTextExtractor _jobTextExtractor;
|
|
private readonly RagSettings _settings;
|
|
private readonly ILogger<CvRagService> _logger;
|
|
|
|
public CvRagService(
|
|
IPdfTextExtractor pdfTextExtractor,
|
|
ITextChunker textChunker,
|
|
IAiRagClient openAi,
|
|
ICvVectorStore store,
|
|
IJobTextExtractor jobTextExtractor,
|
|
IOptions<RagSettings> options,
|
|
ILogger<CvRagService> logger)
|
|
{
|
|
_pdfTextExtractor = pdfTextExtractor;
|
|
_textChunker = textChunker;
|
|
_openAi = openAi;
|
|
_store = store;
|
|
_jobTextExtractor = jobTextExtractor;
|
|
_settings = options.Value;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
|
|
{
|
|
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
|
|
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
|
|
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
|
|
|
|
await using var stream = file.OpenReadStream();
|
|
var text = _pdfTextExtractor.ExtractText(stream);
|
|
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
|
|
|
|
var documentId = $"cv_{Guid.NewGuid():N}";
|
|
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
|
|
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
|
|
|
|
var stored = new List<StoredCvChunk>();
|
|
for (var i = 0; i < chunks.Count; i++)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
stored.Add(new StoredCvChunk
|
|
{
|
|
Id = Guid.NewGuid().ToString("N"),
|
|
DocumentId = documentId,
|
|
Text = chunks[i],
|
|
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
|
|
ChunkIndex = i,
|
|
ExpiresAt = expiresAt
|
|
});
|
|
}
|
|
|
|
_store.Save(documentId, stored);
|
|
var summary = await SummarizeCvAsync(text, ct);
|
|
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
|
|
}
|
|
|
|
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
|
|
{
|
|
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
|
|
|
var cvChunks = _store.Get(request.CvDocumentId);
|
|
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
|
|
|
|
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
|
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
|
|
|
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
|
|
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
|
|
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
|
|
|
|
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
|
|
var userPrompt = $$"""
|
|
Compare the candidate CV context with the job description.
|
|
Return this JSON shape exactly:
|
|
{
|
|
"score": 0,
|
|
"summary": "short direct assessment",
|
|
"strengths": ["strength 1"],
|
|
"gaps": ["gap 1"],
|
|
"recommendations": ["action 1"],
|
|
"evidence": ["short CV evidence quote or paraphrase"]
|
|
}
|
|
Score must be 0-100.
|
|
|
|
CV CONTEXT:
|
|
{{cvContext}}
|
|
|
|
JOB DESCRIPTION:
|
|
{{jobText}}
|
|
""";
|
|
|
|
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
|
|
var response = ParseMatchResponse(content);
|
|
if (response.Evidence.Count == 0)
|
|
{
|
|
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
|
|
}
|
|
return response;
|
|
}
|
|
|
|
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
|
|
var content = await _openAi.CreateChatCompletionAsync(
|
|
"Return only valid JSON.",
|
|
$$"""
|
|
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
|
|
|
|
CV:
|
|
{{shortened}}
|
|
""",
|
|
ct);
|
|
using var doc = JsonDocument.Parse(content);
|
|
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "CV summary failed");
|
|
return "CV indexed.";
|
|
}
|
|
}
|
|
|
|
private static JobMatchResponse ParseMatchResponse(string content)
|
|
{
|
|
try
|
|
{
|
|
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
|
|
response.Score = Math.Clamp(response.Score, 0, 100);
|
|
response.Strengths ??= [];
|
|
response.Gaps ??= [];
|
|
response.Recommendations ??= [];
|
|
response.Evidence ??= [];
|
|
return response;
|
|
}
|
|
catch
|
|
{
|
|
return new JobMatchResponse
|
|
{
|
|
Score = 0,
|
|
Summary = "The AI response could not be parsed. Check logs and prompt output.",
|
|
Gaps = ["Invalid JSON returned by the model."],
|
|
Evidence = []
|
|
};
|
|
}
|
|
}
|
|
}
|