Changes
Build and Push Docker Images / build (push) Successful in 42s

This commit is contained in:
2026-05-04 15:56:15 +03:00
parent 540720e771
commit 2dce2ab0ff
14 changed files with 656 additions and 5 deletions
+169
View File
@@ -0,0 +1,169 @@
using Api.Models.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
using System.Text.Json;
namespace Api.Services.Rag;
public interface ICvRagService
{
Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct);
}
public sealed class CvRagService : ICvRagService
{
private readonly IPdfTextExtractor _pdfTextExtractor;
private readonly ITextChunker _textChunker;
private readonly IOpenAiRagClient _openAi;
private readonly ICvVectorStore _store;
private readonly IJobTextExtractor _jobTextExtractor;
private readonly RagSettings _settings;
private readonly ILogger<CvRagService> _logger;
public CvRagService(
IPdfTextExtractor pdfTextExtractor,
ITextChunker textChunker,
IOpenAiRagClient openAi,
ICvVectorStore store,
IJobTextExtractor jobTextExtractor,
IOptions<RagSettings> options,
ILogger<CvRagService> logger)
{
_pdfTextExtractor = pdfTextExtractor;
_textChunker = textChunker;
_openAi = openAi;
_store = store;
_jobTextExtractor = jobTextExtractor;
_settings = options.Value;
_logger = logger;
}
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
{
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
await using var stream = file.OpenReadStream();
var text = _pdfTextExtractor.ExtractText(stream);
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
var documentId = $"cv_{Guid.NewGuid():N}";
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
var stored = new List<StoredCvChunk>();
for (var i = 0; i < chunks.Count; i++)
{
ct.ThrowIfCancellationRequested();
stored.Add(new StoredCvChunk
{
Id = Guid.NewGuid().ToString("N"),
DocumentId = documentId,
Text = chunks[i],
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
ChunkIndex = i,
ExpiresAt = expiresAt
});
}
_store.Save(documentId, stored);
var summary = await SummarizeCvAsync(text, ct);
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
}
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
var cvChunks = _store.Get(request.CvDocumentId);
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
var userPrompt = $$"""
Compare the candidate CV context with the job description.
Return this JSON shape exactly:
{
"score": 0,
"summary": "short direct assessment",
"strengths": ["strength 1"],
"gaps": ["gap 1"],
"recommendations": ["action 1"],
"evidence": ["short CV evidence quote or paraphrase"]
}
Score must be 0-100.
CV CONTEXT:
{{cvContext}}
JOB DESCRIPTION:
{{jobText}}
""";
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
var response = ParseMatchResponse(content);
if (response.Evidence.Count == 0)
{
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
}
return response;
}
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
{
try
{
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
var content = await _openAi.CreateChatCompletionAsync(
"Return only valid JSON.",
$$"""
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
CV:
{{shortened}}
""",
ct);
using var doc = JsonDocument.Parse(content);
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
}
catch (Exception ex)
{
_logger.LogWarning(ex, "CV summary failed");
return "CV indexed.";
}
}
private static JobMatchResponse ParseMatchResponse(string content)
{
try
{
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
response.Score = Math.Clamp(response.Score, 0, 100);
response.Strengths ??= [];
response.Gaps ??= [];
response.Recommendations ??= [];
response.Evidence ??= [];
return response;
}
catch
{
return new JobMatchResponse
{
Score = 0,
Summary = "The AI response could not be parsed. Check logs and prompt output.",
Gaps = ["Invalid JSON returned by the model."],
Evidence = []
};
}
}
}