1e8758796e
- Frontend: update extractApiError to check body.code first via i18n 'error.<code>' keys; add en/ro translations for cv_file_missing, captcha_verification_failed, request_cancelled - email-data migration: seed 6 fallback template keys (match N/A, subject label, unknown IP, job search results empty states for keywords/providers/location) - EmailApiEmailSender: replace "N/A", "Job", "Unknown" literals with template lookups - CvSearchEmailSender: replace "none detected", "none", "-" literals with template lookups - cv-matcher-data migration: seed parse-error.summary and parse-error.recommendation in AiPrompts - CvMatcherService: look up localized parse-error messages from AiPrompts before calling ParseResult Closes #53 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
206 lines
8.7 KiB
C#
206 lines
8.7 KiB
C#
using System.Text.Json;
|
|
using Api.Clients.Ai.Contracts;
|
|
using Api.Clients.Api.Contracts;
|
|
using CvMatcher.Data.Repositories.Contracts;
|
|
using CvMatcher.Models.Requests;
|
|
using CvMatcher.Models.Responses;
|
|
using CvMatcher.Models.Settings;
|
|
using Api.Services.Contracts;
|
|
using Microsoft.Extensions.Options;
|
|
|
|
namespace Api.Services;
|
|
|
|
/// <summary>
|
|
/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching.
|
|
/// </summary>
|
|
public sealed class CvMatcherService : ICvMatcherService
|
|
{
|
|
private readonly IRagApiClient _rag;
|
|
private readonly IJobTextExtractor _jobTextExtractor;
|
|
private readonly IMatcherAiClient _ai;
|
|
private readonly IMatcherRepository _repository;
|
|
private readonly IAiPromptsRepository _aiPrompts;
|
|
private readonly MatcherSettings _settings;
|
|
|
|
public CvMatcherService(
|
|
IRagApiClient rag,
|
|
IJobTextExtractor jobTextExtractor,
|
|
IMatcherAiClient ai,
|
|
IMatcherRepository repository,
|
|
IAiPromptsRepository aiPrompts,
|
|
IOptions<MatcherSettings> options)
|
|
{
|
|
_rag = rag;
|
|
_jobTextExtractor = jobTextExtractor;
|
|
_ai = ai;
|
|
_repository = repository;
|
|
_aiPrompts = aiPrompts;
|
|
_settings = options.Value;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
|
|
{
|
|
var response = await _rag.IndexCvPdfAsync(file, ct);
|
|
return new CvUploadResponse
|
|
{
|
|
DocumentId = response.DocumentId,
|
|
TextHash = response.TextHash,
|
|
DocumentType = response.DocumentType,
|
|
Title = response.Title,
|
|
Chunks = response.Chunks,
|
|
Characters = response.Characters,
|
|
Cached = response.Cached,
|
|
Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
|
|
};
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
|
|
{
|
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
|
if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
throw new InvalidOperationException("The provided document is not a CV.");
|
|
}
|
|
|
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
|
{
|
|
QueryText = BuildCvSearchProfile(cv.Text),
|
|
TargetDocumentTypes = ["job"],
|
|
TopK = request.TopK ?? _settings.TopK
|
|
}, ct);
|
|
|
|
var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
|
|
var jobs = new List<JobMatchResponse>();
|
|
foreach (var result in search.Results.Take(deepScoreLimit))
|
|
{
|
|
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
|
|
if (job is null) continue;
|
|
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, null, NormalizeLanguage(null), ct));
|
|
}
|
|
|
|
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
|
|
{
|
|
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
|
|
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
|
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
|
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
|
|
|
var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
|
|
var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");
|
|
|
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
|
{
|
|
QueryText = BuildCvSearchProfile(cv.Text),
|
|
TargetDocumentTypes = ["job"],
|
|
TopK = Math.Max(5, _settings.TopK)
|
|
}, ct);
|
|
|
|
var matchedChunks = search.Results
|
|
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
|
|
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
|
|
|
|
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, request.ClientIpAddress, NormalizeLanguage(request.Language), ct);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Scores a (CV, job) pair with the LLM.
|
|
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
|
|
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
|
|
/// </summary>
|
|
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string? clientIpAddress, string language, CancellationToken ct)
|
|
{
|
|
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
|
|
if (cached is not null) return cached;
|
|
|
|
var cvText = Limit(cv.Text, 18000);
|
|
var jobText = Limit(job.Text, 14000);
|
|
var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
|
|
|
|
var systemPrompt = await _aiPrompts.GetAsync("ai.cv-match.system-prompt", language, ct)
|
|
?? throw new InvalidOperationException(
|
|
$"AI prompt not found: key='ai.cv-match.system-prompt', language='{language}'. " +
|
|
$"This is a configuration error. Ensure the cvMatcher.AiPrompts table is properly seeded with language-specific prompts.");
|
|
|
|
var userPrompt = $"""
|
|
CV:
|
|
{cvText}
|
|
|
|
JOB:
|
|
{jobText}
|
|
|
|
SEMANTICALLY MATCHED JOB EVIDENCE:
|
|
{evidence}
|
|
""";
|
|
|
|
var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
|
|
var errorSummary = await _aiPrompts.GetAsync("parse-error.summary", language, ct);
|
|
var errorRec = await _aiPrompts.GetAsync("parse-error.recommendation", language, ct);
|
|
var result = ParseResult(json, errorSummary, errorRec);
|
|
result.JobDocumentId = job.Id;
|
|
result.JobUrl = job.SourceUrl;
|
|
result.Cached = false;
|
|
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, email, clientIpAddress, ct);
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Deserialises the LLM's JSON output into a <see cref="JobMatchResponse"/>.
|
|
/// Returns a safe fallback response instead of throwing when the JSON cannot be parsed.
|
|
/// </summary>
|
|
private static JobMatchResponse ParseResult(
|
|
string json,
|
|
string? errorSummary = null,
|
|
string? errorRec = null)
|
|
{
|
|
try
|
|
{
|
|
var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
|
if (parsed is not null) return parsed;
|
|
}
|
|
catch
|
|
{
|
|
// Fall through to safe response.
|
|
}
|
|
|
|
return new JobMatchResponse
|
|
{
|
|
Score = 0,
|
|
Summary = errorSummary ?? "The AI response could not be parsed as structured JSON.",
|
|
Recommendations = [errorRec ?? "Inspect the raw model output and tune the scoring prompt."]
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Builds a descriptive search query from the CV text for use in vector similarity search.
|
|
/// </summary>
|
|
private static string BuildCvSearchProfile(string cvText)
|
|
{
|
|
var text = Limit(cvText, 10000);
|
|
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts a short job title from the first sentence-like fragment of the job text.
|
|
/// </summary>
|
|
private static string ExtractJobTitle(string jobText)
|
|
{
|
|
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
|
|
return first ?? "Job description";
|
|
}
|
|
|
|
/// <summary>Returns the base language code, lower-cased, defaulting to <c>"en"</c>.</summary>
|
|
private static string NormalizeLanguage(string? language) =>
|
|
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
|
|
|
|
/// <summary>Truncates <paramref name="value"/> to at most <paramref name="max"/> characters.</summary>
|
|
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
|
|
}
|