64e003a639
Refactored the AI prompt system to use proper language-specific prompts (en and ro) instead of a single wildcard prompt with runtime {{languageName}} placeholder substitution.
Benefits:
- Language-specific instructions optimized for each language
- Better control over LLM behavior per language
- Cleaner code without placeholder substitution
- Easier to maintain and update prompts per language
Changes:
- Updated cvMatcher InitialSchema migration to seed en and ro prompts separately
- Modified CvMatcherService to retrieve language-specific prompts directly
- Removed LanguageName() helper method (no longer needed)
- Added fallback prompts in service for safety
The English and Romanian prompts now include specific JSON examples in their respective languages, ensuring the LLM understands the expected output format for each language variant.
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
201 lines
8.3 KiB
C#
201 lines
8.3 KiB
C#
using System.Text.Json;
|
|
using Api.Clients.Ai.Contracts;
|
|
using Api.Clients.Api.Contracts;
|
|
using CvMatcher.Data.Repositories.Contracts;
|
|
using CvMatcher.Models.Requests;
|
|
using CvMatcher.Models.Responses;
|
|
using CvMatcher.Models.Settings;
|
|
using Api.Services.Contracts;
|
|
using Microsoft.Extensions.Options;
|
|
|
|
namespace Api.Services;
|
|
|
|
/// <summary>
|
|
/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching.
|
|
/// </summary>
|
|
public sealed class CvMatcherService : ICvMatcherService
|
|
{
|
|
private readonly IRagApiClient _rag;
|
|
private readonly IJobTextExtractor _jobTextExtractor;
|
|
private readonly IMatcherAiClient _ai;
|
|
private readonly IMatcherRepository _repository;
|
|
private readonly IAiPromptsRepository _aiPrompts;
|
|
private readonly MatcherSettings _settings;
|
|
|
|
public CvMatcherService(
|
|
IRagApiClient rag,
|
|
IJobTextExtractor jobTextExtractor,
|
|
IMatcherAiClient ai,
|
|
IMatcherRepository repository,
|
|
IAiPromptsRepository aiPrompts,
|
|
IOptions<MatcherSettings> options)
|
|
{
|
|
_rag = rag;
|
|
_jobTextExtractor = jobTextExtractor;
|
|
_ai = ai;
|
|
_repository = repository;
|
|
_aiPrompts = aiPrompts;
|
|
_settings = options.Value;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
|
|
{
|
|
var response = await _rag.IndexCvPdfAsync(file, ct);
|
|
return new CvUploadResponse
|
|
{
|
|
DocumentId = response.DocumentId,
|
|
TextHash = response.TextHash,
|
|
DocumentType = response.DocumentType,
|
|
Title = response.Title,
|
|
Chunks = response.Chunks,
|
|
Characters = response.Characters,
|
|
Cached = response.Cached,
|
|
Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
|
|
};
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
|
|
{
|
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
|
if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
throw new InvalidOperationException("The provided document is not a CV.");
|
|
}
|
|
|
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
|
{
|
|
QueryText = BuildCvSearchProfile(cv.Text),
|
|
TargetDocumentTypes = ["job"],
|
|
TopK = request.TopK ?? _settings.TopK
|
|
}, ct);
|
|
|
|
var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
|
|
var jobs = new List<JobMatchResponse>();
|
|
foreach (var result in search.Results.Take(deepScoreLimit))
|
|
{
|
|
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
|
|
if (job is null) continue;
|
|
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct));
|
|
}
|
|
|
|
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
|
|
{
|
|
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
|
|
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
|
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
|
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
|
|
|
var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
|
|
var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");
|
|
|
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
|
{
|
|
QueryText = BuildCvSearchProfile(cv.Text),
|
|
TargetDocumentTypes = ["job"],
|
|
TopK = Math.Max(5, _settings.TopK)
|
|
}, ct);
|
|
|
|
var matchedChunks = search.Results
|
|
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
|
|
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
|
|
|
|
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Scores a (CV, job) pair with the LLM.
|
|
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
|
|
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
|
|
/// </summary>
|
|
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
|
|
{
|
|
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
|
|
if (cached is not null) return cached;
|
|
|
|
var cvText = Limit(cv.Text, 18000);
|
|
var jobText = Limit(job.Text, 14000);
|
|
var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
|
|
|
|
var systemPrompt = await _aiPrompts.GetAsync("ai.cv-match.system-prompt", language, ct)
|
|
?? (language == "ro"
|
|
? "Ești un motor strict de potrivire CV-job. Returnează doar JSON. Punctează realist între 0 și 100."
|
|
: "You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100.");
|
|
|
|
var userPrompt = $"""
|
|
CV:
|
|
{cvText}
|
|
|
|
JOB:
|
|
{jobText}
|
|
|
|
SEMANTICALLY MATCHED JOB EVIDENCE:
|
|
{evidence}
|
|
""";
|
|
|
|
var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
|
|
var result = ParseResult(json);
|
|
result.JobDocumentId = job.Id;
|
|
result.JobUrl = job.SourceUrl;
|
|
result.Cached = false;
|
|
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Deserialises the LLM's JSON output into a <see cref="JobMatchResponse"/>.
|
|
/// Returns a safe fallback response instead of throwing when the JSON cannot be parsed.
|
|
/// </summary>
|
|
private static JobMatchResponse ParseResult(string json)
|
|
{
|
|
try
|
|
{
|
|
var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
|
if (parsed is not null) return parsed;
|
|
}
|
|
catch
|
|
{
|
|
// Fall through to safe response.
|
|
}
|
|
|
|
return new JobMatchResponse
|
|
{
|
|
Score = 0,
|
|
Summary = "The AI response could not be parsed as structured JSON.",
|
|
Recommendations = ["Inspect the raw model output and tune the scoring prompt."]
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Builds a descriptive search query from the CV text for use in vector similarity search.
|
|
/// </summary>
|
|
private static string BuildCvSearchProfile(string cvText)
|
|
{
|
|
var text = Limit(cvText, 10000);
|
|
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts a short job title from the first sentence-like fragment of the job text.
|
|
/// </summary>
|
|
private static string ExtractJobTitle(string jobText)
|
|
{
|
|
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
|
|
return first ?? "Job description";
|
|
}
|
|
|
|
/// <summary>Returns the base language code, lower-cased, defaulting to <c>"en"</c>.</summary>
|
|
private static string NormalizeLanguage(string? language) =>
|
|
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
|
|
|
|
/// <summary>Truncates <paramref name="value"/> to at most <paramref name="max"/> characters.</summary>
|
|
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
|
|
}
|