Files
myAi/Apis/cv-matcher-api/Services/CvMatcherService.cs
T
claude 4ee4a59b5e Improve comments and Swagger annotations across services (#26)
- EmailController: add class summary, full SwaggerResponse/ProducesResponseType
  for 400 and 500, and Description on SwaggerOperation
- ContactController: fix terse "Failed." error message to
  "Could not process subscription."
- FileDownloadController: remove redundant XML <response code> tags from
  the public action doc block; convert private-method /// <summary> to //
  (project convention: no XML doc on internal code)
- CvMatcherService: remove two dead commented-out blocks (old email send
  and BuildEmailBody helper)
- JobTokenService: comment the phone/contact-line regex filter in
  ExtractKeywords
- DocumentClassifier: comment the keyword-frequency scoring approach and
  the confidence formula
- TextChunker: comment the sliding-window step (chunkSize - overlap)
- CvSearchJobTask: comment the GdprConsent = true rationale and the
  BuildCvFileName sanitisation logic
- HtmlJobSearcher: comment GetLeftPart(UriPartial.Path) query-strip dedup

Closes #26

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-28 09:07:23 +03:00

185 lines
7.3 KiB
C#

using System.Text.Json;
using Api.Clients.Ai.Contracts;
using Api.Clients.Api.Contracts;
using CvMatcher.Data.Repositories.Contracts;
using CvMatcher.Models.Requests;
using CvMatcher.Models.Responses;
using CvMatcher.Models.Settings;
using Api.Services.Contracts;
using Microsoft.Extensions.Options;
namespace Api.Services;
public sealed class CvMatcherService : ICvMatcherService
{
private readonly IRagApiClient _rag;
private readonly IJobTextExtractor _jobTextExtractor;
private readonly IMatcherAiClient _ai;
private readonly IMatcherRepository _repository;
private readonly IAiPromptsRepository _aiPrompts;
private readonly MatcherSettings _settings;
public CvMatcherService(
IRagApiClient rag,
IJobTextExtractor jobTextExtractor,
IMatcherAiClient ai,
IMatcherRepository repository,
IAiPromptsRepository aiPrompts,
IOptions<MatcherSettings> options)
{
_rag = rag;
_jobTextExtractor = jobTextExtractor;
_ai = ai;
_repository = repository;
_aiPrompts = aiPrompts;
_settings = options.Value;
}
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
{
var response = await _rag.IndexCvPdfAsync(file, ct);
return new CvUploadResponse
{
DocumentId = response.DocumentId,
TextHash = response.TextHash,
DocumentType = response.DocumentType,
Title = response.Title,
Chunks = response.Chunks,
Characters = response.Characters,
Cached = response.Cached,
Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
};
}
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
{
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException("The provided document is not a CV.");
}
var search = await _rag.SearchAsync(new RagSearchRequest
{
QueryText = BuildCvSearchProfile(cv.Text),
TargetDocumentTypes = ["job"],
TopK = request.TopK ?? _settings.TopK
}, ct);
var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
var jobs = new List<JobMatchResponse>();
foreach (var result in search.Results.Take(deepScoreLimit))
{
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
if (job is null) continue;
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct));
}
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
}
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");
var search = await _rag.SearchAsync(new RagSearchRequest
{
QueryText = BuildCvSearchProfile(cv.Text),
TargetDocumentTypes = ["job"],
TopK = Math.Max(5, _settings.TopK)
}, ct);
var matchedChunks = search.Results
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
}
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
{
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
if (cached is not null) return cached;
var cvText = Limit(cv.Text, 18000);
var jobText = Limit(job.Text, 14000);
var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
var languageName = LanguageName(language);
var promptTemplate = await _aiPrompts.GetAsync("ai.cv-match.system-prompt", "*", ct)
?? "You are a strict CV-to-job matching engine. Return JSON only.";
var systemPrompt = promptTemplate.Replace("{{languageName}}", languageName, StringComparison.OrdinalIgnoreCase);
var userPrompt = $"""
CV:
{cvText}
JOB:
{jobText}
SEMANTICALLY MATCHED JOB EVIDENCE:
{evidence}
""";
var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
var result = ParseResult(json);
result.JobDocumentId = job.Id;
result.JobUrl = job.SourceUrl;
result.Cached = false;
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
return result;
}
private static JobMatchResponse ParseResult(string json)
{
try
{
var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
if (parsed is not null) return parsed;
}
catch
{
// Fall through to safe response.
}
return new JobMatchResponse
{
Score = 0,
Summary = "The AI response could not be parsed as structured JSON.",
Recommendations = ["Inspect the raw model output and tune the scoring prompt."]
};
}
private static string BuildCvSearchProfile(string cvText)
{
var text = Limit(cvText, 10000);
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
}
private static string ExtractJobTitle(string jobText)
{
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
return first ?? "Job description";
}
private static string NormalizeLanguage(string? language) =>
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
private static string LanguageName(string language) => language switch
{
"ro" => "Romanian",
"en" => "English",
_ => "English"
};
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
}