myAi/Apis/cv-matcher-api/Services/CvMatcherService.cs

using System.Text.Json;
using Api.Clients.Ai.Contracts;
using Api.Clients.Api.Contracts;
using CvMatcher.Data.Repositories.Contracts;
using CvMatcher.Models.Requests;
using CvMatcher.Models.Responses;
using CvMatcher.Models.Settings;
using Api.Services.Contracts;
using Microsoft.Extensions.Options;

namespace Api.Services;

public sealed class CvMatcherService : ICvMatcherService
{
    private readonly IRagApiClient _rag;
    private readonly IJobTextExtractor _jobTextExtractor;
    private readonly IMatcherAiClient _ai;
    private readonly IMatcherRepository _repository;
    private readonly IAiPromptsRepository _aiPrompts;
    private readonly MatcherSettings _settings;

    public CvMatcherService(
        IRagApiClient rag,
        IJobTextExtractor jobTextExtractor,
        IMatcherAiClient ai,
        IMatcherRepository repository,
        IAiPromptsRepository aiPrompts,
        IOptions<MatcherSettings> options)
    {
        _rag = rag;
        _jobTextExtractor = jobTextExtractor;
        _ai = ai;
        _repository = repository;
        _aiPrompts = aiPrompts;
        _settings = options.Value;
    }

    public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
    {
        var response = await _rag.IndexCvPdfAsync(file, ct);
        return new CvUploadResponse
        {
            DocumentId = response.DocumentId,
            TextHash = response.TextHash,
            DocumentType = response.DocumentType,
            Title = response.Title,
            Chunks = response.Chunks,
            Characters = response.Characters,
            Cached = response.Cached,
            Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
        };
    }

    public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
    {
        var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
        if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
        {
            throw new InvalidOperationException("The provided document is not a CV.");
        }

        var search = await _rag.SearchAsync(new RagSearchRequest
        {
            QueryText = BuildCvSearchProfile(cv.Text),
            TargetDocumentTypes = ["job"],
            TopK = request.TopK ?? _settings.TopK
        }, ct);

        var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
        var jobs = new List<JobMatchResponse>();
        foreach (var result in search.Results.Take(deepScoreLimit))
        {
            var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
            if (job is null) continue;
            jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct));
        }

        return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
    }

    public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
    {
        if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
        if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");

        var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
        var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
        if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");

        var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
        var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");

        var search = await _rag.SearchAsync(new RagSearchRequest
        {
            QueryText = BuildCvSearchProfile(cv.Text),
            TargetDocumentTypes = ["job"],
            TopK = Math.Max(5, _settings.TopK)
        }, ct);

        var matchedChunks = search.Results
            .FirstOrDefault(x => x.DocumentId == job.DocumentId)?
            .MatchedChunks.Select(x => x.Text).ToArray() ?? [];

        return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
    }

    private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
    {
        var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
        if (cached is not null) return cached;

        var cvText = Limit(cv.Text, 18000);
        var jobText = Limit(job.Text, 14000);
        var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
        var languageName = LanguageName(language);

        var promptTemplate = await _aiPrompts.GetAsync("ai.cv-match.system-prompt", "*", ct)
            ?? "You are a strict CV-to-job matching engine. Return JSON only.";
        var systemPrompt = promptTemplate.Replace("{{languageName}}", languageName, StringComparison.OrdinalIgnoreCase);

        var userPrompt = $"""
            CV:
            {cvText}

            JOB:
            {jobText}

            SEMANTICALLY MATCHED JOB EVIDENCE:
            {evidence}
            """;

        var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
        var result = ParseResult(json);
        result.JobDocumentId = job.Id;
        result.JobUrl = job.SourceUrl;
        result.Cached = false;
        await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
        return result;
    }

    private static JobMatchResponse ParseResult(string json)
    {
        try
        {
            var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
            if (parsed is not null) return parsed;
        }
        catch
        {
            // Fall through to safe response.
        }

        return new JobMatchResponse
        {
            Score = 0,
            Summary = "The AI response could not be parsed as structured JSON.",
            Recommendations = ["Inspect the raw model output and tune the scoring prompt."]
        };
    }

    private static string BuildCvSearchProfile(string cvText)
    {
        var text = Limit(cvText, 10000);
        return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
    }

    private static string ExtractJobTitle(string jobText)
    {
        var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
        return first ?? "Job description";
    }

    private static string NormalizeLanguage(string? language) =>
        string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();

    private static string LanguageName(string language) => language switch
    {
        "ro" => "Romanian",
        "en" => "English",
        _ => "English"
    };

    private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
}