using System.Text.Json; using System.Text.RegularExpressions; using Api.Clients.Api.Contracts; using Api.Services.Contracts; using CvMatcher.Models.Responses; using CvSearch.Data; using CvSearch.Data.Entities; using CvMatcher.Models.Settings; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Options; namespace Api.Services; ///

/// Creates and validates one-time job search tokens, and creates the corresponding search sessions. /// Provider configuration is read from cvSearch.JobProviders at session-creation time and /// snapshotted into JobSearchSessionEntity.ProviderConfigJson so subsequent config changes /// do not affect already-queued sessions. ///

public sealed class JobTokenService : IJobTokenService { private readonly CvSearchDbContext _db; private readonly IRagApiClient _rag; private readonly JobSearchSettings _settings; private readonly ILogger _logger; public JobTokenService( CvSearchDbContext db, IRagApiClient rag, IOptions settings, ILogger logger) { _db = db; _rag = rag; _settings = settings.Value; _logger = logger; } /// public async Task CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct) { var hasEnabledProviders = await _db.JobProviders.AnyAsync(p => p.Enabled, ct); if (!hasEnabledProviders) { _logger.LogDebug("Job search token skipped — no enabled providers in cvSearch.JobProviders"); return null; } var token = new JobSearchTokenEntity { Id = Guid.NewGuid().ToString("N"), CvDocumentId = cvDocumentId, Email = email, Language = language, ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays), Used = false, CreatedAt = DateTime.UtcNow }; _db.JobSearchTokens.Add(token); await _db.SaveChangesAsync(ct); _logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}", token.Id, cvDocumentId); return token.Id; } /// public async Task TriggerStartAsync(string tokenId, CancellationToken ct) { var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct); if (token is null) return StartJobSearchStatus.NotFound; if (token.Used) return StartJobSearchStatus.AlreadyUsed; if (token.ExpiresAt <= DateTime.UtcNow) return StartJobSearchStatus.Expired; token.Used = true; await _db.SaveChangesAsync(ct); var cv = await _rag.GetDocumentAsync(token.CvDocumentId, ct); var keywords = cv is not null ? ExtractKeywords(cv.Text) : string.Empty; var enabledProviders = await _db.JobProviders .Where(p => p.Enabled) .OrderBy(p => p.DisplayOrder) .ToListAsync(ct); var providerConfigJson = JsonSerializer.Serialize( enabledProviders.Select(ToConfig).ToList(), new JsonSerializerOptions(JsonSerializerDefaults.Web)); var session = new JobSearchSessionEntity { Id = Guid.NewGuid().ToString("N"), TokenId = token.Id, CvDocumentId = token.CvDocumentId, Email = token.Email, Language = token.Language, Status = JobSearchStatus.Pending, Keywords = keywords, ProviderConfigJson = providerConfigJson, CreatedAt = DateTime.UtcNow }; _db.JobSearchSessions.Add(session); await _db.SaveChangesAsync(ct); _logger.LogInformation( "Job search session created. SessionId={SessionId}, Keywords={Keywords}, Providers={Providers}", session.Id, keywords, string.Join(", ", enabledProviders.Select(p => p.Name))); return StartJobSearchStatus.Started; } ///

/// Maps a to the DTO used by /// cv-search-job. The InitialKeywords list is stored as a JSON array in the entity. ///

private static JobProviderConfig ToConfig(JobProviderEntity entity) { List keywords; try { keywords = JsonSerializer.Deserialize>(entity.InitialKeywordsJson, new JsonSerializerOptions(JsonSerializerDefaults.Web)) ?? []; } catch { keywords = []; } return new JobProviderConfig { Name = entity.Name, Enabled = entity.Enabled, SearchUrlTemplate = entity.SearchUrlTemplate, JobLinkContains = entity.JobLinkContains, InitialKeywords = keywords, MaxResults = entity.MaxResults }; } ///

/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM). /// Samples the first 2000 characters (where title/role/skills usually appear), splits by /// whitespace and common delimiters, strips punctuation, and deduplicates. /// Works regardless of whether the PDF extractor preserves newlines. ///

private static string ExtractKeywords(string cvText) { // Focus on the header area where name/title/skills typically appear var sample = cvText.Length > 2000 ? cvText[..2000] : cvText; var words = sample .Split([' ', '\n', '\r', '\t', '|', '/', ',', ';', '(', ')'], StringSplitOptions.RemoveEmptyEntries) .Select(w => Regex.Replace(w, @"[^\w\-]", "").Trim('-')) .Where(w => w.Length > 2) .Where(w => !Regex.IsMatch(w, @"^[\d\-]+$")) // skip phone fragments and pure numbers .Where(w => !w.Contains('@') && !w.Contains('.')) // skip emails and URLs .Distinct(StringComparer.OrdinalIgnoreCase) .Take(10) .ToList(); return string.Join(",", words); } }