using System.Text.Json;
using System.Text.RegularExpressions;
using Api.Clients.Api.Contracts;
using Api.Services.Contracts;
using CvMatcher.Models.Responses;
using CvSearch.Data;
using CvSearch.Data.Entities;
using CvMatcher.Models.Settings;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
namespace Api.Services;
///
/// Creates and validates one-time job search tokens, and creates the corresponding search sessions.
/// Provider configuration is read from cvSearch.JobProviders at session-creation time and
/// snapshotted into JobSearchSessionEntity.ProviderConfigJson so subsequent config changes
/// do not affect already-queued sessions.
///
public sealed class JobTokenService : IJobTokenService
{
private readonly CvSearchDbContext _db;
private readonly IRagApiClient _rag;
private readonly JobSearchSettings _settings;
private readonly ILogger _logger;
public JobTokenService(
CvSearchDbContext db,
IRagApiClient rag,
IOptions settings,
ILogger logger)
{
_db = db;
_rag = rag;
_settings = settings.Value;
_logger = logger;
}
///
public async Task CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct)
{
var hasEnabledProviders = await _db.JobProviders.AnyAsync(p => p.Enabled, ct);
if (!hasEnabledProviders)
{
_logger.LogDebug("Job search token skipped — no enabled providers in cvSearch.JobProviders");
return null;
}
var token = new JobSearchTokenEntity
{
Id = Guid.NewGuid().ToString("N"),
CvDocumentId = cvDocumentId,
Email = email,
Language = language,
ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays),
Used = false,
CreatedAt = DateTime.UtcNow
};
_db.JobSearchTokens.Add(token);
await _db.SaveChangesAsync(ct);
_logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}", token.Id, cvDocumentId);
return token.Id;
}
///
public async Task TriggerStartAsync(string tokenId, CancellationToken ct)
{
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
if (token is null) return StartJobSearchStatus.NotFound;
if (token.Used) return StartJobSearchStatus.AlreadyUsed;
if (token.ExpiresAt <= DateTime.UtcNow) return StartJobSearchStatus.Expired;
token.Used = true;
await _db.SaveChangesAsync(ct);
var cv = await _rag.GetDocumentAsync(token.CvDocumentId, ct);
var keywords = cv is not null ? ExtractKeywords(cv.Text) : string.Empty;
var enabledProviders = await _db.JobProviders
.Where(p => p.Enabled)
.OrderBy(p => p.DisplayOrder)
.ToListAsync(ct);
var providerConfigJson = JsonSerializer.Serialize(
enabledProviders.Select(ToConfig).ToList(),
new JsonSerializerOptions(JsonSerializerDefaults.Web));
var session = new JobSearchSessionEntity
{
Id = Guid.NewGuid().ToString("N"),
TokenId = token.Id,
CvDocumentId = token.CvDocumentId,
Email = token.Email,
Language = token.Language,
Status = JobSearchStatus.Pending,
Keywords = keywords,
ProviderConfigJson = providerConfigJson,
CreatedAt = DateTime.UtcNow
};
_db.JobSearchSessions.Add(session);
await _db.SaveChangesAsync(ct);
_logger.LogInformation(
"Job search session created. SessionId={SessionId}, Keywords={Keywords}, Providers={Providers}",
session.Id, keywords, string.Join(", ", enabledProviders.Select(p => p.Name)));
return StartJobSearchStatus.Started;
}
///
/// Maps a to the DTO used by
/// cv-search-job. The InitialKeywords list is stored as a JSON array in the entity.
///
private static JobProviderConfig ToConfig(JobProviderEntity entity)
{
List keywords;
try
{
keywords = JsonSerializer.Deserialize>(entity.InitialKeywordsJson,
new JsonSerializerOptions(JsonSerializerDefaults.Web)) ?? [];
}
catch
{
keywords = [];
}
return new JobProviderConfig
{
Name = entity.Name,
Enabled = entity.Enabled,
SearchUrlTemplate = entity.SearchUrlTemplate,
JobLinkContains = entity.JobLinkContains,
InitialKeywords = keywords,
MaxResults = entity.MaxResults
};
}
///
/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM).
/// Samples the first 2000 characters (where title/role/skills usually appear), splits by
/// whitespace and common delimiters, strips punctuation, and deduplicates.
/// Works regardless of whether the PDF extractor preserves newlines.
///
private static string ExtractKeywords(string cvText)
{
// Focus on the header area where name/title/skills typically appear
var sample = cvText.Length > 2000 ? cvText[..2000] : cvText;
var words = sample
.Split([' ', '\n', '\r', '\t', '|', '/', ',', ';', '(', ')'], StringSplitOptions.RemoveEmptyEntries)
.Select(w => Regex.Replace(w, @"[^\w\-]", "").Trim('-'))
.Where(w => w.Length > 2)
.Where(w => !Regex.IsMatch(w, @"^[\d\-]+$")) // skip phone fragments and pure numbers
.Where(w => !w.Contains('@') && !w.Contains('.')) // skip emails and URLs
.Distinct(StringComparer.OrdinalIgnoreCase)
.Take(10)
.ToList();
return string.Join(",", words);
}
}