Files
myAi/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs
T
claude c89df975bd
Build and Push Docker Images Staging / build (push) Successful in 14m42s
Add searched location to job search results email
Show the candidate's location in the scan summary block of the results email
alongside keywords and providers, for both en and ro templates.

- CvSearchEmailSender.SendResultsAsync accepts location and passes it to BuildScanSummary
- BuildScanSummary passes {{location}} to the template (falls back to '-' when absent)
- CvSearchJobTask passes session.Location to SendResultsAsync
- New migration AddLocationToScanSummaryTemplate updates both language variants of
  email.search-results.scan-summary to include a 'Location / Locație căutată' row

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 15:54:38 +03:00

259 lines
10 KiB
C#

using System.Text.Json;
using CvMatcher.Models.Requests;
using CvSearch.Data;
using CvSearch.Data.Entities;
using CvMatcher.Models.Settings;
using CvSearchJob.Clients;
using CvSearchJob.Services;
using JobScheduler.Tasks;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace CvSearchJob.Tasks;
/// <summary>
/// Background job task that processes pending job search sessions: scrapes providers,
/// scores each URL against the CV via the matcher API, persists results, and sends the results email.
/// </summary>
public sealed class CvSearchJobTask : IJobTask
{
private readonly IServiceScopeFactory _scopeFactory;
private readonly JobSearchSettings _settings;
private readonly HtmlJobSearcher _searcher;
private readonly ICvMatcherInternalApi _matcherApi;
private readonly CvSearchEmailSender _emailSender;
private readonly ILogger<CvSearchJobTask> _logger;
public string TaskType => "CvSearch";
public CvSearchJobTask(
IServiceScopeFactory scopeFactory,
IOptions<JobSearchSettings> settings,
HtmlJobSearcher searcher,
ICvMatcherInternalApi matcherApi,
CvSearchEmailSender emailSender,
ILogger<CvSearchJobTask> logger)
{
_scopeFactory = scopeFactory;
_settings = settings.Value;
_searcher = searcher;
_matcherApi = matcherApi;
_emailSender = emailSender;
_logger = logger;
}
/// <summary>
/// Called by the scheduler on each tick. Resets orphaned sessions, picks the oldest pending session,
/// runs the full search pipeline, and sends the results email.
/// Does nothing when <c>JobSearch:Enabled</c> is <c>false</c>.
/// </summary>
public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
{
if (!_settings.Enabled) return;
using var scope = _scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<CvSearchDbContext>();
// Recover orphaned Processing sessions (container crashed mid-run)
var stuckCutoff = DateTime.UtcNow.AddMinutes(-10);
var stuckSessions = await db.JobSearchSessions
.Where(s => s.Status == JobSearchStatus.Processing && s.CreatedAt < stuckCutoff)
.ToListAsync(cancellationToken);
foreach (var stuck in stuckSessions)
{
stuck.Status = JobSearchStatus.Pending;
_logger.LogWarning("Reset stuck session {SessionId} back to Pending", stuck.Id);
}
if (stuckSessions.Count > 0)
await db.SaveChangesAsync(cancellationToken);
var pending = await db.JobSearchSessions
.Where(s => s.Status == JobSearchStatus.Pending)
.OrderBy(s => s.CreatedAt)
.Take(1)
.FirstOrDefaultAsync(cancellationToken);
if (pending is null) return;
_logger.LogInformation("Processing job search session {SessionId}", pending.Id);
pending.Status = JobSearchStatus.Processing;
await db.SaveChangesAsync(cancellationToken);
try
{
var cvKeywords = pending.Keywords
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(k => k.Trim())
.Where(k => k.Length > 0)
.ToList();
var providers = GetProviders(pending.ProviderConfigJson);
_logger.LogInformation(
"Session {SessionId}: keywords=[{Keywords}] | providers=[{Providers}]",
pending.Id,
cvKeywords.Count > 0 ? string.Join(", ", cvKeywords) : "(none)",
providers.Count > 0 ? string.Join(", ", providers.Select(p => p.Name)) : "(none)");
var results = await RunSearchAsync(pending, cvKeywords, providers, db, cancellationToken);
pending.Status = JobSearchStatus.Done;
await db.SaveChangesAsync(cancellationToken);
var attachmentFileName = BuildCvFileName(pending.CvDocumentId);
await _emailSender.SendResultsAsync(
pending.Email,
attachmentFileName,
results,
cvKeywords,
providers.Select(p => p.Name).ToList(),
pending.Language,
pending.Location,
cancellationToken);
_logger.LogInformation("Session {SessionId} done. {Count} results sent.", pending.Id, results.Count);
}
catch (Exception ex)
{
_logger.LogError(ex, "Session {SessionId} failed.", pending.Id);
pending.Status = JobSearchStatus.Failed;
await db.SaveChangesAsync(cancellationToken);
}
}
/// <summary>
/// Runs the full search pipeline for a session: scrapes all providers, deduplicates URLs,
/// scores each candidate via the matcher API, and persists results that meet the minimum score threshold.
/// </summary>
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
JobSearchSessionEntity session,
List<string> cvKeywords,
List<JobProviderConfig> providers,
CvSearchDbContext db,
CancellationToken ct)
{
if (cvKeywords.Count == 0)
_logger.LogWarning("Session {SessionId}: keyword list is empty — scraper will rely on provider InitialKeywords only", session.Id);
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var provider in providers)
{
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct);
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
foreach (var url in urls) jobUrls.Add(url);
}
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
_logger.LogInformation(
"Session {SessionId}: {Total} unique URLs across all providers, scoring {Scoring} (cap={Cap})",
session.Id, jobUrls.Count, candidates.Count, _settings.MaxJobsToMatch);
var results = new List<JobSearchResultEntity>();
foreach (var url in candidates)
{
try
{
var matchRequest = new MatchJobRequest
{
CvDocumentId = session.CvDocumentId,
JobUrl = url,
// User already gave GDPR consent when they clicked the one-time job search link
GdprConsent = true
};
var matchResult = await _matcherApi.MatchJobAsync(matchRequest, ct);
_logger.LogInformation(
"Session {SessionId}: {Url} → score={Score}% (threshold={Threshold}%) {Verdict}",
session.Id, url, matchResult.Score, _settings.MinMatchScore,
matchResult.Score >= _settings.MinMatchScore ? "ACCEPTED" : "rejected");
if (matchResult.Score < _settings.MinMatchScore)
continue;
var entity = new JobSearchResultEntity
{
Id = Guid.NewGuid().ToString("N"),
SessionId = session.Id,
ProviderName = GuessProvider(url, providers),
JobUrl = url,
JobTitle = matchResult.Summary.Split('.').FirstOrDefault()?.Trim() ?? "Job",
JobText = string.Empty,
Score = matchResult.Score,
ResultJson = JsonSerializer.Serialize(matchResult, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
CreatedAt = DateTime.UtcNow
};
db.JobSearchResults.Add(entity);
await db.SaveChangesAsync(ct);
results.Add(entity);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Session {SessionId}: match failed for {Url}", session.Id, url);
}
}
results.Sort((a, b) => b.Score.CompareTo(a.Score));
return results;
}
/// <summary>
/// Deserialises the provider configuration snapshot stored on the session.
/// Providers are always snapshotted from the DB at session-creation time, so the snapshot
/// should always be present. Returns an empty list (with a warning) when it is missing or corrupt.
/// </summary>
private List<JobProviderConfig> GetProviders(string? providerConfigJson)
{
if (string.IsNullOrWhiteSpace(providerConfigJson))
{
_logger.LogWarning("Session has no provider config snapshot — returning empty provider list");
return [];
}
try
{
return JsonSerializer.Deserialize<List<JobProviderConfig>>(providerConfigJson,
new JsonSerializerOptions(JsonSerializerDefaults.Web))
?? [];
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to deserialise provider config snapshot — returning empty provider list");
return [];
}
}
/// <summary>
/// Infers the provider name from the job URL by matching against each provider's <c>JobLinkContains</c> pattern.
/// Falls back to the URL hostname when no provider matches.
/// </summary>
private static string GuessProvider(string url, List<JobProviderConfig> providers)
{
foreach (var p in providers)
{
if (!string.IsNullOrWhiteSpace(p.JobLinkContains) &&
url.Contains(p.JobLinkContains, StringComparison.OrdinalIgnoreCase))
return p.Name;
}
return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown";
}
/// <summary>
/// Constructs the CV PDF filename from the document ID.
/// </summary>
private static string BuildCvFileName(string cvDocumentId)
{
// Strip non-alphanumeric characters so the filename is safe for all OS/email clients.
var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit));
if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv";
return $"{safeId}.pdf";
}
}