feat(cv-search-job): enrich diagnostics and add scan summary to results email
Build and Push Docker Images Staging / build (push) Successful in 24s

Add funnel-level logging to HtmlJobSearcher (total anchors found,
stage-1 href-filter count, stage-2 keyword-filter count) and warn
when the keyword list is empty. Log the full search URL and response
size to catch silent HTTP failures or bot-block pages.

In CvSearchJobTask, log keywords and active providers at session start,
per-provider URL counts after each scrape, and every scored URL with its
verdict (ACCEPTED / rejected) at Information level.

Add a scan summary block to the results email (both non-empty and
empty-results paths) showing the CV keywords used as chips and the
comma-separated list of providers scanned.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 11:00:04 +03:00
parent e14a6a0f69
commit af3a14c7ed
4 changed files with 99 additions and 20 deletions
+38 -12
View File
@@ -84,13 +84,35 @@ public sealed class CvSearchJobTask : IJobTask
try
{
var results = await RunSearchAsync(pending, db, cancellationToken);
var cvKeywords = pending.Keywords
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(k => k.Trim())
.Where(k => k.Length > 0)
.ToList();
var providers = GetProviders(pending.ProviderConfigJson);
_logger.LogInformation(
"Session {SessionId}: keywords=[{Keywords}] | providers=[{Providers}]",
pending.Id,
cvKeywords.Count > 0 ? string.Join(", ", cvKeywords) : "(none)",
providers.Count > 0 ? string.Join(", ", providers.Select(p => p.Name)) : "(none)");
var results = await RunSearchAsync(pending, cvKeywords, providers, db, cancellationToken);
pending.Status = JobSearchStatus.Done;
await db.SaveChangesAsync(cancellationToken);
var attachmentFileName = BuildCvFileName(pending.CvDocumentId);
await _emailSender.SendResultsAsync(pending.Email, attachmentFileName, results, pending.Language, cancellationToken);
await _emailSender.SendResultsAsync(
pending.Email,
attachmentFileName,
results,
cvKeywords,
providers.Select(p => p.Name).ToList(),
pending.Language,
cancellationToken);
_logger.LogInformation("Session {SessionId} done. {Count} results sent.", pending.Id, results.Count);
}
catch (Exception ex)
@@ -107,26 +129,27 @@ public sealed class CvSearchJobTask : IJobTask
/// </summary>
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
JobSearchSessionEntity session,
List<string> cvKeywords,
List<JobProviderConfig> providers,
CvSearchDbContext db,
CancellationToken ct)
{
var cvKeywords = session.Keywords
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(k => k.Trim())
.Where(k => k.Length > 0)
.ToList();
if (cvKeywords.Count == 0)
_logger.LogWarning("Session {SessionId}: keyword list is empty — scraper will rely on provider InitialKeywords only", session.Id);
var providers = GetProviders(session.ProviderConfigJson);
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var provider in providers)
{
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
foreach (var url in urls) jobUrls.Add(url);
}
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
_logger.LogInformation("Session {SessionId}: {Count} candidate job URLs to match", session.Id, candidates.Count);
_logger.LogInformation(
"Session {SessionId}: {Total} unique URLs across all providers, scoring {Scoring} (cap={Cap})",
session.Id, jobUrls.Count, candidates.Count, _settings.MaxJobsToMatch);
var results = new List<JobSearchResultEntity>();
@@ -143,11 +166,14 @@ public sealed class CvSearchJobTask : IJobTask
};
var matchResult = await _matcherApi.MatchJobAsync(matchRequest, ct);
_logger.LogInformation(
"Session {SessionId}: {Url} → score={Score}% (threshold={Threshold}%) {Verdict}",
session.Id, url, matchResult.Score, _settings.MinMatchScore,
matchResult.Score >= _settings.MinMatchScore ? "ACCEPTED" : "rejected");
if (matchResult.Score < _settings.MinMatchScore)
{
_logger.LogDebug("Session {SessionId}: {Url} scored {Score}% (below threshold)", session.Id, url, matchResult.Score);
continue;
}
var entity = new JobSearchResultEntity
{