feat(cv-search-job): enrich diagnostics and add scan summary to results email
Build and Push Docker Images Staging / build (push) Successful in 24s
Build and Push Docker Images Staging / build (push) Successful in 24s
Add funnel-level logging to HtmlJobSearcher (total anchors found, stage-1 href-filter count, stage-2 keyword-filter count) and warn when the keyword list is empty. Log the full search URL and response size to catch silent HTTP failures or bot-block pages. In CvSearchJobTask, log keywords and active providers at session start, per-provider URL counts after each scrape, and every scored URL with its verdict (ACCEPTED / rejected) at Information level. Add a scan summary block to the results email (both non-empty and empty-results paths) showing the CV keywords used as chips and the comma-separated list of providers scanned. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -35,12 +35,16 @@ public sealed class CvSearchEmailSender
|
||||
/// <param name="toEmail">Primary recipient (the user who triggered the search).</param>
|
||||
/// <param name="attachmentFileName">Relative filename of the CV PDF to attach, or <c>null</c>.</param>
|
||||
/// <param name="results">Ranked list of job search results to include in the email body.</param>
|
||||
/// <param name="keywords">CV keywords used to drive the job search.</param>
|
||||
/// <param name="providerNames">Names of the providers that were scanned.</param>
|
||||
/// <param name="language">Two-letter language code for template rendering.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task SendResultsAsync(
|
||||
string toEmail,
|
||||
string? attachmentFileName,
|
||||
IReadOnlyList<JobSearchResultEntity> results,
|
||||
IReadOnlyList<string> keywords,
|
||||
IReadOnlyList<string> providerNames,
|
||||
string language,
|
||||
CancellationToken ct)
|
||||
{
|
||||
@@ -54,7 +58,7 @@ public sealed class CvSearchEmailSender
|
||||
|
||||
if (recipients.Count == 0) return;
|
||||
|
||||
var htmlBody = BuildBody(results, language);
|
||||
var htmlBody = BuildBody(results, keywords, providerNames, language);
|
||||
var subject = _emailTemplates.Render("email.search-results.subject", language,
|
||||
("count", results.Count.ToString()));
|
||||
|
||||
@@ -81,11 +85,14 @@ public sealed class CvSearchEmailSender
|
||||
/// <summary>
|
||||
/// Renders the HTML email body from the results list.
|
||||
/// Returns the empty-results template when no results are present.
|
||||
/// Prepends a scan summary block showing the keywords and providers used.
|
||||
/// </summary>
|
||||
private string BuildBody(IReadOnlyList<JobSearchResultEntity> results, string language)
|
||||
private string BuildBody(IReadOnlyList<JobSearchResultEntity> results, IReadOnlyList<string> keywords, IReadOnlyList<string> providerNames, string language)
|
||||
{
|
||||
var scanSummary = BuildScanSummary(keywords, providerNames);
|
||||
|
||||
if (results.Count == 0)
|
||||
return _emailTemplates.Get("email.search-results.empty", language);
|
||||
return scanSummary + _emailTemplates.Get("email.search-results.empty", language);
|
||||
|
||||
var items = new System.Text.StringBuilder();
|
||||
for (int i = 0; i < results.Count; i++)
|
||||
@@ -107,7 +114,29 @@ public sealed class CvSearchEmailSender
|
||||
|
||||
return _emailTemplates.Render("email.search-results.body", language,
|
||||
("count", results.Count.ToString()),
|
||||
("items", items.ToString()));
|
||||
("items", scanSummary + items.ToString()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the scan summary block showing the CV keywords and providers used for the search.
|
||||
/// </summary>
|
||||
private static string BuildScanSummary(IReadOnlyList<string> keywords, IReadOnlyList<string> providerNames)
|
||||
{
|
||||
var keywordsHtml = keywords.Count > 0
|
||||
? string.Join("", keywords.Select(k =>
|
||||
$"<span style=\"display:inline-block;background:#e9ecef;border-radius:4px;padding:2px 8px;margin:2px;font-size:12px\">{k}</span>"))
|
||||
: "<span style=\"color:#6c757d;font-size:12px;font-style:italic\">none detected</span>";
|
||||
|
||||
var providersText = providerNames.Count > 0
|
||||
? string.Join(", ", providerNames)
|
||||
: "none";
|
||||
|
||||
return $"""
|
||||
<div style="background:#f8f9fa;border:1px solid #dee2e6;border-radius:6px;padding:14px 16px;margin-bottom:18px;font-size:13px;color:#495057">
|
||||
<div style="margin-bottom:8px"><strong>Keywords used:</strong> {keywordsHtml}</div>
|
||||
<div><strong>Providers scanned:</strong> {providersText}</div>
|
||||
</div>
|
||||
""";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -44,19 +44,27 @@ public sealed class HtmlJobSearcher
|
||||
.ToList();
|
||||
|
||||
if (allKeywords.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("Provider {Provider}: no keywords available (CV keywords empty, InitialKeywords empty), skipping", provider.Name);
|
||||
return [];
|
||||
}
|
||||
|
||||
var keywordsEncoded = HttpUtility.UrlEncode(string.Join(" ", allKeywords));
|
||||
var searchUrl = provider.SearchUrlTemplate.Replace("{keywords}", keywordsEncoded);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Provider {Provider}: fetching {Url} | CV keywords: [{Keywords}]",
|
||||
provider.Name, searchUrl, string.Join(", ", cvKeywords));
|
||||
|
||||
string html;
|
||||
try
|
||||
{
|
||||
html = await _http.GetStringAsync(searchUrl, ct);
|
||||
_logger.LogInformation("Provider {Provider}: received {Length} chars of HTML", provider.Name, html.Length);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to fetch search results from {Provider} at {Url}", provider.Name, searchUrl);
|
||||
_logger.LogWarning(ex, "Provider {Provider}: HTTP fetch failed for {Url}", provider.Name, searchUrl);
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -68,7 +76,11 @@ public sealed class HtmlJobSearcher
|
||||
var anchorPattern = new Regex(@"<a[^>]+href=[""']([^""']+)[""'][^>]*>(.*?)</a>",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
|
||||
foreach (Match match in anchorPattern.Matches(html))
|
||||
var allAnchors = anchorPattern.Matches(html);
|
||||
var stage1Pass = 0;
|
||||
var stage2Pass = 0;
|
||||
|
||||
foreach (Match match in allAnchors)
|
||||
{
|
||||
if (results.Count >= provider.MaxResults) break;
|
||||
|
||||
@@ -78,9 +90,18 @@ public sealed class HtmlJobSearcher
|
||||
if (!href.Contains(provider.JobLinkContains, StringComparison.OrdinalIgnoreCase))
|
||||
continue;
|
||||
|
||||
stage1Pass++;
|
||||
|
||||
// Stage 2: anchor text must contain at least one CV keyword
|
||||
if (!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Provider {Provider}: stage-2 reject | href={Href} | text={Text}",
|
||||
provider.Name, href, anchorText.Length > 100 ? anchorText[..100] : anchorText);
|
||||
continue;
|
||||
}
|
||||
|
||||
stage2Pass++;
|
||||
|
||||
// Make absolute URL
|
||||
if (!Uri.TryCreate(href, UriKind.Absolute, out var absoluteUri))
|
||||
@@ -95,7 +116,10 @@ public sealed class HtmlJobSearcher
|
||||
results.Add(url);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Provider {Provider}: found {Count} job URLs", provider.Name, results.Count);
|
||||
_logger.LogInformation(
|
||||
"Provider {Provider}: {TotalAnchors} anchors found | {Stage1} passed href filter ('{LinkPattern}') | {Stage2} passed keyword filter | {Unique} unique URLs returned",
|
||||
provider.Name, allAnchors.Count, stage1Pass, provider.JobLinkContains, stage2Pass, results.Count);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,13 +84,35 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
|
||||
try
|
||||
{
|
||||
var results = await RunSearchAsync(pending, db, cancellationToken);
|
||||
var cvKeywords = pending.Keywords
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(k => k.Trim())
|
||||
.Where(k => k.Length > 0)
|
||||
.ToList();
|
||||
|
||||
var providers = GetProviders(pending.ProviderConfigJson);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Session {SessionId}: keywords=[{Keywords}] | providers=[{Providers}]",
|
||||
pending.Id,
|
||||
cvKeywords.Count > 0 ? string.Join(", ", cvKeywords) : "(none)",
|
||||
providers.Count > 0 ? string.Join(", ", providers.Select(p => p.Name)) : "(none)");
|
||||
|
||||
var results = await RunSearchAsync(pending, cvKeywords, providers, db, cancellationToken);
|
||||
|
||||
pending.Status = JobSearchStatus.Done;
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
|
||||
var attachmentFileName = BuildCvFileName(pending.CvDocumentId);
|
||||
await _emailSender.SendResultsAsync(pending.Email, attachmentFileName, results, pending.Language, cancellationToken);
|
||||
await _emailSender.SendResultsAsync(
|
||||
pending.Email,
|
||||
attachmentFileName,
|
||||
results,
|
||||
cvKeywords,
|
||||
providers.Select(p => p.Name).ToList(),
|
||||
pending.Language,
|
||||
cancellationToken);
|
||||
|
||||
_logger.LogInformation("Session {SessionId} done. {Count} results sent.", pending.Id, results.Count);
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -107,26 +129,27 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
/// </summary>
|
||||
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
|
||||
JobSearchSessionEntity session,
|
||||
List<string> cvKeywords,
|
||||
List<JobProviderConfig> providers,
|
||||
CvSearchDbContext db,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var cvKeywords = session.Keywords
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(k => k.Trim())
|
||||
.Where(k => k.Length > 0)
|
||||
.ToList();
|
||||
if (cvKeywords.Count == 0)
|
||||
_logger.LogWarning("Session {SessionId}: keyword list is empty — scraper will rely on provider InitialKeywords only", session.Id);
|
||||
|
||||
var providers = GetProviders(session.ProviderConfigJson);
|
||||
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var provider in providers)
|
||||
{
|
||||
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
|
||||
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
|
||||
foreach (var url in urls) jobUrls.Add(url);
|
||||
}
|
||||
|
||||
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
|
||||
_logger.LogInformation("Session {SessionId}: {Count} candidate job URLs to match", session.Id, candidates.Count);
|
||||
_logger.LogInformation(
|
||||
"Session {SessionId}: {Total} unique URLs across all providers, scoring {Scoring} (cap={Cap})",
|
||||
session.Id, jobUrls.Count, candidates.Count, _settings.MaxJobsToMatch);
|
||||
|
||||
var results = new List<JobSearchResultEntity>();
|
||||
|
||||
@@ -143,11 +166,14 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
};
|
||||
|
||||
var matchResult = await _matcherApi.MatchJobAsync(matchRequest, ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Session {SessionId}: {Url} → score={Score}% (threshold={Threshold}%) {Verdict}",
|
||||
session.Id, url, matchResult.Score, _settings.MinMatchScore,
|
||||
matchResult.Score >= _settings.MinMatchScore ? "ACCEPTED" : "rejected");
|
||||
|
||||
if (matchResult.Score < _settings.MinMatchScore)
|
||||
{
|
||||
_logger.LogDebug("Session {SessionId}: {Url} scored {Score}% (below threshold)", session.Id, url, matchResult.Score);
|
||||
continue;
|
||||
}
|
||||
|
||||
var entity = new JobSearchResultEntity
|
||||
{
|
||||
|
||||
@@ -21,10 +21,10 @@
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Apis\cv-matcher-api-models\cv-matcher-api-models.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\email-data\email-data.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\email-api-models\email-api-models.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\cv-search-data\cv-search-data.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\common\common.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\email-data\email-data.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
|
||||
<ProjectReference Include="..\job-scheduler\job-scheduler.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user