feat(cv-search-job): enrich diagnostics and add scan summary to results email
Build and Push Docker Images Staging / build (push) Successful in 24s
Build and Push Docker Images Staging / build (push) Successful in 24s
Add funnel-level logging to HtmlJobSearcher (total anchors found, stage-1 href-filter count, stage-2 keyword-filter count) and warn when the keyword list is empty. Log the full search URL and response size to catch silent HTTP failures or bot-block pages. In CvSearchJobTask, log keywords and active providers at session start, per-provider URL counts after each scrape, and every scored URL with its verdict (ACCEPTED / rejected) at Information level. Add a scan summary block to the results email (both non-empty and empty-results paths) showing the CV keywords used as chips and the comma-separated list of providers scanned. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,19 +44,27 @@ public sealed class HtmlJobSearcher
|
||||
.ToList();
|
||||
|
||||
if (allKeywords.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("Provider {Provider}: no keywords available (CV keywords empty, InitialKeywords empty), skipping", provider.Name);
|
||||
return [];
|
||||
}
|
||||
|
||||
var keywordsEncoded = HttpUtility.UrlEncode(string.Join(" ", allKeywords));
|
||||
var searchUrl = provider.SearchUrlTemplate.Replace("{keywords}", keywordsEncoded);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Provider {Provider}: fetching {Url} | CV keywords: [{Keywords}]",
|
||||
provider.Name, searchUrl, string.Join(", ", cvKeywords));
|
||||
|
||||
string html;
|
||||
try
|
||||
{
|
||||
html = await _http.GetStringAsync(searchUrl, ct);
|
||||
_logger.LogInformation("Provider {Provider}: received {Length} chars of HTML", provider.Name, html.Length);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to fetch search results from {Provider} at {Url}", provider.Name, searchUrl);
|
||||
_logger.LogWarning(ex, "Provider {Provider}: HTTP fetch failed for {Url}", provider.Name, searchUrl);
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -68,7 +76,11 @@ public sealed class HtmlJobSearcher
|
||||
var anchorPattern = new Regex(@"<a[^>]+href=[""']([^""']+)[""'][^>]*>(.*?)</a>",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
|
||||
foreach (Match match in anchorPattern.Matches(html))
|
||||
var allAnchors = anchorPattern.Matches(html);
|
||||
var stage1Pass = 0;
|
||||
var stage2Pass = 0;
|
||||
|
||||
foreach (Match match in allAnchors)
|
||||
{
|
||||
if (results.Count >= provider.MaxResults) break;
|
||||
|
||||
@@ -78,9 +90,18 @@ public sealed class HtmlJobSearcher
|
||||
if (!href.Contains(provider.JobLinkContains, StringComparison.OrdinalIgnoreCase))
|
||||
continue;
|
||||
|
||||
stage1Pass++;
|
||||
|
||||
// Stage 2: anchor text must contain at least one CV keyword
|
||||
if (!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Provider {Provider}: stage-2 reject | href={Href} | text={Text}",
|
||||
provider.Name, href, anchorText.Length > 100 ? anchorText[..100] : anchorText);
|
||||
continue;
|
||||
}
|
||||
|
||||
stage2Pass++;
|
||||
|
||||
// Make absolute URL
|
||||
if (!Uri.TryCreate(href, UriKind.Absolute, out var absoluteUri))
|
||||
@@ -95,7 +116,10 @@ public sealed class HtmlJobSearcher
|
||||
results.Add(url);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Provider {Provider}: found {Count} job URLs", provider.Name, results.Count);
|
||||
_logger.LogInformation(
|
||||
"Provider {Provider}: {TotalAnchors} anchors found | {Stage1} passed href filter ('{LinkPattern}') | {Stage2} passed keyword filter | {Unique} unique URLs returned",
|
||||
provider.Name, allAnchors.Count, stage1Pass, provider.JobLinkContains, stage2Pass, results.Count);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user