Fix job search: location filtering, keyword quality, anchor filter bypass

Closes #41

- Add RequireKeywordInAnchor per-provider flag (default true); set false for
  ejobs.ro and bestjobs.eu so Stage 2 anchor-text filter is skipped — their
  search URL already filters by relevance server-side
- Update AI system prompts (en + ro) to extract concise job-board-friendly
  keywords (role title + key tech, not abstract concepts) and candidate location
- Propagate location through JobMatchResponse -> CreateJobSearchTokenRequest ->
  JobSearchTokenEntity -> JobSearchSessionEntity
- Add {location} and {location-slug} substitution in HtmlJobSearcher
- Update provider SearchUrlTemplates to include location:
    ejobs.ro:    /locuri-de-munca/{location-slug}?q={keywords}
    bestjobs.eu: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords}
    linkedin.com: ?keywords={keywords}&location={location}
- Three new migrations: AddRequireKeywordInAnchorAndLocation,
  ImproveKeywordsAndAddLocation, AddLocationToProviders

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 15:45:45 +03:00
parent 91b2baa445
commit 99e5cfb76b
19 changed files with 877 additions and 11 deletions
+16 -4
View File
@@ -33,6 +33,7 @@ public sealed class HtmlJobSearcher
public async Task<IReadOnlyList<string>> SearchJobUrlsAsync(
JobProviderConfig provider,
IReadOnlyList<string> cvKeywords,
string? location,
CancellationToken ct)
{
var allKeywords = provider.InitialKeywords
@@ -48,13 +49,23 @@ public sealed class HtmlJobSearcher
}
var keywordsEncoded = HttpUtility.UrlEncode(string.Join(" ", allKeywords));
var searchUrl = provider.SearchUrlTemplate.Replace("{keywords}", keywordsEncoded);
var locationEncoded = HttpUtility.UrlEncode(location ?? string.Empty);
var locationSlug = (location ?? string.Empty)
.ToLowerInvariant()
.Replace(",", "")
.Replace(" ", "-")
.Trim('-');
var searchUrl = provider.SearchUrlTemplate
.Replace("{keywords}", keywordsEncoded)
.Replace("{location}", locationEncoded)
.Replace("{location-slug}", locationSlug);
_logger.LogInformation(
"Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}]",
"Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}] | Location: {Location}",
provider.Name, searchUrl,
provider.UseHeadlessBrowser ? "headless" : "http",
string.Join(", ", cvKeywords));
string.Join(", ", cvKeywords),
location ?? "(none)");
string? html;
if (provider.UseHeadlessBrowser)
@@ -89,7 +100,8 @@ public sealed class HtmlJobSearcher
stage1Pass++;
if (!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
if (provider.RequireKeywordInAnchor &&
!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
{
_logger.LogDebug(
"Provider {Provider}: stage-2 reject | href={Href} | text={Text}",
+1 -1
View File
@@ -141,7 +141,7 @@ public sealed class CvSearchJobTask : IJobTask
foreach (var provider in providers)
{
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct);
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
foreach (var url in urls) jobUrls.Add(url);
}