Fix job search: location filtering, keyword quality, anchor filter bypass

Closes #41

- Add RequireKeywordInAnchor per-provider flag (default true); set false for
  ejobs.ro and bestjobs.eu so Stage 2 anchor-text filter is skipped — their
  search URL already filters by relevance server-side
- Update AI system prompts (en + ro) to extract concise job-board-friendly
  keywords (role title + key tech, not abstract concepts) and candidate location
- Propagate location through JobMatchResponse -> CreateJobSearchTokenRequest ->
  JobSearchTokenEntity -> JobSearchSessionEntity
- Add {location} and {location-slug} substitution in HtmlJobSearcher
- Update provider SearchUrlTemplates to include location:
    ejobs.ro:    /locuri-de-munca/{location-slug}?q={keywords}
    bestjobs.eu: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords}
    linkedin.com: ?keywords={keywords}&location={location}
- Three new migrations: AddRequireKeywordInAnchorAndLocation,
  ImproveKeywordsAndAddLocation, AddLocationToProviders

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 15:45:45 +03:00
parent 91b2baa445
commit 99e5cfb76b
19 changed files with 877 additions and 11 deletions
@@ -33,4 +33,10 @@ public sealed class JobProviderEntity
/// <summary>When true, the scraper renders the page with headless Chromium instead of a plain HTTP GET.</summary>
public bool UseHeadlessBrowser { get; set; }
/// <summary>
/// When false, the Stage 2 anchor-text keyword filter is skipped.
/// Set to false for providers whose search URL already filters by relevance server-side (ejobs.ro, bestjobs.eu).
/// </summary>
public bool RequireKeywordInAnchor { get; set; } = true;
}
@@ -9,6 +9,7 @@ public sealed class JobSearchSessionEntity : BaseEntity
public string Email { get; set; } = string.Empty;
public string Status { get; set; } = JobSearchStatus.Pending;
public string Keywords { get; set; } = string.Empty;
public string? Location { get; set; }
public string? ProviderConfigJson { get; set; }
public string Language { get; set; } = "en";
}
@@ -10,4 +10,5 @@ public sealed class JobSearchTokenEntity : BaseEntity
public DateTime ExpiresAt { get; set; }
public bool Used { get; set; }
public string Keywords { get; set; } = string.Empty;
public string? Location { get; set; }
}
@@ -0,0 +1,243 @@
// <auto-generated />
using System;
using CvSearch.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvSearch.Data.Migrations
{
[DbContext(typeof(CvSearchDbContext))]
[Migration("20260608124304_AddRequireKeywordInAnchorAndLocation")]
partial class AddRequireKeywordInAnchorAndLocation
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvSearch")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("int");
SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property<int>("Id"));
b.Property<int>("DisplayOrder")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(0);
b.Property<bool>("Enabled")
.HasColumnType("bit");
b.Property<string>("InitialKeywordsJson")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)")
.HasDefaultValue("[]");
b.Property<string>("JobLinkContains")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<int>("MaxResults")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(20);
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.Property<bool>("UseHeadlessBrowser")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("JobText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("JobTitle")
.IsRequired()
.HasMaxLength(512)
.HasColumnType("nvarchar(512)");
b.Property<string>("JobUrl")
.IsRequired()
.HasMaxLength(2048)
.HasColumnType("nvarchar(2048)");
b.Property<string>("ProviderName")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.Property<string>("SessionId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("SessionId");
b.ToTable("JobSearchResults", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("Keywords")
.IsRequired()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
b.Property<string>("Status")
.IsRequired()
.HasMaxLength(32)
.HasColumnType("nvarchar(32)");
b.Property<string>("TokenId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("Status");
b.ToTable("JobSearchSessions", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<DateTime>("ExpiresAt")
.HasColumnType("datetime2");
b.Property<string>("Keywords")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)")
.HasDefaultValue("");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobSearchTokens", "cvSearch");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,74 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class AddRequireKeywordInAnchorAndLocation : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "Location",
schema: "cvSearch",
table: "JobSearchTokens",
type: "nvarchar(max)",
nullable: true);
migrationBuilder.AddColumn<string>(
name: "Location",
schema: "cvSearch",
table: "JobSearchSessions",
type: "nvarchar(max)",
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "RequireKeywordInAnchor",
schema: "cvSearch",
table: "JobProviders",
type: "bit",
nullable: false,
defaultValue: true);
// ejobs.ro (Id=1) and bestjobs.eu (Id=2) do server-side keyword filtering via their
// search URL — the Stage 2 anchor-text filter rejects all Romanian job titles because
// they rarely contain abstract LLM keywords.
migrationBuilder.UpdateData(
schema: "cvSearch",
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "RequireKeywordInAnchor",
value: false);
migrationBuilder.UpdateData(
schema: "cvSearch",
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "RequireKeywordInAnchor",
value: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "Location",
schema: "cvSearch",
table: "JobSearchTokens");
migrationBuilder.DropColumn(
name: "Location",
schema: "cvSearch",
table: "JobSearchSessions");
migrationBuilder.DropColumn(
name: "RequireKeywordInAnchor",
schema: "cvSearch",
table: "JobProviders");
}
}
}
@@ -0,0 +1,243 @@
// <auto-generated />
using System;
using CvSearch.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvSearch.Data.Migrations
{
[DbContext(typeof(CvSearchDbContext))]
[Migration("20260608124452_AddLocationToProviders")]
partial class AddLocationToProviders
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvSearch")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("int");
SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property<int>("Id"));
b.Property<int>("DisplayOrder")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(0);
b.Property<bool>("Enabled")
.HasColumnType("bit");
b.Property<string>("InitialKeywordsJson")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)")
.HasDefaultValue("[]");
b.Property<string>("JobLinkContains")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<int>("MaxResults")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(20);
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.Property<bool>("UseHeadlessBrowser")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("JobText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("JobTitle")
.IsRequired()
.HasMaxLength(512)
.HasColumnType("nvarchar(512)");
b.Property<string>("JobUrl")
.IsRequired()
.HasMaxLength(2048)
.HasColumnType("nvarchar(2048)");
b.Property<string>("ProviderName")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.Property<string>("SessionId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("SessionId");
b.ToTable("JobSearchResults", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("Keywords")
.IsRequired()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
b.Property<string>("Status")
.IsRequired()
.HasMaxLength(32)
.HasColumnType("nvarchar(32)");
b.Property<string>("TokenId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("Status");
b.ToTable("JobSearchSessions", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<DateTime>("ExpiresAt")
.HasColumnType("datetime2");
b.Property<string>("Keywords")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)")
.HasDefaultValue("");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobSearchTokens", "cvSearch");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,71 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class AddLocationToProviders : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
// ejobs.ro (Id=1): location in URL path as slug, keywords via q= param.
// Verified URL structure: /locuri-de-munca/{location-slug}?q={keywords}
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "SearchUrlTemplate",
value: "https://www.ejobs.ro/locuri-de-munca/{location-slug}?q={keywords}");
// bestjobs.eu (Id=2): location in URL path as slug, keywords via query param.
// Verified URL structure: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords}
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "SearchUrlTemplate",
value: "https://bestjobs.eu/ro/locuri-de-munca-in-{location-slug}?keywords={keywords}");
// linkedin.com (Id=3): location as query parameter.
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 3,
column: "SearchUrlTemplate",
value: "https://www.linkedin.com/jobs/search/?keywords={keywords}&location={location}");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "SearchUrlTemplate",
value: "https://www.ejobs.ro/locuri-de-munca?q={keywords}");
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "SearchUrlTemplate",
value: "https://www.bestjobs.eu/ro/locuri-de-munca?keywords={keywords}");
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 3,
column: "SearchUrlTemplate",
value: "https://www.linkedin.com/jobs/search/?keywords={keywords}");
}
}
}
@@ -61,6 +61,9 @@ namespace CvSearch.Data.Migrations
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
@@ -158,6 +161,9 @@ namespace CvSearch.Data.Migrations
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
@@ -216,6 +222,9 @@ namespace CvSearch.Data.Migrations
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")