Merge pull request 'Fix job search: location filtering, keyword quality, anchor filter bypass' (#42) from feature/job-search-location-keywords into main

Merge PR #42: Fix job search — location filtering, keyword quality, anchor filter bypass
This commit was merged in pull request #42.
This commit is contained in:
2026-06-08 12:51:16 +00:00
19 changed files with 877 additions and 11 deletions
+1 -1
View File
@@ -181,7 +181,7 @@ public sealed class CvMatcherController : ControllerBase
try
{
var tokenResp = await _jobSearchApi.CreateTokenAsync(
new CreateJobSearchTokenRequest { CvDocumentId = request.CvDocumentId, Email = request.Email, Language = language, Keywords = res.Keywords },
new CreateJobSearchTokenRequest { CvDocumentId = request.CvDocumentId, Email = request.Email, Language = language, Keywords = res.Keywords, Location = res.Location },
ct);
if (!string.IsNullOrWhiteSpace(tokenResp.TokenId))
{
@@ -6,4 +6,5 @@ public sealed class CreateJobSearchTokenRequest
public string Email { get; set; } = string.Empty;
public string Language { get; set; } = "en";
public List<string> Keywords { get; set; } = [];
public string? Location { get; set; }
}
@@ -9,6 +9,7 @@
public List<string> Recommendations { get; set; } = [];
public List<string> Evidence { get; set; } = [];
public List<string> Keywords { get; set; } = [];
public string? Location { get; set; }
public bool Cached { get; set; }
public string? JobDocumentId { get; set; }
public string? JobUrl { get; set; }
@@ -23,4 +23,9 @@ public sealed class JobProviderConfig
public int MaxResults { get; set; } = 20;
/// <summary>When true the scraper uses a headless Chromium browser to render JS-heavy pages.</summary>
public bool UseHeadlessBrowser { get; set; }
/// <summary>
/// When false, the Stage 2 anchor-text keyword filter is skipped.
/// Set to false for providers whose search URL already filters by relevance server-side.
/// </summary>
public bool RequireKeywordInAnchor { get; set; } = true;
}
@@ -53,7 +53,7 @@ public sealed class JobSearchController : ControllerBase
if (string.IsNullOrWhiteSpace(request.CvDocumentId) || string.IsNullOrWhiteSpace(request.Email))
return BadRequest(new ErrorResponse { Error = "CvDocumentId and Email are required.", Code = "invalid_request" });
var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, request.Language, request.Keywords, ct);
var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, request.Language, request.Keywords, request.Location, ct);
return Ok(new CreateJobSearchTokenResponse { TokenId = tokenId });
}
catch (Exception ex)
@@ -13,12 +13,13 @@ public interface IJobTokenService
/// <param name="email">Email address of the user who will receive the results.</param>
/// <param name="language">Preferred language for result emails (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <param name="keywords">Job search keywords extracted by the LLM during the match call.</param>
/// <param name="location">Candidate location extracted from the CV (e.g. "Cluj-Napoca, Romania"). Null if not available.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>
/// The generated token ID to embed in the one-click job search link,
/// or <c>null</c> when no job providers are currently enabled (link should be suppressed).
/// </returns>
Task<string?> CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList<string> keywords, CancellationToken ct);
Task<string?> CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList<string> keywords, string? location, CancellationToken ct);
/// <summary>
/// Validates the token and, if valid, marks it as used and creates a <c>Pending</c> job search session.
@@ -34,7 +34,7 @@ public sealed class JobTokenService : IJobTokenService
}
/// <inheritdoc />
public async Task<string?> CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList<string> keywords, CancellationToken ct)
public async Task<string?> CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList<string> keywords, string? location, CancellationToken ct)
{
var hasEnabledProviders = await _db.JobProviders.AnyAsync(p => p.Enabled, ct);
if (!hasEnabledProviders)
@@ -50,6 +50,7 @@ public sealed class JobTokenService : IJobTokenService
Email = email,
Language = language,
Keywords = string.Join(",", keywords),
Location = location,
ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays),
Used = false,
CreatedAt = DateTime.UtcNow
@@ -57,7 +58,7 @@ public sealed class JobTokenService : IJobTokenService
_db.JobSearchTokens.Add(token);
await _db.SaveChangesAsync(ct);
_logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}, Keywords={Keywords}", token.Id, cvDocumentId, token.Keywords);
_logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}, Keywords={Keywords}, Location={Location}", token.Id, cvDocumentId, token.Keywords, token.Location);
return token.Id;
}
@@ -92,6 +93,7 @@ public sealed class JobTokenService : IJobTokenService
Language = token.Language,
Status = JobSearchStatus.Pending,
Keywords = keywords,
Location = token.Location,
ProviderConfigJson = providerConfigJson,
CreatedAt = DateTime.UtcNow
};
@@ -126,7 +128,8 @@ public sealed class JobTokenService : IJobTokenService
JobLinkContains = entity.JobLinkContains,
InitialKeywords = keywords,
MaxResults = entity.MaxResults,
UseHeadlessBrowser = entity.UseHeadlessBrowser
UseHeadlessBrowser = entity.UseHeadlessBrowser,
RequireKeywordInAnchor = entity.RequireKeywordInAnchor
};
}
@@ -0,0 +1,130 @@
// <auto-generated />
using System;
using CvMatcher.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvMatcher.Data.Migrations
{
[DbContext(typeof(CvMatcherDbContext))]
[Migration("20260608124331_ImproveKeywordsAndAddLocation")]
partial class ImproveKeywordsAndAddLocation
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvMatcher")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvMatcher.Data.Entities.AiPromptEntity", b =>
{
b.Property<string>("Key")
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("Language")
.HasMaxLength(8)
.HasColumnType("nvarchar(8)");
b.Property<string>("Description")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(500)
.HasColumnType("nvarchar(500)")
.HasDefaultValue("");
b.Property<DateTime>("UpdatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("Value")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.HasKey("Key", "Language");
b.ToTable("AiPrompts", "cvMatcher");
});
modelBuilder.Entity("CvMatcher.Data.Entities.CvMatchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("JobDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Language")
.IsRequired()
.HasColumnType("nvarchar(450)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("CvDocumentId", "JobDocumentId", "Language")
.IsUnique();
b.ToTable("Results", "cvMatcher");
});
modelBuilder.Entity("CvMatcher.Data.Entities.CvMatcherChatCacheEntity", b =>
{
b.Property<string>("CacheKey")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("Model")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("nvarchar(120)");
b.Property<string>("ResponseText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<decimal>("Temperature")
.HasColumnType("decimal(4,2)");
b.HasKey("CacheKey");
b.ToTable("ChatCache", "cvMatcher");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,65 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvMatcher.Data.Migrations
{
/// <inheritdoc />
public partial class ImproveKeywordsAndAddLocation : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
// Update English prompt: tighter keywords instruction (job-board search terms, not abstract
// concepts) and add location field so the LLM extracts the candidate's city/country.
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "AiPrompts",
keyColumns: ["Key", "Language"],
keyValues: ["ai.cv-match.system-prompt", "en"],
columns: ["Value", "Description"],
values: [
"You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100. Penalize missing required skills. Do not invent experience. Use concise business language. All text fields in the JSON response must be in English.\nJSON shape: {\"score\":number,\"summary\":\"one-line summary in English\",\"strengths\":[\"strength 1 in English\"],\"gaps\":[\"gap 1 in English\"],\"recommendations\":[\"recommendation 1 in English\"],\"evidence\":[\"evidence 1 in English\"],\"keywords\":[\"Senior .NET Developer\",\"C#\",\"Azure\"],\"location\":\"City, Country\"}.\nFor 'keywords': extract 2-4 short, concrete terms a recruiter would search for on a job board — the candidate's primary role title and key technologies (e.g. 'Senior .NET Developer', 'C#', 'Azure'). Avoid abstract concepts like 'leadership', 'cloud', or 'microservices'.\nFor 'location': extract the candidate's city and country from the CV (e.g. 'Cluj-Napoca, Romania'). Use an empty string if not found.",
"System prompt for CV-to-job matching in English. Extracts job-board-friendly keywords (role title + key tech) and candidate location."
]);
// Update Romanian prompt: same improvements.
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "AiPrompts",
keyColumns: ["Key", "Language"],
keyValues: ["ai.cv-match.system-prompt", "ro"],
columns: ["Value", "Description"],
values: [
"Ești un motor strict de potrivire CV-job. Returnează doar JSON. Punctează realist între 0 și 100. Penalizează abilitățile lipsă necesare. Nu inventa experiență. Folosește limbaj profesional concis. Toate câmpurile text din răspunsul JSON trebuie să fie în limba română.\nJSON shape: {\"score\":number,\"summary\":\"rezumat pe o linie în română\",\"strengths\":[\"punct forte 1 în română\"],\"gaps\":[\"lipsă 1 în română\"],\"recommendations\":[\"recomandare 1 în română\"],\"evidence\":[\"dovadă 1 în română\"],\"keywords\":[\"Senior .NET Developer\",\"C#\",\"Azure\"],\"location\":\"Oraș, Țară\"}.\nPentru 'keywords': extrage 2-4 termeni scurți și concreți pe care un recrutor i-ar căuta pe un site de joburi — titlul principal al rolului și tehnologiile cheie (ex. 'Senior .NET Developer', 'C#', 'Azure'). Evită concepte abstracte precum 'leadership', 'cloud' sau 'microservicii'.\nPentru 'location': extrage orașul și țara candidatului din CV (ex. 'Cluj-Napoca, România'). Folosește string gol dacă nu se găsește.",
"System prompt pentru potrivire CV-job în limba română. Extrage cuvinte cheie prietenoase pentru site-uri de joburi (titlu rol + tehnologii cheie) și locația candidatului."
]);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "AiPrompts",
keyColumns: ["Key", "Language"],
keyValues: ["ai.cv-match.system-prompt", "en"],
columns: ["Value", "Description"],
values: [
"You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100. Penalize missing required skills. Do not invent experience. Use concise business language. All text fields in the JSON response must be in English.\nJSON shape: {\"score\":number,\"summary\":\"one-line summary in English\",\"strengths\":[\"strength 1 in English\"],\"gaps\":[\"gap 1 in English\"],\"recommendations\":[\"recommendation 1 in English\"],\"evidence\":[\"evidence 1 in English\"],\"keywords\":[\"keyword1\",\"keyword2\",\"keyword3\"]}",
"System prompt for CV-to-job matching in English. Instructs LLM to return JSON with CV strengths, gaps, and recommendations relative to the job."
]);
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "AiPrompts",
keyColumns: ["Key", "Language"],
keyValues: ["ai.cv-match.system-prompt", "ro"],
columns: ["Value", "Description"],
values: [
"Ești un motor strict de potrivire CV-job. Returnează doar JSON. Punctează realist între 0 și 100. Penalizează abilitățile lipsă necesare. Nu inventa experiență. Folosește limbaj profesional concis. Toate câmpurile text din răspunsul JSON trebuie să fie în limba română.\nJSON shape: {\"score\":number,\"summary\":\"rezumat pe o linie în română\",\"strengths\":[\"punct forte 1 în română\"],\"gaps\":[\"lipsă 1 în română\"],\"recommendations\":[\"recomandare 1 în română\"],\"evidence\":[\"dovadă 1 în română\"],\"keywords\":[\"cuvant1\",\"cuvant2\",\"cuvant3\"]}",
"System prompt pentru potrivire CV-job în limba română. Instruiește LLM-ul să returneze JSON cu punctele forte ale CV-ului, lacunele și recomandări relative la job."
]);
}
}
}
@@ -33,4 +33,10 @@ public sealed class JobProviderEntity
/// <summary>When true, the scraper renders the page with headless Chromium instead of a plain HTTP GET.</summary>
public bool UseHeadlessBrowser { get; set; }
/// <summary>
/// When false, the Stage 2 anchor-text keyword filter is skipped.
/// Set to false for providers whose search URL already filters by relevance server-side (ejobs.ro, bestjobs.eu).
/// </summary>
public bool RequireKeywordInAnchor { get; set; } = true;
}
@@ -9,6 +9,7 @@ public sealed class JobSearchSessionEntity : BaseEntity
public string Email { get; set; } = string.Empty;
public string Status { get; set; } = JobSearchStatus.Pending;
public string Keywords { get; set; } = string.Empty;
public string? Location { get; set; }
public string? ProviderConfigJson { get; set; }
public string Language { get; set; } = "en";
}
@@ -10,4 +10,5 @@ public sealed class JobSearchTokenEntity : BaseEntity
public DateTime ExpiresAt { get; set; }
public bool Used { get; set; }
public string Keywords { get; set; } = string.Empty;
public string? Location { get; set; }
}
@@ -0,0 +1,243 @@
// <auto-generated />
using System;
using CvSearch.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvSearch.Data.Migrations
{
[DbContext(typeof(CvSearchDbContext))]
[Migration("20260608124304_AddRequireKeywordInAnchorAndLocation")]
partial class AddRequireKeywordInAnchorAndLocation
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvSearch")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("int");
SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property<int>("Id"));
b.Property<int>("DisplayOrder")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(0);
b.Property<bool>("Enabled")
.HasColumnType("bit");
b.Property<string>("InitialKeywordsJson")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)")
.HasDefaultValue("[]");
b.Property<string>("JobLinkContains")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<int>("MaxResults")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(20);
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.Property<bool>("UseHeadlessBrowser")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("JobText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("JobTitle")
.IsRequired()
.HasMaxLength(512)
.HasColumnType("nvarchar(512)");
b.Property<string>("JobUrl")
.IsRequired()
.HasMaxLength(2048)
.HasColumnType("nvarchar(2048)");
b.Property<string>("ProviderName")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.Property<string>("SessionId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("SessionId");
b.ToTable("JobSearchResults", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("Keywords")
.IsRequired()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
b.Property<string>("Status")
.IsRequired()
.HasMaxLength(32)
.HasColumnType("nvarchar(32)");
b.Property<string>("TokenId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("Status");
b.ToTable("JobSearchSessions", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<DateTime>("ExpiresAt")
.HasColumnType("datetime2");
b.Property<string>("Keywords")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)")
.HasDefaultValue("");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobSearchTokens", "cvSearch");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,74 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class AddRequireKeywordInAnchorAndLocation : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "Location",
schema: "cvSearch",
table: "JobSearchTokens",
type: "nvarchar(max)",
nullable: true);
migrationBuilder.AddColumn<string>(
name: "Location",
schema: "cvSearch",
table: "JobSearchSessions",
type: "nvarchar(max)",
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "RequireKeywordInAnchor",
schema: "cvSearch",
table: "JobProviders",
type: "bit",
nullable: false,
defaultValue: true);
// ejobs.ro (Id=1) and bestjobs.eu (Id=2) do server-side keyword filtering via their
// search URL — the Stage 2 anchor-text filter rejects all Romanian job titles because
// they rarely contain abstract LLM keywords.
migrationBuilder.UpdateData(
schema: "cvSearch",
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "RequireKeywordInAnchor",
value: false);
migrationBuilder.UpdateData(
schema: "cvSearch",
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "RequireKeywordInAnchor",
value: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "Location",
schema: "cvSearch",
table: "JobSearchTokens");
migrationBuilder.DropColumn(
name: "Location",
schema: "cvSearch",
table: "JobSearchSessions");
migrationBuilder.DropColumn(
name: "RequireKeywordInAnchor",
schema: "cvSearch",
table: "JobProviders");
}
}
}
@@ -0,0 +1,243 @@
// <auto-generated />
using System;
using CvSearch.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvSearch.Data.Migrations
{
[DbContext(typeof(CvSearchDbContext))]
[Migration("20260608124452_AddLocationToProviders")]
partial class AddLocationToProviders
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvSearch")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("int");
SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property<int>("Id"));
b.Property<int>("DisplayOrder")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(0);
b.Property<bool>("Enabled")
.HasColumnType("bit");
b.Property<string>("InitialKeywordsJson")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)")
.HasDefaultValue("[]");
b.Property<string>("JobLinkContains")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<int>("MaxResults")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(20);
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.Property<bool>("UseHeadlessBrowser")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("JobText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("JobTitle")
.IsRequired()
.HasMaxLength(512)
.HasColumnType("nvarchar(512)");
b.Property<string>("JobUrl")
.IsRequired()
.HasMaxLength(2048)
.HasColumnType("nvarchar(2048)");
b.Property<string>("ProviderName")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.Property<string>("SessionId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("SessionId");
b.ToTable("JobSearchResults", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("Keywords")
.IsRequired()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
b.Property<string>("Status")
.IsRequired()
.HasMaxLength(32)
.HasColumnType("nvarchar(32)");
b.Property<string>("TokenId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("Status");
b.ToTable("JobSearchSessions", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<DateTime>("ExpiresAt")
.HasColumnType("datetime2");
b.Property<string>("Keywords")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)")
.HasDefaultValue("");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobSearchTokens", "cvSearch");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,71 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class AddLocationToProviders : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
// ejobs.ro (Id=1): location in URL path as slug, keywords via q= param.
// Verified URL structure: /locuri-de-munca/{location-slug}?q={keywords}
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "SearchUrlTemplate",
value: "https://www.ejobs.ro/locuri-de-munca/{location-slug}?q={keywords}");
// bestjobs.eu (Id=2): location in URL path as slug, keywords via query param.
// Verified URL structure: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords}
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "SearchUrlTemplate",
value: "https://bestjobs.eu/ro/locuri-de-munca-in-{location-slug}?keywords={keywords}");
// linkedin.com (Id=3): location as query parameter.
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 3,
column: "SearchUrlTemplate",
value: "https://www.linkedin.com/jobs/search/?keywords={keywords}&location={location}");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
column: "SearchUrlTemplate",
value: "https://www.ejobs.ro/locuri-de-munca?q={keywords}");
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 2,
column: "SearchUrlTemplate",
value: "https://www.bestjobs.eu/ro/locuri-de-munca?keywords={keywords}");
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 3,
column: "SearchUrlTemplate",
value: "https://www.linkedin.com/jobs/search/?keywords={keywords}");
}
}
}
@@ -61,6 +61,9 @@ namespace CvSearch.Data.Migrations
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
@@ -158,6 +161,9 @@ namespace CvSearch.Data.Migrations
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
@@ -216,6 +222,9 @@ namespace CvSearch.Data.Migrations
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
+16 -4
View File
@@ -33,6 +33,7 @@ public sealed class HtmlJobSearcher
public async Task<IReadOnlyList<string>> SearchJobUrlsAsync(
JobProviderConfig provider,
IReadOnlyList<string> cvKeywords,
string? location,
CancellationToken ct)
{
var allKeywords = provider.InitialKeywords
@@ -48,13 +49,23 @@ public sealed class HtmlJobSearcher
}
var keywordsEncoded = HttpUtility.UrlEncode(string.Join(" ", allKeywords));
var searchUrl = provider.SearchUrlTemplate.Replace("{keywords}", keywordsEncoded);
var locationEncoded = HttpUtility.UrlEncode(location ?? string.Empty);
var locationSlug = (location ?? string.Empty)
.ToLowerInvariant()
.Replace(",", "")
.Replace(" ", "-")
.Trim('-');
var searchUrl = provider.SearchUrlTemplate
.Replace("{keywords}", keywordsEncoded)
.Replace("{location}", locationEncoded)
.Replace("{location-slug}", locationSlug);
_logger.LogInformation(
"Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}]",
"Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}] | Location: {Location}",
provider.Name, searchUrl,
provider.UseHeadlessBrowser ? "headless" : "http",
string.Join(", ", cvKeywords));
string.Join(", ", cvKeywords),
location ?? "(none)");
string? html;
if (provider.UseHeadlessBrowser)
@@ -89,7 +100,8 @@ public sealed class HtmlJobSearcher
stage1Pass++;
if (!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
if (provider.RequireKeywordInAnchor &&
!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase)))
{
_logger.LogDebug(
"Provider {Provider}: stage-2 reject | href={Href} | text={Text}",
+1 -1
View File
@@ -141,7 +141,7 @@ public sealed class CvSearchJobTask : IJobTask
foreach (var provider in providers)
{
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct);
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
foreach (var url in urls) jobUrls.Add(url);
}