From 99e5cfb76bd508ad264feec74b44066c289b18e0 Mon Sep 17 00:00:00 2001 From: claude Date: Mon, 8 Jun 2026 15:45:45 +0300 Subject: [PATCH] Fix job search: location filtering, keyword quality, anchor filter bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #41 - Add RequireKeywordInAnchor per-provider flag (default true); set false for ejobs.ro and bestjobs.eu so Stage 2 anchor-text filter is skipped — their search URL already filters by relevance server-side - Update AI system prompts (en + ro) to extract concise job-board-friendly keywords (role title + key tech, not abstract concepts) and candidate location - Propagate location through JobMatchResponse -> CreateJobSearchTokenRequest -> JobSearchTokenEntity -> JobSearchSessionEntity - Add {location} and {location-slug} substitution in HtmlJobSearcher - Update provider SearchUrlTemplates to include location: ejobs.ro: /locuri-de-munca/{location-slug}?q={keywords} bestjobs.eu: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords} linkedin.com: ?keywords={keywords}&location={location} - Three new migrations: AddRequireKeywordInAnchorAndLocation, ImproveKeywordsAndAddLocation, AddLocationToProviders Co-Authored-By: Claude Sonnet 4.6 --- Apis/api/Controllers/CvMatcherController.cs | 2 +- .../Requests/CreateJobSearchTokenRequest.cs | 1 + .../Responses/JobMatchResponse.cs | 1 + .../Settings/JobSearchSettings.cs | 5 + .../Controllers/JobSearchController.cs | 2 +- .../Services/Contracts/IJobTokenService.cs | 3 +- .../Services/JobTokenService.cs | 9 +- ..._ImproveKeywordsAndAddLocation.Designer.cs | 130 ++++++++++ ...608124331_ImproveKeywordsAndAddLocation.cs | 65 +++++ .../Data/Entities/JobProviderEntity.cs | 6 + .../Data/Entities/JobSearchSessionEntity.cs | 1 + .../Data/Entities/JobSearchTokenEntity.cs | 1 + ...uireKeywordInAnchorAndLocation.Designer.cs | 243 ++++++++++++++++++ ...04_AddRequireKeywordInAnchorAndLocation.cs | 74 ++++++ ...8124452_AddLocationToProviders.Designer.cs | 243 ++++++++++++++++++ .../20260608124452_AddLocationToProviders.cs | 71 +++++ .../CvSearchDbContextModelSnapshot.cs | 9 + .../cv-search-job/Services/HtmlJobSearcher.cs | 20 +- Jobs/cv-search-job/Tasks/CvSearchJobTask.cs | 2 +- 19 files changed, 877 insertions(+), 11 deletions(-) create mode 100644 Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.Designer.cs create mode 100644 Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.cs create mode 100644 Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.Designer.cs create mode 100644 Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.cs create mode 100644 Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.Designer.cs create mode 100644 Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.cs diff --git a/Apis/api/Controllers/CvMatcherController.cs b/Apis/api/Controllers/CvMatcherController.cs index 3b0bcc1..5669c25 100644 --- a/Apis/api/Controllers/CvMatcherController.cs +++ b/Apis/api/Controllers/CvMatcherController.cs @@ -181,7 +181,7 @@ public sealed class CvMatcherController : ControllerBase try { var tokenResp = await _jobSearchApi.CreateTokenAsync( - new CreateJobSearchTokenRequest { CvDocumentId = request.CvDocumentId, Email = request.Email, Language = language, Keywords = res.Keywords }, + new CreateJobSearchTokenRequest { CvDocumentId = request.CvDocumentId, Email = request.Email, Language = language, Keywords = res.Keywords, Location = res.Location }, ct); if (!string.IsNullOrWhiteSpace(tokenResp.TokenId)) { diff --git a/Apis/cv-matcher-api-models/Requests/CreateJobSearchTokenRequest.cs b/Apis/cv-matcher-api-models/Requests/CreateJobSearchTokenRequest.cs index 6e1bcb4..6efe6e1 100644 --- a/Apis/cv-matcher-api-models/Requests/CreateJobSearchTokenRequest.cs +++ b/Apis/cv-matcher-api-models/Requests/CreateJobSearchTokenRequest.cs @@ -6,4 +6,5 @@ public sealed class CreateJobSearchTokenRequest public string Email { get; set; } = string.Empty; public string Language { get; set; } = "en"; public List Keywords { get; set; } = []; + public string? Location { get; set; } } diff --git a/Apis/cv-matcher-api-models/Responses/JobMatchResponse.cs b/Apis/cv-matcher-api-models/Responses/JobMatchResponse.cs index b7ffe7b..9be1af9 100644 --- a/Apis/cv-matcher-api-models/Responses/JobMatchResponse.cs +++ b/Apis/cv-matcher-api-models/Responses/JobMatchResponse.cs @@ -9,6 +9,7 @@ public List Recommendations { get; set; } = []; public List Evidence { get; set; } = []; public List Keywords { get; set; } = []; + public string? Location { get; set; } public bool Cached { get; set; } public string? JobDocumentId { get; set; } public string? JobUrl { get; set; } diff --git a/Apis/cv-matcher-api-models/Settings/JobSearchSettings.cs b/Apis/cv-matcher-api-models/Settings/JobSearchSettings.cs index 96ed11e..63db298 100644 --- a/Apis/cv-matcher-api-models/Settings/JobSearchSettings.cs +++ b/Apis/cv-matcher-api-models/Settings/JobSearchSettings.cs @@ -23,4 +23,9 @@ public sealed class JobProviderConfig public int MaxResults { get; set; } = 20; /// When true the scraper uses a headless Chromium browser to render JS-heavy pages. public bool UseHeadlessBrowser { get; set; } + /// + /// When false, the Stage 2 anchor-text keyword filter is skipped. + /// Set to false for providers whose search URL already filters by relevance server-side. + /// + public bool RequireKeywordInAnchor { get; set; } = true; } diff --git a/Apis/cv-matcher-api/Controllers/JobSearchController.cs b/Apis/cv-matcher-api/Controllers/JobSearchController.cs index c44e7a5..2b058fa 100644 --- a/Apis/cv-matcher-api/Controllers/JobSearchController.cs +++ b/Apis/cv-matcher-api/Controllers/JobSearchController.cs @@ -53,7 +53,7 @@ public sealed class JobSearchController : ControllerBase if (string.IsNullOrWhiteSpace(request.CvDocumentId) || string.IsNullOrWhiteSpace(request.Email)) return BadRequest(new ErrorResponse { Error = "CvDocumentId and Email are required.", Code = "invalid_request" }); - var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, request.Language, request.Keywords, ct); + var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, request.Language, request.Keywords, request.Location, ct); return Ok(new CreateJobSearchTokenResponse { TokenId = tokenId }); } catch (Exception ex) diff --git a/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs b/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs index 4f8ba25..5a40aba 100644 --- a/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs +++ b/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs @@ -13,12 +13,13 @@ public interface IJobTokenService /// Email address of the user who will receive the results. /// Preferred language for result emails (e.g. "en", "ro"). /// Job search keywords extracted by the LLM during the match call. + /// Candidate location extracted from the CV (e.g. "Cluj-Napoca, Romania"). Null if not available. /// Cancellation token. /// /// The generated token ID to embed in the one-click job search link, /// or null when no job providers are currently enabled (link should be suppressed). /// - Task CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList keywords, CancellationToken ct); + Task CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList keywords, string? location, CancellationToken ct); /// /// Validates the token and, if valid, marks it as used and creates a Pending job search session. diff --git a/Apis/cv-matcher-api/Services/JobTokenService.cs b/Apis/cv-matcher-api/Services/JobTokenService.cs index d8856ac..5658b8f 100644 --- a/Apis/cv-matcher-api/Services/JobTokenService.cs +++ b/Apis/cv-matcher-api/Services/JobTokenService.cs @@ -34,7 +34,7 @@ public sealed class JobTokenService : IJobTokenService } /// - public async Task CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList keywords, CancellationToken ct) + public async Task CreateTokenAsync(string cvDocumentId, string email, string language, IReadOnlyList keywords, string? location, CancellationToken ct) { var hasEnabledProviders = await _db.JobProviders.AnyAsync(p => p.Enabled, ct); if (!hasEnabledProviders) @@ -50,6 +50,7 @@ public sealed class JobTokenService : IJobTokenService Email = email, Language = language, Keywords = string.Join(",", keywords), + Location = location, ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays), Used = false, CreatedAt = DateTime.UtcNow @@ -57,7 +58,7 @@ public sealed class JobTokenService : IJobTokenService _db.JobSearchTokens.Add(token); await _db.SaveChangesAsync(ct); - _logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}, Keywords={Keywords}", token.Id, cvDocumentId, token.Keywords); + _logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}, Keywords={Keywords}, Location={Location}", token.Id, cvDocumentId, token.Keywords, token.Location); return token.Id; } @@ -92,6 +93,7 @@ public sealed class JobTokenService : IJobTokenService Language = token.Language, Status = JobSearchStatus.Pending, Keywords = keywords, + Location = token.Location, ProviderConfigJson = providerConfigJson, CreatedAt = DateTime.UtcNow }; @@ -126,7 +128,8 @@ public sealed class JobTokenService : IJobTokenService JobLinkContains = entity.JobLinkContains, InitialKeywords = keywords, MaxResults = entity.MaxResults, - UseHeadlessBrowser = entity.UseHeadlessBrowser + UseHeadlessBrowser = entity.UseHeadlessBrowser, + RequireKeywordInAnchor = entity.RequireKeywordInAnchor }; } diff --git a/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.Designer.cs b/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.Designer.cs new file mode 100644 index 0000000..8529302 --- /dev/null +++ b/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.Designer.cs @@ -0,0 +1,130 @@ +// +using System; +using CvMatcher.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace CvMatcher.Data.Migrations +{ + [DbContext(typeof(CvMatcherDbContext))] + [Migration("20260608124331_ImproveKeywordsAndAddLocation")] + partial class ImproveKeywordsAndAddLocation + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasDefaultSchema("cvMatcher") + .HasAnnotation("ProductVersion", "10.0.7") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); + + modelBuilder.Entity("CvMatcher.Data.Entities.AiPromptEntity", b => + { + b.Property("Key") + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("Language") + .HasMaxLength(8) + .HasColumnType("nvarchar(8)"); + + b.Property("Description") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(500) + .HasColumnType("nvarchar(500)") + .HasDefaultValue(""); + + b.Property("UpdatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("Value") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.HasKey("Key", "Language"); + + b.ToTable("AiPrompts", "cvMatcher"); + }); + + modelBuilder.Entity("CvMatcher.Data.Entities.CvMatchResultEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("JobDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Language") + .IsRequired() + .HasColumnType("nvarchar(450)"); + + b.Property("ResultJson") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Score") + .HasColumnType("int"); + + b.HasKey("Id"); + + b.HasIndex("CvDocumentId", "JobDocumentId", "Language") + .IsUnique(); + + b.ToTable("Results", "cvMatcher"); + }); + + modelBuilder.Entity("CvMatcher.Data.Entities.CvMatcherChatCacheEntity", b => + { + b.Property("CacheKey") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("Model") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("nvarchar(120)"); + + b.Property("ResponseText") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Temperature") + .HasColumnType("decimal(4,2)"); + + b.HasKey("CacheKey"); + + b.ToTable("ChatCache", "cvMatcher"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.cs b/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.cs new file mode 100644 index 0000000..235a28c --- /dev/null +++ b/Apis/cv-matcher-data/Migrations/20260608124331_ImproveKeywordsAndAddLocation.cs @@ -0,0 +1,65 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace CvMatcher.Data.Migrations +{ + /// + public partial class ImproveKeywordsAndAddLocation : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + // Update English prompt: tighter keywords instruction (job-board search terms, not abstract + // concepts) and add location field so the LLM extracts the candidate's city/country. + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "AiPrompts", + keyColumns: ["Key", "Language"], + keyValues: ["ai.cv-match.system-prompt", "en"], + columns: ["Value", "Description"], + values: [ + "You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100. Penalize missing required skills. Do not invent experience. Use concise business language. All text fields in the JSON response must be in English.\nJSON shape: {\"score\":number,\"summary\":\"one-line summary in English\",\"strengths\":[\"strength 1 in English\"],\"gaps\":[\"gap 1 in English\"],\"recommendations\":[\"recommendation 1 in English\"],\"evidence\":[\"evidence 1 in English\"],\"keywords\":[\"Senior .NET Developer\",\"C#\",\"Azure\"],\"location\":\"City, Country\"}.\nFor 'keywords': extract 2-4 short, concrete terms a recruiter would search for on a job board — the candidate's primary role title and key technologies (e.g. 'Senior .NET Developer', 'C#', 'Azure'). Avoid abstract concepts like 'leadership', 'cloud', or 'microservices'.\nFor 'location': extract the candidate's city and country from the CV (e.g. 'Cluj-Napoca, Romania'). Use an empty string if not found.", + "System prompt for CV-to-job matching in English. Extracts job-board-friendly keywords (role title + key tech) and candidate location." + ]); + + // Update Romanian prompt: same improvements. + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "AiPrompts", + keyColumns: ["Key", "Language"], + keyValues: ["ai.cv-match.system-prompt", "ro"], + columns: ["Value", "Description"], + values: [ + "Ești un motor strict de potrivire CV-job. Returnează doar JSON. Punctează realist între 0 și 100. Penalizează abilitățile lipsă necesare. Nu inventa experiență. Folosește limbaj profesional concis. Toate câmpurile text din răspunsul JSON trebuie să fie în limba română.\nJSON shape: {\"score\":number,\"summary\":\"rezumat pe o linie în română\",\"strengths\":[\"punct forte 1 în română\"],\"gaps\":[\"lipsă 1 în română\"],\"recommendations\":[\"recomandare 1 în română\"],\"evidence\":[\"dovadă 1 în română\"],\"keywords\":[\"Senior .NET Developer\",\"C#\",\"Azure\"],\"location\":\"Oraș, Țară\"}.\nPentru 'keywords': extrage 2-4 termeni scurți și concreți pe care un recrutor i-ar căuta pe un site de joburi — titlul principal al rolului și tehnologiile cheie (ex. 'Senior .NET Developer', 'C#', 'Azure'). Evită concepte abstracte precum 'leadership', 'cloud' sau 'microservicii'.\nPentru 'location': extrage orașul și țara candidatului din CV (ex. 'Cluj-Napoca, România'). Folosește string gol dacă nu se găsește.", + "System prompt pentru potrivire CV-job în limba română. Extrage cuvinte cheie prietenoase pentru site-uri de joburi (titlu rol + tehnologii cheie) și locația candidatului." + ]); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "AiPrompts", + keyColumns: ["Key", "Language"], + keyValues: ["ai.cv-match.system-prompt", "en"], + columns: ["Value", "Description"], + values: [ + "You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100. Penalize missing required skills. Do not invent experience. Use concise business language. All text fields in the JSON response must be in English.\nJSON shape: {\"score\":number,\"summary\":\"one-line summary in English\",\"strengths\":[\"strength 1 in English\"],\"gaps\":[\"gap 1 in English\"],\"recommendations\":[\"recommendation 1 in English\"],\"evidence\":[\"evidence 1 in English\"],\"keywords\":[\"keyword1\",\"keyword2\",\"keyword3\"]}", + "System prompt for CV-to-job matching in English. Instructs LLM to return JSON with CV strengths, gaps, and recommendations relative to the job." + ]); + + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "AiPrompts", + keyColumns: ["Key", "Language"], + keyValues: ["ai.cv-match.system-prompt", "ro"], + columns: ["Value", "Description"], + values: [ + "Ești un motor strict de potrivire CV-job. Returnează doar JSON. Punctează realist între 0 și 100. Penalizează abilitățile lipsă necesare. Nu inventa experiență. Folosește limbaj profesional concis. Toate câmpurile text din răspunsul JSON trebuie să fie în limba română.\nJSON shape: {\"score\":number,\"summary\":\"rezumat pe o linie în română\",\"strengths\":[\"punct forte 1 în română\"],\"gaps\":[\"lipsă 1 în română\"],\"recommendations\":[\"recomandare 1 în română\"],\"evidence\":[\"dovadă 1 în română\"],\"keywords\":[\"cuvant1\",\"cuvant2\",\"cuvant3\"]}", + "System prompt pentru potrivire CV-job în limba română. Instruiește LLM-ul să returneze JSON cu punctele forte ale CV-ului, lacunele și recomandări relative la job." + ]); + } + } +} diff --git a/Apis/cv-search-data/Data/Entities/JobProviderEntity.cs b/Apis/cv-search-data/Data/Entities/JobProviderEntity.cs index 2697be4..1bdc641 100644 --- a/Apis/cv-search-data/Data/Entities/JobProviderEntity.cs +++ b/Apis/cv-search-data/Data/Entities/JobProviderEntity.cs @@ -33,4 +33,10 @@ public sealed class JobProviderEntity /// When true, the scraper renders the page with headless Chromium instead of a plain HTTP GET. public bool UseHeadlessBrowser { get; set; } + + /// + /// When false, the Stage 2 anchor-text keyword filter is skipped. + /// Set to false for providers whose search URL already filters by relevance server-side (ejobs.ro, bestjobs.eu). + /// + public bool RequireKeywordInAnchor { get; set; } = true; } diff --git a/Apis/cv-search-data/Data/Entities/JobSearchSessionEntity.cs b/Apis/cv-search-data/Data/Entities/JobSearchSessionEntity.cs index 70102e4..0a7db20 100644 --- a/Apis/cv-search-data/Data/Entities/JobSearchSessionEntity.cs +++ b/Apis/cv-search-data/Data/Entities/JobSearchSessionEntity.cs @@ -9,6 +9,7 @@ public sealed class JobSearchSessionEntity : BaseEntity public string Email { get; set; } = string.Empty; public string Status { get; set; } = JobSearchStatus.Pending; public string Keywords { get; set; } = string.Empty; + public string? Location { get; set; } public string? ProviderConfigJson { get; set; } public string Language { get; set; } = "en"; } diff --git a/Apis/cv-search-data/Data/Entities/JobSearchTokenEntity.cs b/Apis/cv-search-data/Data/Entities/JobSearchTokenEntity.cs index 68bd984..6c581f2 100644 --- a/Apis/cv-search-data/Data/Entities/JobSearchTokenEntity.cs +++ b/Apis/cv-search-data/Data/Entities/JobSearchTokenEntity.cs @@ -10,4 +10,5 @@ public sealed class JobSearchTokenEntity : BaseEntity public DateTime ExpiresAt { get; set; } public bool Used { get; set; } public string Keywords { get; set; } = string.Empty; + public string? Location { get; set; } } diff --git a/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.Designer.cs b/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.Designer.cs new file mode 100644 index 0000000..c8720bc --- /dev/null +++ b/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.Designer.cs @@ -0,0 +1,243 @@ +// +using System; +using CvSearch.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace CvSearch.Data.Migrations +{ + [DbContext(typeof(CvSearchDbContext))] + [Migration("20260608124304_AddRequireKeywordInAnchorAndLocation")] + partial class AddRequireKeywordInAnchorAndLocation + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasDefaultSchema("cvSearch") + .HasAnnotation("ProductVersion", "10.0.7") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); + + modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("int"); + + SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property("Id")); + + b.Property("DisplayOrder") + .ValueGeneratedOnAdd() + .HasColumnType("int") + .HasDefaultValue(0); + + b.Property("Enabled") + .HasColumnType("bit"); + + b.Property("InitialKeywordsJson") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(2000) + .HasColumnType("nvarchar(2000)") + .HasDefaultValue("[]"); + + b.Property("JobLinkContains") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("MaxResults") + .ValueGeneratedOnAdd() + .HasColumnType("int") + .HasDefaultValue(20); + + b.Property("Name") + .IsRequired() + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("RequireKeywordInAnchor") + .HasColumnType("bit"); + + b.Property("SearchUrlTemplate") + .IsRequired() + .HasMaxLength(1024) + .HasColumnType("nvarchar(1024)"); + + b.Property("UseHeadlessBrowser") + .ValueGeneratedOnAdd() + .HasColumnType("bit") + .HasDefaultValue(false); + + b.HasKey("Id"); + + b.ToTable("JobProviders", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("JobText") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("JobTitle") + .IsRequired() + .HasMaxLength(512) + .HasColumnType("nvarchar(512)"); + + b.Property("JobUrl") + .IsRequired() + .HasMaxLength(2048) + .HasColumnType("nvarchar(2048)"); + + b.Property("ProviderName") + .IsRequired() + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("ResultJson") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Score") + .HasColumnType("int"); + + b.Property("SessionId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.HasKey("Id"); + + b.HasIndex("SessionId"); + + b.ToTable("JobSearchResults", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Email") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("Keywords") + .IsRequired() + .HasMaxLength(1000) + .HasColumnType("nvarchar(1000)"); + + b.Property("Language") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(8) + .HasColumnType("nvarchar(8)") + .HasDefaultValue("en"); + + b.Property("Location") + .HasColumnType("nvarchar(max)"); + + b.Property("ProviderConfigJson") + .HasColumnType("nvarchar(max)"); + + b.Property("Status") + .IsRequired() + .HasMaxLength(32) + .HasColumnType("nvarchar(32)"); + + b.Property("TokenId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.HasKey("Id"); + + b.HasIndex("Status"); + + b.ToTable("JobSearchSessions", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Email") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("ExpiresAt") + .HasColumnType("datetime2"); + + b.Property("Keywords") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(1000) + .HasColumnType("nvarchar(1000)") + .HasDefaultValue(""); + + b.Property("Language") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(8) + .HasColumnType("nvarchar(8)") + .HasDefaultValue("en"); + + b.Property("Location") + .HasColumnType("nvarchar(max)"); + + b.Property("Used") + .ValueGeneratedOnAdd() + .HasColumnType("bit") + .HasDefaultValue(false); + + b.HasKey("Id"); + + b.ToTable("JobSearchTokens", "cvSearch"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.cs b/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.cs new file mode 100644 index 0000000..7e6a230 --- /dev/null +++ b/Apis/cv-search-data/Migrations/20260608124304_AddRequireKeywordInAnchorAndLocation.cs @@ -0,0 +1,74 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace CvSearch.Data.Migrations +{ + /// + public partial class AddRequireKeywordInAnchorAndLocation : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "Location", + schema: "cvSearch", + table: "JobSearchTokens", + type: "nvarchar(max)", + nullable: true); + + migrationBuilder.AddColumn( + name: "Location", + schema: "cvSearch", + table: "JobSearchSessions", + type: "nvarchar(max)", + nullable: true); + + migrationBuilder.AddColumn( + name: "RequireKeywordInAnchor", + schema: "cvSearch", + table: "JobProviders", + type: "bit", + nullable: false, + defaultValue: true); + + // ejobs.ro (Id=1) and bestjobs.eu (Id=2) do server-side keyword filtering via their + // search URL — the Stage 2 anchor-text filter rejects all Romanian job titles because + // they rarely contain abstract LLM keywords. + migrationBuilder.UpdateData( + schema: "cvSearch", + table: "JobProviders", + keyColumn: "Id", + keyValue: 1, + column: "RequireKeywordInAnchor", + value: false); + + migrationBuilder.UpdateData( + schema: "cvSearch", + table: "JobProviders", + keyColumn: "Id", + keyValue: 2, + column: "RequireKeywordInAnchor", + value: false); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "Location", + schema: "cvSearch", + table: "JobSearchTokens"); + + migrationBuilder.DropColumn( + name: "Location", + schema: "cvSearch", + table: "JobSearchSessions"); + + migrationBuilder.DropColumn( + name: "RequireKeywordInAnchor", + schema: "cvSearch", + table: "JobProviders"); + } + } +} diff --git a/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.Designer.cs b/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.Designer.cs new file mode 100644 index 0000000..91c742b --- /dev/null +++ b/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.Designer.cs @@ -0,0 +1,243 @@ +// +using System; +using CvSearch.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace CvSearch.Data.Migrations +{ + [DbContext(typeof(CvSearchDbContext))] + [Migration("20260608124452_AddLocationToProviders")] + partial class AddLocationToProviders + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasDefaultSchema("cvSearch") + .HasAnnotation("ProductVersion", "10.0.7") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); + + modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("int"); + + SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property("Id")); + + b.Property("DisplayOrder") + .ValueGeneratedOnAdd() + .HasColumnType("int") + .HasDefaultValue(0); + + b.Property("Enabled") + .HasColumnType("bit"); + + b.Property("InitialKeywordsJson") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(2000) + .HasColumnType("nvarchar(2000)") + .HasDefaultValue("[]"); + + b.Property("JobLinkContains") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("MaxResults") + .ValueGeneratedOnAdd() + .HasColumnType("int") + .HasDefaultValue(20); + + b.Property("Name") + .IsRequired() + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("RequireKeywordInAnchor") + .HasColumnType("bit"); + + b.Property("SearchUrlTemplate") + .IsRequired() + .HasMaxLength(1024) + .HasColumnType("nvarchar(1024)"); + + b.Property("UseHeadlessBrowser") + .ValueGeneratedOnAdd() + .HasColumnType("bit") + .HasDefaultValue(false); + + b.HasKey("Id"); + + b.ToTable("JobProviders", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("JobText") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("JobTitle") + .IsRequired() + .HasMaxLength(512) + .HasColumnType("nvarchar(512)"); + + b.Property("JobUrl") + .IsRequired() + .HasMaxLength(2048) + .HasColumnType("nvarchar(2048)"); + + b.Property("ProviderName") + .IsRequired() + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("ResultJson") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Score") + .HasColumnType("int"); + + b.Property("SessionId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.HasKey("Id"); + + b.HasIndex("SessionId"); + + b.ToTable("JobSearchResults", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Email") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("Keywords") + .IsRequired() + .HasMaxLength(1000) + .HasColumnType("nvarchar(1000)"); + + b.Property("Language") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(8) + .HasColumnType("nvarchar(8)") + .HasDefaultValue("en"); + + b.Property("Location") + .HasColumnType("nvarchar(max)"); + + b.Property("ProviderConfigJson") + .HasColumnType("nvarchar(max)"); + + b.Property("Status") + .IsRequired() + .HasMaxLength(32) + .HasColumnType("nvarchar(32)"); + + b.Property("TokenId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.HasKey("Id"); + + b.HasIndex("Status"); + + b.ToTable("JobSearchSessions", "cvSearch"); + }); + + modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Email") + .IsRequired() + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("ExpiresAt") + .HasColumnType("datetime2"); + + b.Property("Keywords") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(1000) + .HasColumnType("nvarchar(1000)") + .HasDefaultValue(""); + + b.Property("Language") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(8) + .HasColumnType("nvarchar(8)") + .HasDefaultValue("en"); + + b.Property("Location") + .HasColumnType("nvarchar(max)"); + + b.Property("Used") + .ValueGeneratedOnAdd() + .HasColumnType("bit") + .HasDefaultValue(false); + + b.HasKey("Id"); + + b.ToTable("JobSearchTokens", "cvSearch"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.cs b/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.cs new file mode 100644 index 0000000..01cca6d --- /dev/null +++ b/Apis/cv-search-data/Migrations/20260608124452_AddLocationToProviders.cs @@ -0,0 +1,71 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace CvSearch.Data.Migrations +{ + /// + public partial class AddLocationToProviders : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + // ejobs.ro (Id=1): location in URL path as slug, keywords via q= param. + // Verified URL structure: /locuri-de-munca/{location-slug}?q={keywords} + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 1, + column: "SearchUrlTemplate", + value: "https://www.ejobs.ro/locuri-de-munca/{location-slug}?q={keywords}"); + + // bestjobs.eu (Id=2): location in URL path as slug, keywords via query param. + // Verified URL structure: /ro/locuri-de-munca-in-{location-slug}?keywords={keywords} + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 2, + column: "SearchUrlTemplate", + value: "https://bestjobs.eu/ro/locuri-de-munca-in-{location-slug}?keywords={keywords}"); + + // linkedin.com (Id=3): location as query parameter. + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 3, + column: "SearchUrlTemplate", + value: "https://www.linkedin.com/jobs/search/?keywords={keywords}&location={location}"); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 1, + column: "SearchUrlTemplate", + value: "https://www.ejobs.ro/locuri-de-munca?q={keywords}"); + + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 2, + column: "SearchUrlTemplate", + value: "https://www.bestjobs.eu/ro/locuri-de-munca?keywords={keywords}"); + + migrationBuilder.UpdateData( + schema: MigrationConstants.SchemaName, + table: "JobProviders", + keyColumn: "Id", + keyValue: 3, + column: "SearchUrlTemplate", + value: "https://www.linkedin.com/jobs/search/?keywords={keywords}"); + } + } +} diff --git a/Apis/cv-search-data/Migrations/CvSearchDbContextModelSnapshot.cs b/Apis/cv-search-data/Migrations/CvSearchDbContextModelSnapshot.cs index 6d5b927..389ecd0 100644 --- a/Apis/cv-search-data/Migrations/CvSearchDbContextModelSnapshot.cs +++ b/Apis/cv-search-data/Migrations/CvSearchDbContextModelSnapshot.cs @@ -61,6 +61,9 @@ namespace CvSearch.Data.Migrations .HasMaxLength(128) .HasColumnType("nvarchar(128)"); + b.Property("RequireKeywordInAnchor") + .HasColumnType("bit"); + b.Property("SearchUrlTemplate") .IsRequired() .HasMaxLength(1024) @@ -158,6 +161,9 @@ namespace CvSearch.Data.Migrations .HasColumnType("nvarchar(8)") .HasDefaultValue("en"); + b.Property("Location") + .HasColumnType("nvarchar(max)"); + b.Property("ProviderConfigJson") .HasColumnType("nvarchar(max)"); @@ -216,6 +222,9 @@ namespace CvSearch.Data.Migrations .HasColumnType("nvarchar(8)") .HasDefaultValue("en"); + b.Property("Location") + .HasColumnType("nvarchar(max)"); + b.Property("Used") .ValueGeneratedOnAdd() .HasColumnType("bit") diff --git a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs index a958000..6c4cb78 100644 --- a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs +++ b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs @@ -33,6 +33,7 @@ public sealed class HtmlJobSearcher public async Task> SearchJobUrlsAsync( JobProviderConfig provider, IReadOnlyList cvKeywords, + string? location, CancellationToken ct) { var allKeywords = provider.InitialKeywords @@ -48,13 +49,23 @@ public sealed class HtmlJobSearcher } var keywordsEncoded = HttpUtility.UrlEncode(string.Join(" ", allKeywords)); - var searchUrl = provider.SearchUrlTemplate.Replace("{keywords}", keywordsEncoded); + var locationEncoded = HttpUtility.UrlEncode(location ?? string.Empty); + var locationSlug = (location ?? string.Empty) + .ToLowerInvariant() + .Replace(",", "") + .Replace(" ", "-") + .Trim('-'); + var searchUrl = provider.SearchUrlTemplate + .Replace("{keywords}", keywordsEncoded) + .Replace("{location}", locationEncoded) + .Replace("{location-slug}", locationSlug); _logger.LogInformation( - "Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}]", + "Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}] | Location: {Location}", provider.Name, searchUrl, provider.UseHeadlessBrowser ? "headless" : "http", - string.Join(", ", cvKeywords)); + string.Join(", ", cvKeywords), + location ?? "(none)"); string? html; if (provider.UseHeadlessBrowser) @@ -89,7 +100,8 @@ public sealed class HtmlJobSearcher stage1Pass++; - if (!cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase))) + if (provider.RequireKeywordInAnchor && + !cvKeywords.Any(k => anchorText.Contains(k, StringComparison.OrdinalIgnoreCase))) { _logger.LogDebug( "Provider {Provider}: stage-2 reject | href={Href} | text={Text}", diff --git a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs index db92305..4b45d44 100644 --- a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs +++ b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs @@ -141,7 +141,7 @@ public sealed class CvSearchJobTask : IJobTask foreach (var provider in providers) { - var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct); + var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct); _logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count); foreach (var url in urls) jobUrls.Add(url); }