7 Commits

Author SHA1 Message Date
claude 02d2b1e510 Add Email and ClientIpAddress audit fields to cvMatcher.Results
Threads the caller's email and client IP through the match pipeline so
every Results row records who triggered the match and from where.
Closes #45

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 18:56:36 +03:00
claude 3c3451b198 Merge branch 'main' into staging
Build and Push Docker Images Staging / build (push) Successful in 17m15s
2026-06-08 18:43:56 +03:00
claude a83f6f705f Remove UseHeadlessBrowser from JobProvider — all fetches now go via page-fetcher-api
page-fetcher-api always uses Playwright (networkidle by default), so the
per-provider flag that chose between headless and plain HTTP is obsolete.

- Removed from JobProviderEntity, CvSearchDbContext, JobProviderConfig, JobTokenService
- HtmlJobSearcher no longer passes WaitFor (uses page-fetcher-api default)
- EF migration drops the column from cvSearch.JobProviders

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 18:43:42 +03:00
claude b68cf942a8 Merge branch 'main' into staging
Build and Push Docker Images Staging / build (push) Successful in 28m0s
2026-06-08 18:37:00 +03:00
claude 61805e2fb5 Merge pull request 'feat: page-fetcher-api centralised Playwright page fetcher' (#44) from feature/page-fetcher-api into main
Merge feature/page-fetcher-api into main
2026-06-08 15:36:44 +00:00
claude 4de6f1db45 Merge branch 'main' into staging
Build and Push Docker Images Staging / build (push) Successful in 7m25s
2026-06-08 17:30:49 +03:00
claude 2e9069cbdb Fix file:// URL bug in HtmlJobSearcher — skip non-HTTP(S) URLs
Build and Push Docker Images Staging / build (push) Successful in 35s
After resolving relative hrefs against the base search URL, some ejobs.ro
links were producing file:/// URIs (e.g. file:///user/locuri-de-munca/...).
These were sent to cv-matcher-api and rejected with HTTP 400, causing 0 matches.

Added a scheme guard after URI resolution to skip any URL that is not
http:// or https://, preventing malformed URLs from reaching the matcher.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 16:57:52 +03:00
19 changed files with 478 additions and 19 deletions
@@ -10,4 +10,6 @@ public sealed class JobMatchRequest
public string? CaptchaToken { get; set; }
/// <summary>ISO 639-1 language code for the match result (e.g. "en", "ro"). Defaults to "en".</summary>
public string? Language { get; set; }
/// <summary>Client IP address — set by the api layer from the HTTP context before forwarding. Not supplied by the browser.</summary>
public string? ClientIpAddress { get; set; }
}
@@ -163,6 +163,7 @@ public sealed class CvMatcherController : ControllerBase
return BadRequest(new ErrorResponse { Error = "Captcha verification failed.", Code = "captcha_verification_failed" });
}
request.ClientIpAddress = userIp;
_logger.LogInformation("Proxying job match request to cv-matcher-api. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}",
request.CvDocumentId,
!string.IsNullOrWhiteSpace(request.JobUrl),
@@ -9,5 +9,7 @@
public string? Email { get; set; }
/// <summary>ISO 639-1 language code for the match result (e.g. "en", "ro"). Defaults to "en".</summary>
public string? Language { get; set; }
/// <summary>Client IP address forwarded by the api layer. Null when called from a background job.</summary>
public string? ClientIpAddress { get; set; }
}
}
@@ -21,8 +21,6 @@ public sealed class JobProviderConfig
public string JobLinkContains { get; set; } = string.Empty;
public List<string> InitialKeywords { get; set; } = [];
public int MaxResults { get; set; } = 20;
/// <summary>When true the scraper uses a headless Chromium browser to render JS-heavy pages.</summary>
public bool UseHeadlessBrowser { get; set; }
/// <summary>
/// When false, the Stage 2 anchor-text keyword filter is skipped.
/// Set to false for providers whose search URL already filters by relevance server-side.
@@ -77,7 +77,7 @@ public sealed class CvMatcherService : ICvMatcherService
{
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
if (job is null) continue;
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct));
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, null, NormalizeLanguage(null), ct));
}
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
@@ -107,7 +107,7 @@ public sealed class CvMatcherService : ICvMatcherService
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, request.ClientIpAddress, NormalizeLanguage(request.Language), ct);
}
/// <summary>
@@ -115,7 +115,7 @@ public sealed class CvMatcherService : ICvMatcherService
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
/// </summary>
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string? clientIpAddress, string language, CancellationToken ct)
{
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
if (cached is not null) return cached;
@@ -145,7 +145,7 @@ public sealed class CvMatcherService : ICvMatcherService
result.JobDocumentId = job.Id;
result.JobUrl = job.SourceUrl;
result.Cached = false;
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, email, clientIpAddress, ct);
return result;
}
@@ -128,7 +128,6 @@ public sealed class JobTokenService : IJobTokenService
JobLinkContains = entity.JobLinkContains,
InitialKeywords = keywords,
MaxResults = entity.MaxResults,
UseHeadlessBrowser = entity.UseHeadlessBrowser,
RequireKeywordInAnchor = entity.RequireKeywordInAnchor
};
}
@@ -36,6 +36,8 @@ public sealed class CvMatcherDbContext : DbContext
entity.Property(x => x.JobDocumentId).HasMaxLength(64).IsRequired();
entity.Property(x => x.ResultJson).IsRequired();
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
entity.Property(x => x.Email).HasMaxLength(256);
entity.Property(x => x.ClientIpAddress).HasMaxLength(45);
entity.HasIndex(x => new { x.CvDocumentId, x.JobDocumentId, x.Language }).IsUnique();
});
@@ -9,4 +9,6 @@ public sealed class CvMatchResultEntity : BaseEntity
public string Language { get; set; } = "en";
public string ResultJson { get; set; } = string.Empty;
public int Score { get; set; }
public string? Email { get; set; }
public string? ClientIpAddress { get; set; }
}
@@ -0,0 +1,138 @@
// <auto-generated />
using System;
using CvMatcher.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvMatcher.Data.Migrations
{
[DbContext(typeof(CvMatcherDbContext))]
[Migration("20260608155310_AddEmailAndIpToResults")]
partial class AddEmailAndIpToResults
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvMatcher")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvMatcher.Data.Entities.AiPromptEntity", b =>
{
b.Property<string>("Key")
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("Language")
.HasMaxLength(8)
.HasColumnType("nvarchar(8)");
b.Property<string>("Description")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(500)
.HasColumnType("nvarchar(500)")
.HasDefaultValue("");
b.Property<DateTime>("UpdatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("Value")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.HasKey("Key", "Language");
b.ToTable("AiPrompts", "cvMatcher");
});
modelBuilder.Entity("CvMatcher.Data.Entities.CvMatchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("ClientIpAddress")
.HasMaxLength(45)
.HasColumnType("nvarchar(45)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("JobDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Language")
.IsRequired()
.HasColumnType("nvarchar(450)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("CvDocumentId", "JobDocumentId", "Language")
.IsUnique();
b.ToTable("Results", "cvMatcher");
});
modelBuilder.Entity("CvMatcher.Data.Entities.CvMatcherChatCacheEntity", b =>
{
b.Property<string>("CacheKey")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("Model")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("nvarchar(120)");
b.Property<string>("ResponseText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<decimal>("Temperature")
.HasColumnType("decimal(4,2)");
b.HasKey("CacheKey");
b.ToTable("ChatCache", "cvMatcher");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,45 @@
using CvMatcher.Data;
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvMatcher.Data.Migrations
{
/// <inheritdoc />
public partial class AddEmailAndIpToResults : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "ClientIpAddress",
schema: MigrationConstants.SchemaName,
table: "Results",
type: "nvarchar(45)",
maxLength: 45,
nullable: true);
migrationBuilder.AddColumn<string>(
name: "Email",
schema: MigrationConstants.SchemaName,
table: "Results",
type: "nvarchar(256)",
maxLength: 256,
nullable: true);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "ClientIpAddress",
schema: MigrationConstants.SchemaName,
table: "Results");
migrationBuilder.DropColumn(
name: "Email",
schema: MigrationConstants.SchemaName,
table: "Results");
}
}
}
@@ -60,6 +60,10 @@ namespace CvMatcher.Data.Migrations
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("ClientIpAddress")
.HasMaxLength(45)
.HasColumnType("nvarchar(45)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
@@ -70,6 +74,10 @@ namespace CvMatcher.Data.Migrations
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("JobDocumentId")
.IsRequired()
.HasMaxLength(64)
@@ -6,7 +6,7 @@ public interface IMatcherRepository
{
Task InitializeAsync(CancellationToken ct);
Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, string language, CancellationToken ct);
Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, CancellationToken ct);
Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, string? email, string? clientIpAddress, CancellationToken ct);
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
}
@@ -40,7 +40,7 @@ public sealed class EfMatcherRepository : IMatcherRepository
return result;
}
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, CancellationToken ct)
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, string? email, string? clientIpAddress, CancellationToken ct)
{
var exists = await _db.CvMatchResults.AnyAsync(
x => x.CvDocumentId == cvDocumentId && x.JobDocumentId == jobDocumentId && x.Language == language,
@@ -58,6 +58,8 @@ public sealed class EfMatcherRepository : IMatcherRepository
Language = language,
ResultJson = JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
Score = response.Score,
Email = email,
ClientIpAddress = clientIpAddress,
CreatedAt = DateTime.UtcNow
});
@@ -79,7 +79,6 @@ public sealed class CvSearchDbContext : DbContext
entity.Property(x => x.InitialKeywordsJson).HasMaxLength(2000).HasDefaultValue("[]").IsRequired();
entity.Property(x => x.MaxResults).HasDefaultValue(20);
entity.Property(x => x.DisplayOrder).HasDefaultValue(0);
entity.Property(x => x.UseHeadlessBrowser).HasDefaultValue(false);
});
}
}
@@ -31,9 +31,6 @@ public sealed class JobProviderEntity
/// <summary>Controls display ordering in future admin UIs.</summary>
public int DisplayOrder { get; set; }
/// <summary>When true, the scraper renders the page with headless Chromium instead of a plain HTTP GET.</summary>
public bool UseHeadlessBrowser { get; set; }
/// <summary>
/// When false, the Stage 2 anchor-text keyword filter is skipped.
/// Set to false for providers whose search URL already filters by relevance server-side (ejobs.ro, bestjobs.eu).
@@ -0,0 +1,238 @@
// <auto-generated />
using System;
using CvSearch.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
#nullable disable
namespace CvSearch.Data.Migrations
{
[DbContext(typeof(CvSearchDbContext))]
[Migration("20260608154221_RemoveUseHeadlessBrowser")]
partial class RemoveUseHeadlessBrowser
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("cvSearch")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("CvSearch.Data.Entities.JobProviderEntity", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("int");
SqlServerPropertyBuilderExtensions.UseIdentityColumn(b.Property<int>("Id"));
b.Property<int>("DisplayOrder")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(0);
b.Property<bool>("Enabled")
.HasColumnType("bit");
b.Property<string>("InitialKeywordsJson")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)")
.HasDefaultValue("[]");
b.Property<string>("JobLinkContains")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<int>("MaxResults")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasDefaultValue(20);
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<bool>("RequireKeywordInAnchor")
.HasColumnType("bit");
b.Property<string>("SearchUrlTemplate")
.IsRequired()
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchResultEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("JobText")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("JobTitle")
.IsRequired()
.HasMaxLength(512)
.HasColumnType("nvarchar(512)");
b.Property<string>("JobUrl")
.IsRequired()
.HasMaxLength(2048)
.HasColumnType("nvarchar(2048)");
b.Property<string>("ProviderName")
.IsRequired()
.HasMaxLength(128)
.HasColumnType("nvarchar(128)");
b.Property<string>("ResultJson")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int>("Score")
.HasColumnType("int");
b.Property<string>("SessionId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("SessionId");
b.ToTable("JobSearchResults", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchSessionEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<string>("Keywords")
.IsRequired()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<string>("ProviderConfigJson")
.HasColumnType("nvarchar(max)");
b.Property<string>("Status")
.IsRequired()
.HasMaxLength(32)
.HasColumnType("nvarchar(32)");
b.Property<string>("TokenId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.HasKey("Id");
b.HasIndex("Status");
b.ToTable("JobSearchSessions", "cvSearch");
});
modelBuilder.Entity("CvSearch.Data.Entities.JobSearchTokenEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<string>("CvDocumentId")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("Email")
.IsRequired()
.HasMaxLength(256)
.HasColumnType("nvarchar(256)");
b.Property<DateTime>("ExpiresAt")
.HasColumnType("datetime2");
b.Property<string>("Keywords")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(1000)
.HasColumnType("nvarchar(1000)")
.HasDefaultValue("");
b.Property<string>("Language")
.IsRequired()
.ValueGeneratedOnAdd()
.HasMaxLength(8)
.HasColumnType("nvarchar(8)")
.HasDefaultValue("en");
b.Property<string>("Location")
.HasColumnType("nvarchar(max)");
b.Property<bool>("Used")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobSearchTokens", "cvSearch");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,32 @@
using CvSearch.Data;
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class RemoveUseHeadlessBrowser : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "UseHeadlessBrowser",
schema: MigrationConstants.SchemaName,
table: "JobProviders");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "UseHeadlessBrowser",
schema: MigrationConstants.SchemaName,
table: "JobProviders",
type: "bit",
nullable: false,
defaultValue: false);
}
}
}
@@ -69,11 +69,6 @@ namespace CvSearch.Data.Migrations
.HasMaxLength(1024)
.HasColumnType("nvarchar(1024)");
b.Property<bool>("UseHeadlessBrowser")
.ValueGeneratedOnAdd()
.HasColumnType("bit")
.HasDefaultValue(false);
b.HasKey("Id");
b.ToTable("JobProviders", "cvSearch");
@@ -72,7 +72,6 @@ public sealed class HtmlJobSearcher
var fetchResponse = await _pageFetcher.FetchAsync(new FetchPageRequest
{
Url = searchUrl,
WaitFor = provider.UseHeadlessBrowser ? "networkidle" : "domcontentloaded",
CallerService = "cv-search-job"
}, ct);