From 2e9069cbdb8e8550f3f204b88b7e5ed20528f25c Mon Sep 17 00:00:00 2001 From: claude Date: Mon, 8 Jun 2026 16:57:52 +0300 Subject: [PATCH 1/2] =?UTF-8?q?Fix=20file://=20URL=20bug=20in=20HtmlJobSea?= =?UTF-8?q?rcher=20=E2=80=94=20skip=20non-HTTP(S)=20URLs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After resolving relative hrefs against the base search URL, some ejobs.ro links were producing file:/// URIs (e.g. file:///user/locuri-de-munca/...). These were sent to cv-matcher-api and rejected with HTTP 400, causing 0 matches. Added a scheme guard after URI resolution to skip any URL that is not http:// or https://, preventing malformed URLs from reaching the matcher. Co-Authored-By: Claude Sonnet 4.6 --- Jobs/cv-search-job/Services/HtmlJobSearcher.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs index 6c4cb78..a4e40f6 100644 --- a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs +++ b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs @@ -117,6 +117,10 @@ public sealed class HtmlJobSearcher continue; } + // Skip non-HTTP(S) URLs (e.g. file:// or javascript: that can appear in scraped HTML) + if (absoluteUri.Scheme != Uri.UriSchemeHttp && absoluteUri.Scheme != Uri.UriSchemeHttps) + continue; + var url = absoluteUri.GetLeftPart(UriPartial.Path); if (seen.Add(url)) results.Add(url); -- 2.52.0 From 02d2b1e510a5dc4e1d9f2a093c0ac1789df8d26d Mon Sep 17 00:00:00 2001 From: claude Date: Mon, 8 Jun 2026 18:56:36 +0300 Subject: [PATCH 2/2] Add Email and ClientIpAddress audit fields to cvMatcher.Results Threads the caller's email and client IP through the match pipeline so every Results row records who triggered the match and from where. Closes #45 Co-Authored-By: Claude Sonnet 4.6 --- Apis/api-models/Requests/JobMatchRequest.cs | 2 + Apis/api/Controllers/CvMatcherController.cs | 1 + .../Requests/MatchJobRequest.cs | 2 + .../Services/CvMatcherService.cs | 8 +- Apis/cv-matcher-data/CvMatcherDbContext.cs | 2 + .../Entities/CvMatchResultEntity.cs | 2 + ...8155310_AddEmailAndIpToResults.Designer.cs | 138 ++++++++++++++++++ .../20260608155310_AddEmailAndIpToResults.cs | 45 ++++++ .../CvMatcherDbContextModelSnapshot.cs | 8 + .../Contracts/IMatcherRepository.cs | 2 +- .../Repositories/EfMatcherRepository.cs | 4 +- 11 files changed, 208 insertions(+), 6 deletions(-) create mode 100644 Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.Designer.cs create mode 100644 Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.cs diff --git a/Apis/api-models/Requests/JobMatchRequest.cs b/Apis/api-models/Requests/JobMatchRequest.cs index 35f9013..dbd5ae6 100644 --- a/Apis/api-models/Requests/JobMatchRequest.cs +++ b/Apis/api-models/Requests/JobMatchRequest.cs @@ -10,4 +10,6 @@ public sealed class JobMatchRequest public string? CaptchaToken { get; set; } /// ISO 639-1 language code for the match result (e.g. "en", "ro"). Defaults to "en". public string? Language { get; set; } + /// Client IP address — set by the api layer from the HTTP context before forwarding. Not supplied by the browser. + public string? ClientIpAddress { get; set; } } diff --git a/Apis/api/Controllers/CvMatcherController.cs b/Apis/api/Controllers/CvMatcherController.cs index 5669c25..d883c1f 100644 --- a/Apis/api/Controllers/CvMatcherController.cs +++ b/Apis/api/Controllers/CvMatcherController.cs @@ -163,6 +163,7 @@ public sealed class CvMatcherController : ControllerBase return BadRequest(new ErrorResponse { Error = "Captcha verification failed.", Code = "captcha_verification_failed" }); } + request.ClientIpAddress = userIp; _logger.LogInformation("Proxying job match request to cv-matcher-api. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}", request.CvDocumentId, !string.IsNullOrWhiteSpace(request.JobUrl), diff --git a/Apis/cv-matcher-api-models/Requests/MatchJobRequest.cs b/Apis/cv-matcher-api-models/Requests/MatchJobRequest.cs index d3366da..2a6abe2 100644 --- a/Apis/cv-matcher-api-models/Requests/MatchJobRequest.cs +++ b/Apis/cv-matcher-api-models/Requests/MatchJobRequest.cs @@ -9,5 +9,7 @@ public string? Email { get; set; } /// ISO 639-1 language code for the match result (e.g. "en", "ro"). Defaults to "en". public string? Language { get; set; } + /// Client IP address forwarded by the api layer. Null when called from a background job. + public string? ClientIpAddress { get; set; } } } diff --git a/Apis/cv-matcher-api/Services/CvMatcherService.cs b/Apis/cv-matcher-api/Services/CvMatcherService.cs index a35f8fe..65b7327 100644 --- a/Apis/cv-matcher-api/Services/CvMatcherService.cs +++ b/Apis/cv-matcher-api/Services/CvMatcherService.cs @@ -77,7 +77,7 @@ public sealed class CvMatcherService : ICvMatcherService { var job = await _rag.GetDocumentAsync(result.DocumentId, ct); if (job is null) continue; - jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, NormalizeLanguage(null), ct)); + jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, null, NormalizeLanguage(null), ct)); } return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs }; @@ -107,7 +107,7 @@ public sealed class CvMatcherService : ICvMatcherService .FirstOrDefault(x => x.DocumentId == job.DocumentId)? .MatchedChunks.Select(x => x.Text).ToArray() ?? []; - return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct); + return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, request.ClientIpAddress, NormalizeLanguage(request.Language), ct); } /// @@ -115,7 +115,7 @@ public sealed class CvMatcherService : ICvMatcherService /// Returns a cached result immediately when the same (CV, job, language) triple has been scored before. /// When no evidence chunks are available from the vector search, falls back to the raw job text. /// - private async Task ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList evidenceChunks, string? email, string language, CancellationToken ct) + private async Task ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList evidenceChunks, string? email, string? clientIpAddress, string language, CancellationToken ct) { var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct); if (cached is not null) return cached; @@ -145,7 +145,7 @@ public sealed class CvMatcherService : ICvMatcherService result.JobDocumentId = job.Id; result.JobUrl = job.SourceUrl; result.Cached = false; - await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct); + await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, email, clientIpAddress, ct); return result; } diff --git a/Apis/cv-matcher-data/CvMatcherDbContext.cs b/Apis/cv-matcher-data/CvMatcherDbContext.cs index d2cde82..941d0b7 100644 --- a/Apis/cv-matcher-data/CvMatcherDbContext.cs +++ b/Apis/cv-matcher-data/CvMatcherDbContext.cs @@ -36,6 +36,8 @@ public sealed class CvMatcherDbContext : DbContext entity.Property(x => x.JobDocumentId).HasMaxLength(64).IsRequired(); entity.Property(x => x.ResultJson).IsRequired(); entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()"); + entity.Property(x => x.Email).HasMaxLength(256); + entity.Property(x => x.ClientIpAddress).HasMaxLength(45); entity.HasIndex(x => new { x.CvDocumentId, x.JobDocumentId, x.Language }).IsUnique(); }); diff --git a/Apis/cv-matcher-data/Entities/CvMatchResultEntity.cs b/Apis/cv-matcher-data/Entities/CvMatchResultEntity.cs index ac0a9c5..4c32054 100644 --- a/Apis/cv-matcher-data/Entities/CvMatchResultEntity.cs +++ b/Apis/cv-matcher-data/Entities/CvMatchResultEntity.cs @@ -9,4 +9,6 @@ public sealed class CvMatchResultEntity : BaseEntity public string Language { get; set; } = "en"; public string ResultJson { get; set; } = string.Empty; public int Score { get; set; } + public string? Email { get; set; } + public string? ClientIpAddress { get; set; } } diff --git a/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.Designer.cs b/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.Designer.cs new file mode 100644 index 0000000..f11d46b --- /dev/null +++ b/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.Designer.cs @@ -0,0 +1,138 @@ +// +using System; +using CvMatcher.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Metadata; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace CvMatcher.Data.Migrations +{ + [DbContext(typeof(CvMatcherDbContext))] + [Migration("20260608155310_AddEmailAndIpToResults")] + partial class AddEmailAndIpToResults + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasDefaultSchema("cvMatcher") + .HasAnnotation("ProductVersion", "10.0.7") + .HasAnnotation("Relational:MaxIdentifierLength", 128); + + SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder); + + modelBuilder.Entity("CvMatcher.Data.Entities.AiPromptEntity", b => + { + b.Property("Key") + .HasMaxLength(128) + .HasColumnType("nvarchar(128)"); + + b.Property("Language") + .HasMaxLength(8) + .HasColumnType("nvarchar(8)"); + + b.Property("Description") + .IsRequired() + .ValueGeneratedOnAdd() + .HasMaxLength(500) + .HasColumnType("nvarchar(500)") + .HasDefaultValue(""); + + b.Property("UpdatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("Value") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.HasKey("Key", "Language"); + + b.ToTable("AiPrompts", "cvMatcher"); + }); + + modelBuilder.Entity("CvMatcher.Data.Entities.CvMatchResultEntity", b => + { + b.Property("Id") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("ClientIpAddress") + .HasMaxLength(45) + .HasColumnType("nvarchar(45)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("CvDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Email") + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + + b.Property("JobDocumentId") + .IsRequired() + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("Language") + .IsRequired() + .HasColumnType("nvarchar(450)"); + + b.Property("ResultJson") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Score") + .HasColumnType("int"); + + b.HasKey("Id"); + + b.HasIndex("CvDocumentId", "JobDocumentId", "Language") + .IsUnique(); + + b.ToTable("Results", "cvMatcher"); + }); + + modelBuilder.Entity("CvMatcher.Data.Entities.CvMatcherChatCacheEntity", b => + { + b.Property("CacheKey") + .HasMaxLength(64) + .HasColumnType("nvarchar(64)"); + + b.Property("CreatedAt") + .ValueGeneratedOnAdd() + .HasColumnType("datetime2") + .HasDefaultValueSql("SYSUTCDATETIME()"); + + b.Property("Model") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("nvarchar(120)"); + + b.Property("ResponseText") + .IsRequired() + .HasColumnType("nvarchar(max)"); + + b.Property("Temperature") + .HasColumnType("decimal(4,2)"); + + b.HasKey("CacheKey"); + + b.ToTable("ChatCache", "cvMatcher"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.cs b/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.cs new file mode 100644 index 0000000..897bbda --- /dev/null +++ b/Apis/cv-matcher-data/Migrations/20260608155310_AddEmailAndIpToResults.cs @@ -0,0 +1,45 @@ +using CvMatcher.Data; +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace CvMatcher.Data.Migrations +{ + /// + public partial class AddEmailAndIpToResults : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "ClientIpAddress", + schema: MigrationConstants.SchemaName, + table: "Results", + type: "nvarchar(45)", + maxLength: 45, + nullable: true); + + migrationBuilder.AddColumn( + name: "Email", + schema: MigrationConstants.SchemaName, + table: "Results", + type: "nvarchar(256)", + maxLength: 256, + nullable: true); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "ClientIpAddress", + schema: MigrationConstants.SchemaName, + table: "Results"); + + migrationBuilder.DropColumn( + name: "Email", + schema: MigrationConstants.SchemaName, + table: "Results"); + } + } +} diff --git a/Apis/cv-matcher-data/Migrations/CvMatcherDbContextModelSnapshot.cs b/Apis/cv-matcher-data/Migrations/CvMatcherDbContextModelSnapshot.cs index 64a6262..6a8d37a 100644 --- a/Apis/cv-matcher-data/Migrations/CvMatcherDbContextModelSnapshot.cs +++ b/Apis/cv-matcher-data/Migrations/CvMatcherDbContextModelSnapshot.cs @@ -60,6 +60,10 @@ namespace CvMatcher.Data.Migrations .HasMaxLength(64) .HasColumnType("nvarchar(64)"); + b.Property("ClientIpAddress") + .HasMaxLength(45) + .HasColumnType("nvarchar(45)"); + b.Property("CreatedAt") .ValueGeneratedOnAdd() .HasColumnType("datetime2") @@ -70,6 +74,10 @@ namespace CvMatcher.Data.Migrations .HasMaxLength(64) .HasColumnType("nvarchar(64)"); + b.Property("Email") + .HasMaxLength(256) + .HasColumnType("nvarchar(256)"); + b.Property("JobDocumentId") .IsRequired() .HasMaxLength(64) diff --git a/Apis/cv-matcher-data/Repositories/Contracts/IMatcherRepository.cs b/Apis/cv-matcher-data/Repositories/Contracts/IMatcherRepository.cs index 6241862..069f93b 100644 --- a/Apis/cv-matcher-data/Repositories/Contracts/IMatcherRepository.cs +++ b/Apis/cv-matcher-data/Repositories/Contracts/IMatcherRepository.cs @@ -6,7 +6,7 @@ public interface IMatcherRepository { Task InitializeAsync(CancellationToken ct); Task GetMatchAsync(string cvDocumentId, string jobDocumentId, string language, CancellationToken ct); - Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, CancellationToken ct); + Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, string? email, string? clientIpAddress, CancellationToken ct); Task GetChatCompletionAsync(string cacheKey, CancellationToken ct); Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct); } diff --git a/Apis/cv-matcher-data/Repositories/EfMatcherRepository.cs b/Apis/cv-matcher-data/Repositories/EfMatcherRepository.cs index 651dcea..33b8efa 100644 --- a/Apis/cv-matcher-data/Repositories/EfMatcherRepository.cs +++ b/Apis/cv-matcher-data/Repositories/EfMatcherRepository.cs @@ -40,7 +40,7 @@ public sealed class EfMatcherRepository : IMatcherRepository return result; } - public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, CancellationToken ct) + public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, string language, JobMatchResponse response, string? email, string? clientIpAddress, CancellationToken ct) { var exists = await _db.CvMatchResults.AnyAsync( x => x.CvDocumentId == cvDocumentId && x.JobDocumentId == jobDocumentId && x.Language == language, @@ -58,6 +58,8 @@ public sealed class EfMatcherRepository : IMatcherRepository Language = language, ResultJson = JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)), Score = response.Score, + Email = email, + ClientIpAddress = clientIpAddress, CreatedAt = DateTime.UtcNow }); -- 2.52.0