Add internet job search feature (cv-search-job)
Build and Push Docker Images / build (push) Failing after 1m36s

- New cv-search-models shared library: EF entities + CvSearchDbContext for cvSearch schema (JobSearchTokens, JobSearchSessions, JobSearchResults tables)
- New cv-search-job worker service: polls DB for pending sessions, scrapes job boards via configurable HTML scraping, runs LLM scoring via cv-matcher-api, emails ranked results
- cv-matcher-api: JobTokenService creates one-time tokens; JobSearchController handles link clicks and creates sessions
- api: proxies job-search start endpoint, appends job search link to match result email
- CI workflow updated to build and push myai-cv-search-job:staging image
- CLAUDE.md documentation added for all affected services

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 17:56:23 +03:00
parent a0ae262afc
commit 6293fa89e3
38 changed files with 2074 additions and 18 deletions
+60
View File
@@ -0,0 +1,60 @@
# cv-matcher-api — Internal CV Match Engine
Internal port 8082. Only reachable from `api` and `cv-search-job` via `X-Internal-Api-Key`.
## Responsibilities
- Indexes CV PDFs into the RAG system via `rag-api`
- Matches a CV against a job posting URL (scrapes job HTML, scores pair with LLM)
- Manages job search tokens and sessions for the one-click job search feature
- Owns two EF DbContexts: `CvMatcherDbContext` (schema `cvMatcher`) and `CvSearchDbContext` (schema `cvSearch`)
- Runs EF migrations for both contexts on startup
## Key routes
| Method | Route | Description |
|--------|-------|-------------|
| POST | `/api/cv/upload` | Index CV PDF into RAG |
| POST | `/api/cv/match-job` | Score CV against a job URL (LLM call) |
| POST | `/api/cv/find-jobs` | Find matching jobs from the RAG index |
| POST | `/api/cv/job-search/token` | Create a job search token (called by api after a match) |
| POST | `/api/cv/job-search/token/{tokenId}/start` | Validate token, create Pending session (called by api on link click) |
| GET | `/api/health` | Health check |
## Core services
- `CvMatcherService` — orchestrates upload + match; calls `IRagApiClient` and `IMatcherAiClient`
- `JobTextExtractor` — fetches a job page URL and extracts plain text
- `JobTokenService` — creates tokens; validates + starts job search sessions; extracts CV keywords using simple heuristics (first 5 meaningful non-empty lines of CV text, split into words)
## AI providers
Configured under `Ai:Provider` (`OpenAI` or `Ollama`). Both providers implement `IMatcherAiClient`.
Default model: `gpt-4o-mini`. Timeout: 90 s.
## Database contexts
Both contexts use the same SQL Server connection string (from `Database:*` settings).
- `CvMatcherDbContext` — schema `cvMatcher`; migrations in `cv-matcher-api` assembly
- `CvSearchDbContext` — schema `cvSearch`; migrations in `cv-search-models` assembly (MigrationsAssembly = "cv-search-models")
## Keyword extraction (JobTokenService.ExtractKeywords)
No LLM call. Takes the first 5 non-empty lines of CV text that are:
- Longer than 5 characters
- Not purely numeric or contact-line patterns
Splits into words, strips punctuation, deduplicates, returns up to 10 comma-separated keywords.
These keywords are stored in `JobSearchSessionEntity.Keywords` and used by `cv-search-job` for scraping.
## Settings
| Section | Notes |
|---------|-------|
| `Database` | Shared SQL Server connection |
| `RagApi` | BaseUrl + InternalApiKey for rag-api |
| `Ai` | Provider, model, timeout |
| `Matcher` | TopK, DeepScoreTopN, MaxJobTextChars |
| `JobSearch` | TokenExpiryDays, providers list (stored in session JSON) |
| `InternalApi` | ApiKey used by UseInternalApiKeyProtection middleware |
@@ -0,0 +1,56 @@
using Api.Services.Contracts;
using CvMatcher.Models.Requests;
using CvMatcher.Models.Responses;
using Microsoft.AspNetCore.Mvc;
using Shared.Models.Responses;
namespace Api.Controllers;
[ApiController]
[Route("api/cv/job-search")]
public sealed class JobSearchController : ControllerBase
{
private readonly IJobTokenService _tokenService;
private readonly ILogger<JobSearchController> _logger;
public JobSearchController(IJobTokenService tokenService, ILogger<JobSearchController> logger)
{
_tokenService = tokenService;
_logger = logger;
}
[HttpPost("token")]
public async Task<ActionResult<CreateJobSearchTokenResponse>> CreateToken(
[FromBody] CreateJobSearchTokenRequest request,
CancellationToken ct)
{
try
{
if (string.IsNullOrWhiteSpace(request.CvDocumentId) || string.IsNullOrWhiteSpace(request.Email))
return BadRequest(new ErrorResponse { Error = "CvDocumentId and Email are required.", Code = "invalid_request" });
var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, ct);
return Ok(new CreateJobSearchTokenResponse { TokenId = tokenId });
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to create job search token.");
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed to create token.", Code = "token_create_failed" });
}
}
[HttpPost("token/{tokenId}/start")]
public async Task<ActionResult<StartJobSearchResponse>> Start(string tokenId, CancellationToken ct)
{
try
{
var status = await _tokenService.TriggerStartAsync(tokenId, ct);
return Ok(new StartJobSearchResponse { Status = status });
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to start job search for token {TokenId}.", tokenId);
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed to start search.", Code = "start_failed" });
}
}
}
+19
View File
@@ -8,6 +8,8 @@ using Api.Data.Repositories.Contracts;
using Api.Services;
using Api.Services.Contracts;
using CvMatcher.Models.Settings;
using CvSearch.Models.Data;
using CvSearch.Models.Settings;
using Microsoft.EntityFrameworkCore;
using Refit;
using Serilog;
@@ -34,6 +36,7 @@ try
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
builder.Services.Configure<CvMatcher.Models.Settings.AiSettings>(builder.Configuration.GetSection("Ai"));
builder.Services.Configure<MatcherSettings>(builder.Configuration.GetSection("Matcher"));
builder.Services.Configure<JobSearchSettings>(builder.Configuration.GetSection("JobSearch"));
builder.Services.AddRefitClient<IRefitRagApi>()
.ConfigureHttpClient((sp, c) =>
@@ -61,8 +64,19 @@ try
});
});
builder.Services.AddDbContext<CvSearchDbContext>(options =>
{
var connectionString = builder.Services.GetConfiguredDbConnectionString(builder.Configuration);
options.UseSqlServer(connectionString, sql =>
{
sql.MigrationsAssembly("cv-search-models");
sql.MigrationsHistoryTable(CvSearchDbContext.MigrationTableName, CvSearchDbContext.SchemaName);
});
});
builder.Services.AddScoped<IMatcherRepository, EfMatcherRepository>();
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
builder.Services.AddScoped<IJobTokenService, JobTokenService>();
builder.Services.AddControllers();
builder.Services.AddSwaggerWithXmlComments(Assembly.GetExecutingAssembly(), ServiceName);
@@ -90,6 +104,11 @@ try
var db = scope.ServiceProvider.GetRequiredService<CvMatcherDbContext>();
db.Database.Migrate();
}
using (var scope = app.Services.CreateScope())
{
var db = scope.ServiceProvider.GetRequiredService<CvSearchDbContext>();
db.Database.Migrate();
}
Log.Information("{Service} startup complete", ServiceName);
app.Run();
@@ -0,0 +1,7 @@
namespace Api.Services.Contracts;
public interface IJobTokenService
{
Task<string> CreateTokenAsync(string cvDocumentId, string email, CancellationToken ct);
Task<string> TriggerStartAsync(string tokenId, CancellationToken ct);
}
@@ -0,0 +1,107 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using Api.Clients.Api.Contracts;
using Api.Services.Contracts;
using CvMatcher.Models.Responses;
using CvSearch.Models.Data;
using CvSearch.Models.Data.Entities;
using CvSearch.Models.Settings;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
namespace Api.Services;
public sealed class JobTokenService : IJobTokenService
{
private readonly CvSearchDbContext _db;
private readonly IRagApiClient _rag;
private readonly JobSearchSettings _settings;
private readonly ILogger<JobTokenService> _logger;
public JobTokenService(
CvSearchDbContext db,
IRagApiClient rag,
IOptions<JobSearchSettings> settings,
ILogger<JobTokenService> logger)
{
_db = db;
_rag = rag;
_settings = settings.Value;
_logger = logger;
}
public async Task<string> CreateTokenAsync(string cvDocumentId, string email, CancellationToken ct)
{
var token = new JobSearchTokenEntity
{
Id = Guid.NewGuid().ToString("N"),
CvDocumentId = cvDocumentId,
Email = email,
ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays),
Used = false,
CreatedAt = DateTime.UtcNow
};
_db.JobSearchTokens.Add(token);
await _db.SaveChangesAsync(ct);
_logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}", token.Id, cvDocumentId);
return token.Id;
}
public async Task<string> TriggerStartAsync(string tokenId, CancellationToken ct)
{
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
if (token is null) return StartJobSearchStatus.NotFound;
if (token.Used) return StartJobSearchStatus.AlreadyUsed;
if (token.ExpiresAt <= DateTime.UtcNow) return StartJobSearchStatus.Expired;
token.Used = true;
await _db.SaveChangesAsync(ct);
var cv = await _rag.GetDocumentAsync(token.CvDocumentId, ct);
var keywords = cv is not null ? ExtractKeywords(cv.Text) : string.Empty;
var providerConfigJson = JsonSerializer.Serialize(
_settings.Providers.Where(p => p.Enabled).ToList(),
new JsonSerializerOptions(JsonSerializerDefaults.Web));
var session = new JobSearchSessionEntity
{
Id = Guid.NewGuid().ToString("N"),
TokenId = token.Id,
CvDocumentId = token.CvDocumentId,
Email = token.Email,
Status = JobSearchStatus.Pending,
Keywords = keywords,
ProviderConfigJson = providerConfigJson,
CreatedAt = DateTime.UtcNow
};
_db.JobSearchSessions.Add(session);
await _db.SaveChangesAsync(ct);
_logger.LogInformation("Job search session created. SessionId={SessionId}, Keywords={Keywords}", session.Id, keywords);
return StartJobSearchStatus.Started;
}
private static string ExtractKeywords(string cvText)
{
var lines = cvText
.Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries)
.Select(l => l.Trim())
.Where(l => l.Length > 5 && l.Length < 200)
.Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$"))
.Take(5)
.ToList();
var words = lines
.SelectMany(l => l.Split(' ', StringSplitOptions.RemoveEmptyEntries))
.Select(w => Regex.Replace(w, @"[^\w\-]", ""))
.Where(w => w.Length > 2)
.Distinct(StringComparer.OrdinalIgnoreCase)
.Take(10)
.ToList();
return string.Join(",", words);
}
}
+33
View File
@@ -106,5 +106,38 @@
"TopK": 10,
"DeepScoreTopN": 5,
"MaxJobTextChars": 60000
},
"JobSearch": {
"Enabled": true,
"JobSearchLinkBaseUrl": "https://myai.ro",
"TokenExpiryDays": 7,
"MinMatchScore": 15,
"MaxJobsToMatch": 15,
"Providers": [
{
"Name": "ejobs.ro",
"Enabled": false,
"SearchUrlTemplate": "https://www.ejobs.ro/locuri-de-munca/{keywords}/",
"JobLinkContains": "/user/locuri-de-munca/job/",
"InitialKeywords": [],
"MaxResults": 20
},
{
"Name": "bestjobs.eu",
"Enabled": false,
"SearchUrlTemplate": "https://www.bestjobs.eu/ro/locuri-de-munca?q={keywords}",
"JobLinkContains": "/ro/locuri-de-munca/",
"InitialKeywords": [],
"MaxResults": 20
},
{
"Name": "linkedin.com",
"Enabled": false,
"SearchUrlTemplate": "https://www.linkedin.com/jobs/search/?keywords={keywords}&location=Romania",
"JobLinkContains": "/jobs/view/",
"InitialKeywords": [],
"MaxResults": 20
}
]
}
}
@@ -79,6 +79,7 @@
<ItemGroup>
<ProjectReference Include="..\..\Helpers\common-helpers\common-helpers.csproj" />
<ProjectReference Include="..\cv-matcher-api-models\cv-matcher-api-models.csproj" />
<ProjectReference Include="..\cv-search-models\cv-search-models.csproj" />
<ProjectReference Include="..\shared-models\shared-models.csproj" />
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
</ItemGroup>