Add internet job search feature (cv-search-job)
Build and Push Docker Images / build (push) Failing after 1m36s
Build and Push Docker Images / build (push) Failing after 1m36s
- New cv-search-models shared library: EF entities + CvSearchDbContext for cvSearch schema (JobSearchTokens, JobSearchSessions, JobSearchResults tables) - New cv-search-job worker service: polls DB for pending sessions, scrapes job boards via configurable HTML scraping, runs LLM scoring via cv-matcher-api, emails ranked results - cv-matcher-api: JobTokenService creates one-time tokens; JobSearchController handles link clicks and creates sessions - api: proxies job-search start endpoint, appends job search link to match result email - CI workflow updated to build and push myai-cv-search-job:staging image - CLAUDE.md documentation added for all affected services Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
# cv-matcher-api — Internal CV Match Engine
|
||||
|
||||
Internal port 8082. Only reachable from `api` and `cv-search-job` via `X-Internal-Api-Key`.
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- Indexes CV PDFs into the RAG system via `rag-api`
|
||||
- Matches a CV against a job posting URL (scrapes job HTML, scores pair with LLM)
|
||||
- Manages job search tokens and sessions for the one-click job search feature
|
||||
- Owns two EF DbContexts: `CvMatcherDbContext` (schema `cvMatcher`) and `CvSearchDbContext` (schema `cvSearch`)
|
||||
- Runs EF migrations for both contexts on startup
|
||||
|
||||
## Key routes
|
||||
|
||||
| Method | Route | Description |
|
||||
|--------|-------|-------------|
|
||||
| POST | `/api/cv/upload` | Index CV PDF into RAG |
|
||||
| POST | `/api/cv/match-job` | Score CV against a job URL (LLM call) |
|
||||
| POST | `/api/cv/find-jobs` | Find matching jobs from the RAG index |
|
||||
| POST | `/api/cv/job-search/token` | Create a job search token (called by api after a match) |
|
||||
| POST | `/api/cv/job-search/token/{tokenId}/start` | Validate token, create Pending session (called by api on link click) |
|
||||
| GET | `/api/health` | Health check |
|
||||
|
||||
## Core services
|
||||
|
||||
- `CvMatcherService` — orchestrates upload + match; calls `IRagApiClient` and `IMatcherAiClient`
|
||||
- `JobTextExtractor` — fetches a job page URL and extracts plain text
|
||||
- `JobTokenService` — creates tokens; validates + starts job search sessions; extracts CV keywords using simple heuristics (first 5 meaningful non-empty lines of CV text, split into words)
|
||||
|
||||
## AI providers
|
||||
|
||||
Configured under `Ai:Provider` (`OpenAI` or `Ollama`). Both providers implement `IMatcherAiClient`.
|
||||
Default model: `gpt-4o-mini`. Timeout: 90 s.
|
||||
|
||||
## Database contexts
|
||||
|
||||
Both contexts use the same SQL Server connection string (from `Database:*` settings).
|
||||
|
||||
- `CvMatcherDbContext` — schema `cvMatcher`; migrations in `cv-matcher-api` assembly
|
||||
- `CvSearchDbContext` — schema `cvSearch`; migrations in `cv-search-models` assembly (MigrationsAssembly = "cv-search-models")
|
||||
|
||||
## Keyword extraction (JobTokenService.ExtractKeywords)
|
||||
|
||||
No LLM call. Takes the first 5 non-empty lines of CV text that are:
|
||||
- Longer than 5 characters
|
||||
- Not purely numeric or contact-line patterns
|
||||
|
||||
Splits into words, strips punctuation, deduplicates, returns up to 10 comma-separated keywords.
|
||||
These keywords are stored in `JobSearchSessionEntity.Keywords` and used by `cv-search-job` for scraping.
|
||||
|
||||
## Settings
|
||||
|
||||
| Section | Notes |
|
||||
|---------|-------|
|
||||
| `Database` | Shared SQL Server connection |
|
||||
| `RagApi` | BaseUrl + InternalApiKey for rag-api |
|
||||
| `Ai` | Provider, model, timeout |
|
||||
| `Matcher` | TopK, DeepScoreTopN, MaxJobTextChars |
|
||||
| `JobSearch` | TokenExpiryDays, providers list (stored in session JSON) |
|
||||
| `InternalApi` | ApiKey used by UseInternalApiKeyProtection middleware |
|
||||
@@ -0,0 +1,56 @@
|
||||
using Api.Services.Contracts;
|
||||
using CvMatcher.Models.Requests;
|
||||
using CvMatcher.Models.Responses;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Shared.Models.Responses;
|
||||
|
||||
namespace Api.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("api/cv/job-search")]
|
||||
public sealed class JobSearchController : ControllerBase
|
||||
{
|
||||
private readonly IJobTokenService _tokenService;
|
||||
private readonly ILogger<JobSearchController> _logger;
|
||||
|
||||
public JobSearchController(IJobTokenService tokenService, ILogger<JobSearchController> logger)
|
||||
{
|
||||
_tokenService = tokenService;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
[HttpPost("token")]
|
||||
public async Task<ActionResult<CreateJobSearchTokenResponse>> CreateToken(
|
||||
[FromBody] CreateJobSearchTokenRequest request,
|
||||
CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(request.CvDocumentId) || string.IsNullOrWhiteSpace(request.Email))
|
||||
return BadRequest(new ErrorResponse { Error = "CvDocumentId and Email are required.", Code = "invalid_request" });
|
||||
|
||||
var tokenId = await _tokenService.CreateTokenAsync(request.CvDocumentId, request.Email, ct);
|
||||
return Ok(new CreateJobSearchTokenResponse { TokenId = tokenId });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to create job search token.");
|
||||
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed to create token.", Code = "token_create_failed" });
|
||||
}
|
||||
}
|
||||
|
||||
[HttpPost("token/{tokenId}/start")]
|
||||
public async Task<ActionResult<StartJobSearchResponse>> Start(string tokenId, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var status = await _tokenService.TriggerStartAsync(tokenId, ct);
|
||||
return Ok(new StartJobSearchResponse { Status = status });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to start job search for token {TokenId}.", tokenId);
|
||||
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed to start search.", Code = "start_failed" });
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,8 @@ using Api.Data.Repositories.Contracts;
|
||||
using Api.Services;
|
||||
using Api.Services.Contracts;
|
||||
using CvMatcher.Models.Settings;
|
||||
using CvSearch.Models.Data;
|
||||
using CvSearch.Models.Settings;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Refit;
|
||||
using Serilog;
|
||||
@@ -34,6 +36,7 @@ try
|
||||
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
||||
builder.Services.Configure<CvMatcher.Models.Settings.AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||
builder.Services.Configure<MatcherSettings>(builder.Configuration.GetSection("Matcher"));
|
||||
builder.Services.Configure<JobSearchSettings>(builder.Configuration.GetSection("JobSearch"));
|
||||
|
||||
builder.Services.AddRefitClient<IRefitRagApi>()
|
||||
.ConfigureHttpClient((sp, c) =>
|
||||
@@ -61,8 +64,19 @@ try
|
||||
});
|
||||
});
|
||||
|
||||
builder.Services.AddDbContext<CvSearchDbContext>(options =>
|
||||
{
|
||||
var connectionString = builder.Services.GetConfiguredDbConnectionString(builder.Configuration);
|
||||
options.UseSqlServer(connectionString, sql =>
|
||||
{
|
||||
sql.MigrationsAssembly("cv-search-models");
|
||||
sql.MigrationsHistoryTable(CvSearchDbContext.MigrationTableName, CvSearchDbContext.SchemaName);
|
||||
});
|
||||
});
|
||||
|
||||
builder.Services.AddScoped<IMatcherRepository, EfMatcherRepository>();
|
||||
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
|
||||
builder.Services.AddScoped<IJobTokenService, JobTokenService>();
|
||||
|
||||
builder.Services.AddControllers();
|
||||
builder.Services.AddSwaggerWithXmlComments(Assembly.GetExecutingAssembly(), ServiceName);
|
||||
@@ -90,6 +104,11 @@ try
|
||||
var db = scope.ServiceProvider.GetRequiredService<CvMatcherDbContext>();
|
||||
db.Database.Migrate();
|
||||
}
|
||||
using (var scope = app.Services.CreateScope())
|
||||
{
|
||||
var db = scope.ServiceProvider.GetRequiredService<CvSearchDbContext>();
|
||||
db.Database.Migrate();
|
||||
}
|
||||
|
||||
Log.Information("{Service} startup complete", ServiceName);
|
||||
app.Run();
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface IJobTokenService
|
||||
{
|
||||
Task<string> CreateTokenAsync(string cvDocumentId, string email, CancellationToken ct);
|
||||
Task<string> TriggerStartAsync(string tokenId, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using Api.Clients.Api.Contracts;
|
||||
using Api.Services.Contracts;
|
||||
using CvMatcher.Models.Responses;
|
||||
using CvSearch.Models.Data;
|
||||
using CvSearch.Models.Data.Entities;
|
||||
using CvSearch.Models.Settings;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class JobTokenService : IJobTokenService
|
||||
{
|
||||
private readonly CvSearchDbContext _db;
|
||||
private readonly IRagApiClient _rag;
|
||||
private readonly JobSearchSettings _settings;
|
||||
private readonly ILogger<JobTokenService> _logger;
|
||||
|
||||
public JobTokenService(
|
||||
CvSearchDbContext db,
|
||||
IRagApiClient rag,
|
||||
IOptions<JobSearchSettings> settings,
|
||||
ILogger<JobTokenService> logger)
|
||||
{
|
||||
_db = db;
|
||||
_rag = rag;
|
||||
_settings = settings.Value;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<string> CreateTokenAsync(string cvDocumentId, string email, CancellationToken ct)
|
||||
{
|
||||
var token = new JobSearchTokenEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
CvDocumentId = cvDocumentId,
|
||||
Email = email,
|
||||
ExpiresAt = DateTime.UtcNow.AddDays(_settings.TokenExpiryDays),
|
||||
Used = false,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
_db.JobSearchTokens.Add(token);
|
||||
await _db.SaveChangesAsync(ct);
|
||||
_logger.LogInformation("Job search token created. TokenId={TokenId}, CvDocumentId={CvDocumentId}", token.Id, cvDocumentId);
|
||||
return token.Id;
|
||||
}
|
||||
|
||||
public async Task<string> TriggerStartAsync(string tokenId, CancellationToken ct)
|
||||
{
|
||||
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
|
||||
if (token is null) return StartJobSearchStatus.NotFound;
|
||||
if (token.Used) return StartJobSearchStatus.AlreadyUsed;
|
||||
if (token.ExpiresAt <= DateTime.UtcNow) return StartJobSearchStatus.Expired;
|
||||
|
||||
token.Used = true;
|
||||
await _db.SaveChangesAsync(ct);
|
||||
|
||||
var cv = await _rag.GetDocumentAsync(token.CvDocumentId, ct);
|
||||
var keywords = cv is not null ? ExtractKeywords(cv.Text) : string.Empty;
|
||||
|
||||
var providerConfigJson = JsonSerializer.Serialize(
|
||||
_settings.Providers.Where(p => p.Enabled).ToList(),
|
||||
new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
||||
|
||||
var session = new JobSearchSessionEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
TokenId = token.Id,
|
||||
CvDocumentId = token.CvDocumentId,
|
||||
Email = token.Email,
|
||||
Status = JobSearchStatus.Pending,
|
||||
Keywords = keywords,
|
||||
ProviderConfigJson = providerConfigJson,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
_db.JobSearchSessions.Add(session);
|
||||
await _db.SaveChangesAsync(ct);
|
||||
_logger.LogInformation("Job search session created. SessionId={SessionId}, Keywords={Keywords}", session.Id, keywords);
|
||||
|
||||
return StartJobSearchStatus.Started;
|
||||
}
|
||||
|
||||
private static string ExtractKeywords(string cvText)
|
||||
{
|
||||
var lines = cvText
|
||||
.Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(l => l.Trim())
|
||||
.Where(l => l.Length > 5 && l.Length < 200)
|
||||
.Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$"))
|
||||
.Take(5)
|
||||
.ToList();
|
||||
|
||||
var words = lines
|
||||
.SelectMany(l => l.Split(' ', StringSplitOptions.RemoveEmptyEntries))
|
||||
.Select(w => Regex.Replace(w, @"[^\w\-]", ""))
|
||||
.Where(w => w.Length > 2)
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.Take(10)
|
||||
.ToList();
|
||||
|
||||
return string.Join(",", words);
|
||||
}
|
||||
}
|
||||
@@ -106,5 +106,38 @@
|
||||
"TopK": 10,
|
||||
"DeepScoreTopN": 5,
|
||||
"MaxJobTextChars": 60000
|
||||
},
|
||||
"JobSearch": {
|
||||
"Enabled": true,
|
||||
"JobSearchLinkBaseUrl": "https://myai.ro",
|
||||
"TokenExpiryDays": 7,
|
||||
"MinMatchScore": 15,
|
||||
"MaxJobsToMatch": 15,
|
||||
"Providers": [
|
||||
{
|
||||
"Name": "ejobs.ro",
|
||||
"Enabled": false,
|
||||
"SearchUrlTemplate": "https://www.ejobs.ro/locuri-de-munca/{keywords}/",
|
||||
"JobLinkContains": "/user/locuri-de-munca/job/",
|
||||
"InitialKeywords": [],
|
||||
"MaxResults": 20
|
||||
},
|
||||
{
|
||||
"Name": "bestjobs.eu",
|
||||
"Enabled": false,
|
||||
"SearchUrlTemplate": "https://www.bestjobs.eu/ro/locuri-de-munca?q={keywords}",
|
||||
"JobLinkContains": "/ro/locuri-de-munca/",
|
||||
"InitialKeywords": [],
|
||||
"MaxResults": 20
|
||||
},
|
||||
{
|
||||
"Name": "linkedin.com",
|
||||
"Enabled": false,
|
||||
"SearchUrlTemplate": "https://www.linkedin.com/jobs/search/?keywords={keywords}&location=Romania",
|
||||
"JobLinkContains": "/jobs/view/",
|
||||
"InitialKeywords": [],
|
||||
"MaxResults": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,6 +79,7 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Helpers\common-helpers\common-helpers.csproj" />
|
||||
<ProjectReference Include="..\cv-matcher-api-models\cv-matcher-api-models.csproj" />
|
||||
<ProjectReference Include="..\cv-search-models\cv-search-models.csproj" />
|
||||
<ProjectReference Include="..\shared-models\shared-models.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user