Add internet job search feature (cv-search-job)
Build and Push Docker Images / build (push) Failing after 1m36s
Build and Push Docker Images / build (push) Failing after 1m36s
- New cv-search-models shared library: EF entities + CvSearchDbContext for cvSearch schema (JobSearchTokens, JobSearchSessions, JobSearchResults tables) - New cv-search-job worker service: polls DB for pending sessions, scrapes job boards via configurable HTML scraping, runs LLM scoring via cv-matcher-api, emails ranked results - cv-matcher-api: JobTokenService creates one-time tokens; JobSearchController handles link clicks and creates sessions - api: proxies job-search start endpoint, appends job search link to match result email - CI workflow updated to build and push myai-cv-search-job:staging image - CLAUDE.md documentation added for all affected services Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
using System.Text.Json;
|
||||
using CvMatcher.Models.Requests;
|
||||
using CvSearch.Models.Data;
|
||||
using CvSearch.Models.Data.Entities;
|
||||
using CvSearch.Models.Settings;
|
||||
using CvSearchJob.Clients;
|
||||
using CvSearchJob.Services;
|
||||
using JobScheduler.Tasks;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace CvSearchJob.Tasks;
|
||||
|
||||
public sealed class CvSearchJobTask : IJobTask
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly JobSearchSettings _settings;
|
||||
private readonly HtmlJobSearcher _searcher;
|
||||
private readonly ICvMatcherInternalApi _matcherApi;
|
||||
private readonly CvSearchEmailSender _emailSender;
|
||||
private readonly ILogger<CvSearchJobTask> _logger;
|
||||
private readonly string _fileStoragePath;
|
||||
|
||||
public string TaskType => "CvSearch";
|
||||
|
||||
public CvSearchJobTask(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
IOptions<JobSearchSettings> settings,
|
||||
HtmlJobSearcher searcher,
|
||||
ICvMatcherInternalApi matcherApi,
|
||||
CvSearchEmailSender emailSender,
|
||||
IConfiguration config,
|
||||
ILogger<CvSearchJobTask> logger)
|
||||
{
|
||||
_scopeFactory = scopeFactory;
|
||||
_settings = settings.Value;
|
||||
_searcher = searcher;
|
||||
_matcherApi = matcherApi;
|
||||
_emailSender = emailSender;
|
||||
_logger = logger;
|
||||
_fileStoragePath = config["FileStorage:Path"] ?? "Files";
|
||||
if (!Path.IsPathRooted(_fileStoragePath))
|
||||
_fileStoragePath = Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), _fileStoragePath));
|
||||
}
|
||||
|
||||
public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!_settings.Enabled) return;
|
||||
|
||||
using var scope = _scopeFactory.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<CvSearchDbContext>();
|
||||
|
||||
// Recover orphaned Processing sessions (container crashed mid-run)
|
||||
var stuckCutoff = DateTime.UtcNow.AddMinutes(-10);
|
||||
var stuckSessions = await db.JobSearchSessions
|
||||
.Where(s => s.Status == JobSearchStatus.Processing && s.CreatedAt < stuckCutoff)
|
||||
.ToListAsync(cancellationToken);
|
||||
foreach (var stuck in stuckSessions)
|
||||
{
|
||||
stuck.Status = JobSearchStatus.Pending;
|
||||
_logger.LogWarning("Reset stuck session {SessionId} back to Pending", stuck.Id);
|
||||
}
|
||||
if (stuckSessions.Count > 0)
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
|
||||
var pending = await db.JobSearchSessions
|
||||
.Where(s => s.Status == JobSearchStatus.Pending)
|
||||
.OrderBy(s => s.CreatedAt)
|
||||
.Take(1)
|
||||
.FirstOrDefaultAsync(cancellationToken);
|
||||
|
||||
if (pending is null) return;
|
||||
|
||||
_logger.LogInformation("Processing job search session {SessionId}", pending.Id);
|
||||
pending.Status = JobSearchStatus.Processing;
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
var results = await RunSearchAsync(pending, db, cancellationToken);
|
||||
|
||||
pending.Status = JobSearchStatus.Done;
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
|
||||
var attachmentPath = BuildCvPath(pending.CvDocumentId);
|
||||
await _emailSender.SendResultsAsync(pending.Email, attachmentPath, results, cancellationToken);
|
||||
_logger.LogInformation("Session {SessionId} done. {Count} results sent.", pending.Id, results.Count);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Session {SessionId} failed.", pending.Id);
|
||||
pending.Status = JobSearchStatus.Failed;
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
|
||||
JobSearchSessionEntity session,
|
||||
CvSearchDbContext db,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var cvKeywords = session.Keywords
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(k => k.Trim())
|
||||
.Where(k => k.Length > 0)
|
||||
.ToList();
|
||||
|
||||
var providers = GetProviders(session.ProviderConfigJson);
|
||||
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var provider in providers)
|
||||
{
|
||||
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
|
||||
foreach (var url in urls) jobUrls.Add(url);
|
||||
}
|
||||
|
||||
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
|
||||
_logger.LogInformation("Session {SessionId}: {Count} candidate job URLs to match", session.Id, candidates.Count);
|
||||
|
||||
var results = new List<JobSearchResultEntity>();
|
||||
|
||||
foreach (var url in candidates)
|
||||
{
|
||||
try
|
||||
{
|
||||
var matchRequest = new MatchJobRequest
|
||||
{
|
||||
CvDocumentId = session.CvDocumentId,
|
||||
JobUrl = url,
|
||||
GdprConsent = true
|
||||
};
|
||||
|
||||
var matchResult = await _matcherApi.MatchJobAsync(matchRequest, ct);
|
||||
if (matchResult.Score < _settings.MinMatchScore)
|
||||
{
|
||||
_logger.LogDebug("Session {SessionId}: {Url} scored {Score}% (below threshold)", session.Id, url, matchResult.Score);
|
||||
continue;
|
||||
}
|
||||
|
||||
var entity = new JobSearchResultEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
SessionId = session.Id,
|
||||
ProviderName = GuessProvider(url, providers),
|
||||
JobUrl = url,
|
||||
JobTitle = matchResult.Summary.Split('.').FirstOrDefault()?.Trim() ?? "Job",
|
||||
JobText = string.Empty,
|
||||
Score = matchResult.Score,
|
||||
ResultJson = JsonSerializer.Serialize(matchResult, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
db.JobSearchResults.Add(entity);
|
||||
await db.SaveChangesAsync(ct);
|
||||
results.Add(entity);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Session {SessionId}: match failed for {Url}", session.Id, url);
|
||||
}
|
||||
}
|
||||
|
||||
results.Sort((a, b) => b.Score.CompareTo(a.Score));
|
||||
return results;
|
||||
}
|
||||
|
||||
private List<JobProviderConfig> GetProviders(string? providerConfigJson)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(providerConfigJson)) return _settings.Providers.Where(p => p.Enabled).ToList();
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<List<JobProviderConfig>>(providerConfigJson,
|
||||
new JsonSerializerOptions(JsonSerializerDefaults.Web))
|
||||
?? _settings.Providers.Where(p => p.Enabled).ToList();
|
||||
}
|
||||
catch
|
||||
{
|
||||
return _settings.Providers.Where(p => p.Enabled).ToList();
|
||||
}
|
||||
}
|
||||
|
||||
private static string GuessProvider(string url, List<JobProviderConfig> providers)
|
||||
{
|
||||
foreach (var p in providers)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(p.JobLinkContains) &&
|
||||
url.Contains(p.JobLinkContains, StringComparison.OrdinalIgnoreCase))
|
||||
return p.Name;
|
||||
}
|
||||
|
||||
return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown";
|
||||
}
|
||||
|
||||
private string BuildCvPath(string cvDocumentId)
|
||||
{
|
||||
var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit));
|
||||
if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv";
|
||||
return Path.Combine(_fileStoragePath, $"{safeId}.pdf");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user