Files
myAi/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs
T
claude e95ed36647 refactor: restructure solution into -models/-data/-api project taxonomy
Phases 1-10 of the planned refactoring:

Phase 1: rename shared-models -> common
  - namespace Shared.Models -> Common throughout
  - remove stale AspNetCore.Http.Features 5.0 reference

Phase 2: create shared-data with abstract BaseEntity
  - BaseEntity: required string Id { get; init; } + DateTime CreatedAt { get; init; }

Phase 3: rename myai-models -> myai-data
  - namespace MyAi.Models -> MyAi.Data
  - MigrationsAssembly("myai-data")

Phase 4: rename cv-search-models -> cv-search-data
  - namespace CvSearch.Models -> CvSearch.Data
  - move JobSearchSettings to cv-matcher-api-models
  - JobSearch*Entity now inherits BaseEntity

Phase 5: extract rag-data from rag-api
  - new project: Apis/rag-data with RagDbContext + entities + migrations
  - RagDocumentEntity inherits BaseEntity; cache entities use CacheKey PK
  - fix duplicate AddHttpClient<RagAiClient>/AddScoped registrations in rag-api
  - MigrationsAssembly("rag-data")

Phase 6: extract cv-matcher-data from cv-matcher-api
  - new project: Apis/cv-matcher-data with CvMatcherDbContext + entities + migrations
  - CvMatchResultEntity inherits BaseEntity; CvMatcherChatCacheEntity uses CacheKey PK
  - MigrationsAssembly("cv-matcher-data")

Phase 7: create empty cv-cleanup-job-models and cv-search-job-models

Phase 8: update all 5 Dockerfiles for renamed/new projects

Phase 9: reorganise .sln virtual folders (Apis/Jobs/Models/Data/Helpers)
  - update root CLAUDE.md with new project taxonomy and migration commands
  - update cv-matcher-api/CLAUDE.md and cv-search-job/CLAUDE.md

Phase 10: add Directory.Packages.props for centralised NuGet versions
  - remove Version= from all PackageReference elements in active .csproj files

No database changes. No runtime behaviour changes.
All MigrationId strings in __EFMigrationsHistory are unaffected.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 15:26:03 +03:00

204 lines
7.6 KiB
C#

using System.Text.Json;
using CvMatcher.Models.Requests;
using CvSearch.Data;
using CvSearch.Data.Entities;
using CvMatcher.Models.Settings;
using CvSearchJob.Clients;
using CvSearchJob.Services;
using JobScheduler.Tasks;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace CvSearchJob.Tasks;
public sealed class CvSearchJobTask : IJobTask
{
private readonly IServiceScopeFactory _scopeFactory;
private readonly JobSearchSettings _settings;
private readonly HtmlJobSearcher _searcher;
private readonly ICvMatcherInternalApi _matcherApi;
private readonly CvSearchEmailSender _emailSender;
private readonly ILogger<CvSearchJobTask> _logger;
private readonly string _fileStoragePath;
public string TaskType => "CvSearch";
public CvSearchJobTask(
IServiceScopeFactory scopeFactory,
IOptions<JobSearchSettings> settings,
HtmlJobSearcher searcher,
ICvMatcherInternalApi matcherApi,
CvSearchEmailSender emailSender,
IConfiguration config,
ILogger<CvSearchJobTask> logger)
{
_scopeFactory = scopeFactory;
_settings = settings.Value;
_searcher = searcher;
_matcherApi = matcherApi;
_emailSender = emailSender;
_logger = logger;
_fileStoragePath = config["FileStorage:Path"] ?? "Files";
if (!Path.IsPathRooted(_fileStoragePath))
_fileStoragePath = Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), _fileStoragePath));
}
public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
{
if (!_settings.Enabled) return;
using var scope = _scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<CvSearchDbContext>();
// Recover orphaned Processing sessions (container crashed mid-run)
var stuckCutoff = DateTime.UtcNow.AddMinutes(-10);
var stuckSessions = await db.JobSearchSessions
.Where(s => s.Status == JobSearchStatus.Processing && s.CreatedAt < stuckCutoff)
.ToListAsync(cancellationToken);
foreach (var stuck in stuckSessions)
{
stuck.Status = JobSearchStatus.Pending;
_logger.LogWarning("Reset stuck session {SessionId} back to Pending", stuck.Id);
}
if (stuckSessions.Count > 0)
await db.SaveChangesAsync(cancellationToken);
var pending = await db.JobSearchSessions
.Where(s => s.Status == JobSearchStatus.Pending)
.OrderBy(s => s.CreatedAt)
.Take(1)
.FirstOrDefaultAsync(cancellationToken);
if (pending is null) return;
_logger.LogInformation("Processing job search session {SessionId}", pending.Id);
pending.Status = JobSearchStatus.Processing;
await db.SaveChangesAsync(cancellationToken);
try
{
var results = await RunSearchAsync(pending, db, cancellationToken);
pending.Status = JobSearchStatus.Done;
await db.SaveChangesAsync(cancellationToken);
var attachmentPath = BuildCvPath(pending.CvDocumentId);
await _emailSender.SendResultsAsync(pending.Email, attachmentPath, results, pending.Language, cancellationToken);
_logger.LogInformation("Session {SessionId} done. {Count} results sent.", pending.Id, results.Count);
}
catch (Exception ex)
{
_logger.LogError(ex, "Session {SessionId} failed.", pending.Id);
pending.Status = JobSearchStatus.Failed;
await db.SaveChangesAsync(cancellationToken);
}
}
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
JobSearchSessionEntity session,
CvSearchDbContext db,
CancellationToken ct)
{
var cvKeywords = session.Keywords
.Split(',', StringSplitOptions.RemoveEmptyEntries)
.Select(k => k.Trim())
.Where(k => k.Length > 0)
.ToList();
var providers = GetProviders(session.ProviderConfigJson);
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var provider in providers)
{
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, ct);
foreach (var url in urls) jobUrls.Add(url);
}
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
_logger.LogInformation("Session {SessionId}: {Count} candidate job URLs to match", session.Id, candidates.Count);
var results = new List<JobSearchResultEntity>();
foreach (var url in candidates)
{
try
{
var matchRequest = new MatchJobRequest
{
CvDocumentId = session.CvDocumentId,
JobUrl = url,
GdprConsent = true
};
var matchResult = await _matcherApi.MatchJobAsync(matchRequest, ct);
if (matchResult.Score < _settings.MinMatchScore)
{
_logger.LogDebug("Session {SessionId}: {Url} scored {Score}% (below threshold)", session.Id, url, matchResult.Score);
continue;
}
var entity = new JobSearchResultEntity
{
Id = Guid.NewGuid().ToString("N"),
SessionId = session.Id,
ProviderName = GuessProvider(url, providers),
JobUrl = url,
JobTitle = matchResult.Summary.Split('.').FirstOrDefault()?.Trim() ?? "Job",
JobText = string.Empty,
Score = matchResult.Score,
ResultJson = JsonSerializer.Serialize(matchResult, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
CreatedAt = DateTime.UtcNow
};
db.JobSearchResults.Add(entity);
await db.SaveChangesAsync(ct);
results.Add(entity);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Session {SessionId}: match failed for {Url}", session.Id, url);
}
}
results.Sort((a, b) => b.Score.CompareTo(a.Score));
return results;
}
private List<JobProviderConfig> GetProviders(string? providerConfigJson)
{
if (string.IsNullOrWhiteSpace(providerConfigJson)) return _settings.Providers.Where(p => p.Enabled).ToList();
try
{
return JsonSerializer.Deserialize<List<JobProviderConfig>>(providerConfigJson,
new JsonSerializerOptions(JsonSerializerDefaults.Web))
?? _settings.Providers.Where(p => p.Enabled).ToList();
}
catch
{
return _settings.Providers.Where(p => p.Enabled).ToList();
}
}
private static string GuessProvider(string url, List<JobProviderConfig> providers)
{
foreach (var p in providers)
{
if (!string.IsNullOrWhiteSpace(p.JobLinkContains) &&
url.Contains(p.JobLinkContains, StringComparison.OrdinalIgnoreCase))
return p.Name;
}
return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown";
}
private string BuildCvPath(string cvDocumentId)
{
var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit));
if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv";
return Path.Combine(_fileStoragePath, $"{safeId}.pdf");
}
}