This commit is contained in:
2026-05-13 09:38:52 +03:00
parent 24962fba03
commit d4805b06e6
15 changed files with 514 additions and 7 deletions
+9
View File
@@ -12,6 +12,7 @@ env:
CV_MATCHER_API_IMAGE: apps/myai-cv-matcher-api
RAG_API_IMAGE: apps/myai-rag-api
WEB_IMAGE: apps/myai-web
JOB_IMAGE: apps/myai-job
IMAGE_TAG: staging
jobs:
@@ -47,6 +48,10 @@ jobs:
run: |
docker build -f web/Dockerfile -t "${REGISTRY_HOST}/${WEB_IMAGE}:${IMAGE_TAG}" .
- name: Build Job worker image
run: |
docker build -f cv-cleanup-job/Dockerfile -t "${REGISTRY_HOST}/${JOB_IMAGE}:${IMAGE_TAG}" .
- name: Push API image
run: |
docker push "${REGISTRY_HOST}/${API_IMAGE}:${IMAGE_TAG}"
@@ -62,3 +67,7 @@ jobs:
- name: Push Web image
run: |
docker push "${REGISTRY_HOST}/${WEB_IMAGE}:${IMAGE_TAG}"
- name: Push Job worker image
run: |
docker push "${REGISTRY_HOST}/${JOB_IMAGE}:${IMAGE_TAG}"
+22
View File
@@ -0,0 +1,22 @@
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
ARG BUILD_CONFIGURATION=Release
WORKDIR /src
COPY cv-cleanup-job/cv-cleanup-job.csproj cv-cleanup-job/
COPY api-models/api-models.csproj api-models/
COPY shared-models/shared-models.csproj shared-models/
RUN dotnet restore cv-cleanup-job/cv-cleanup-job.csproj
COPY cv-cleanup-job/ cv-cleanup-job/
COPY api-models/ api-models/
COPY shared-models/ shared-models/
RUN dotnet publish cv-cleanup-job/cv-cleanup-job.csproj -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false
FROM mcr.microsoft.com/dotnet/runtime:10.0 AS final
WORKDIR /app
COPY --from=build /app/publish .
ENTRYPOINT ["dotnet", "cv-cleanup-job.dll"]
@@ -0,0 +1,19 @@
namespace CvCleanupJob.Models;
/// <summary>
/// Parameters for the CvStorageCleanup scheduled task (bound from Jobs:Tasks:n:Parameters).
/// </summary>
public sealed class CvStorageCleanupParameters
{
/// <summary>Maximum total size of retained CV files (defaults to 40 MiB).</summary>
public double MaxTotalSizeMegabytes { get; set; } = 40;
/// <summary>File glob within the storage directory (default matches cached CV PDFs).</summary>
public string SearchPattern { get; set; } = "*.pdf";
/// <summary>
/// When true, only files whose base name is alphanumeric (same convention as API CV cache) are considered.
/// Set false if you store other PDFs under the same folder and want the glob to apply to all of them.
/// </summary>
public bool RestrictToCvStyleFileNamesOnly { get; set; } = true;
}
+21
View File
@@ -0,0 +1,21 @@
using CvCleanupJob.Tasks;
using JobScheduler.Scheduling;
using JobScheduler.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Models.Settings;
var builder = Host.CreateApplicationBuilder(args);
builder.Services.Configure<FileStorageSettings>(builder.Configuration.GetSection("FileStorage"));
builder.Services.AddSingleton<CvStorageCleanupJobTask>();
builder.Services.AddSingleton<IEnumerable<IJobTask>>(sp => new IJobTask[]
{
sp.GetRequiredService<CvStorageCleanupJobTask>(),
});
builder.Services.AddHostedService<JobSchedulerHostedService>();
var host = builder.Build();
await host.RunAsync();
@@ -0,0 +1,134 @@
using CvCleanupJob.Models;
using JobScheduler.Tasks;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Models.Settings;
namespace CvCleanupJob.Tasks;
/// <summary>
/// Deletes oldest cached CV files until the total size of remaining files is at or below the configured budget.
/// </summary>
public sealed class CvStorageCleanupJobTask : IJobTask
{
private readonly FileStorageSettings _fileStorage;
private readonly ILogger<CvStorageCleanupJobTask> _logger;
public CvStorageCleanupJobTask(IOptions<FileStorageSettings> fileStorage, ILogger<CvStorageCleanupJobTask> logger)
{
_fileStorage = fileStorage.Value;
_logger = logger;
}
public string TaskType => "CvStorageCleanup";
public Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
var parameters = parametersSection.Get<CvStorageCleanupParameters>()
?? new CvStorageCleanupParameters();
if (parameters.MaxTotalSizeMegabytes <= 0)
{
_logger.LogWarning(
"CvStorageCleanup skipped: MaxTotalSizeMegabytes must be positive (got {Value}).",
parameters.MaxTotalSizeMegabytes);
return Task.CompletedTask;
}
var root = ResolveStorageRoot(_fileStorage.Path);
if (!Directory.Exists(root))
{
_logger.LogDebug("CvStorageCleanup: directory does not exist yet: {Path}", root);
return Task.CompletedTask;
}
var maxBytes = (long)Math.Round(parameters.MaxTotalSizeMegabytes * 1024d * 1024d, MidpointRounding.AwayFromZero);
var candidatePaths = Directory
.EnumerateFiles(root, parameters.SearchPattern, SearchOption.TopDirectoryOnly)
.Where(p => !parameters.RestrictToCvStyleFileNamesOnly || IsCvStyleFileName(Path.GetFileName(p)))
.Select(p => new FileInfo(p))
.Where(f => f.Exists)
.ToList();
if (candidatePaths.Count == 0)
{
_logger.LogDebug("CvStorageCleanup: no files matched under {Path}.", root);
return Task.CompletedTask;
}
long totalBytes = candidatePaths.Sum(f => f.Length);
if (totalBytes <= maxBytes)
{
_logger.LogInformation(
"CvStorageCleanup: within budget ({TotalMb:F2} MiB / {MaxMb:F2} MiB). No files removed.",
totalBytes / (1024d * 1024d),
parameters.MaxTotalSizeMegabytes);
return Task.CompletedTask;
}
var orderedOldestFirst = candidatePaths
.OrderBy(f => f.LastWriteTimeUtc)
.ThenBy(f => f.FullName, StringComparer.Ordinal)
.ToList();
long remaining = totalBytes;
var deleted = 0;
long freedBytes = 0;
while (remaining > maxBytes && orderedOldestFirst.Count > 0)
{
cancellationToken.ThrowIfCancellationRequested();
var oldest = orderedOldestFirst[0];
orderedOldestFirst.RemoveAt(0);
try
{
var len = oldest.Length;
oldest.Delete();
remaining -= len;
freedBytes += len;
deleted++;
_logger.LogInformation(
"CvStorageCleanup: deleted oldest file {File} ({SizeKb} KiB, remaining aggregate ~{RemainMb:F2} MiB).",
oldest.Name,
len / 1024d,
remaining / (1024d * 1024d));
}
catch (Exception ex)
{
_logger.LogWarning(ex, "CvStorageCleanup: could not delete {File}", oldest.FullName);
}
}
_logger.LogInformation(
"CvStorageCleanup finished: removed {Deleted} file(s), freed ~{FreedMb:F2} MiB; size now ~{RemainMb:F2} MiB (budget {MaxMb:F2} MiB).",
deleted,
freedBytes / (1024d * 1024d),
remaining / (1024d * 1024d),
parameters.MaxTotalSizeMegabytes);
return Task.CompletedTask;
}
/// <summary>Matches API behavior for cached CV paths (alphanumeric stem).</summary>
internal static bool IsCvStyleFileName(string fileName)
{
var stem = Path.GetFileNameWithoutExtension(fileName);
return stem.Length > 0 && stem.All(char.IsLetterOrDigit);
}
internal static string ResolveStorageRoot(string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return configuredPath;
}
return Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), configuredPath));
}
}
+25
View File
@@ -0,0 +1,25 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"FileStorage": {
"Path": "Files"
},
"Jobs": {
"Tasks": [
{
"TaskType": "CvStorageCleanup",
"Enabled": true,
"Interval": "01:00:00",
"Parameters": {
"MaxTotalSizeMegabytes": 40,
"SearchPattern": "*.pdf",
"RestrictToCvStyleFileNamesOnly": true
}
}
]
}
}
+20
View File
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<RootNamespace>CvCleanupJob</RootNamespace>
<AssemblyName>cv-cleanup-job</AssemblyName>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\api-models\api-models.csproj" />
<ProjectReference Include="..\job-scheduler\job-scheduler.csproj" />
</ItemGroup>
</Project>
+13 -6
View File
@@ -39,10 +39,6 @@ Database__User=sa
Database__Password=
Database__TrustServerCertificate=true
# Internal API protection
InternalApi__ApiKey=
InternalApi__RequireApiKey=false
# RAG settings
Rag__MaxFileSizeMb=8
Rag__ChunkSize=900
@@ -57,9 +53,15 @@ Matcher__TopK=10
Matcher__DeepScoreTopN=5
Matcher__MaxJobTextChars=60000
# RagApi used by cv-matcher
RagApi__BaseUrl=http://rag-api:8082
# RagApi credentials
RagApi__BaseUrl=http://rag-api:8080
RagApi__InternalApiKey=
RagApi__RequireApiKey=true
# CvMatcher credentials
CvMatcherApi__BaseUrl=http://cv-matcher-api:8080
CvMatcherApi__InternalApiKey=
CvMatcherApi__RequireApiKey=true
# Captcha
Captcha__Provider=Recaptcha
@@ -67,6 +69,11 @@ Captcha__SecretKey=
Captcha__PublicKey=
Captcha__MinimumScore=0.5
# Job worker (scheduled tasks: CV file storage cleanup, etc.)
Jobs__CvStorageCleanupEnabled=true
Jobs__CvStorageCleanupInterval=01:00:00
Jobs__CvStorageMaxTotalSizeMegabytes=40
# File Storage
FileStorage__Path=/opt/myai/files
FileStorage__DefaultFileName=
@@ -206,6 +206,28 @@ services:
labels:
- "com.centurylinklabs.watchtower.enable=true"
job:
image: registry.easysoft.ro/apps/myai-job:production
container_name: myai-job
depends_on:
- api
environment:
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Production}
- APP_ENVIRONMENT_NAME=${APP_ENVIRONMENT_NAME:-myai.production}
- FileStorage__Path=Files
- Jobs__Tasks__0__Enabled=${Jobs__CvStorageCleanupEnabled:-true}
- Jobs__Tasks__0__Interval=${Jobs__CvStorageCleanupInterval:-01:00:00}
- Jobs__Tasks__0__Parameters__MaxTotalSizeMegabytes=${Jobs__CvStorageMaxTotalSizeMegabytes:-40}
- Logging__LogLevel__Default=${Logging__LogLevel__Default:-Information}
- Logging__LogLevel__Microsoft=${Logging__LogLevel__Microsoft:-Warning}
volumes:
- /opt/myai/files:/app/Files
networks:
- myai-network
restart: unless-stopped
labels:
- "com.centurylinklabs.watchtower.enable=true"
web:
image: registry.easysoft.ro/apps/myai-web:production
container_name: myai-web
+22
View File
@@ -206,6 +206,28 @@ services:
labels:
- "com.centurylinklabs.watchtower.enable=true"
job:
image: registry.easysoft.ro/apps/myai-job:staging
container_name: myai-job
depends_on:
- api
environment:
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Staging}
- APP_ENVIRONMENT_NAME=${APP_ENVIRONMENT_NAME:-myai.staging}
- FileStorage__Path=Files
- Jobs__Tasks__0__Enabled=${Jobs__CvStorageCleanupEnabled:-true}
- Jobs__Tasks__0__Interval=${Jobs__CvStorageCleanupInterval:-01:00:00}
- Jobs__Tasks__0__Parameters__MaxTotalSizeMegabytes=${Jobs__CvStorageMaxTotalSizeMegabytes:-40}
- Logging__LogLevel__Default=${Logging__LogLevel__Default:-Information}
- Logging__LogLevel__Microsoft=${Logging__LogLevel__Microsoft:-Warning}
volumes:
- /opt/myai/files:/app/Files
networks:
- myai-network
restart: unless-stopped
labels:
- "com.centurylinklabs.watchtower.enable=true"
web:
image: registry.easysoft.ro/apps/myai-web:staging
container_name: myai-web
+26
View File
@@ -226,6 +226,32 @@ services:
labels:
- "com.centurylinklabs.watchtower.enable=true"
job:
build:
context: ..
dockerfile: cv-cleanup-job/Dockerfile
container_name: myai-job
depends_on:
- api
env_file:
- .env
environment:
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Development}
- APP_ENVIRONMENT_NAME=${APP_ENVIRONMENT_NAME:-myai.local}
- FileStorage__Path=${FileStorage__Path:-Files}
- Jobs__Tasks__0__Enabled=${Jobs__CvStorageCleanupEnabled:-true}
- Jobs__Tasks__0__Interval=${Jobs__CvStorageCleanupInterval:-01:00:00}
- Jobs__Tasks__0__Parameters__MaxTotalSizeMegabytes=${Jobs__CvStorageMaxTotalSizeMegabytes:-40}
- Logging__LogLevel__Default=${Logging__LogLevel__Default:-Information}
- Logging__LogLevel__Microsoft=${Logging__LogLevel__Microsoft:-Warning}
volumes:
- ${FileStorage__Path:-../Files}:/app/Files
networks:
- myai-network
restart: unless-stopped
labels:
- "com.centurylinklabs.watchtower.enable=true"
web:
build:
context: ..
@@ -0,0 +1,133 @@
using JobScheduler.Tasks;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace JobScheduler.Scheduling;
/// <summary>
/// Loads <c>Jobs:Tasks</c> and runs each enabled task on its own interval.
/// </summary>
public sealed class JobSchedulerHostedService : BackgroundService
{
private readonly IConfiguration _configuration;
private readonly IReadOnlyDictionary<string, IJobTask> _tasksByType;
private readonly ILogger<JobSchedulerHostedService> _logger;
public JobSchedulerHostedService(
IConfiguration configuration,
IEnumerable<IJobTask> tasks,
ILogger<JobSchedulerHostedService> logger)
{
_configuration = configuration;
_tasksByType = tasks.ToDictionary(t => t.TaskType, t => t, StringComparer.OrdinalIgnoreCase);
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var section = _configuration.GetSection("Jobs:Tasks");
var children = section.GetChildren().ToList();
if (children.Count == 0)
{
_logger.LogWarning("No Jobs:Tasks configured; scheduler idle.");
await Task.Delay(Timeout.InfiniteTimeSpan, stoppingToken);
return;
}
var loops = new List<Task>();
foreach (var taskSection in children)
{
if (!taskSection.GetValue("Enabled", true))
{
_logger.LogInformation("Job task disabled: {Section}", taskSection.Path);
continue;
}
var taskType = taskSection["TaskType"];
if (string.IsNullOrWhiteSpace(taskType))
{
_logger.LogWarning("Skipping task without TaskType at {Section}", taskSection.Path);
continue;
}
if (!_tasksByType.TryGetValue(taskType, out var task))
{
_logger.LogError("No IJobTask registered for TaskType '{TaskType}'.", taskType);
continue;
}
var interval = ParseInterval(taskSection["Interval"]);
var parameters = taskSection.GetSection("Parameters");
loops.Add(RunTaskLoopAsync(taskType, task, parameters, interval, stoppingToken));
}
if (loops.Count == 0)
{
_logger.LogWarning("No enabled job tasks to run.");
await Task.Delay(Timeout.InfiniteTimeSpan, stoppingToken);
return;
}
await Task.WhenAll(loops);
}
private async Task RunTaskLoopAsync(
string taskType,
IJobTask task,
IConfiguration parameters,
TimeSpan interval,
CancellationToken stoppingToken)
{
_logger.LogInformation(
"Starting job loop for {TaskType} every {Interval}.",
taskType,
interval);
try
{
while (!stoppingToken.IsCancellationRequested)
{
try
{
await task.ExecuteAsync(parameters, stoppingToken);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Job task {TaskType} failed.", taskType);
}
if (interval <= TimeSpan.Zero)
{
_logger.LogWarning(
"Job task {TaskType} has non-positive Interval; sleeping 1 hour.",
taskType);
await Task.Delay(TimeSpan.FromHours(1), stoppingToken);
continue;
}
await Task.Delay(interval, stoppingToken);
}
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
_logger.LogInformation("Job loop for {TaskType} cancelled.", taskType);
}
}
private static TimeSpan ParseInterval(string? value)
{
if (string.IsNullOrWhiteSpace(value))
{
return TimeSpan.FromHours(1);
}
return TimeSpan.TryParse(value, out var ts) ? ts : TimeSpan.FromHours(1);
}
}
+14
View File
@@ -0,0 +1,14 @@
using Microsoft.Extensions.Configuration;
namespace JobScheduler.Tasks;
/// <summary>
/// A named unit of work invoked by <see cref="JobScheduler.Scheduling.JobSchedulerHostedService"/>.
/// </summary>
public interface IJobTask
{
/// <summary>Matches <c>Jobs:Tasks:*:TaskType</c> in configuration.</summary>
string TaskType { get; }
Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken);
}
+17
View File
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<RootNamespace>JobScheduler</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="10.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.0" />
</ItemGroup>
</Project>
+16
View File
@@ -30,6 +30,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "startup-helpers", "startup-
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "common-helpers", "common-helpers\common-helpers.csproj", "{4EDDEE9A-E9C7-4972-9C4A-3177611CCFE3}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Jobs", "Jobs", "{F1A2B3C4-D5E6-4789-ABCD-EF0123456789}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "cv-cleanup-job", "cv-cleanup-job\cv-cleanup-job.csproj", "{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "job-scheduler", "job-scheduler\job-scheduler.csproj", "{A19D2776-B935-BD35-4AB1-3FCE2092805A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -80,6 +86,14 @@ Global
{4EDDEE9A-E9C7-4972-9C4A-3177611CCFE3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4EDDEE9A-E9C7-4972-9C4A-3177611CCFE3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4EDDEE9A-E9C7-4972-9C4A-3177611CCFE3}.Release|Any CPU.Build.0 = Release|Any CPU
{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41}.Release|Any CPU.Build.0 = Release|Any CPU
{A19D2776-B935-BD35-4AB1-3FCE2092805A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A19D2776-B935-BD35-4AB1-3FCE2092805A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A19D2776-B935-BD35-4AB1-3FCE2092805A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A19D2776-B935-BD35-4AB1-3FCE2092805A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -94,6 +108,8 @@ Global
{185A8BB0-344A-4856-AEB4-213866EB2EE7} = {E08A1D43-24A3-4F93-B66A-4230FD8261BA}
{7446D193-8636-4E58-96E4-0C8CB8790679} = {43E9CD21-25B6-4CB4-B94E-5B953B2E1284}
{4EDDEE9A-E9C7-4972-9C4A-3177611CCFE3} = {43E9CD21-25B6-4CB4-B94E-5B953B2E1284}
{E7F21C94-6D88-4E9B-A12F-9C3E8D5B7A41} = {F1A2B3C4-D5E6-4789-ABCD-EF0123456789}
{A19D2776-B935-BD35-4AB1-3FCE2092805A} = {F1A2B3C4-D5E6-4789-ABCD-EF0123456789}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {6246A67B-299E-4E64-8DBE-1A66771E7C67}