Changes
Build and Push Docker Images / build (push) Successful in 37s

This commit is contained in:
2026-05-04 21:02:35 +03:00
parent 34625ae242
commit fa1ef23c02
87 changed files with 3151 additions and 522 deletions
+4 -4
View File
@@ -1,9 +1,9 @@
namespace Api.Services.Contracts
{
public sealed record CaptchaVerdict(bool Success, string? Error, double? Score);
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts
{
public interface ICaptchaVerifier
{
Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct);
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct);
}
}
+1 -1
View File
@@ -1,4 +1,4 @@
using Api.Models;
using Api.Requests;
namespace Api.Services.Contracts
{
@@ -0,0 +1,4 @@
namespace Api.Services.Contracts.Models
{
public sealed record CaptchaVerdictModel(bool Success, string? Error, double? Score);
}
@@ -1,7 +0,0 @@
namespace Api.Services.Contracts.Rag;
public interface IAiRagClient
{
Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct);
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct);
}
@@ -1,9 +0,0 @@
using Api.Models.Rag;
namespace api.Services.Contracts.Rag;
public interface ICvRagService
{
Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct);
}
@@ -1,6 +0,0 @@
namespace Api.Services.Contracts.Rag;
public interface IJobTextExtractor
{
Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct);
}
@@ -1,6 +0,0 @@
namespace Api.Services.Contracts.Rag;
public interface IPdfTextExtractor
{
string ExtractText(Stream pdfStream);
}
@@ -1,6 +0,0 @@
namespace Api.Services.Contracts.Rag;
public interface ITextChunker
{
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
}
-165
View File
@@ -1,165 +0,0 @@
using api.Services.Contracts.Rag;
using Api.Models.Rag;
using Api.Services.Contracts.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
using System.Text.Json;
namespace Api.Services.Rag;
public sealed class CvRagService : ICvRagService
{
private readonly IPdfTextExtractor _pdfTextExtractor;
private readonly ITextChunker _textChunker;
private readonly IAiRagClient _openAi;
private readonly ICvVectorStore _store;
private readonly IJobTextExtractor _jobTextExtractor;
private readonly RagSettings _settings;
private readonly ILogger<CvRagService> _logger;
public CvRagService(
IPdfTextExtractor pdfTextExtractor,
ITextChunker textChunker,
IAiRagClient openAi,
ICvVectorStore store,
IJobTextExtractor jobTextExtractor,
IOptions<RagSettings> options,
ILogger<CvRagService> logger)
{
_pdfTextExtractor = pdfTextExtractor;
_textChunker = textChunker;
_openAi = openAi;
_store = store;
_jobTextExtractor = jobTextExtractor;
_settings = options.Value;
_logger = logger;
}
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
{
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
await using var stream = file.OpenReadStream();
var text = _pdfTextExtractor.ExtractText(stream);
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
var documentId = $"cv_{Guid.NewGuid():N}";
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
var stored = new List<StoredCvChunk>();
for (var i = 0; i < chunks.Count; i++)
{
ct.ThrowIfCancellationRequested();
stored.Add(new StoredCvChunk
{
Id = Guid.NewGuid().ToString("N"),
DocumentId = documentId,
Text = chunks[i],
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
ChunkIndex = i,
ExpiresAt = expiresAt
});
}
_store.Save(documentId, stored);
var summary = await SummarizeCvAsync(text, ct);
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
}
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
var cvChunks = _store.Get(request.CvDocumentId);
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
var userPrompt = $$"""
Compare the candidate CV context with the job description.
Return this JSON shape exactly:
{
"score": 0,
"summary": "short direct assessment",
"strengths": ["strength 1"],
"gaps": ["gap 1"],
"recommendations": ["action 1"],
"evidence": ["short CV evidence quote or paraphrase"]
}
Score must be 0-100.
CV CONTEXT:
{{cvContext}}
JOB DESCRIPTION:
{{jobText}}
""";
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
var response = ParseMatchResponse(content);
if (response.Evidence.Count == 0)
{
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
}
return response;
}
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
{
try
{
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
var content = await _openAi.CreateChatCompletionAsync(
"Return only valid JSON.",
$$"""
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
CV:
{{shortened}}
""",
ct);
using var doc = JsonDocument.Parse(content);
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
}
catch (Exception ex)
{
_logger.LogWarning(ex, "CV summary failed");
return "CV indexed.";
}
}
private static JobMatchResponse ParseMatchResponse(string content)
{
try
{
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
response.Score = Math.Clamp(response.Score, 0, 100);
response.Strengths ??= [];
response.Gaps ??= [];
response.Recommendations ??= [];
response.Evidence ??= [];
return response;
}
catch
{
return new JobMatchResponse
{
Score = 0,
Summary = "The AI response could not be parsed. Check logs and prompt output.",
Gaps = ["Invalid JSON returned by the model."],
Evidence = []
};
}
}
}
-79
View File
@@ -1,79 +0,0 @@
using Api.Models.Rag;
namespace Api.Services.Rag;
public interface ICvVectorStore
{
void Save(string documentId, IEnumerable<StoredCvChunk> chunks);
IReadOnlyList<StoredCvChunk> Get(string documentId);
IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK);
}
public sealed class InMemoryCvVectorStore : ICvVectorStore
{
private readonly object _lock = new();
private readonly Dictionary<string, List<StoredCvChunk>> _store = new(StringComparer.OrdinalIgnoreCase);
public void Save(string documentId, IEnumerable<StoredCvChunk> chunks)
{
lock (_lock)
{
CleanupExpiredUnsafe();
_store[documentId] = chunks.ToList();
}
}
public IReadOnlyList<StoredCvChunk> Get(string documentId)
{
lock (_lock)
{
CleanupExpiredUnsafe();
return _store.TryGetValue(documentId, out var chunks) ? chunks.ToList() : [];
}
}
public IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK)
{
var chunks = Get(documentId);
if (chunks.Count == 0) return [];
return chunks
.Select(chunk => new RetrievedCvChunk
{
Text = chunk.Text,
ChunkIndex = chunk.ChunkIndex,
Score = CosineSimilarity(queryEmbedding, chunk.Embedding)
})
.OrderByDescending(x => x.Score)
.Take(Math.Clamp(topK, 1, 12))
.ToList();
}
private void CleanupExpiredUnsafe()
{
var now = DateTimeOffset.UtcNow;
foreach (var key in _store.Where(x => x.Value.All(c => c.ExpiresAt <= now)).Select(x => x.Key).ToList())
{
_store.Remove(key);
}
}
private static double CosineSimilarity(float[] a, float[] b)
{
if (a.Length != b.Length || a.Length == 0) return 0;
double dot = 0;
double magA = 0;
double magB = 0;
for (var i = 0; i < a.Length; i++)
{
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
if (magA == 0 || magB == 0) return 0;
return dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
}
}
-52
View File
@@ -1,52 +0,0 @@
using System.Net;
using System.Text.RegularExpressions;
using Api.Services.Contracts.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services.Rag;
public sealed class JobTextExtractor : IJobTextExtractor
{
private readonly HttpClient _httpClient;
private readonly RagSettings _settings;
public JobTextExtractor(HttpClient httpClient, IOptions<RagSettings> options)
{
_httpClient = httpClient;
_settings = options.Value;
_httpClient.Timeout = TimeSpan.FromSeconds(20);
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
}
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
{
var pasted = Normalize(jobDescription ?? string.Empty);
if (!string.IsNullOrWhiteSpace(pasted)) return Limit(pasted);
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || (uri.Scheme != "http" && uri.Scheme != "https"))
{
throw new InvalidOperationException("Invalid job URL.");
}
var html = await _httpClient.GetStringAsync(uri, ct);
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<[^>]+>", " ");
var text = WebUtility.HtmlDecode(html);
return Limit(Normalize(text));
}
private string Limit(string value)
{
var max = Math.Max(4000, _settings.MaxJobTextChars);
return value.Length <= max ? value : value[..max];
}
private static string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
return string.Join(' ', parts).Trim();
}
}
-99
View File
@@ -1,99 +0,0 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Api.Services.Contracts.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services.Rag;
public sealed class OpenAiRagClient : IAiRagClient
{
private readonly HttpClient _httpClient;
private readonly OpenAiSettings _settings;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
public OpenAiRagClient(HttpClient httpClient, IOptions<OpenAiSettings> options)
{
_httpClient = httpClient;
_settings = options.Value;
if (!string.IsNullOrWhiteSpace(_settings.ApiKey))
{
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.ApiKey);
}
_httpClient.Timeout = TimeSpan.FromSeconds(Math.Max(15, _settings.TimeoutSeconds));
_httpClient.BaseAddress = new Uri("https://api.openai.com/v1/");
}
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
{
EnsureConfigured();
var payload = new { model = _settings.EmbeddingModel, input };
using var response = await _httpClient.PostAsync("embeddings", ToJson(payload), ct);
var json = await response.Content.ReadAsStringAsync(ct);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"OpenAI embeddings request failed: {(int)response.StatusCode} {json}");
}
using var document = JsonDocument.Parse(json);
var embedding = document.RootElement.GetProperty("data")[0].GetProperty("embedding");
var result = new float[embedding.GetArrayLength()];
var i = 0;
foreach (var value in embedding.EnumerateArray())
{
result[i++] = value.GetSingle();
}
return result;
}
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct)
{
EnsureConfigured();
var payload = new
{
model = _settings.ChatModel,
temperature = 0.2,
response_format = new { type = "json_object" },
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
}
};
using var response = await _httpClient.PostAsync("chat/completions", ToJson(payload), ct);
var json = await response.Content.ReadAsStringAsync(ct);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"OpenAI chat request failed: {(int)response.StatusCode} {json}");
}
using var document = JsonDocument.Parse(json);
return document.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString() ?? "{}";
}
private void EnsureConfigured()
{
if (string.IsNullOrWhiteSpace(_settings.ApiKey))
{
throw new InvalidOperationException("OpenAI API key is not configured. Set OpenAI__ApiKey.");
}
}
private static StringContent ToJson<T>(T payload) => new(
JsonSerializer.Serialize(payload, JsonOptions),
Encoding.UTF8,
"application/json"
);
}
-29
View File
@@ -1,29 +0,0 @@
using Api.Services.Contracts.Rag;
using System.Text;
using UglyToad.PdfPig;
namespace Api.Services.Rag;
public sealed class PdfTextExtractor : IPdfTextExtractor
{
public string ExtractText(Stream pdfStream)
{
using var document = PdfDocument.Open(pdfStream);
var builder = new StringBuilder();
foreach (var page in document.GetPages())
{
builder.AppendLine(page.Text);
builder.AppendLine();
}
return NormalizeWhitespace(builder.ToString());
}
private static string NormalizeWhitespace(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
return string.Join(' ', parts).Trim();
}
}
-24
View File
@@ -1,24 +0,0 @@
using Api.Services.Contracts.Rag;
namespace Api.Services.Rag;
public sealed class TextChunker : ITextChunker
{
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
{
if (string.IsNullOrWhiteSpace(text)) return [];
chunkSize = Math.Clamp(chunkSize, 300, 3000);
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
var chunks = new List<string>();
var start = 0;
while (start < text.Length)
{
var length = Math.Min(chunkSize, text.Length - start);
chunks.Add(text.Substring(start, length).Trim());
start += chunkSize - overlap;
}
return chunks.Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
}
}
+10 -9
View File
@@ -1,3 +1,4 @@
using Api.Services.Contracts.Models;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
@@ -17,14 +18,14 @@ namespace Api.Services
_log = log;
}
public async Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct)
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct)
{
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
if (string.IsNullOrWhiteSpace(_opt.SecretKey))
{
_log.LogWarning("Captcha verification attempted but SecretKey is not configured");
return new CaptchaVerdict(false, "Captcha not configured", null);
return new CaptchaVerdictModel(false, "Captcha not configured", null);
}
var form = new Dictionary<string, string>
@@ -45,21 +46,21 @@ namespace Api.Services
{
_log.LogWarning("Captcha HTTP request failed with status {StatusCode} for IP {Ip}",
(int)resp.StatusCode, userIp ?? "unknown");
return new CaptchaVerdict(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
return new CaptchaVerdictModel(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
}
var data = await resp.Content.ReadFromJsonAsync<RecaptchaResponse>(cancellationToken: ct);
if (data is null)
{
_log.LogError("Failed to parse captcha response for IP {Ip}", userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha parse error", null);
return new CaptchaVerdictModel(false, "Captcha parse error", null);
}
if (!data.success)
{
_log.LogWarning("Captcha verification failed for IP {Ip}. Score={Score}",
userIp ?? "unknown", data.score);
return new CaptchaVerdict(false, "Captcha failed", data.score);
return new CaptchaVerdictModel(false, "Captcha failed", data.score);
}
// v3 score check (score is typically null for v2)
@@ -67,7 +68,7 @@ namespace Api.Services
{
_log.LogWarning("Captcha score {Score} below minimum {MinScore} for IP {Ip}",
score, _opt.MinimumScore, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha score too low", score);
return new CaptchaVerdictModel(false, "Captcha score too low", score);
}
// Optional strictness (usually v3): action/hostname checks
@@ -76,7 +77,7 @@ namespace Api.Services
{
_log.LogWarning("Captcha action mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
_opt.ExpectedAction, data.action, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha action mismatch", data.score);
return new CaptchaVerdictModel(false, "Captcha action mismatch", data.score);
}
if (!string.IsNullOrWhiteSpace(_opt.ExpectedHostname) &&
@@ -84,12 +85,12 @@ namespace Api.Services
{
_log.LogWarning("Captcha hostname mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
_opt.ExpectedHostname, data.hostname, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha hostname mismatch", data.score);
return new CaptchaVerdictModel(false, "Captcha hostname mismatch", data.score);
}
_log.LogInformation("Captcha verified successfully for IP {Ip}. Score={Score}",
userIp ?? "unknown", data.score);
return new CaptchaVerdict(true, null, data.score);
return new CaptchaVerdictModel(true, null, data.score);
}
private sealed class RecaptchaResponse
+1 -1
View File
@@ -1,5 +1,5 @@
using Api.Services.Contracts;
using Api.Models;
using Api.Requests;
using Microsoft.Extensions.Options;
using MailKit.Net.Smtp;
using MailKit.Security;