Improve comments and Swagger docs across services #27
@@ -115,7 +115,7 @@ namespace Api.Controllers
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.LogError(ex, "Subscription failed. ip={Ip} eMail={eMail}", userIp, req.Email);
|
||||
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed.", Code = "subscription_failed" });
|
||||
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Could not process subscription.", Code = "subscription_failed" });
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -44,10 +44,6 @@ namespace Api.Controllers
|
||||
/// </summary>
|
||||
/// <param name="fileName">The name of the file to download (optional - uses default from settings if not provided)</param>
|
||||
/// <returns>File stream with appropriate headers for resumable downloads</returns>
|
||||
/// <response code="200">Full file content</response>
|
||||
/// <response code="206">Partial file content (range request)</response>
|
||||
/// <response code="404">File not found</response>
|
||||
/// <response code="416">Requested range not satisfiable</response>
|
||||
[HttpGet("{fileName?}")]
|
||||
[SwaggerOperation(Summary = "Download file", Description = "Downloads a file with support for full and ranged (resumable) transfers.")]
|
||||
[SwaggerResponse(StatusCodes.Status200OK, "Full file content returned")]
|
||||
|
||||
@@ -1,9 +1,21 @@
|
||||
using Api.Services.Contracts.Models;
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services.Contracts
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies a reCAPTCHA token against the Google verification API.
|
||||
/// </summary>
|
||||
public interface ICaptchaVerifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Sends the token to the Google reCAPTCHA verification endpoint and
|
||||
/// returns a verdict indicating success, score, and any failure reason.
|
||||
/// </summary>
|
||||
/// <param name="token">The reCAPTCHA token provided by the client.</param>
|
||||
/// <param name="userIp">Optional remote IP address passed to Google for additional risk analysis.</param>
|
||||
/// <param name="expectedAction">Optional action name to validate against the token's embedded action (v3 only).</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>A <see cref="CaptchaVerdictModel"/> with the verification outcome.</returns>
|
||||
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,65 @@
|
||||
using CvMatcher.Models.Responses;
|
||||
using CvMatcher.Models.Responses;
|
||||
using Models.Requests;
|
||||
|
||||
namespace Api.Services.Contracts
|
||||
{
|
||||
/// <summary>
|
||||
/// Abstraction for sending transactional emails from the public API.
|
||||
/// </summary>
|
||||
public interface IEmailSender
|
||||
{
|
||||
/// <summary>
|
||||
/// Sends a contact-form message to the configured operator address.
|
||||
/// </summary>
|
||||
/// <param name="req">Contact request containing name, email, subject, and message.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task SendContactAsync(ContactRequest req, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Notifies the configured operator address that a new email subscription was received.
|
||||
/// </summary>
|
||||
/// <param name="req">Subscription request containing the subscriber's email address.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Sends a background notification when a file download is initiated.
|
||||
/// Does nothing when no notification address is configured.
|
||||
/// </summary>
|
||||
/// <param name="fileName">Name of the downloaded file.</param>
|
||||
/// <param name="userIp">Remote IP address of the downloader, or <c>null</c> if unavailable.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Sends a CV match results email to the user and the operator copy address.
|
||||
/// </summary>
|
||||
/// <param name="explicitTo">Primary recipient email address, or <c>null</c> to send only the operator copy.</param>
|
||||
/// <param name="subject">Email subject line.</param>
|
||||
/// <param name="body">Pre-built HTML body fragment.</param>
|
||||
/// <param name="attachmentPath">Full path to a CV PDF to attach, or <c>null</c> for no attachment.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the localised subject line for a CV match email.
|
||||
/// </summary>
|
||||
/// <param name="score">Match score percentage (0–100).</param>
|
||||
/// <param name="jobLabel">Human-readable job title or label.</param>
|
||||
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
|
||||
/// <returns>Rendered subject string.</returns>
|
||||
string BuildMatchEmailSubject(int score, string? jobLabel, string language);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the full HTML body for a CV match email, including an optional job-search footer link.
|
||||
/// </summary>
|
||||
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
|
||||
/// <param name="result">Structured match response from the CV matcher engine.</param>
|
||||
/// <param name="jobLabel">Human-readable job title or label.</param>
|
||||
/// <param name="language">Two-letter language code.</param>
|
||||
/// <param name="jobSearchLink">Optional one-click job-search URL to append as a footer CTA.</param>
|
||||
/// <param name="expiryDays">Number of days until the job-search link expires (shown in the footer copy).</param>
|
||||
/// <returns>Rendered HTML body string.</returns>
|
||||
string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,9 @@ using Models.Settings;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Implements <see cref="IEmailSender"/> by delegating all email dispatch to the internal email-api service via Refit.
|
||||
/// </summary>
|
||||
public sealed class EmailApiEmailSender : IEmailSender
|
||||
{
|
||||
private readonly IEmailApiClient _emailApi;
|
||||
@@ -34,6 +37,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
_log = log;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task SendContactAsync(ContactRequest req, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_contact.ToEmail))
|
||||
@@ -76,6 +80,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
_log.LogInformation("Contact email sent successfully from {SenderEmail}", req.Email);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_subscribe.ToEmail))
|
||||
@@ -108,6 +113,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
_log.LogInformation("Subscription email sent successfully for {Email}", req.Email);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_fileStorage.ToEmail))
|
||||
@@ -146,6 +152,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
_log.LogInformation("File download notification sent successfully for {FileName}", fileName);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct)
|
||||
{
|
||||
var operatorCopy = _emailTemplates.GetOperatorCopy("email.match.subject", "en");
|
||||
@@ -184,6 +191,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7)
|
||||
{
|
||||
var strengths = result.Strengths?.Count > 0
|
||||
@@ -221,6 +229,7 @@ public sealed class EmailApiEmailSender : IEmailSender
|
||||
return body;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string BuildMatchEmailSubject(int score, string? jobLabel, string language) =>
|
||||
_emailTemplates.Render("email.match.subject", language,
|
||||
("score", score.ToString()),
|
||||
|
||||
@@ -5,6 +5,9 @@ using Models.Settings;
|
||||
|
||||
namespace Api.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies reCAPTCHA v2/v3 tokens by calling the Google site-verify API.
|
||||
/// </summary>
|
||||
public sealed class RecaptchaVerifier : ICaptchaVerifier
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
@@ -18,6 +21,7 @@ namespace Api.Services
|
||||
_log = log;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct)
|
||||
{
|
||||
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
|
||||
|
||||
@@ -3,9 +3,34 @@ using CvMatcher.Models.Responses;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates CV indexing, job matching, and job discovery operations.
|
||||
/// </summary>
|
||||
public interface ICvMatcherService
|
||||
{
|
||||
/// <summary>
|
||||
/// Indexes a CV PDF into the RAG system and returns document metadata.
|
||||
/// Returns cached metadata without re-indexing when the same text hash already exists.
|
||||
/// </summary>
|
||||
/// <param name="file">Uploaded CV PDF file.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Upload response with document ID, hash, and indexing statistics.</returns>
|
||||
Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Scores a CV against a specific job posting URL or pasted description using the LLM.
|
||||
/// Caches the result so repeat requests for the same (CV, job, language) triple are served instantly.
|
||||
/// </summary>
|
||||
/// <param name="request">Match request containing CV document ID, job URL or description, and language preference.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Structured match response with score, summary, strengths, gaps, and recommendations.</returns>
|
||||
Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Searches the RAG index for job documents most similar to the given CV and scores the top candidates.
|
||||
/// </summary>
|
||||
/// <param name="request">Request containing the CV document ID and optional result count limit.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Response with the CV document ID and a list of ranked match results.</returns>
|
||||
Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,17 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts plain text from a job posting, either from a pasted description or by fetching and parsing a URL.
|
||||
/// </summary>
|
||||
public interface IJobTextExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns normalised plain text for the job posting.
|
||||
/// Prefers <paramref name="jobDescription"/> when provided; otherwise fetches and strips HTML from <paramref name="jobUrl"/>.
|
||||
/// </summary>
|
||||
/// <param name="jobUrl">URL of the job posting page, used when no description is pasted.</param>
|
||||
/// <param name="jobDescription">Pasted job description text; takes priority over URL fetching.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Normalised plain text, truncated to the configured maximum character limit.</returns>
|
||||
Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,29 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Manages one-time job search tokens and the sessions they trigger.
|
||||
/// </summary>
|
||||
public interface IJobTokenService
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a new single-use job search token linked to the given CV document and user.
|
||||
/// The token expires after the number of days configured in <c>JobSearch:TokenExpiryDays</c>.
|
||||
/// </summary>
|
||||
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
|
||||
/// <param name="email">Email address of the user who will receive the results.</param>
|
||||
/// <param name="language">Preferred language for result emails (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>The generated token ID, to be embedded in the one-click job search link.</returns>
|
||||
Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Validates the token and, if valid, marks it as used and creates a <c>Pending</c> job search session.
|
||||
/// </summary>
|
||||
/// <param name="tokenId">The token ID from the one-click link.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>
|
||||
/// One of the <c>StartJobSearchStatus</c> string constants:
|
||||
/// <c>Started</c>, <c>AlreadyUsed</c>, <c>Expired</c>, or <c>NotFound</c>.
|
||||
/// </returns>
|
||||
Task<string> TriggerStartAsync(string tokenId, CancellationToken ct);
|
||||
}
|
||||
|
||||
@@ -10,6 +10,9 @@ using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching.
|
||||
/// </summary>
|
||||
public sealed class CvMatcherService : ICvMatcherService
|
||||
{
|
||||
private readonly IRagApiClient _rag;
|
||||
@@ -35,6 +38,7 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
_settings = options.Value;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
|
||||
{
|
||||
var response = await _rag.IndexCvPdfAsync(file, ct);
|
||||
@@ -51,6 +55,7 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
|
||||
{
|
||||
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
||||
@@ -78,6 +83,7 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
|
||||
{
|
||||
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
||||
@@ -104,6 +110,11 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Scores a (CV, job) pair with the LLM.
|
||||
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
|
||||
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
|
||||
/// </summary>
|
||||
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
|
||||
{
|
||||
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
|
||||
@@ -135,16 +146,13 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
result.JobUrl = job.SourceUrl;
|
||||
result.Cached = false;
|
||||
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
|
||||
|
||||
//await _email.SendMatchAsync(
|
||||
// email,
|
||||
// $"MyAi.ro CV Match: {result.Score}% - {job.Title}",
|
||||
// BuildEmailBody(cv, job, result),
|
||||
// ct);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialises the LLM's JSON output into a <see cref="JobMatchResponse"/>.
|
||||
/// Returns a safe fallback response instead of throwing when the JSON cannot be parsed.
|
||||
/// </summary>
|
||||
private static JobMatchResponse ParseResult(string json)
|
||||
{
|
||||
try
|
||||
@@ -165,21 +173,29 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a descriptive search query from the CV text for use in vector similarity search.
|
||||
/// </summary>
|
||||
private static string BuildCvSearchProfile(string cvText)
|
||||
{
|
||||
var text = Limit(cvText, 10000);
|
||||
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts a short job title from the first sentence-like fragment of the job text.
|
||||
/// </summary>
|
||||
private static string ExtractJobTitle(string jobText)
|
||||
{
|
||||
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
|
||||
return first ?? "Job description";
|
||||
}
|
||||
|
||||
/// <summary>Returns the base language code, lower-cased, defaulting to <c>"en"</c>.</summary>
|
||||
private static string NormalizeLanguage(string? language) =>
|
||||
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
|
||||
|
||||
/// <summary>Maps a language code to its full English name for use in the LLM system prompt.</summary>
|
||||
private static string LanguageName(string language) => language switch
|
||||
{
|
||||
"ro" => "Romanian",
|
||||
@@ -187,26 +203,6 @@ public sealed class CvMatcherService : ICvMatcherService
|
||||
_ => "English"
|
||||
};
|
||||
|
||||
/// <summary>Truncates <paramref name="value"/> to at most <paramref name="max"/> characters.</summary>
|
||||
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
|
||||
|
||||
//private static string BuildEmailBody(RagDocumentDetails cv, RagDocumentDetails job, JobMatchResponse result) => $"""
|
||||
// CV Matcher result
|
||||
|
||||
// CV: {cv.Title}
|
||||
// Job: {job.Title}
|
||||
// Job URL: {job.SourceUrl ?? "N/A"}
|
||||
// Score: {result.Score}%
|
||||
|
||||
// Summary:
|
||||
// {result.Summary}
|
||||
|
||||
// Strengths:
|
||||
// - {string.Join("\n- ", result.Strengths)}
|
||||
|
||||
// Gaps:
|
||||
// - {string.Join("\n- ", result.Gaps)}
|
||||
|
||||
// Recommendations:
|
||||
// - {string.Join("\n- ", result.Recommendations)}
|
||||
// """;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,10 @@ using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts normalised plain text from a job posting, either from a pasted description or by
|
||||
/// fetching and stripping the HTML of the job page URL.
|
||||
/// </summary>
|
||||
public sealed class JobTextExtractor : IJobTextExtractor
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
@@ -19,6 +23,7 @@ public sealed class JobTextExtractor : IJobTextExtractor
|
||||
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
|
||||
{
|
||||
var pasted = Normalize(jobDescription ?? string.Empty);
|
||||
@@ -37,12 +42,14 @@ public sealed class JobTextExtractor : IJobTextExtractor
|
||||
return Limit(Normalize(WebUtility.HtmlDecode(html)));
|
||||
}
|
||||
|
||||
/// <summary>Truncates text to the configured maximum character count.</summary>
|
||||
private string Limit(string value)
|
||||
{
|
||||
var max = Math.Max(4000, _settings.MaxJobTextChars);
|
||||
return value.Length <= max ? value : value[..max];
|
||||
}
|
||||
|
||||
/// <summary>Collapses all whitespace runs to single spaces and trims the result.</summary>
|
||||
private static string Normalize(string value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
|
||||
@@ -11,6 +11,9 @@ using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Creates and validates one-time job search tokens, and creates the corresponding search sessions.
|
||||
/// </summary>
|
||||
public sealed class JobTokenService : IJobTokenService
|
||||
{
|
||||
private readonly CvSearchDbContext _db;
|
||||
@@ -30,6 +33,7 @@ public sealed class JobTokenService : IJobTokenService
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct)
|
||||
{
|
||||
var token = new JobSearchTokenEntity
|
||||
@@ -49,6 +53,7 @@ public sealed class JobTokenService : IJobTokenService
|
||||
return token.Id;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<string> TriggerStartAsync(string tokenId, CancellationToken ct)
|
||||
{
|
||||
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
|
||||
@@ -86,12 +91,17 @@ public sealed class JobTokenService : IJobTokenService
|
||||
return StartJobSearchStatus.Started;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM).
|
||||
/// Takes the first 5 usable lines, splits them into words, strips punctuation, and deduplicates.
|
||||
/// </summary>
|
||||
private static string ExtractKeywords(string cvText)
|
||||
{
|
||||
var lines = cvText
|
||||
.Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(l => l.Trim())
|
||||
.Where(l => l.Length > 5 && l.Length < 200)
|
||||
// Skip lines that are purely digits, spaces, and phone/contact punctuation (phone numbers, emails, etc.)
|
||||
.Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$"))
|
||||
.Take(5)
|
||||
.ToList();
|
||||
|
||||
@@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace EmailApi.Data.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Singleton implementation of <see cref="IEmailTemplateService"/> that caches all email templates
|
||||
/// from the database and refreshes them every 10 minutes.
|
||||
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped repository from a singleton lifetime.
|
||||
/// </summary>
|
||||
public sealed class EmailTemplateService : IEmailTemplateService
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
@@ -20,6 +25,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Get(string key, string language = "en")
|
||||
{
|
||||
EnsureCacheLoaded();
|
||||
@@ -35,6 +41,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
|
||||
return key;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
|
||||
{
|
||||
var template = Get(key, language);
|
||||
@@ -43,6 +50,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
|
||||
return template;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string? GetOperatorCopy(string key, string language)
|
||||
{
|
||||
EnsureCacheLoaded();
|
||||
@@ -61,6 +69,10 @@ public sealed class EmailTemplateService : IEmailTemplateService
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reloads all templates from the database when the cache TTL has expired.
|
||||
/// Swaps both caches atomically; logs an error and continues serving the stale cache on failure.
|
||||
/// </summary>
|
||||
private void EnsureCacheLoaded()
|
||||
{
|
||||
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
|
||||
@@ -91,5 +103,6 @@ public sealed class EmailTemplateService : IEmailTemplateService
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Builds the dictionary key used for both caches.</summary>
|
||||
private static string CacheKey(string key, string language) => $"{key}::{language}";
|
||||
}
|
||||
|
||||
@@ -1,8 +1,38 @@
|
||||
namespace EmailApi.Data.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Provides access to localised email templates stored in the <c>emailApi.EmailTemplates</c> table.
|
||||
/// Implementations are expected to cache templates and refresh periodically.
|
||||
/// </summary>
|
||||
public interface IEmailTemplateService
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns the template value for the given key and language.
|
||||
/// Falls back to <c>"en"</c> when the requested language has no entry.
|
||||
/// Returns the raw key string when no matching template is found.
|
||||
/// </summary>
|
||||
/// <param name="key">Template key (e.g. <c>"email.match.subject"</c>).</param>
|
||||
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
|
||||
/// <returns>Template value string.</returns>
|
||||
string Get(string key, string language = "en");
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
|
||||
/// </summary>
|
||||
/// <param name="key">Template key.</param>
|
||||
/// <param name="language">Two-letter language code.</param>
|
||||
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
|
||||
/// <returns>Rendered template string with all placeholders replaced.</returns>
|
||||
string Render(string key, string language, params (string Key, string Value)[] placeholders);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the operator copy address for the given template key.
|
||||
/// Uses the specific row's <c>OperatorCopy</c> value when non-empty; otherwise falls back
|
||||
/// to the first non-empty <c>OperatorCopy</c> across all cached rows, so future template rows
|
||||
/// with an empty value automatically inherit the globally configured address.
|
||||
/// </summary>
|
||||
/// <param name="key">Template key used to look up the specific row (typically the subject key).</param>
|
||||
/// <param name="language">Two-letter language code.</param>
|
||||
/// <returns>Operator copy email address, or <c>null</c> when none is configured.</returns>
|
||||
string? GetOperatorCopy(string key, string language);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,11 @@ using Swashbuckle.AspNetCore.Annotations;
|
||||
|
||||
namespace EmailApi.Controllers;
|
||||
|
||||
/// <summary>
|
||||
/// Internal email relay. Accepts an HTML body fragment from trusted callers
|
||||
/// (api, cv-search-job), wraps it in the branded HTML shell, and dispatches
|
||||
/// via SMTP. Protected by X-Internal-Api-Key.
|
||||
/// </summary>
|
||||
[ApiController]
|
||||
[Route("api/email")]
|
||||
public sealed class EmailController : ControllerBase
|
||||
@@ -13,9 +18,27 @@ public sealed class EmailController : ControllerBase
|
||||
|
||||
public EmailController(SmtpEmailDispatcher dispatcher) => _dispatcher = dispatcher;
|
||||
|
||||
/// <summary>
|
||||
/// Sends an HTML email via SMTP. The supplied body fragment is wrapped in
|
||||
/// the branded HTML shell before dispatch. Attachments are resolved from
|
||||
/// the shared file storage volume using the relative path in
|
||||
/// <see cref="SendEmailRequest.AttachmentPath"/>.
|
||||
/// </summary>
|
||||
/// <param name="request">Email payload: recipients, subject, HTML body fragment, optional attachment path.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>204 No Content on success.</returns>
|
||||
[HttpPost("send")]
|
||||
[SwaggerOperation(Summary = "Send an HTML email via SMTP")]
|
||||
[SwaggerOperation(
|
||||
Summary = "Send an HTML email via SMTP",
|
||||
Description = "Wraps the provided HTML body in the branded shell and sends via SMTP. " +
|
||||
"If AttachmentPath is set, resolves the file from the shared file-storage volume. " +
|
||||
"Returns 204 on success; 400 when the request body is invalid; 500 on SMTP failure.")]
|
||||
[SwaggerResponse(StatusCodes.Status204NoContent, "Email dispatched successfully")]
|
||||
[SwaggerResponse(StatusCodes.Status400BadRequest, "Request body is missing or invalid")]
|
||||
[SwaggerResponse(StatusCodes.Status500InternalServerError, "SMTP dispatch failed")]
|
||||
[ProducesResponseType(StatusCodes.Status204NoContent)]
|
||||
[ProducesResponseType(StatusCodes.Status400BadRequest)]
|
||||
[ProducesResponseType(StatusCodes.Status500InternalServerError)]
|
||||
public async Task<IActionResult> Send([FromBody] SendEmailRequest request, CancellationToken ct)
|
||||
{
|
||||
await _dispatcher.SendAsync(request, ct);
|
||||
|
||||
@@ -8,6 +8,10 @@ using Models.Settings;
|
||||
|
||||
namespace EmailApi.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps an HTML body fragment in the branded HTML shell and sends the resulting email via SMTP using MailKit.
|
||||
/// Attaches files from the shared file-storage volume when an attachment path is provided.
|
||||
/// </summary>
|
||||
public sealed class SmtpEmailDispatcher
|
||||
{
|
||||
private readonly SmtpSettings _smtp;
|
||||
@@ -29,6 +33,13 @@ public sealed class SmtpEmailDispatcher
|
||||
_environmentName = Environment.GetEnvironmentVariable("APP_ENVIRONMENT_NAME") ?? "Development";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a <see cref="MimeMessage"/> from <paramref name="req"/>, wraps the body in the HTML shell,
|
||||
/// optionally attaches a file, and sends via the configured SMTP server.
|
||||
/// Logs a warning and returns without throwing when the SMTP host is not configured.
|
||||
/// </summary>
|
||||
/// <param name="req">Email payload containing recipients, subject, HTML body, and optional attachment path.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task SendAsync(SendEmailRequest req, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_smtp.Host))
|
||||
|
||||
@@ -6,6 +6,11 @@ using System.Collections.Concurrent;
|
||||
|
||||
namespace MyAi.Data.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Singleton implementation of <see cref="ITemplateService"/> that caches all templates from the
|
||||
/// <c>myAi.Templates</c> table and refreshes them every 10 minutes.
|
||||
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped DbContext from a singleton lifetime.
|
||||
/// </summary>
|
||||
public sealed class DbTemplateService : ITemplateService
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
@@ -20,6 +25,7 @@ public sealed class DbTemplateService : ITemplateService
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Get(string key, string language = "en")
|
||||
{
|
||||
EnsureCacheLoaded();
|
||||
@@ -35,6 +41,7 @@ public sealed class DbTemplateService : ITemplateService
|
||||
return key;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
|
||||
{
|
||||
var template = Get(key, language);
|
||||
@@ -43,6 +50,10 @@ public sealed class DbTemplateService : ITemplateService
|
||||
return template;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reloads all templates from the database when the cache TTL has expired.
|
||||
/// Swaps the cache atomically; logs an error and continues serving the stale cache on failure.
|
||||
/// </summary>
|
||||
private void EnsureCacheLoaded()
|
||||
{
|
||||
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
|
||||
@@ -66,5 +77,6 @@ public sealed class DbTemplateService : ITemplateService
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Builds the dictionary key used in the cache.</summary>
|
||||
private static string CacheKey(string key, string language) => $"{key}::{language}";
|
||||
}
|
||||
|
||||
@@ -1,7 +1,27 @@
|
||||
namespace MyAi.Data.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Provides access to localised string templates stored in the <c>myAi.Templates</c> table.
|
||||
/// Implementations are expected to cache templates and refresh periodically.
|
||||
/// </summary>
|
||||
public interface ITemplateService
|
||||
{
|
||||
/// <summary>
|
||||
/// Returns the template value for the given key and language.
|
||||
/// Falls back to <c>"en"</c> when the requested language has no entry.
|
||||
/// Returns the raw key string when no matching template is found.
|
||||
/// </summary>
|
||||
/// <param name="key">Template key (e.g. <c>"html.job-search-start.title"</c>).</param>
|
||||
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
|
||||
/// <returns>Template value string.</returns>
|
||||
string Get(string key, string language = "en");
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
|
||||
/// </summary>
|
||||
/// <param name="key">Template key.</param>
|
||||
/// <param name="language">Two-letter language code.</param>
|
||||
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
|
||||
/// <returns>Rendered template string with all placeholders replaced.</returns>
|
||||
string Render(string key, string language, params (string Key, string Value)[] placeholders);
|
||||
}
|
||||
|
||||
@@ -2,7 +2,20 @@ using Rag.Models;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Classifies a document into a known type (cv, job, contract, etc.) and extracts a title.
|
||||
/// </summary>
|
||||
public interface IDocumentClassifier
|
||||
{
|
||||
/// <summary>
|
||||
/// Determines the document type and title from the provided text.
|
||||
/// Uses <paramref name="providedType"/> and <paramref name="providedTitle"/> directly when supplied;
|
||||
/// otherwise falls back to a keyword-frequency heuristic over the text.
|
||||
/// </summary>
|
||||
/// <param name="text">Full document text to classify.</param>
|
||||
/// <param name="providedType">Caller-supplied document type hint; skips heuristic when non-empty.</param>
|
||||
/// <param name="providedTitle">Caller-supplied document title; skips title extraction when non-empty.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>A <see cref="DocumentClassification"/> with type, confidence score, and title.</returns>
|
||||
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
|
||||
}
|
||||
|
||||
@@ -3,10 +3,46 @@ using Rag.Models.Responses;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Core RAG (Retrieval-Augmented Generation) operations: document indexing, vector search, and retrieval.
|
||||
/// </summary>
|
||||
public interface IRagService
|
||||
{
|
||||
/// <summary>
|
||||
/// Indexes a plain-text document by classifying it, chunking the text, generating embeddings,
|
||||
/// and persisting the document and its chunks. Returns cached metadata when the text hash already exists.
|
||||
/// </summary>
|
||||
/// <param name="request">Indexing request with text, optional document type, title, and source URL.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
|
||||
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Extracts text from a PDF file, then indexes it the same way as <see cref="IndexTextAsync"/>.
|
||||
/// Returns cached metadata when the extracted text hash already exists.
|
||||
/// </summary>
|
||||
/// <param name="file">Uploaded PDF file (must be ≤ configured max size).</param>
|
||||
/// <param name="documentType">Optional document type hint; if omitted the classifier is used.</param>
|
||||
/// <param name="title">Optional title override; if omitted the title is extracted from the text.</param>
|
||||
/// <param name="sourceUrl">Optional source URL to associate with the document.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
|
||||
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Performs a vector similarity search over indexed document chunks, groups results by document,
|
||||
/// and returns the top-K documents with their best-matching chunks.
|
||||
/// </summary>
|
||||
/// <param name="request">Search request with query text, optional document type filter, and top-K limit.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Ranked list of matching documents with scored chunk excerpts.</returns>
|
||||
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves full document details — including the original text — by document ID.
|
||||
/// </summary>
|
||||
/// <param name="documentId">The document's unique identifier.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Document details, or <c>null</c> if no document with that ID exists.</returns>
|
||||
Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,17 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Splits document text into overlapping chunks suitable for embedding and vector search.
|
||||
/// </summary>
|
||||
public interface ITextChunker
|
||||
{
|
||||
/// <summary>
|
||||
/// Divides <paramref name="text"/> into a list of chunks using a sliding window.
|
||||
/// Adjacent chunks share <paramref name="overlap"/> characters to preserve cross-boundary context.
|
||||
/// </summary>
|
||||
/// <param name="text">The full document text to chunk.</param>
|
||||
/// <param name="chunkSize">Maximum character length per chunk (clamped to 300–3000).</param>
|
||||
/// <param name="overlap">Number of trailing characters from the previous chunk to repeat at the start of the next (clamped to 0–chunkSize/2).</param>
|
||||
/// <returns>Ordered list of non-empty text chunks.</returns>
|
||||
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,23 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts and normalises plain text from documents.
|
||||
/// </summary>
|
||||
public interface ITextExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// Reads all pages of a PDF stream and returns the concatenated, normalised plain text.
|
||||
/// </summary>
|
||||
/// <param name="stream">Readable stream positioned at the start of the PDF file.</param>
|
||||
/// <param name="ct">Cancellation token (checked between pages).</param>
|
||||
/// <returns>Normalised plain text extracted from the PDF.</returns>
|
||||
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Collapses all whitespace sequences in <paramref name="value"/> to single spaces and trims the result.
|
||||
/// Returns an empty string for null/whitespace input.
|
||||
/// </summary>
|
||||
/// <param name="value">Raw text to normalise.</param>
|
||||
/// <returns>Whitespace-normalised text.</returns>
|
||||
string Normalize(string value);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,9 @@ using Rag.Models;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Classifies documents by type using a keyword-frequency heuristic and extracts a title from the text.
|
||||
/// </summary>
|
||||
public sealed class DocumentClassifier : IDocumentClassifier
|
||||
{
|
||||
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
|
||||
@@ -11,6 +14,7 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(providedType))
|
||||
@@ -24,6 +28,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
});
|
||||
}
|
||||
|
||||
// Keyword-frequency heuristic: count how many characteristic terms each document
|
||||
// type contributes to the text, then pick the type with the highest hit count.
|
||||
var lower = text.ToLowerInvariant();
|
||||
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
@@ -37,6 +43,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
|
||||
var best = scores.OrderByDescending(x => x.Value).First();
|
||||
var type = best.Value <= 0 ? "unknown" : best.Key;
|
||||
// Confidence baseline 0.45 + 0.08 per matched keyword term, capped at 0.95.
|
||||
// Zero hits → 0.25 (effectively unknown).
|
||||
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
|
||||
|
||||
return Task.FromResult(new DocumentClassification
|
||||
@@ -47,14 +55,20 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Counts how many of the given <paramref name="terms"/> appear in the lower-cased text.</summary>
|
||||
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
|
||||
|
||||
/// <summary>Lowercases and replaces non-alphanumeric characters with hyphens to produce a safe type slug.</summary>
|
||||
private static string NormalizeType(string value)
|
||||
{
|
||||
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
|
||||
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns <paramref name="providedTitle"/> when available; otherwise extracts the first sentence-like
|
||||
/// fragment from the text, or falls back to a generic "{type} document" label.
|
||||
/// </summary>
|
||||
private static string BuildTitle(string? providedTitle, string text, string documentType)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
|
||||
|
||||
@@ -11,6 +11,9 @@ using CommonHelpers;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Implements the core RAG pipeline: document classification, chunking, embedding, vector search, and retrieval.
|
||||
/// </summary>
|
||||
public sealed class RagService : IRagService
|
||||
{
|
||||
private readonly ITextExtractor _textExtractor;
|
||||
@@ -36,6 +39,7 @@ public sealed class RagService : IRagService
|
||||
_settings = options.Value;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
|
||||
{
|
||||
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
|
||||
@@ -44,6 +48,7 @@ public sealed class RagService : IRagService
|
||||
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
|
||||
{
|
||||
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
|
||||
@@ -57,6 +62,7 @@ public sealed class RagService : IRagService
|
||||
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
|
||||
{
|
||||
var query = _textExtractor.Normalize(request.QueryText);
|
||||
@@ -97,6 +103,7 @@ public sealed class RagService : IRagService
|
||||
return new SearchResponse { Results = results };
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct)
|
||||
{
|
||||
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
|
||||
@@ -112,6 +119,11 @@ public sealed class RagService : IRagService
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Core indexing pipeline: computes a text hash for deduplication, classifies and chunks the text,
|
||||
/// generates embeddings for each chunk, and persists the document and chunks to the repository.
|
||||
/// Returns cached metadata without re-indexing when the same text hash and source URL already exist.
|
||||
/// </summary>
|
||||
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
|
||||
string text,
|
||||
string? documentType,
|
||||
|
||||
@@ -2,14 +2,20 @@ using Api.Services.Contracts;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Splits text into overlapping fixed-size chunks using a sliding window for use in vector embedding pipelines.
|
||||
/// </summary>
|
||||
public sealed class TextChunker : ITextChunker
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text)) return [];
|
||||
chunkSize = Math.Clamp(chunkSize, 300, 3000);
|
||||
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
|
||||
|
||||
// Sliding window: step forward by (chunkSize - overlap) each iteration so
|
||||
// adjacent chunks share `overlap` characters, preserving cross-boundary context.
|
||||
var chunks = new List<string>();
|
||||
var start = 0;
|
||||
while (start < text.Length)
|
||||
|
||||
@@ -4,8 +4,12 @@ using UglyToad.PdfPig;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts and normalises plain text from PDF files using PdfPig.
|
||||
/// </summary>
|
||||
public sealed class TextExtractor : ITextExtractor
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
|
||||
{
|
||||
using var document = PdfDocument.Open(stream);
|
||||
@@ -19,6 +23,7 @@ public sealed class TextExtractor : ITextExtractor
|
||||
return Task.FromResult(Normalize(builder.ToString()));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string Normalize(string value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
|
||||
@@ -212,8 +212,8 @@ Every service follows this structure:
|
||||
|
||||
## Coding conventions
|
||||
|
||||
- No XML doc comments on internal code; Swagger annotations on public controller actions
|
||||
- No explanatory inline comments — code should be self-describing
|
||||
- XML doc comments (`/// <summary>`) on all public methods, interfaces, and non-trivial private/protected helpers; Swagger annotations on public controller actions
|
||||
- Inline `//` comments for non-obvious logic; avoid restating what the code already says clearly
|
||||
- Use `$$"""..."""` raw string literals (not `$"""`) when the content contains CSS or other curly-brace-heavy text — avoids CS9006 brace-escaping errors
|
||||
- `sealed` on all concrete service classes
|
||||
- Settings classes injected via `IOptions<T>` — registered with `Configure<T>(config.GetSection("..."))`
|
||||
|
||||
@@ -7,6 +7,10 @@ using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace CvSearchJob.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Sends job search results emails to the session user and the operator copy address,
|
||||
/// with an optional CV PDF attachment.
|
||||
/// </summary>
|
||||
public sealed class CvSearchEmailSender
|
||||
{
|
||||
private readonly IEmailApiClient _emailApi;
|
||||
@@ -23,6 +27,16 @@ public sealed class CvSearchEmailSender
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds and sends the job search results email.
|
||||
/// Resolves the recipient list from <paramref name="toEmail"/> and the operator copy address
|
||||
/// stored in the email template. Does nothing when no recipients can be resolved.
|
||||
/// </summary>
|
||||
/// <param name="toEmail">Primary recipient (the user who triggered the search).</param>
|
||||
/// <param name="attachmentFileName">Relative filename of the CV PDF to attach, or <c>null</c>.</param>
|
||||
/// <param name="results">Ranked list of job search results to include in the email body.</param>
|
||||
/// <param name="language">Two-letter language code for template rendering.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
public async Task SendResultsAsync(
|
||||
string toEmail,
|
||||
string? attachmentFileName,
|
||||
@@ -64,6 +78,10 @@ public sealed class CvSearchEmailSender
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Renders the HTML email body from the results list.
|
||||
/// Returns the empty-results template when no results are present.
|
||||
/// </summary>
|
||||
private string BuildBody(IReadOnlyList<JobSearchResultEntity> results, string language)
|
||||
{
|
||||
if (results.Count == 0)
|
||||
@@ -92,6 +110,10 @@ public sealed class CvSearchEmailSender
|
||||
("items", items.ToString()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to deserialise the stored result JSON into a <see cref="JobMatchResponse"/>.
|
||||
/// Returns <c>null</c> on parse failure so the email still renders without a summary.
|
||||
/// </summary>
|
||||
private static JobMatchResponse? TryParseResult(string json)
|
||||
{
|
||||
try
|
||||
|
||||
@@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace CvSearchJob.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Config-driven HTML scraper that fetches a provider's job listing page and extracts matching job URLs.
|
||||
/// Uses a two-stage anchor filter: href must contain the provider's link pattern, and anchor text must
|
||||
/// contain at least one CV keyword.
|
||||
/// </summary>
|
||||
public sealed class HtmlJobSearcher
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
@@ -18,6 +23,15 @@ public sealed class HtmlJobSearcher
|
||||
_http.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; MyAi.ro CV-Search/1.0)");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches the provider's search result page for the combined initial + CV keywords, parses all anchor
|
||||
/// tags, applies the two-stage filter, and returns up to <see cref="JobProviderConfig.MaxResults"/> absolute URLs.
|
||||
/// Returns an empty list when the HTTP request fails rather than throwing.
|
||||
/// </summary>
|
||||
/// <param name="provider">Provider configuration including search URL template, link filter, and result cap.</param>
|
||||
/// <param name="cvKeywords">Keywords extracted from the user's CV to inject into the search query.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>Deduplicated list of absolute job page URLs (query string stripped).</returns>
|
||||
public async Task<IReadOnlyList<string>> SearchJobUrlsAsync(
|
||||
JobProviderConfig provider,
|
||||
IReadOnlyList<string> cvKeywords,
|
||||
@@ -75,6 +89,7 @@ public sealed class HtmlJobSearcher
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strip query string and fragment so different tracking variants of the same URL collapse to one.
|
||||
var url = absoluteUri.GetLeftPart(UriPartial.Path);
|
||||
if (seen.Add(url))
|
||||
results.Add(url);
|
||||
|
||||
@@ -14,6 +14,10 @@ using Microsoft.Extensions.Options;
|
||||
|
||||
namespace CvSearchJob.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// Background job task that processes pending job search sessions: scrapes providers,
|
||||
/// scores each URL against the CV via the matcher API, persists results, and sends the results email.
|
||||
/// </summary>
|
||||
public sealed class CvSearchJobTask : IJobTask
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
@@ -41,6 +45,11 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Called by the scheduler on each tick. Resets orphaned sessions, picks the oldest pending session,
|
||||
/// runs the full search pipeline, and sends the results email.
|
||||
/// Does nothing when <c>JobSearch:Enabled</c> is <c>false</c>.
|
||||
/// </summary>
|
||||
public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!_settings.Enabled) return;
|
||||
@@ -92,6 +101,10 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the full search pipeline for a session: scrapes all providers, deduplicates URLs,
|
||||
/// scores each candidate via the matcher API, and persists results that meet the minimum score threshold.
|
||||
/// </summary>
|
||||
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
|
||||
JobSearchSessionEntity session,
|
||||
CvSearchDbContext db,
|
||||
@@ -125,6 +138,7 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
{
|
||||
CvDocumentId = session.CvDocumentId,
|
||||
JobUrl = url,
|
||||
// User already gave GDPR consent when they clicked the one-time job search link
|
||||
GdprConsent = true
|
||||
};
|
||||
|
||||
@@ -162,6 +176,10 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
return results;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialises the provider configuration snapshot stored on the session.
|
||||
/// Falls back to the current live config when the snapshot is absent or unparseable.
|
||||
/// </summary>
|
||||
private List<JobProviderConfig> GetProviders(string? providerConfigJson)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(providerConfigJson)) return _settings.Providers.Where(p => p.Enabled).ToList();
|
||||
@@ -177,6 +195,10 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Infers the provider name from the job URL by matching against each provider's <c>JobLinkContains</c> pattern.
|
||||
/// Falls back to the URL hostname when no provider matches.
|
||||
/// </summary>
|
||||
private static string GuessProvider(string url, List<JobProviderConfig> providers)
|
||||
{
|
||||
foreach (var p in providers)
|
||||
@@ -189,8 +211,12 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Constructs the CV PDF filename from the document ID.
|
||||
/// </summary>
|
||||
private static string BuildCvFileName(string cvDocumentId)
|
||||
{
|
||||
// Strip non-alphanumeric characters so the filename is safe for all OS/email clients.
|
||||
var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit));
|
||||
if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv";
|
||||
return $"{safeId}.pdf";
|
||||
|
||||
Reference in New Issue
Block a user