Improve comments and Swagger docs across services #27

Merged
gelu merged 2 commits from feature/improve-comments-swagger-26 into main 2026-05-28 06:26:57 +00:00
30 changed files with 465 additions and 38 deletions
+1 -1
View File
@@ -115,7 +115,7 @@ namespace Api.Controllers
catch (Exception ex)
{
_log.LogError(ex, "Subscription failed. ip={Ip} eMail={eMail}", userIp, req.Email);
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed.", Code = "subscription_failed" });
return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Could not process subscription.", Code = "subscription_failed" });
}
}
@@ -44,10 +44,6 @@ namespace Api.Controllers
/// </summary>
/// <param name="fileName">The name of the file to download (optional - uses default from settings if not provided)</param>
/// <returns>File stream with appropriate headers for resumable downloads</returns>
/// <response code="200">Full file content</response>
/// <response code="206">Partial file content (range request)</response>
/// <response code="404">File not found</response>
/// <response code="416">Requested range not satisfiable</response>
[HttpGet("{fileName?}")]
[SwaggerOperation(Summary = "Download file", Description = "Downloads a file with support for full and ranged (resumable) transfers.")]
[SwaggerResponse(StatusCodes.Status200OK, "Full file content returned")]
@@ -1,9 +1,21 @@
using Api.Services.Contracts.Models;
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts
{
/// <summary>
/// Verifies a reCAPTCHA token against the Google verification API.
/// </summary>
public interface ICaptchaVerifier
{
/// <summary>
/// Sends the token to the Google reCAPTCHA verification endpoint and
/// returns a verdict indicating success, score, and any failure reason.
/// </summary>
/// <param name="token">The reCAPTCHA token provided by the client.</param>
/// <param name="userIp">Optional remote IP address passed to Google for additional risk analysis.</param>
/// <param name="expectedAction">Optional action name to validate against the token's embedded action (v3 only).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A <see cref="CaptchaVerdictModel"/> with the verification outcome.</returns>
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct);
}
}
+51 -1
View File
@@ -1,15 +1,65 @@
using CvMatcher.Models.Responses;
using CvMatcher.Models.Responses;
using Models.Requests;
namespace Api.Services.Contracts
{
/// <summary>
/// Abstraction for sending transactional emails from the public API.
/// </summary>
public interface IEmailSender
{
/// <summary>
/// Sends a contact-form message to the configured operator address.
/// </summary>
/// <param name="req">Contact request containing name, email, subject, and message.</param>
/// <param name="ct">Cancellation token.</param>
Task SendContactAsync(ContactRequest req, CancellationToken ct);
/// <summary>
/// Notifies the configured operator address that a new email subscription was received.
/// </summary>
/// <param name="req">Subscription request containing the subscriber's email address.</param>
/// <param name="ct">Cancellation token.</param>
Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct);
/// <summary>
/// Sends a background notification when a file download is initiated.
/// Does nothing when no notification address is configured.
/// </summary>
/// <param name="fileName">Name of the downloaded file.</param>
/// <param name="userIp">Remote IP address of the downloader, or <c>null</c> if unavailable.</param>
/// <param name="ct">Cancellation token.</param>
Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct);
/// <summary>
/// Sends a CV match results email to the user and the operator copy address.
/// </summary>
/// <param name="explicitTo">Primary recipient email address, or <c>null</c> to send only the operator copy.</param>
/// <param name="subject">Email subject line.</param>
/// <param name="body">Pre-built HTML body fragment.</param>
/// <param name="attachmentPath">Full path to a CV PDF to attach, or <c>null</c> for no attachment.</param>
/// <param name="ct">Cancellation token.</param>
Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct);
/// <summary>
/// Builds the localised subject line for a CV match email.
/// </summary>
/// <param name="score">Match score percentage (0100).</param>
/// <param name="jobLabel">Human-readable job title or label.</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Rendered subject string.</returns>
string BuildMatchEmailSubject(int score, string? jobLabel, string language);
/// <summary>
/// Builds the full HTML body for a CV match email, including an optional job-search footer link.
/// </summary>
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
/// <param name="result">Structured match response from the CV matcher engine.</param>
/// <param name="jobLabel">Human-readable job title or label.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="jobSearchLink">Optional one-click job-search URL to append as a footer CTA.</param>
/// <param name="expiryDays">Number of days until the job-search link expires (shown in the footer copy).</param>
/// <returns>Rendered HTML body string.</returns>
string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7);
}
}
+9
View File
@@ -9,6 +9,9 @@ using Models.Settings;
namespace Api.Services;
/// <summary>
/// Implements <see cref="IEmailSender"/> by delegating all email dispatch to the internal email-api service via Refit.
/// </summary>
public sealed class EmailApiEmailSender : IEmailSender
{
private readonly IEmailApiClient _emailApi;
@@ -34,6 +37,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log = log;
}
/// <inheritdoc />
public async Task SendContactAsync(ContactRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_contact.ToEmail))
@@ -76,6 +80,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("Contact email sent successfully from {SenderEmail}", req.Email);
}
/// <inheritdoc />
public async Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_subscribe.ToEmail))
@@ -108,6 +113,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("Subscription email sent successfully for {Email}", req.Email);
}
/// <inheritdoc />
public async Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_fileStorage.ToEmail))
@@ -146,6 +152,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("File download notification sent successfully for {FileName}", fileName);
}
/// <inheritdoc />
public async Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct)
{
var operatorCopy = _emailTemplates.GetOperatorCopy("email.match.subject", "en");
@@ -184,6 +191,7 @@ public sealed class EmailApiEmailSender : IEmailSender
}
}
/// <inheritdoc />
public string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7)
{
var strengths = result.Strengths?.Count > 0
@@ -221,6 +229,7 @@ public sealed class EmailApiEmailSender : IEmailSender
return body;
}
/// <inheritdoc />
public string BuildMatchEmailSubject(int score, string? jobLabel, string language) =>
_emailTemplates.Render("email.match.subject", language,
("score", score.ToString()),
+4
View File
@@ -5,6 +5,9 @@ using Models.Settings;
namespace Api.Services
{
/// <summary>
/// Verifies reCAPTCHA v2/v3 tokens by calling the Google site-verify API.
/// </summary>
public sealed class RecaptchaVerifier : ICaptchaVerifier
{
private readonly HttpClient _http;
@@ -18,6 +21,7 @@ namespace Api.Services
_log = log;
}
/// <inheritdoc />
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct)
{
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
@@ -3,9 +3,34 @@ using CvMatcher.Models.Responses;
namespace Api.Services.Contracts;
/// <summary>
/// Orchestrates CV indexing, job matching, and job discovery operations.
/// </summary>
public interface ICvMatcherService
{
/// <summary>
/// Indexes a CV PDF into the RAG system and returns document metadata.
/// Returns cached metadata without re-indexing when the same text hash already exists.
/// </summary>
/// <param name="file">Uploaded CV PDF file.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Upload response with document ID, hash, and indexing statistics.</returns>
Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct);
/// <summary>
/// Scores a CV against a specific job posting URL or pasted description using the LLM.
/// Caches the result so repeat requests for the same (CV, job, language) triple are served instantly.
/// </summary>
/// <param name="request">Match request containing CV document ID, job URL or description, and language preference.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Structured match response with score, summary, strengths, gaps, and recommendations.</returns>
Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct);
/// <summary>
/// Searches the RAG index for job documents most similar to the given CV and scores the top candidates.
/// </summary>
/// <param name="request">Request containing the CV document ID and optional result count limit.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with the CV document ID and a list of ranked match results.</returns>
Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct);
}
@@ -1,6 +1,17 @@
namespace Api.Services.Contracts;
/// <summary>
/// Extracts plain text from a job posting, either from a pasted description or by fetching and parsing a URL.
/// </summary>
public interface IJobTextExtractor
{
/// <summary>
/// Returns normalised plain text for the job posting.
/// Prefers <paramref name="jobDescription"/> when provided; otherwise fetches and strips HTML from <paramref name="jobUrl"/>.
/// </summary>
/// <param name="jobUrl">URL of the job posting page, used when no description is pasted.</param>
/// <param name="jobDescription">Pasted job description text; takes priority over URL fetching.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Normalised plain text, truncated to the configured maximum character limit.</returns>
Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct);
}
@@ -1,7 +1,29 @@
namespace Api.Services.Contracts;
/// <summary>
/// Manages one-time job search tokens and the sessions they trigger.
/// </summary>
public interface IJobTokenService
{
/// <summary>
/// Creates a new single-use job search token linked to the given CV document and user.
/// The token expires after the number of days configured in <c>JobSearch:TokenExpiryDays</c>.
/// </summary>
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
/// <param name="email">Email address of the user who will receive the results.</param>
/// <param name="language">Preferred language for result emails (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The generated token ID, to be embedded in the one-click job search link.</returns>
Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct);
/// <summary>
/// Validates the token and, if valid, marks it as used and creates a <c>Pending</c> job search session.
/// </summary>
/// <param name="tokenId">The token ID from the one-click link.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>
/// One of the <c>StartJobSearchStatus</c> string constants:
/// <c>Started</c>, <c>AlreadyUsed</c>, <c>Expired</c>, or <c>NotFound</c>.
/// </returns>
Task<string> TriggerStartAsync(string tokenId, CancellationToken ct);
}
@@ -10,6 +10,9 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching.
/// </summary>
public sealed class CvMatcherService : ICvMatcherService
{
private readonly IRagApiClient _rag;
@@ -35,6 +38,7 @@ public sealed class CvMatcherService : ICvMatcherService
_settings = options.Value;
}
/// <inheritdoc />
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
{
var response = await _rag.IndexCvPdfAsync(file, ct);
@@ -51,6 +55,7 @@ public sealed class CvMatcherService : ICvMatcherService
};
}
/// <inheritdoc />
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
{
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
@@ -78,6 +83,7 @@ public sealed class CvMatcherService : ICvMatcherService
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
}
/// <inheritdoc />
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
@@ -104,6 +110,11 @@ public sealed class CvMatcherService : ICvMatcherService
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
}
/// <summary>
/// Scores a (CV, job) pair with the LLM.
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
/// </summary>
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
{
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
@@ -135,16 +146,13 @@ public sealed class CvMatcherService : ICvMatcherService
result.JobUrl = job.SourceUrl;
result.Cached = false;
await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct);
//await _email.SendMatchAsync(
// email,
// $"MyAi.ro CV Match: {result.Score}% - {job.Title}",
// BuildEmailBody(cv, job, result),
// ct);
return result;
}
/// <summary>
/// Deserialises the LLM's JSON output into a <see cref="JobMatchResponse"/>.
/// Returns a safe fallback response instead of throwing when the JSON cannot be parsed.
/// </summary>
private static JobMatchResponse ParseResult(string json)
{
try
@@ -165,21 +173,29 @@ public sealed class CvMatcherService : ICvMatcherService
};
}
/// <summary>
/// Builds a descriptive search query from the CV text for use in vector similarity search.
/// </summary>
private static string BuildCvSearchProfile(string cvText)
{
var text = Limit(cvText, 10000);
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
}
/// <summary>
/// Extracts a short job title from the first sentence-like fragment of the job text.
/// </summary>
private static string ExtractJobTitle(string jobText)
{
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
return first ?? "Job description";
}
/// <summary>Returns the base language code, lower-cased, defaulting to <c>"en"</c>.</summary>
private static string NormalizeLanguage(string? language) =>
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
/// <summary>Maps a language code to its full English name for use in the LLM system prompt.</summary>
private static string LanguageName(string language) => language switch
{
"ro" => "Romanian",
@@ -187,26 +203,6 @@ public sealed class CvMatcherService : ICvMatcherService
_ => "English"
};
/// <summary>Truncates <paramref name="value"/> to at most <paramref name="max"/> characters.</summary>
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
//private static string BuildEmailBody(RagDocumentDetails cv, RagDocumentDetails job, JobMatchResponse result) => $"""
// CV Matcher result
// CV: {cv.Title}
// Job: {job.Title}
// Job URL: {job.SourceUrl ?? "N/A"}
// Score: {result.Score}%
// Summary:
// {result.Summary}
// Strengths:
// - {string.Join("\n- ", result.Strengths)}
// Gaps:
// - {string.Join("\n- ", result.Gaps)}
// Recommendations:
// - {string.Join("\n- ", result.Recommendations)}
// """;
}
@@ -6,6 +6,10 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Extracts normalised plain text from a job posting, either from a pasted description or by
/// fetching and stripping the HTML of the job page URL.
/// </summary>
public sealed class JobTextExtractor : IJobTextExtractor
{
private readonly HttpClient _http;
@@ -19,6 +23,7 @@ public sealed class JobTextExtractor : IJobTextExtractor
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
}
/// <inheritdoc />
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
{
var pasted = Normalize(jobDescription ?? string.Empty);
@@ -37,12 +42,14 @@ public sealed class JobTextExtractor : IJobTextExtractor
return Limit(Normalize(WebUtility.HtmlDecode(html)));
}
/// <summary>Truncates text to the configured maximum character count.</summary>
private string Limit(string value)
{
var max = Math.Max(4000, _settings.MaxJobTextChars);
return value.Length <= max ? value : value[..max];
}
/// <summary>Collapses all whitespace runs to single spaces and trims the result.</summary>
private static string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
@@ -11,6 +11,9 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Creates and validates one-time job search tokens, and creates the corresponding search sessions.
/// </summary>
public sealed class JobTokenService : IJobTokenService
{
private readonly CvSearchDbContext _db;
@@ -30,6 +33,7 @@ public sealed class JobTokenService : IJobTokenService
_logger = logger;
}
/// <inheritdoc />
public async Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct)
{
var token = new JobSearchTokenEntity
@@ -49,6 +53,7 @@ public sealed class JobTokenService : IJobTokenService
return token.Id;
}
/// <inheritdoc />
public async Task<string> TriggerStartAsync(string tokenId, CancellationToken ct)
{
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
@@ -86,12 +91,17 @@ public sealed class JobTokenService : IJobTokenService
return StartJobSearchStatus.Started;
}
/// <summary>
/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM).
/// Takes the first 5 usable lines, splits them into words, strips punctuation, and deduplicates.
/// </summary>
private static string ExtractKeywords(string cvText)
{
var lines = cvText
.Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries)
.Select(l => l.Trim())
.Where(l => l.Length > 5 && l.Length < 200)
// Skip lines that are purely digits, spaces, and phone/contact punctuation (phone numbers, emails, etc.)
.Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$"))
.Take(5)
.ToList();
@@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging;
namespace EmailApi.Data.Services;
/// <summary>
/// Singleton implementation of <see cref="IEmailTemplateService"/> that caches all email templates
/// from the database and refreshes them every 10 minutes.
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped repository from a singleton lifetime.
/// </summary>
public sealed class EmailTemplateService : IEmailTemplateService
{
private readonly IServiceScopeFactory _scopeFactory;
@@ -20,6 +25,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
_logger = logger;
}
/// <inheritdoc />
public string Get(string key, string language = "en")
{
EnsureCacheLoaded();
@@ -35,6 +41,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
return key;
}
/// <inheritdoc />
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
{
var template = Get(key, language);
@@ -43,6 +50,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
return template;
}
/// <inheritdoc />
public string? GetOperatorCopy(string key, string language)
{
EnsureCacheLoaded();
@@ -61,6 +69,10 @@ public sealed class EmailTemplateService : IEmailTemplateService
return null;
}
/// <summary>
/// Reloads all templates from the database when the cache TTL has expired.
/// Swaps both caches atomically; logs an error and continues serving the stale cache on failure.
/// </summary>
private void EnsureCacheLoaded()
{
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
@@ -91,5 +103,6 @@ public sealed class EmailTemplateService : IEmailTemplateService
}
}
/// <summary>Builds the dictionary key used for both caches.</summary>
private static string CacheKey(string key, string language) => $"{key}::{language}";
}
@@ -1,8 +1,38 @@
namespace EmailApi.Data.Services;
/// <summary>
/// Provides access to localised email templates stored in the <c>emailApi.EmailTemplates</c> table.
/// Implementations are expected to cache templates and refresh periodically.
/// </summary>
public interface IEmailTemplateService
{
/// <summary>
/// Returns the template value for the given key and language.
/// Falls back to <c>"en"</c> when the requested language has no entry.
/// Returns the raw key string when no matching template is found.
/// </summary>
/// <param name="key">Template key (e.g. <c>"email.match.subject"</c>).</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Template value string.</returns>
string Get(string key, string language = "en");
/// <summary>
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
/// </summary>
/// <param name="key">Template key.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
/// <returns>Rendered template string with all placeholders replaced.</returns>
string Render(string key, string language, params (string Key, string Value)[] placeholders);
/// <summary>
/// Returns the operator copy address for the given template key.
/// Uses the specific row's <c>OperatorCopy</c> value when non-empty; otherwise falls back
/// to the first non-empty <c>OperatorCopy</c> across all cached rows, so future template rows
/// with an empty value automatically inherit the globally configured address.
/// </summary>
/// <param name="key">Template key used to look up the specific row (typically the subject key).</param>
/// <param name="language">Two-letter language code.</param>
/// <returns>Operator copy email address, or <c>null</c> when none is configured.</returns>
string? GetOperatorCopy(string key, string language);
}
+24 -1
View File
@@ -5,6 +5,11 @@ using Swashbuckle.AspNetCore.Annotations;
namespace EmailApi.Controllers;
/// <summary>
/// Internal email relay. Accepts an HTML body fragment from trusted callers
/// (api, cv-search-job), wraps it in the branded HTML shell, and dispatches
/// via SMTP. Protected by X-Internal-Api-Key.
/// </summary>
[ApiController]
[Route("api/email")]
public sealed class EmailController : ControllerBase
@@ -13,9 +18,27 @@ public sealed class EmailController : ControllerBase
public EmailController(SmtpEmailDispatcher dispatcher) => _dispatcher = dispatcher;
/// <summary>
/// Sends an HTML email via SMTP. The supplied body fragment is wrapped in
/// the branded HTML shell before dispatch. Attachments are resolved from
/// the shared file storage volume using the relative path in
/// <see cref="SendEmailRequest.AttachmentPath"/>.
/// </summary>
/// <param name="request">Email payload: recipients, subject, HTML body fragment, optional attachment path.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>204 No Content on success.</returns>
[HttpPost("send")]
[SwaggerOperation(Summary = "Send an HTML email via SMTP")]
[SwaggerOperation(
Summary = "Send an HTML email via SMTP",
Description = "Wraps the provided HTML body in the branded shell and sends via SMTP. " +
"If AttachmentPath is set, resolves the file from the shared file-storage volume. " +
"Returns 204 on success; 400 when the request body is invalid; 500 on SMTP failure.")]
[SwaggerResponse(StatusCodes.Status204NoContent, "Email dispatched successfully")]
[SwaggerResponse(StatusCodes.Status400BadRequest, "Request body is missing or invalid")]
[SwaggerResponse(StatusCodes.Status500InternalServerError, "SMTP dispatch failed")]
[ProducesResponseType(StatusCodes.Status204NoContent)]
[ProducesResponseType(StatusCodes.Status400BadRequest)]
[ProducesResponseType(StatusCodes.Status500InternalServerError)]
public async Task<IActionResult> Send([FromBody] SendEmailRequest request, CancellationToken ct)
{
await _dispatcher.SendAsync(request, ct);
@@ -8,6 +8,10 @@ using Models.Settings;
namespace EmailApi.Services;
/// <summary>
/// Wraps an HTML body fragment in the branded HTML shell and sends the resulting email via SMTP using MailKit.
/// Attaches files from the shared file-storage volume when an attachment path is provided.
/// </summary>
public sealed class SmtpEmailDispatcher
{
private readonly SmtpSettings _smtp;
@@ -29,6 +33,13 @@ public sealed class SmtpEmailDispatcher
_environmentName = Environment.GetEnvironmentVariable("APP_ENVIRONMENT_NAME") ?? "Development";
}
/// <summary>
/// Builds a <see cref="MimeMessage"/> from <paramref name="req"/>, wraps the body in the HTML shell,
/// optionally attaches a file, and sends via the configured SMTP server.
/// Logs a warning and returns without throwing when the SMTP host is not configured.
/// </summary>
/// <param name="req">Email payload containing recipients, subject, HTML body, and optional attachment path.</param>
/// <param name="ct">Cancellation token.</param>
public async Task SendAsync(SendEmailRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_smtp.Host))
@@ -6,6 +6,11 @@ using System.Collections.Concurrent;
namespace MyAi.Data.Services;
/// <summary>
/// Singleton implementation of <see cref="ITemplateService"/> that caches all templates from the
/// <c>myAi.Templates</c> table and refreshes them every 10 minutes.
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped DbContext from a singleton lifetime.
/// </summary>
public sealed class DbTemplateService : ITemplateService
{
private readonly IServiceScopeFactory _scopeFactory;
@@ -20,6 +25,7 @@ public sealed class DbTemplateService : ITemplateService
_logger = logger;
}
/// <inheritdoc />
public string Get(string key, string language = "en")
{
EnsureCacheLoaded();
@@ -35,6 +41,7 @@ public sealed class DbTemplateService : ITemplateService
return key;
}
/// <inheritdoc />
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
{
var template = Get(key, language);
@@ -43,6 +50,10 @@ public sealed class DbTemplateService : ITemplateService
return template;
}
/// <summary>
/// Reloads all templates from the database when the cache TTL has expired.
/// Swaps the cache atomically; logs an error and continues serving the stale cache on failure.
/// </summary>
private void EnsureCacheLoaded()
{
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
@@ -66,5 +77,6 @@ public sealed class DbTemplateService : ITemplateService
}
}
/// <summary>Builds the dictionary key used in the cache.</summary>
private static string CacheKey(string key, string language) => $"{key}::{language}";
}
@@ -1,7 +1,27 @@
namespace MyAi.Data.Services;
/// <summary>
/// Provides access to localised string templates stored in the <c>myAi.Templates</c> table.
/// Implementations are expected to cache templates and refresh periodically.
/// </summary>
public interface ITemplateService
{
/// <summary>
/// Returns the template value for the given key and language.
/// Falls back to <c>"en"</c> when the requested language has no entry.
/// Returns the raw key string when no matching template is found.
/// </summary>
/// <param name="key">Template key (e.g. <c>"html.job-search-start.title"</c>).</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Template value string.</returns>
string Get(string key, string language = "en");
/// <summary>
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
/// </summary>
/// <param name="key">Template key.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
/// <returns>Rendered template string with all placeholders replaced.</returns>
string Render(string key, string language, params (string Key, string Value)[] placeholders);
}
@@ -2,7 +2,20 @@ using Rag.Models;
namespace Api.Services.Contracts;
/// <summary>
/// Classifies a document into a known type (cv, job, contract, etc.) and extracts a title.
/// </summary>
public interface IDocumentClassifier
{
/// <summary>
/// Determines the document type and title from the provided text.
/// Uses <paramref name="providedType"/> and <paramref name="providedTitle"/> directly when supplied;
/// otherwise falls back to a keyword-frequency heuristic over the text.
/// </summary>
/// <param name="text">Full document text to classify.</param>
/// <param name="providedType">Caller-supplied document type hint; skips heuristic when non-empty.</param>
/// <param name="providedTitle">Caller-supplied document title; skips title extraction when non-empty.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A <see cref="DocumentClassification"/> with type, confidence score, and title.</returns>
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
}
@@ -3,10 +3,46 @@ using Rag.Models.Responses;
namespace Api.Services.Contracts;
/// <summary>
/// Core RAG (Retrieval-Augmented Generation) operations: document indexing, vector search, and retrieval.
/// </summary>
public interface IRagService
{
/// <summary>
/// Indexes a plain-text document by classifying it, chunking the text, generating embeddings,
/// and persisting the document and its chunks. Returns cached metadata when the text hash already exists.
/// </summary>
/// <param name="request">Indexing request with text, optional document type, title, and source URL.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
/// <summary>
/// Extracts text from a PDF file, then indexes it the same way as <see cref="IndexTextAsync"/>.
/// Returns cached metadata when the extracted text hash already exists.
/// </summary>
/// <param name="file">Uploaded PDF file (must be ≤ configured max size).</param>
/// <param name="documentType">Optional document type hint; if omitted the classifier is used.</param>
/// <param name="title">Optional title override; if omitted the title is extracted from the text.</param>
/// <param name="sourceUrl">Optional source URL to associate with the document.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
/// <summary>
/// Performs a vector similarity search over indexed document chunks, groups results by document,
/// and returns the top-K documents with their best-matching chunks.
/// </summary>
/// <param name="request">Search request with query text, optional document type filter, and top-K limit.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Ranked list of matching documents with scored chunk excerpts.</returns>
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
/// <summary>
/// Retrieves full document details — including the original text — by document ID.
/// </summary>
/// <param name="documentId">The document's unique identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Document details, or <c>null</c> if no document with that ID exists.</returns>
Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct);
}
@@ -1,6 +1,17 @@
namespace Api.Services.Contracts;
/// <summary>
/// Splits document text into overlapping chunks suitable for embedding and vector search.
/// </summary>
public interface ITextChunker
{
/// <summary>
/// Divides <paramref name="text"/> into a list of chunks using a sliding window.
/// Adjacent chunks share <paramref name="overlap"/> characters to preserve cross-boundary context.
/// </summary>
/// <param name="text">The full document text to chunk.</param>
/// <param name="chunkSize">Maximum character length per chunk (clamped to 3003000).</param>
/// <param name="overlap">Number of trailing characters from the previous chunk to repeat at the start of the next (clamped to 0chunkSize/2).</param>
/// <returns>Ordered list of non-empty text chunks.</returns>
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
}
@@ -1,7 +1,23 @@
namespace Api.Services.Contracts;
/// <summary>
/// Extracts and normalises plain text from documents.
/// </summary>
public interface ITextExtractor
{
/// <summary>
/// Reads all pages of a PDF stream and returns the concatenated, normalised plain text.
/// </summary>
/// <param name="stream">Readable stream positioned at the start of the PDF file.</param>
/// <param name="ct">Cancellation token (checked between pages).</param>
/// <returns>Normalised plain text extracted from the PDF.</returns>
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
/// <summary>
/// Collapses all whitespace sequences in <paramref name="value"/> to single spaces and trims the result.
/// Returns an empty string for null/whitespace input.
/// </summary>
/// <param name="value">Raw text to normalise.</param>
/// <returns>Whitespace-normalised text.</returns>
string Normalize(string value);
}
@@ -4,6 +4,9 @@ using Rag.Models;
namespace Api.Services;
/// <summary>
/// Classifies documents by type using a keyword-frequency heuristic and extracts a title from the text.
/// </summary>
public sealed class DocumentClassifier : IDocumentClassifier
{
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
@@ -11,6 +14,7 @@ public sealed class DocumentClassifier : IDocumentClassifier
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
};
/// <inheritdoc />
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
{
if (!string.IsNullOrWhiteSpace(providedType))
@@ -24,6 +28,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
});
}
// Keyword-frequency heuristic: count how many characteristic terms each document
// type contributes to the text, then pick the type with the highest hit count.
var lower = text.ToLowerInvariant();
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
{
@@ -37,6 +43,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
var best = scores.OrderByDescending(x => x.Value).First();
var type = best.Value <= 0 ? "unknown" : best.Key;
// Confidence baseline 0.45 + 0.08 per matched keyword term, capped at 0.95.
// Zero hits → 0.25 (effectively unknown).
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
return Task.FromResult(new DocumentClassification
@@ -47,14 +55,20 @@ public sealed class DocumentClassifier : IDocumentClassifier
});
}
/// <summary>Counts how many of the given <paramref name="terms"/> appear in the lower-cased text.</summary>
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
/// <summary>Lowercases and replaces non-alphanumeric characters with hyphens to produce a safe type slug.</summary>
private static string NormalizeType(string value)
{
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
}
/// <summary>
/// Returns <paramref name="providedTitle"/> when available; otherwise extracts the first sentence-like
/// fragment from the text, or falls back to a generic "{type} document" label.
/// </summary>
private static string BuildTitle(string? providedTitle, string text, string documentType)
{
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
+12
View File
@@ -11,6 +11,9 @@ using CommonHelpers;
namespace Api.Services;
/// <summary>
/// Implements the core RAG pipeline: document classification, chunking, embedding, vector search, and retrieval.
/// </summary>
public sealed class RagService : IRagService
{
private readonly ITextExtractor _textExtractor;
@@ -36,6 +39,7 @@ public sealed class RagService : IRagService
_settings = options.Value;
}
/// <inheritdoc />
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
{
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
@@ -44,6 +48,7 @@ public sealed class RagService : IRagService
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
}
/// <inheritdoc />
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
{
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
@@ -57,6 +62,7 @@ public sealed class RagService : IRagService
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
}
/// <inheritdoc />
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
{
var query = _textExtractor.Normalize(request.QueryText);
@@ -97,6 +103,7 @@ public sealed class RagService : IRagService
return new SearchResponse { Results = results };
}
/// <inheritdoc />
public async Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct)
{
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
@@ -112,6 +119,11 @@ public sealed class RagService : IRagService
};
}
/// <summary>
/// Core indexing pipeline: computes a text hash for deduplication, classifies and chunks the text,
/// generates embeddings for each chunk, and persists the document and chunks to the repository.
/// Returns cached metadata without re-indexing when the same text hash and source URL already exist.
/// </summary>
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
string text,
string? documentType,
+6
View File
@@ -2,14 +2,20 @@ using Api.Services.Contracts;
namespace Api.Services;
/// <summary>
/// Splits text into overlapping fixed-size chunks using a sliding window for use in vector embedding pipelines.
/// </summary>
public sealed class TextChunker : ITextChunker
{
/// <inheritdoc />
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
{
if (string.IsNullOrWhiteSpace(text)) return [];
chunkSize = Math.Clamp(chunkSize, 300, 3000);
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
// Sliding window: step forward by (chunkSize - overlap) each iteration so
// adjacent chunks share `overlap` characters, preserving cross-boundary context.
var chunks = new List<string>();
var start = 0;
while (start < text.Length)
+5
View File
@@ -4,8 +4,12 @@ using UglyToad.PdfPig;
namespace Api.Services;
/// <summary>
/// Extracts and normalises plain text from PDF files using PdfPig.
/// </summary>
public sealed class TextExtractor : ITextExtractor
{
/// <inheritdoc />
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
{
using var document = PdfDocument.Open(stream);
@@ -19,6 +23,7 @@ public sealed class TextExtractor : ITextExtractor
return Task.FromResult(Normalize(builder.ToString()));
}
/// <inheritdoc />
public string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
+2 -2
View File
@@ -212,8 +212,8 @@ Every service follows this structure:
## Coding conventions
- No XML doc comments on internal code; Swagger annotations on public controller actions
- No explanatory inline comments — code should be self-describing
- XML doc comments (`/// <summary>`) on all public methods, interfaces, and non-trivial private/protected helpers; Swagger annotations on public controller actions
- Inline `//` comments for non-obvious logic; avoid restating what the code already says clearly
- Use `$$"""..."""` raw string literals (not `$"""`) when the content contains CSS or other curly-brace-heavy text — avoids CS9006 brace-escaping errors
- `sealed` on all concrete service classes
- Settings classes injected via `IOptions<T>` — registered with `Configure<T>(config.GetSection("..."))`
@@ -7,6 +7,10 @@ using Microsoft.Extensions.Logging;
namespace CvSearchJob.Services;
/// <summary>
/// Sends job search results emails to the session user and the operator copy address,
/// with an optional CV PDF attachment.
/// </summary>
public sealed class CvSearchEmailSender
{
private readonly IEmailApiClient _emailApi;
@@ -23,6 +27,16 @@ public sealed class CvSearchEmailSender
_logger = logger;
}
/// <summary>
/// Builds and sends the job search results email.
/// Resolves the recipient list from <paramref name="toEmail"/> and the operator copy address
/// stored in the email template. Does nothing when no recipients can be resolved.
/// </summary>
/// <param name="toEmail">Primary recipient (the user who triggered the search).</param>
/// <param name="attachmentFileName">Relative filename of the CV PDF to attach, or <c>null</c>.</param>
/// <param name="results">Ranked list of job search results to include in the email body.</param>
/// <param name="language">Two-letter language code for template rendering.</param>
/// <param name="ct">Cancellation token.</param>
public async Task SendResultsAsync(
string toEmail,
string? attachmentFileName,
@@ -64,6 +78,10 @@ public sealed class CvSearchEmailSender
}
}
/// <summary>
/// Renders the HTML email body from the results list.
/// Returns the empty-results template when no results are present.
/// </summary>
private string BuildBody(IReadOnlyList<JobSearchResultEntity> results, string language)
{
if (results.Count == 0)
@@ -92,6 +110,10 @@ public sealed class CvSearchEmailSender
("items", items.ToString()));
}
/// <summary>
/// Attempts to deserialise the stored result JSON into a <see cref="JobMatchResponse"/>.
/// Returns <c>null</c> on parse failure so the email still renders without a summary.
/// </summary>
private static JobMatchResponse? TryParseResult(string json)
{
try
@@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging;
namespace CvSearchJob.Services;
/// <summary>
/// Config-driven HTML scraper that fetches a provider's job listing page and extracts matching job URLs.
/// Uses a two-stage anchor filter: href must contain the provider's link pattern, and anchor text must
/// contain at least one CV keyword.
/// </summary>
public sealed class HtmlJobSearcher
{
private readonly HttpClient _http;
@@ -18,6 +23,15 @@ public sealed class HtmlJobSearcher
_http.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; MyAi.ro CV-Search/1.0)");
}
/// <summary>
/// Fetches the provider's search result page for the combined initial + CV keywords, parses all anchor
/// tags, applies the two-stage filter, and returns up to <see cref="JobProviderConfig.MaxResults"/> absolute URLs.
/// Returns an empty list when the HTTP request fails rather than throwing.
/// </summary>
/// <param name="provider">Provider configuration including search URL template, link filter, and result cap.</param>
/// <param name="cvKeywords">Keywords extracted from the user's CV to inject into the search query.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Deduplicated list of absolute job page URLs (query string stripped).</returns>
public async Task<IReadOnlyList<string>> SearchJobUrlsAsync(
JobProviderConfig provider,
IReadOnlyList<string> cvKeywords,
@@ -75,6 +89,7 @@ public sealed class HtmlJobSearcher
continue;
}
// Strip query string and fragment so different tracking variants of the same URL collapse to one.
var url = absoluteUri.GetLeftPart(UriPartial.Path);
if (seen.Add(url))
results.Add(url);
@@ -14,6 +14,10 @@ using Microsoft.Extensions.Options;
namespace CvSearchJob.Tasks;
/// <summary>
/// Background job task that processes pending job search sessions: scrapes providers,
/// scores each URL against the CV via the matcher API, persists results, and sends the results email.
/// </summary>
public sealed class CvSearchJobTask : IJobTask
{
private readonly IServiceScopeFactory _scopeFactory;
@@ -41,6 +45,11 @@ public sealed class CvSearchJobTask : IJobTask
_logger = logger;
}
/// <summary>
/// Called by the scheduler on each tick. Resets orphaned sessions, picks the oldest pending session,
/// runs the full search pipeline, and sends the results email.
/// Does nothing when <c>JobSearch:Enabled</c> is <c>false</c>.
/// </summary>
public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken)
{
if (!_settings.Enabled) return;
@@ -92,6 +101,10 @@ public sealed class CvSearchJobTask : IJobTask
}
}
/// <summary>
/// Runs the full search pipeline for a session: scrapes all providers, deduplicates URLs,
/// scores each candidate via the matcher API, and persists results that meet the minimum score threshold.
/// </summary>
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
JobSearchSessionEntity session,
CvSearchDbContext db,
@@ -125,6 +138,7 @@ public sealed class CvSearchJobTask : IJobTask
{
CvDocumentId = session.CvDocumentId,
JobUrl = url,
// User already gave GDPR consent when they clicked the one-time job search link
GdprConsent = true
};
@@ -162,6 +176,10 @@ public sealed class CvSearchJobTask : IJobTask
return results;
}
/// <summary>
/// Deserialises the provider configuration snapshot stored on the session.
/// Falls back to the current live config when the snapshot is absent or unparseable.
/// </summary>
private List<JobProviderConfig> GetProviders(string? providerConfigJson)
{
if (string.IsNullOrWhiteSpace(providerConfigJson)) return _settings.Providers.Where(p => p.Enabled).ToList();
@@ -177,6 +195,10 @@ public sealed class CvSearchJobTask : IJobTask
}
}
/// <summary>
/// Infers the provider name from the job URL by matching against each provider's <c>JobLinkContains</c> pattern.
/// Falls back to the URL hostname when no provider matches.
/// </summary>
private static string GuessProvider(string url, List<JobProviderConfig> providers)
{
foreach (var p in providers)
@@ -189,8 +211,12 @@ public sealed class CvSearchJobTask : IJobTask
return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown";
}
/// <summary>
/// Constructs the CV PDF filename from the document ID.
/// </summary>
private static string BuildCvFileName(string cvDocumentId)
{
// Strip non-alphanumeric characters so the filename is safe for all OS/email clients.
var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit));
if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv";
return $"{safeId}.pdf";