Add XML doc to all service interfaces and implementations (#26)

- Update CLAUDE.md: replace incorrect 'no XML doc on internal code' rule
  with the correct convention (XML doc on all public methods and
  non-trivial private/protected helpers)
- Restore /// <summary> on FileDownloadController private helpers
  (HandleRangeRequest, StreamRangeAsync)
- Add full XML doc to all service contracts:
  ICaptchaVerifier, IEmailSender, ICvMatcherService, IJobTextExtractor,
  IJobTokenService, IDocumentClassifier, IRagService, ITextChunker,
  ITextExtractor, IEmailTemplateService, ITemplateService
- Add /// <summary> and /// <inheritdoc /> to all concrete service classes
  and their methods: RecaptchaVerifier, EmailApiEmailSender,
  SmtpEmailDispatcher, CvMatcherService, JobTextExtractor, JobTokenService,
  RagService, DocumentClassifier, TextChunker, TextExtractor,
  HtmlJobSearcher, CvSearchEmailSender, CvSearchJobTask,
  EmailTemplateService, DbTemplateService

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-28 09:17:42 +03:00
parent 4ee4a59b5e
commit 16bb195cb5
28 changed files with 436 additions and 6 deletions
@@ -131,7 +131,9 @@ namespace Api.Controllers
}
}
// Handles HTTP range requests for partial content downloads and resume support.
/// <summary>
/// Handles HTTP range requests for partial content downloads and resume support.
/// </summary>
private async Task<IActionResult> HandleRangeRequest(
string filePath,
long fileLength,
@@ -188,7 +190,9 @@ namespace Api.Controllers
}
}
// Efficiently streams a specific byte range from source to destination.
/// <summary>
/// Efficiently streams a specific byte range from source to destination.
/// </summary>
private static async Task StreamRangeAsync(Stream source, Stream destination, long bytesToRead)
{
var buffer = new byte[BufferSize];
@@ -1,9 +1,21 @@
using Api.Services.Contracts.Models;
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts
{
/// <summary>
/// Verifies a reCAPTCHA token against the Google verification API.
/// </summary>
public interface ICaptchaVerifier
{
/// <summary>
/// Sends the token to the Google reCAPTCHA verification endpoint and
/// returns a verdict indicating success, score, and any failure reason.
/// </summary>
/// <param name="token">The reCAPTCHA token provided by the client.</param>
/// <param name="userIp">Optional remote IP address passed to Google for additional risk analysis.</param>
/// <param name="expectedAction">Optional action name to validate against the token's embedded action (v3 only).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A <see cref="CaptchaVerdictModel"/> with the verification outcome.</returns>
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct);
}
}
+51 -1
View File
@@ -1,15 +1,65 @@
using CvMatcher.Models.Responses;
using CvMatcher.Models.Responses;
using Models.Requests;
namespace Api.Services.Contracts
{
/// <summary>
/// Abstraction for sending transactional emails from the public API.
/// </summary>
public interface IEmailSender
{
/// <summary>
/// Sends a contact-form message to the configured operator address.
/// </summary>
/// <param name="req">Contact request containing name, email, subject, and message.</param>
/// <param name="ct">Cancellation token.</param>
Task SendContactAsync(ContactRequest req, CancellationToken ct);
/// <summary>
/// Notifies the configured operator address that a new email subscription was received.
/// </summary>
/// <param name="req">Subscription request containing the subscriber's email address.</param>
/// <param name="ct">Cancellation token.</param>
Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct);
/// <summary>
/// Sends a background notification when a file download is initiated.
/// Does nothing when no notification address is configured.
/// </summary>
/// <param name="fileName">Name of the downloaded file.</param>
/// <param name="userIp">Remote IP address of the downloader, or <c>null</c> if unavailable.</param>
/// <param name="ct">Cancellation token.</param>
Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct);
/// <summary>
/// Sends a CV match results email to the user and the operator copy address.
/// </summary>
/// <param name="explicitTo">Primary recipient email address, or <c>null</c> to send only the operator copy.</param>
/// <param name="subject">Email subject line.</param>
/// <param name="body">Pre-built HTML body fragment.</param>
/// <param name="attachmentPath">Full path to a CV PDF to attach, or <c>null</c> for no attachment.</param>
/// <param name="ct">Cancellation token.</param>
Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct);
/// <summary>
/// Builds the localised subject line for a CV match email.
/// </summary>
/// <param name="score">Match score percentage (0100).</param>
/// <param name="jobLabel">Human-readable job title or label.</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Rendered subject string.</returns>
string BuildMatchEmailSubject(int score, string? jobLabel, string language);
/// <summary>
/// Builds the full HTML body for a CV match email, including an optional job-search footer link.
/// </summary>
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
/// <param name="result">Structured match response from the CV matcher engine.</param>
/// <param name="jobLabel">Human-readable job title or label.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="jobSearchLink">Optional one-click job-search URL to append as a footer CTA.</param>
/// <param name="expiryDays">Number of days until the job-search link expires (shown in the footer copy).</param>
/// <returns>Rendered HTML body string.</returns>
string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7);
}
}
+9
View File
@@ -9,6 +9,9 @@ using Models.Settings;
namespace Api.Services;
/// <summary>
/// Implements <see cref="IEmailSender"/> by delegating all email dispatch to the internal email-api service via Refit.
/// </summary>
public sealed class EmailApiEmailSender : IEmailSender
{
private readonly IEmailApiClient _emailApi;
@@ -34,6 +37,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log = log;
}
/// <inheritdoc />
public async Task SendContactAsync(ContactRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_contact.ToEmail))
@@ -76,6 +80,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("Contact email sent successfully from {SenderEmail}", req.Email);
}
/// <inheritdoc />
public async Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_subscribe.ToEmail))
@@ -108,6 +113,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("Subscription email sent successfully for {Email}", req.Email);
}
/// <inheritdoc />
public async Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_fileStorage.ToEmail))
@@ -146,6 +152,7 @@ public sealed class EmailApiEmailSender : IEmailSender
_log.LogInformation("File download notification sent successfully for {FileName}", fileName);
}
/// <inheritdoc />
public async Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct)
{
var operatorCopy = _emailTemplates.GetOperatorCopy("email.match.subject", "en");
@@ -184,6 +191,7 @@ public sealed class EmailApiEmailSender : IEmailSender
}
}
/// <inheritdoc />
public string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7)
{
var strengths = result.Strengths?.Count > 0
@@ -221,6 +229,7 @@ public sealed class EmailApiEmailSender : IEmailSender
return body;
}
/// <inheritdoc />
public string BuildMatchEmailSubject(int score, string? jobLabel, string language) =>
_emailTemplates.Render("email.match.subject", language,
("score", score.ToString()),
+4
View File
@@ -5,6 +5,9 @@ using Models.Settings;
namespace Api.Services
{
/// <summary>
/// Verifies reCAPTCHA v2/v3 tokens by calling the Google site-verify API.
/// </summary>
public sealed class RecaptchaVerifier : ICaptchaVerifier
{
private readonly HttpClient _http;
@@ -18,6 +21,7 @@ namespace Api.Services
_log = log;
}
/// <inheritdoc />
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct)
{
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
@@ -3,9 +3,34 @@ using CvMatcher.Models.Responses;
namespace Api.Services.Contracts;
/// <summary>
/// Orchestrates CV indexing, job matching, and job discovery operations.
/// </summary>
public interface ICvMatcherService
{
/// <summary>
/// Indexes a CV PDF into the RAG system and returns document metadata.
/// Returns cached metadata without re-indexing when the same text hash already exists.
/// </summary>
/// <param name="file">Uploaded CV PDF file.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Upload response with document ID, hash, and indexing statistics.</returns>
Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct);
/// <summary>
/// Scores a CV against a specific job posting URL or pasted description using the LLM.
/// Caches the result so repeat requests for the same (CV, job, language) triple are served instantly.
/// </summary>
/// <param name="request">Match request containing CV document ID, job URL or description, and language preference.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Structured match response with score, summary, strengths, gaps, and recommendations.</returns>
Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct);
/// <summary>
/// Searches the RAG index for job documents most similar to the given CV and scores the top candidates.
/// </summary>
/// <param name="request">Request containing the CV document ID and optional result count limit.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with the CV document ID and a list of ranked match results.</returns>
Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct);
}
@@ -1,6 +1,17 @@
namespace Api.Services.Contracts;
/// <summary>
/// Extracts plain text from a job posting, either from a pasted description or by fetching and parsing a URL.
/// </summary>
public interface IJobTextExtractor
{
/// <summary>
/// Returns normalised plain text for the job posting.
/// Prefers <paramref name="jobDescription"/> when provided; otherwise fetches and strips HTML from <paramref name="jobUrl"/>.
/// </summary>
/// <param name="jobUrl">URL of the job posting page, used when no description is pasted.</param>
/// <param name="jobDescription">Pasted job description text; takes priority over URL fetching.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Normalised plain text, truncated to the configured maximum character limit.</returns>
Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct);
}
@@ -1,7 +1,29 @@
namespace Api.Services.Contracts;
/// <summary>
/// Manages one-time job search tokens and the sessions they trigger.
/// </summary>
public interface IJobTokenService
{
/// <summary>
/// Creates a new single-use job search token linked to the given CV document and user.
/// The token expires after the number of days configured in <c>JobSearch:TokenExpiryDays</c>.
/// </summary>
/// <param name="cvDocumentId">Identifier of the indexed CV document.</param>
/// <param name="email">Email address of the user who will receive the results.</param>
/// <param name="language">Preferred language for result emails (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The generated token ID, to be embedded in the one-click job search link.</returns>
Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct);
/// <summary>
/// Validates the token and, if valid, marks it as used and creates a <c>Pending</c> job search session.
/// </summary>
/// <param name="tokenId">The token ID from the one-click link.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>
/// One of the <c>StartJobSearchStatus</c> string constants:
/// <c>Started</c>, <c>AlreadyUsed</c>, <c>Expired</c>, or <c>NotFound</c>.
/// </returns>
Task<string> TriggerStartAsync(string tokenId, CancellationToken ct);
}
@@ -10,6 +10,9 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching.
/// </summary>
public sealed class CvMatcherService : ICvMatcherService
{
private readonly IRagApiClient _rag;
@@ -35,6 +38,7 @@ public sealed class CvMatcherService : ICvMatcherService
_settings = options.Value;
}
/// <inheritdoc />
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, CancellationToken ct)
{
var response = await _rag.IndexCvPdfAsync(file, ct);
@@ -51,6 +55,7 @@ public sealed class CvMatcherService : ICvMatcherService
};
}
/// <inheritdoc />
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
{
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
@@ -78,6 +83,7 @@ public sealed class CvMatcherService : ICvMatcherService
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
}
/// <inheritdoc />
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
@@ -104,6 +110,11 @@ public sealed class CvMatcherService : ICvMatcherService
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct);
}
/// <summary>
/// Scores a (CV, job) pair with the LLM.
/// Returns a cached result immediately when the same (CV, job, language) triple has been scored before.
/// When no evidence chunks are available from the vector search, falls back to the raw job text.
/// </summary>
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, string language, CancellationToken ct)
{
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct);
@@ -138,6 +149,10 @@ public sealed class CvMatcherService : ICvMatcherService
return result;
}
/// <summary>
/// Deserialises the LLM's JSON output into a <see cref="JobMatchResponse"/>.
/// Returns a safe fallback response instead of throwing when the JSON cannot be parsed.
/// </summary>
private static JobMatchResponse ParseResult(string json)
{
try
@@ -158,21 +173,29 @@ public sealed class CvMatcherService : ICvMatcherService
};
}
/// <summary>
/// Builds a descriptive search query from the CV text for use in vector similarity search.
/// </summary>
private static string BuildCvSearchProfile(string cvText)
{
var text = Limit(cvText, 10000);
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
}
/// <summary>
/// Extracts a short job title from the first sentence-like fragment of the job text.
/// </summary>
private static string ExtractJobTitle(string jobText)
{
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
return first ?? "Job description";
}
/// <summary>Returns the base language code, lower-cased, defaulting to <c>"en"</c>.</summary>
private static string NormalizeLanguage(string? language) =>
string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim();
/// <summary>Maps a language code to its full English name for use in the LLM system prompt.</summary>
private static string LanguageName(string language) => language switch
{
"ro" => "Romanian",
@@ -180,5 +203,6 @@ public sealed class CvMatcherService : ICvMatcherService
_ => "English"
};
/// <summary>Truncates <paramref name="value"/> to at most <paramref name="max"/> characters.</summary>
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
}
@@ -6,6 +6,10 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Extracts normalised plain text from a job posting, either from a pasted description or by
/// fetching and stripping the HTML of the job page URL.
/// </summary>
public sealed class JobTextExtractor : IJobTextExtractor
{
private readonly HttpClient _http;
@@ -19,6 +23,7 @@ public sealed class JobTextExtractor : IJobTextExtractor
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
}
/// <inheritdoc />
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
{
var pasted = Normalize(jobDescription ?? string.Empty);
@@ -37,12 +42,14 @@ public sealed class JobTextExtractor : IJobTextExtractor
return Limit(Normalize(WebUtility.HtmlDecode(html)));
}
/// <summary>Truncates text to the configured maximum character count.</summary>
private string Limit(string value)
{
var max = Math.Max(4000, _settings.MaxJobTextChars);
return value.Length <= max ? value : value[..max];
}
/// <summary>Collapses all whitespace runs to single spaces and trims the result.</summary>
private static string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
@@ -11,6 +11,9 @@ using Microsoft.Extensions.Options;
namespace Api.Services;
/// <summary>
/// Creates and validates one-time job search tokens, and creates the corresponding search sessions.
/// </summary>
public sealed class JobTokenService : IJobTokenService
{
private readonly CvSearchDbContext _db;
@@ -30,6 +33,7 @@ public sealed class JobTokenService : IJobTokenService
_logger = logger;
}
/// <inheritdoc />
public async Task<string> CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct)
{
var token = new JobSearchTokenEntity
@@ -49,6 +53,7 @@ public sealed class JobTokenService : IJobTokenService
return token.Id;
}
/// <inheritdoc />
public async Task<string> TriggerStartAsync(string tokenId, CancellationToken ct)
{
var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct);
@@ -86,6 +91,10 @@ public sealed class JobTokenService : IJobTokenService
return StartJobSearchStatus.Started;
}
/// <summary>
/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM).
/// Takes the first 5 usable lines, splits them into words, strips punctuation, and deduplicates.
/// </summary>
private static string ExtractKeywords(string cvText)
{
var lines = cvText
@@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging;
namespace EmailApi.Data.Services;
/// <summary>
/// Singleton implementation of <see cref="IEmailTemplateService"/> that caches all email templates
/// from the database and refreshes them every 10 minutes.
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped repository from a singleton lifetime.
/// </summary>
public sealed class EmailTemplateService : IEmailTemplateService
{
private readonly IServiceScopeFactory _scopeFactory;
@@ -20,6 +25,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
_logger = logger;
}
/// <inheritdoc />
public string Get(string key, string language = "en")
{
EnsureCacheLoaded();
@@ -35,6 +41,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
return key;
}
/// <inheritdoc />
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
{
var template = Get(key, language);
@@ -43,6 +50,7 @@ public sealed class EmailTemplateService : IEmailTemplateService
return template;
}
/// <inheritdoc />
public string? GetOperatorCopy(string key, string language)
{
EnsureCacheLoaded();
@@ -61,6 +69,10 @@ public sealed class EmailTemplateService : IEmailTemplateService
return null;
}
/// <summary>
/// Reloads all templates from the database when the cache TTL has expired.
/// Swaps both caches atomically; logs an error and continues serving the stale cache on failure.
/// </summary>
private void EnsureCacheLoaded()
{
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
@@ -91,5 +103,6 @@ public sealed class EmailTemplateService : IEmailTemplateService
}
}
/// <summary>Builds the dictionary key used for both caches.</summary>
private static string CacheKey(string key, string language) => $"{key}::{language}";
}
@@ -1,8 +1,38 @@
namespace EmailApi.Data.Services;
/// <summary>
/// Provides access to localised email templates stored in the <c>emailApi.EmailTemplates</c> table.
/// Implementations are expected to cache templates and refresh periodically.
/// </summary>
public interface IEmailTemplateService
{
/// <summary>
/// Returns the template value for the given key and language.
/// Falls back to <c>"en"</c> when the requested language has no entry.
/// Returns the raw key string when no matching template is found.
/// </summary>
/// <param name="key">Template key (e.g. <c>"email.match.subject"</c>).</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Template value string.</returns>
string Get(string key, string language = "en");
/// <summary>
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
/// </summary>
/// <param name="key">Template key.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
/// <returns>Rendered template string with all placeholders replaced.</returns>
string Render(string key, string language, params (string Key, string Value)[] placeholders);
/// <summary>
/// Returns the operator copy address for the given template key.
/// Uses the specific row's <c>OperatorCopy</c> value when non-empty; otherwise falls back
/// to the first non-empty <c>OperatorCopy</c> across all cached rows, so future template rows
/// with an empty value automatically inherit the globally configured address.
/// </summary>
/// <param name="key">Template key used to look up the specific row (typically the subject key).</param>
/// <param name="language">Two-letter language code.</param>
/// <returns>Operator copy email address, or <c>null</c> when none is configured.</returns>
string? GetOperatorCopy(string key, string language);
}
@@ -8,6 +8,10 @@ using Models.Settings;
namespace EmailApi.Services;
/// <summary>
/// Wraps an HTML body fragment in the branded HTML shell and sends the resulting email via SMTP using MailKit.
/// Attaches files from the shared file-storage volume when an attachment path is provided.
/// </summary>
public sealed class SmtpEmailDispatcher
{
private readonly SmtpSettings _smtp;
@@ -29,6 +33,13 @@ public sealed class SmtpEmailDispatcher
_environmentName = Environment.GetEnvironmentVariable("APP_ENVIRONMENT_NAME") ?? "Development";
}
/// <summary>
/// Builds a <see cref="MimeMessage"/> from <paramref name="req"/>, wraps the body in the HTML shell,
/// optionally attaches a file, and sends via the configured SMTP server.
/// Logs a warning and returns without throwing when the SMTP host is not configured.
/// </summary>
/// <param name="req">Email payload containing recipients, subject, HTML body, and optional attachment path.</param>
/// <param name="ct">Cancellation token.</param>
public async Task SendAsync(SendEmailRequest req, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_smtp.Host))
@@ -6,6 +6,11 @@ using System.Collections.Concurrent;
namespace MyAi.Data.Services;
/// <summary>
/// Singleton implementation of <see cref="ITemplateService"/> that caches all templates from the
/// <c>myAi.Templates</c> table and refreshes them every 10 minutes.
/// Uses <see cref="IServiceScopeFactory"/> to resolve the scoped DbContext from a singleton lifetime.
/// </summary>
public sealed class DbTemplateService : ITemplateService
{
private readonly IServiceScopeFactory _scopeFactory;
@@ -20,6 +25,7 @@ public sealed class DbTemplateService : ITemplateService
_logger = logger;
}
/// <inheritdoc />
public string Get(string key, string language = "en")
{
EnsureCacheLoaded();
@@ -35,6 +41,7 @@ public sealed class DbTemplateService : ITemplateService
return key;
}
/// <inheritdoc />
public string Render(string key, string language, params (string Key, string Value)[] placeholders)
{
var template = Get(key, language);
@@ -43,6 +50,10 @@ public sealed class DbTemplateService : ITemplateService
return template;
}
/// <summary>
/// Reloads all templates from the database when the cache TTL has expired.
/// Swaps the cache atomically; logs an error and continues serving the stale cache on failure.
/// </summary>
private void EnsureCacheLoaded()
{
if (DateTime.UtcNow - _loadedAt < CacheTtl) return;
@@ -66,5 +77,6 @@ public sealed class DbTemplateService : ITemplateService
}
}
/// <summary>Builds the dictionary key used in the cache.</summary>
private static string CacheKey(string key, string language) => $"{key}::{language}";
}
@@ -1,7 +1,27 @@
namespace MyAi.Data.Services;
/// <summary>
/// Provides access to localised string templates stored in the <c>myAi.Templates</c> table.
/// Implementations are expected to cache templates and refresh periodically.
/// </summary>
public interface ITemplateService
{
/// <summary>
/// Returns the template value for the given key and language.
/// Falls back to <c>"en"</c> when the requested language has no entry.
/// Returns the raw key string when no matching template is found.
/// </summary>
/// <param name="key">Template key (e.g. <c>"html.job-search-start.title"</c>).</param>
/// <param name="language">Two-letter language code (e.g. <c>"en"</c>, <c>"ro"</c>).</param>
/// <returns>Template value string.</returns>
string Get(string key, string language = "en");
/// <summary>
/// Retrieves the template and substitutes <c>{{placeholder}}</c> tokens with the provided values.
/// </summary>
/// <param name="key">Template key.</param>
/// <param name="language">Two-letter language code.</param>
/// <param name="placeholders">Named replacement pairs in the form <c>("name", value)</c>.</param>
/// <returns>Rendered template string with all placeholders replaced.</returns>
string Render(string key, string language, params (string Key, string Value)[] placeholders);
}
@@ -2,7 +2,20 @@ using Rag.Models;
namespace Api.Services.Contracts;
/// <summary>
/// Classifies a document into a known type (cv, job, contract, etc.) and extracts a title.
/// </summary>
public interface IDocumentClassifier
{
/// <summary>
/// Determines the document type and title from the provided text.
/// Uses <paramref name="providedType"/> and <paramref name="providedTitle"/> directly when supplied;
/// otherwise falls back to a keyword-frequency heuristic over the text.
/// </summary>
/// <param name="text">Full document text to classify.</param>
/// <param name="providedType">Caller-supplied document type hint; skips heuristic when non-empty.</param>
/// <param name="providedTitle">Caller-supplied document title; skips title extraction when non-empty.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A <see cref="DocumentClassification"/> with type, confidence score, and title.</returns>
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
}
@@ -3,10 +3,46 @@ using Rag.Models.Responses;
namespace Api.Services.Contracts;
/// <summary>
/// Core RAG (Retrieval-Augmented Generation) operations: document indexing, vector search, and retrieval.
/// </summary>
public interface IRagService
{
/// <summary>
/// Indexes a plain-text document by classifying it, chunking the text, generating embeddings,
/// and persisting the document and its chunks. Returns cached metadata when the text hash already exists.
/// </summary>
/// <param name="request">Indexing request with text, optional document type, title, and source URL.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
/// <summary>
/// Extracts text from a PDF file, then indexes it the same way as <see cref="IndexTextAsync"/>.
/// Returns cached metadata when the extracted text hash already exists.
/// </summary>
/// <param name="file">Uploaded PDF file (must be ≤ configured max size).</param>
/// <param name="documentType">Optional document type hint; if omitted the classifier is used.</param>
/// <param name="title">Optional title override; if omitted the title is extracted from the text.</param>
/// <param name="sourceUrl">Optional source URL to associate with the document.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
/// <summary>
/// Performs a vector similarity search over indexed document chunks, groups results by document,
/// and returns the top-K documents with their best-matching chunks.
/// </summary>
/// <param name="request">Search request with query text, optional document type filter, and top-K limit.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Ranked list of matching documents with scored chunk excerpts.</returns>
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
/// <summary>
/// Retrieves full document details — including the original text — by document ID.
/// </summary>
/// <param name="documentId">The document's unique identifier.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Document details, or <c>null</c> if no document with that ID exists.</returns>
Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct);
}
@@ -1,6 +1,17 @@
namespace Api.Services.Contracts;
/// <summary>
/// Splits document text into overlapping chunks suitable for embedding and vector search.
/// </summary>
public interface ITextChunker
{
/// <summary>
/// Divides <paramref name="text"/> into a list of chunks using a sliding window.
/// Adjacent chunks share <paramref name="overlap"/> characters to preserve cross-boundary context.
/// </summary>
/// <param name="text">The full document text to chunk.</param>
/// <param name="chunkSize">Maximum character length per chunk (clamped to 3003000).</param>
/// <param name="overlap">Number of trailing characters from the previous chunk to repeat at the start of the next (clamped to 0chunkSize/2).</param>
/// <returns>Ordered list of non-empty text chunks.</returns>
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
}
@@ -1,7 +1,23 @@
namespace Api.Services.Contracts;
/// <summary>
/// Extracts and normalises plain text from documents.
/// </summary>
public interface ITextExtractor
{
/// <summary>
/// Reads all pages of a PDF stream and returns the concatenated, normalised plain text.
/// </summary>
/// <param name="stream">Readable stream positioned at the start of the PDF file.</param>
/// <param name="ct">Cancellation token (checked between pages).</param>
/// <returns>Normalised plain text extracted from the PDF.</returns>
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
/// <summary>
/// Collapses all whitespace sequences in <paramref name="value"/> to single spaces and trims the result.
/// Returns an empty string for null/whitespace input.
/// </summary>
/// <param name="value">Raw text to normalise.</param>
/// <returns>Whitespace-normalised text.</returns>
string Normalize(string value);
}
@@ -4,6 +4,9 @@ using Rag.Models;
namespace Api.Services;
/// <summary>
/// Classifies documents by type using a keyword-frequency heuristic and extracts a title from the text.
/// </summary>
public sealed class DocumentClassifier : IDocumentClassifier
{
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
@@ -11,6 +14,7 @@ public sealed class DocumentClassifier : IDocumentClassifier
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
};
/// <inheritdoc />
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
{
if (!string.IsNullOrWhiteSpace(providedType))
@@ -51,14 +55,20 @@ public sealed class DocumentClassifier : IDocumentClassifier
});
}
/// <summary>Counts how many of the given <paramref name="terms"/> appear in the lower-cased text.</summary>
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
/// <summary>Lowercases and replaces non-alphanumeric characters with hyphens to produce a safe type slug.</summary>
private static string NormalizeType(string value)
{
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
}
/// <summary>
/// Returns <paramref name="providedTitle"/> when available; otherwise extracts the first sentence-like
/// fragment from the text, or falls back to a generic "{type} document" label.
/// </summary>
private static string BuildTitle(string? providedTitle, string text, string documentType)
{
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
+12
View File
@@ -11,6 +11,9 @@ using CommonHelpers;
namespace Api.Services;
/// <summary>
/// Implements the core RAG pipeline: document classification, chunking, embedding, vector search, and retrieval.
/// </summary>
public sealed class RagService : IRagService
{
private readonly ITextExtractor _textExtractor;
@@ -36,6 +39,7 @@ public sealed class RagService : IRagService
_settings = options.Value;
}
/// <inheritdoc />
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
{
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
@@ -44,6 +48,7 @@ public sealed class RagService : IRagService
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
}
/// <inheritdoc />
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
{
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
@@ -57,6 +62,7 @@ public sealed class RagService : IRagService
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
}
/// <inheritdoc />
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
{
var query = _textExtractor.Normalize(request.QueryText);
@@ -97,6 +103,7 @@ public sealed class RagService : IRagService
return new SearchResponse { Results = results };
}
/// <inheritdoc />
public async Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct)
{
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
@@ -112,6 +119,11 @@ public sealed class RagService : IRagService
};
}
/// <summary>
/// Core indexing pipeline: computes a text hash for deduplication, classifies and chunks the text,
/// generates embeddings for each chunk, and persists the document and chunks to the repository.
/// Returns cached metadata without re-indexing when the same text hash and source URL already exist.
/// </summary>
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
string text,
string? documentType,
+4
View File
@@ -2,8 +2,12 @@ using Api.Services.Contracts;
namespace Api.Services;
/// <summary>
/// Splits text into overlapping fixed-size chunks using a sliding window for use in vector embedding pipelines.
/// </summary>
public sealed class TextChunker : ITextChunker
{
/// <inheritdoc />
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
{
if (string.IsNullOrWhiteSpace(text)) return [];
+5
View File
@@ -4,8 +4,12 @@ using UglyToad.PdfPig;
namespace Api.Services;
/// <summary>
/// Extracts and normalises plain text from PDF files using PdfPig.
/// </summary>
public sealed class TextExtractor : ITextExtractor
{
/// <inheritdoc />
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
{
using var document = PdfDocument.Open(stream);
@@ -19,6 +23,7 @@ public sealed class TextExtractor : ITextExtractor
return Task.FromResult(Normalize(builder.ToString()));
}
/// <inheritdoc />
public string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;