From 4ee4a59b5e9c8460304d42bbf446cc5e8ed327c1 Mon Sep 17 00:00:00 2001 From: claude Date: Thu, 28 May 2026 09:07:23 +0300 Subject: [PATCH 1/2] Improve comments and Swagger annotations across services (#26) - EmailController: add class summary, full SwaggerResponse/ProducesResponseType for 400 and 500, and Description on SwaggerOperation - ContactController: fix terse "Failed." error message to "Could not process subscription." - FileDownloadController: remove redundant XML tags from the public action doc block; convert private-method /// to // (project convention: no XML doc on internal code) - CvMatcherService: remove two dead commented-out blocks (old email send and BuildEmailBody helper) - JobTokenService: comment the phone/contact-line regex filter in ExtractKeywords - DocumentClassifier: comment the keyword-frequency scoring approach and the confidence formula - TextChunker: comment the sliding-window step (chunkSize - overlap) - CvSearchJobTask: comment the GdprConsent = true rationale and the BuildCvFileName sanitisation logic - HtmlJobSearcher: comment GetLeftPart(UriPartial.Path) query-strip dedup Closes #26 Co-Authored-By: Claude Sonnet 4.6 --- Apis/api/Controllers/ContactController.cs | 2 +- .../api/Controllers/FileDownloadController.cs | 12 ++------ .../Services/CvMatcherService.cs | 28 ------------------- .../Services/JobTokenService.cs | 1 + Apis/email-api/Controllers/EmailController.cs | 25 ++++++++++++++++- Apis/rag-api/Services/DocumentClassifier.cs | 4 +++ Apis/rag-api/Services/TextChunker.cs | 2 ++ .../cv-search-job/Services/HtmlJobSearcher.cs | 1 + Jobs/cv-search-job/Tasks/CvSearchJobTask.cs | 2 ++ 9 files changed, 37 insertions(+), 40 deletions(-) diff --git a/Apis/api/Controllers/ContactController.cs b/Apis/api/Controllers/ContactController.cs index b95e2ea..122d772 100644 --- a/Apis/api/Controllers/ContactController.cs +++ b/Apis/api/Controllers/ContactController.cs @@ -115,7 +115,7 @@ namespace Api.Controllers catch (Exception ex) { _log.LogError(ex, "Subscription failed. ip={Ip} eMail={eMail}", userIp, req.Email); - return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Failed.", Code = "subscription_failed" }); + return StatusCode(StatusCodes.Status500InternalServerError, new ErrorResponse { Error = "Could not process subscription.", Code = "subscription_failed" }); } } diff --git a/Apis/api/Controllers/FileDownloadController.cs b/Apis/api/Controllers/FileDownloadController.cs index f12a5fb..e28223e 100644 --- a/Apis/api/Controllers/FileDownloadController.cs +++ b/Apis/api/Controllers/FileDownloadController.cs @@ -44,10 +44,6 @@ namespace Api.Controllers /// /// The name of the file to download (optional - uses default from settings if not provided) /// File stream with appropriate headers for resumable downloads - /// Full file content - /// Partial file content (range request) - /// File not found - /// Requested range not satisfiable [HttpGet("{fileName?}")] [SwaggerOperation(Summary = "Download file", Description = "Downloads a file with support for full and ranged (resumable) transfers.")] [SwaggerResponse(StatusCodes.Status200OK, "Full file content returned")] @@ -135,9 +131,7 @@ namespace Api.Controllers } } - /// - /// Handles HTTP range requests for partial content downloads and resume support. - /// + // Handles HTTP range requests for partial content downloads and resume support. private async Task HandleRangeRequest( string filePath, long fileLength, @@ -194,9 +188,7 @@ namespace Api.Controllers } } - /// - /// Efficiently streams a specific byte range from source to destination. - /// + // Efficiently streams a specific byte range from source to destination. private static async Task StreamRangeAsync(Stream source, Stream destination, long bytesToRead) { var buffer = new byte[BufferSize]; diff --git a/Apis/cv-matcher-api/Services/CvMatcherService.cs b/Apis/cv-matcher-api/Services/CvMatcherService.cs index 023de6d..1dd361d 100644 --- a/Apis/cv-matcher-api/Services/CvMatcherService.cs +++ b/Apis/cv-matcher-api/Services/CvMatcherService.cs @@ -135,13 +135,6 @@ public sealed class CvMatcherService : ICvMatcherService result.JobUrl = job.SourceUrl; result.Cached = false; await _repository.SaveMatchAsync(cv.Id, job.Id, language, result, ct); - - //await _email.SendMatchAsync( - // email, - // $"MyAi.ro CV Match: {result.Score}% - {job.Title}", - // BuildEmailBody(cv, job, result), - // ct); - return result; } @@ -188,25 +181,4 @@ public sealed class CvMatcherService : ICvMatcherService }; private static string Limit(string value, int max) => value.Length <= max ? value : value[..max]; - - //private static string BuildEmailBody(RagDocumentDetails cv, RagDocumentDetails job, JobMatchResponse result) => $""" - // CV Matcher result - - // CV: {cv.Title} - // Job: {job.Title} - // Job URL: {job.SourceUrl ?? "N/A"} - // Score: {result.Score}% - - // Summary: - // {result.Summary} - - // Strengths: - // - {string.Join("\n- ", result.Strengths)} - - // Gaps: - // - {string.Join("\n- ", result.Gaps)} - - // Recommendations: - // - {string.Join("\n- ", result.Recommendations)} - // """; } diff --git a/Apis/cv-matcher-api/Services/JobTokenService.cs b/Apis/cv-matcher-api/Services/JobTokenService.cs index 8b1f2d8..bf4036e 100644 --- a/Apis/cv-matcher-api/Services/JobTokenService.cs +++ b/Apis/cv-matcher-api/Services/JobTokenService.cs @@ -92,6 +92,7 @@ public sealed class JobTokenService : IJobTokenService .Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries) .Select(l => l.Trim()) .Where(l => l.Length > 5 && l.Length < 200) + // Skip lines that are purely digits, spaces, and phone/contact punctuation (phone numbers, emails, etc.) .Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$")) .Take(5) .ToList(); diff --git a/Apis/email-api/Controllers/EmailController.cs b/Apis/email-api/Controllers/EmailController.cs index e7e628f..6cafe16 100644 --- a/Apis/email-api/Controllers/EmailController.cs +++ b/Apis/email-api/Controllers/EmailController.cs @@ -5,6 +5,11 @@ using Swashbuckle.AspNetCore.Annotations; namespace EmailApi.Controllers; +/// +/// Internal email relay. Accepts an HTML body fragment from trusted callers +/// (api, cv-search-job), wraps it in the branded HTML shell, and dispatches +/// via SMTP. Protected by X-Internal-Api-Key. +/// [ApiController] [Route("api/email")] public sealed class EmailController : ControllerBase @@ -13,9 +18,27 @@ public sealed class EmailController : ControllerBase public EmailController(SmtpEmailDispatcher dispatcher) => _dispatcher = dispatcher; + /// + /// Sends an HTML email via SMTP. The supplied body fragment is wrapped in + /// the branded HTML shell before dispatch. Attachments are resolved from + /// the shared file storage volume using the relative path in + /// . + /// + /// Email payload: recipients, subject, HTML body fragment, optional attachment path. + /// Cancellation token. + /// 204 No Content on success. [HttpPost("send")] - [SwaggerOperation(Summary = "Send an HTML email via SMTP")] + [SwaggerOperation( + Summary = "Send an HTML email via SMTP", + Description = "Wraps the provided HTML body in the branded shell and sends via SMTP. " + + "If AttachmentPath is set, resolves the file from the shared file-storage volume. " + + "Returns 204 on success; 400 when the request body is invalid; 500 on SMTP failure.")] + [SwaggerResponse(StatusCodes.Status204NoContent, "Email dispatched successfully")] + [SwaggerResponse(StatusCodes.Status400BadRequest, "Request body is missing or invalid")] + [SwaggerResponse(StatusCodes.Status500InternalServerError, "SMTP dispatch failed")] [ProducesResponseType(StatusCodes.Status204NoContent)] + [ProducesResponseType(StatusCodes.Status400BadRequest)] + [ProducesResponseType(StatusCodes.Status500InternalServerError)] public async Task Send([FromBody] SendEmailRequest request, CancellationToken ct) { await _dispatcher.SendAsync(request, ct); diff --git a/Apis/rag-api/Services/DocumentClassifier.cs b/Apis/rag-api/Services/DocumentClassifier.cs index ae64279..28c8b8c 100644 --- a/Apis/rag-api/Services/DocumentClassifier.cs +++ b/Apis/rag-api/Services/DocumentClassifier.cs @@ -24,6 +24,8 @@ public sealed class DocumentClassifier : IDocumentClassifier }); } + // Keyword-frequency heuristic: count how many characteristic terms each document + // type contributes to the text, then pick the type with the highest hit count. var lower = text.ToLowerInvariant(); var scores = new Dictionary(StringComparer.OrdinalIgnoreCase) { @@ -37,6 +39,8 @@ public sealed class DocumentClassifier : IDocumentClassifier var best = scores.OrderByDescending(x => x.Value).First(); var type = best.Value <= 0 ? "unknown" : best.Key; + // Confidence baseline 0.45 + 0.08 per matched keyword term, capped at 0.95. + // Zero hits → 0.25 (effectively unknown). var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08); return Task.FromResult(new DocumentClassification diff --git a/Apis/rag-api/Services/TextChunker.cs b/Apis/rag-api/Services/TextChunker.cs index 434f2b9..0b011fb 100644 --- a/Apis/rag-api/Services/TextChunker.cs +++ b/Apis/rag-api/Services/TextChunker.cs @@ -10,6 +10,8 @@ public sealed class TextChunker : ITextChunker chunkSize = Math.Clamp(chunkSize, 300, 3000); overlap = Math.Clamp(overlap, 0, chunkSize / 2); + // Sliding window: step forward by (chunkSize - overlap) each iteration so + // adjacent chunks share `overlap` characters, preserving cross-boundary context. var chunks = new List(); var start = 0; while (start < text.Length) diff --git a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs index fe03132..7fba235 100644 --- a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs +++ b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs @@ -75,6 +75,7 @@ public sealed class HtmlJobSearcher continue; } + // Strip query string and fragment so different tracking variants of the same URL collapse to one. var url = absoluteUri.GetLeftPart(UriPartial.Path); if (seen.Add(url)) results.Add(url); diff --git a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs index 593baf7..16b0087 100644 --- a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs +++ b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs @@ -125,6 +125,7 @@ public sealed class CvSearchJobTask : IJobTask { CvDocumentId = session.CvDocumentId, JobUrl = url, + // User already gave GDPR consent when they clicked the one-time job search link GdprConsent = true }; @@ -191,6 +192,7 @@ public sealed class CvSearchJobTask : IJobTask private static string BuildCvFileName(string cvDocumentId) { + // Strip non-alphanumeric characters so the filename is safe for all OS/email clients. var safeId = string.Concat(cvDocumentId.Where(char.IsLetterOrDigit)); if (string.IsNullOrWhiteSpace(safeId)) safeId = "cv"; return $"{safeId}.pdf"; -- 2.52.0 From 16bb195cb583177408d8476cadaeee9bacf5674d Mon Sep 17 00:00:00 2001 From: claude Date: Thu, 28 May 2026 09:17:42 +0300 Subject: [PATCH 2/2] Add XML doc to all service interfaces and implementations (#26) - Update CLAUDE.md: replace incorrect 'no XML doc on internal code' rule with the correct convention (XML doc on all public methods and non-trivial private/protected helpers) - Restore /// on FileDownloadController private helpers (HandleRangeRequest, StreamRangeAsync) - Add full XML doc to all service contracts: ICaptchaVerifier, IEmailSender, ICvMatcherService, IJobTextExtractor, IJobTokenService, IDocumentClassifier, IRagService, ITextChunker, ITextExtractor, IEmailTemplateService, ITemplateService - Add /// and /// to all concrete service classes and their methods: RecaptchaVerifier, EmailApiEmailSender, SmtpEmailDispatcher, CvMatcherService, JobTextExtractor, JobTokenService, RagService, DocumentClassifier, TextChunker, TextExtractor, HtmlJobSearcher, CvSearchEmailSender, CvSearchJobTask, EmailTemplateService, DbTemplateService Co-Authored-By: Claude Sonnet 4.6 --- .../api/Controllers/FileDownloadController.cs | 8 ++- .../Services/Contracts/ICaptchaVerifier.cs | 14 ++++- Apis/api/Services/Contracts/IEmailSender.cs | 52 ++++++++++++++++++- Apis/api/Services/EmailApiEmailSender.cs | 9 ++++ Apis/api/Services/RecaptchaVerifier.cs | 4 ++ .../Services/Contracts/ICvMatcherService.cs | 25 +++++++++ .../Services/Contracts/IJobTextExtractor.cs | 11 ++++ .../Services/Contracts/IJobTokenService.cs | 22 ++++++++ .../Services/CvMatcherService.cs | 24 +++++++++ .../Services/JobTextExtractor.cs | 7 +++ .../Services/JobTokenService.cs | 9 ++++ .../Services/EmailTemplateService.cs | 13 +++++ .../Services/IEmailTemplateService.cs | 30 +++++++++++ .../email-api/Services/SmtpEmailDispatcher.cs | 11 ++++ Apis/myai-data/Services/DbTemplateService.cs | 12 +++++ Apis/myai-data/Services/ITemplateService.cs | 20 +++++++ .../Services/Contracts/IDocumentClassifier.cs | 13 +++++ .../rag-api/Services/Contracts/IRagService.cs | 36 +++++++++++++ .../Services/Contracts/ITextChunker.cs | 11 ++++ .../Services/Contracts/ITextExtractor.cs | 16 ++++++ Apis/rag-api/Services/DocumentClassifier.cs | 10 ++++ Apis/rag-api/Services/RagService.cs | 12 +++++ Apis/rag-api/Services/TextChunker.cs | 4 ++ Apis/rag-api/Services/TextExtractor.cs | 5 ++ CLAUDE.md | 4 +- .../Services/CvSearchEmailSender.cs | 22 ++++++++ .../cv-search-job/Services/HtmlJobSearcher.cs | 14 +++++ Jobs/cv-search-job/Tasks/CvSearchJobTask.cs | 24 +++++++++ 28 files changed, 436 insertions(+), 6 deletions(-) diff --git a/Apis/api/Controllers/FileDownloadController.cs b/Apis/api/Controllers/FileDownloadController.cs index e28223e..c34ee17 100644 --- a/Apis/api/Controllers/FileDownloadController.cs +++ b/Apis/api/Controllers/FileDownloadController.cs @@ -131,7 +131,9 @@ namespace Api.Controllers } } - // Handles HTTP range requests for partial content downloads and resume support. + /// + /// Handles HTTP range requests for partial content downloads and resume support. + /// private async Task HandleRangeRequest( string filePath, long fileLength, @@ -188,7 +190,9 @@ namespace Api.Controllers } } - // Efficiently streams a specific byte range from source to destination. + /// + /// Efficiently streams a specific byte range from source to destination. + /// private static async Task StreamRangeAsync(Stream source, Stream destination, long bytesToRead) { var buffer = new byte[BufferSize]; diff --git a/Apis/api/Services/Contracts/ICaptchaVerifier.cs b/Apis/api/Services/Contracts/ICaptchaVerifier.cs index a97754c..a549e9d 100644 --- a/Apis/api/Services/Contracts/ICaptchaVerifier.cs +++ b/Apis/api/Services/Contracts/ICaptchaVerifier.cs @@ -1,9 +1,21 @@ -using Api.Services.Contracts.Models; +using Api.Services.Contracts.Models; namespace Api.Services.Contracts { + /// + /// Verifies a reCAPTCHA token against the Google verification API. + /// public interface ICaptchaVerifier { + /// + /// Sends the token to the Google reCAPTCHA verification endpoint and + /// returns a verdict indicating success, score, and any failure reason. + /// + /// The reCAPTCHA token provided by the client. + /// Optional remote IP address passed to Google for additional risk analysis. + /// Optional action name to validate against the token's embedded action (v3 only). + /// Cancellation token. + /// A with the verification outcome. Task VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct); } } diff --git a/Apis/api/Services/Contracts/IEmailSender.cs b/Apis/api/Services/Contracts/IEmailSender.cs index b6b2b27..8fd7502 100644 --- a/Apis/api/Services/Contracts/IEmailSender.cs +++ b/Apis/api/Services/Contracts/IEmailSender.cs @@ -1,15 +1,65 @@ -using CvMatcher.Models.Responses; +using CvMatcher.Models.Responses; using Models.Requests; namespace Api.Services.Contracts { + /// + /// Abstraction for sending transactional emails from the public API. + /// public interface IEmailSender { + /// + /// Sends a contact-form message to the configured operator address. + /// + /// Contact request containing name, email, subject, and message. + /// Cancellation token. Task SendContactAsync(ContactRequest req, CancellationToken ct); + + /// + /// Notifies the configured operator address that a new email subscription was received. + /// + /// Subscription request containing the subscriber's email address. + /// Cancellation token. Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct); + + /// + /// Sends a background notification when a file download is initiated. + /// Does nothing when no notification address is configured. + /// + /// Name of the downloaded file. + /// Remote IP address of the downloader, or null if unavailable. + /// Cancellation token. Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct); + + /// + /// Sends a CV match results email to the user and the operator copy address. + /// + /// Primary recipient email address, or null to send only the operator copy. + /// Email subject line. + /// Pre-built HTML body fragment. + /// Full path to a CV PDF to attach, or null for no attachment. + /// Cancellation token. Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct); + + /// + /// Builds the localised subject line for a CV match email. + /// + /// Match score percentage (0–100). + /// Human-readable job title or label. + /// Two-letter language code (e.g. "en", "ro"). + /// Rendered subject string. string BuildMatchEmailSubject(int score, string? jobLabel, string language); + + /// + /// Builds the full HTML body for a CV match email, including an optional job-search footer link. + /// + /// Identifier of the indexed CV document. + /// Structured match response from the CV matcher engine. + /// Human-readable job title or label. + /// Two-letter language code. + /// Optional one-click job-search URL to append as a footer CTA. + /// Number of days until the job-search link expires (shown in the footer copy). + /// Rendered HTML body string. string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7); } } diff --git a/Apis/api/Services/EmailApiEmailSender.cs b/Apis/api/Services/EmailApiEmailSender.cs index 7d2a5a3..c979431 100644 --- a/Apis/api/Services/EmailApiEmailSender.cs +++ b/Apis/api/Services/EmailApiEmailSender.cs @@ -9,6 +9,9 @@ using Models.Settings; namespace Api.Services; +/// +/// Implements by delegating all email dispatch to the internal email-api service via Refit. +/// public sealed class EmailApiEmailSender : IEmailSender { private readonly IEmailApiClient _emailApi; @@ -34,6 +37,7 @@ public sealed class EmailApiEmailSender : IEmailSender _log = log; } + /// public async Task SendContactAsync(ContactRequest req, CancellationToken ct) { if (string.IsNullOrWhiteSpace(_contact.ToEmail)) @@ -76,6 +80,7 @@ public sealed class EmailApiEmailSender : IEmailSender _log.LogInformation("Contact email sent successfully from {SenderEmail}", req.Email); } + /// public async Task SendSubscribeAsync(SubscribeRequest req, CancellationToken ct) { if (string.IsNullOrWhiteSpace(_subscribe.ToEmail)) @@ -108,6 +113,7 @@ public sealed class EmailApiEmailSender : IEmailSender _log.LogInformation("Subscription email sent successfully for {Email}", req.Email); } + /// public async Task SendFileDownloadNotificationAsync(string fileName, string? userIp, CancellationToken ct) { if (string.IsNullOrWhiteSpace(_fileStorage.ToEmail)) @@ -146,6 +152,7 @@ public sealed class EmailApiEmailSender : IEmailSender _log.LogInformation("File download notification sent successfully for {FileName}", fileName); } + /// public async Task SendMatchAsync(string? explicitTo, string subject, string body, string? attachmentPath, CancellationToken ct) { var operatorCopy = _emailTemplates.GetOperatorCopy("email.match.subject", "en"); @@ -184,6 +191,7 @@ public sealed class EmailApiEmailSender : IEmailSender } } + /// public string BuildMatchEmailBody(string cvDocumentId, JobMatchResponse result, string? jobLabel, string language, string? jobSearchLink = null, int expiryDays = 7) { var strengths = result.Strengths?.Count > 0 @@ -221,6 +229,7 @@ public sealed class EmailApiEmailSender : IEmailSender return body; } + /// public string BuildMatchEmailSubject(int score, string? jobLabel, string language) => _emailTemplates.Render("email.match.subject", language, ("score", score.ToString()), diff --git a/Apis/api/Services/RecaptchaVerifier.cs b/Apis/api/Services/RecaptchaVerifier.cs index b5659be..517e2fb 100644 --- a/Apis/api/Services/RecaptchaVerifier.cs +++ b/Apis/api/Services/RecaptchaVerifier.cs @@ -5,6 +5,9 @@ using Models.Settings; namespace Api.Services { + /// + /// Verifies reCAPTCHA v2/v3 tokens by calling the Google site-verify API. + /// public sealed class RecaptchaVerifier : ICaptchaVerifier { private readonly HttpClient _http; @@ -18,6 +21,7 @@ namespace Api.Services _log = log; } + /// public async Task VerifyAsync(string token, string? userIp, string? expectedAction, CancellationToken ct) { _log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown"); diff --git a/Apis/cv-matcher-api/Services/Contracts/ICvMatcherService.cs b/Apis/cv-matcher-api/Services/Contracts/ICvMatcherService.cs index 9c483a5..32df0b8 100644 --- a/Apis/cv-matcher-api/Services/Contracts/ICvMatcherService.cs +++ b/Apis/cv-matcher-api/Services/Contracts/ICvMatcherService.cs @@ -3,9 +3,34 @@ using CvMatcher.Models.Responses; namespace Api.Services.Contracts; +/// +/// Orchestrates CV indexing, job matching, and job discovery operations. +/// public interface ICvMatcherService { + /// + /// Indexes a CV PDF into the RAG system and returns document metadata. + /// Returns cached metadata without re-indexing when the same text hash already exists. + /// + /// Uploaded CV PDF file. + /// Cancellation token. + /// Upload response with document ID, hash, and indexing statistics. Task UploadCvAsync(IFormFile file, CancellationToken ct); + + /// + /// Scores a CV against a specific job posting URL or pasted description using the LLM. + /// Caches the result so repeat requests for the same (CV, job, language) triple are served instantly. + /// + /// Match request containing CV document ID, job URL or description, and language preference. + /// Cancellation token. + /// Structured match response with score, summary, strengths, gaps, and recommendations. Task MatchJobAsync(MatchJobRequest request, CancellationToken ct); + + /// + /// Searches the RAG index for job documents most similar to the given CV and scores the top candidates. + /// + /// Request containing the CV document ID and optional result count limit. + /// Cancellation token. + /// Response with the CV document ID and a list of ranked match results. Task FindJobsAsync(FindJobsRequest request, CancellationToken ct); } diff --git a/Apis/cv-matcher-api/Services/Contracts/IJobTextExtractor.cs b/Apis/cv-matcher-api/Services/Contracts/IJobTextExtractor.cs index 850521c..746fda6 100644 --- a/Apis/cv-matcher-api/Services/Contracts/IJobTextExtractor.cs +++ b/Apis/cv-matcher-api/Services/Contracts/IJobTextExtractor.cs @@ -1,6 +1,17 @@ namespace Api.Services.Contracts; +/// +/// Extracts plain text from a job posting, either from a pasted description or by fetching and parsing a URL. +/// public interface IJobTextExtractor { + /// + /// Returns normalised plain text for the job posting. + /// Prefers when provided; otherwise fetches and strips HTML from . + /// + /// URL of the job posting page, used when no description is pasted. + /// Pasted job description text; takes priority over URL fetching. + /// Cancellation token. + /// Normalised plain text, truncated to the configured maximum character limit. Task ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct); } diff --git a/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs b/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs index 972aff3..195710b 100644 --- a/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs +++ b/Apis/cv-matcher-api/Services/Contracts/IJobTokenService.cs @@ -1,7 +1,29 @@ namespace Api.Services.Contracts; +/// +/// Manages one-time job search tokens and the sessions they trigger. +/// public interface IJobTokenService { + /// + /// Creates a new single-use job search token linked to the given CV document and user. + /// The token expires after the number of days configured in JobSearch:TokenExpiryDays. + /// + /// Identifier of the indexed CV document. + /// Email address of the user who will receive the results. + /// Preferred language for result emails (e.g. "en", "ro"). + /// Cancellation token. + /// The generated token ID, to be embedded in the one-click job search link. Task CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct); + + /// + /// Validates the token and, if valid, marks it as used and creates a Pending job search session. + /// + /// The token ID from the one-click link. + /// Cancellation token. + /// + /// One of the StartJobSearchStatus string constants: + /// Started, AlreadyUsed, Expired, or NotFound. + /// Task TriggerStartAsync(string tokenId, CancellationToken ct); } diff --git a/Apis/cv-matcher-api/Services/CvMatcherService.cs b/Apis/cv-matcher-api/Services/CvMatcherService.cs index 1dd361d..02f0ddd 100644 --- a/Apis/cv-matcher-api/Services/CvMatcherService.cs +++ b/Apis/cv-matcher-api/Services/CvMatcherService.cs @@ -10,6 +10,9 @@ using Microsoft.Extensions.Options; namespace Api.Services; +/// +/// Orchestrates CV upload, RAG indexing, job text extraction, LLM scoring, and result caching. +/// public sealed class CvMatcherService : ICvMatcherService { private readonly IRagApiClient _rag; @@ -35,6 +38,7 @@ public sealed class CvMatcherService : ICvMatcherService _settings = options.Value; } + /// public async Task UploadCvAsync(IFormFile file, CancellationToken ct) { var response = await _rag.IndexCvPdfAsync(file, ct); @@ -51,6 +55,7 @@ public sealed class CvMatcherService : ICvMatcherService }; } + /// public async Task FindJobsAsync(FindJobsRequest request, CancellationToken ct) { var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found."); @@ -78,6 +83,7 @@ public sealed class CvMatcherService : ICvMatcherService return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs }; } + /// public async Task MatchJobAsync(MatchJobRequest request, CancellationToken ct) { if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required."); @@ -104,6 +110,11 @@ public sealed class CvMatcherService : ICvMatcherService return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, NormalizeLanguage(request.Language), ct); } + /// + /// Scores a (CV, job) pair with the LLM. + /// Returns a cached result immediately when the same (CV, job, language) triple has been scored before. + /// When no evidence chunks are available from the vector search, falls back to the raw job text. + /// private async Task ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList evidenceChunks, string? email, string language, CancellationToken ct) { var cached = await _repository.GetMatchAsync(cv.Id, job.Id, language, ct); @@ -138,6 +149,10 @@ public sealed class CvMatcherService : ICvMatcherService return result; } + /// + /// Deserialises the LLM's JSON output into a . + /// Returns a safe fallback response instead of throwing when the JSON cannot be parsed. + /// private static JobMatchResponse ParseResult(string json) { try @@ -158,21 +173,29 @@ public sealed class CvMatcherService : ICvMatcherService }; } + /// + /// Builds a descriptive search query from the CV text for use in vector similarity search. + /// private static string BuildCvSearchProfile(string cvText) { var text = Limit(cvText, 10000); return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}"; } + /// + /// Extracts a short job title from the first sentence-like fragment of the job text. + /// private static string ExtractJobTitle(string jobText) { var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140); return first ?? "Job description"; } + /// Returns the base language code, lower-cased, defaulting to "en". private static string NormalizeLanguage(string? language) => string.IsNullOrWhiteSpace(language) ? "en" : language.ToLowerInvariant().Split('-')[0].Trim(); + /// Maps a language code to its full English name for use in the LLM system prompt. private static string LanguageName(string language) => language switch { "ro" => "Romanian", @@ -180,5 +203,6 @@ public sealed class CvMatcherService : ICvMatcherService _ => "English" }; + /// Truncates to at most characters. private static string Limit(string value, int max) => value.Length <= max ? value : value[..max]; } diff --git a/Apis/cv-matcher-api/Services/JobTextExtractor.cs b/Apis/cv-matcher-api/Services/JobTextExtractor.cs index 668e018..f8e806b 100644 --- a/Apis/cv-matcher-api/Services/JobTextExtractor.cs +++ b/Apis/cv-matcher-api/Services/JobTextExtractor.cs @@ -6,6 +6,10 @@ using Microsoft.Extensions.Options; namespace Api.Services; +/// +/// Extracts normalised plain text from a job posting, either from a pasted description or by +/// fetching and stripping the HTML of the job page URL. +/// public sealed class JobTextExtractor : IJobTextExtractor { private readonly HttpClient _http; @@ -19,6 +23,7 @@ public sealed class JobTextExtractor : IJobTextExtractor _http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0"); } + /// public async Task ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct) { var pasted = Normalize(jobDescription ?? string.Empty); @@ -37,12 +42,14 @@ public sealed class JobTextExtractor : IJobTextExtractor return Limit(Normalize(WebUtility.HtmlDecode(html))); } + /// Truncates text to the configured maximum character count. private string Limit(string value) { var max = Math.Max(4000, _settings.MaxJobTextChars); return value.Length <= max ? value : value[..max]; } + /// Collapses all whitespace runs to single spaces and trims the result. private static string Normalize(string value) { if (string.IsNullOrWhiteSpace(value)) return string.Empty; diff --git a/Apis/cv-matcher-api/Services/JobTokenService.cs b/Apis/cv-matcher-api/Services/JobTokenService.cs index bf4036e..421bccf 100644 --- a/Apis/cv-matcher-api/Services/JobTokenService.cs +++ b/Apis/cv-matcher-api/Services/JobTokenService.cs @@ -11,6 +11,9 @@ using Microsoft.Extensions.Options; namespace Api.Services; +/// +/// Creates and validates one-time job search tokens, and creates the corresponding search sessions. +/// public sealed class JobTokenService : IJobTokenService { private readonly CvSearchDbContext _db; @@ -30,6 +33,7 @@ public sealed class JobTokenService : IJobTokenService _logger = logger; } + /// public async Task CreateTokenAsync(string cvDocumentId, string email, string language, CancellationToken ct) { var token = new JobSearchTokenEntity @@ -49,6 +53,7 @@ public sealed class JobTokenService : IJobTokenService return token.Id; } + /// public async Task TriggerStartAsync(string tokenId, CancellationToken ct) { var token = await _db.JobSearchTokens.FirstOrDefaultAsync(x => x.Id == tokenId, ct); @@ -86,6 +91,10 @@ public sealed class JobTokenService : IJobTokenService return StartJobSearchStatus.Started; } + /// + /// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM). + /// Takes the first 5 usable lines, splits them into words, strips punctuation, and deduplicates. + /// private static string ExtractKeywords(string cvText) { var lines = cvText diff --git a/Apis/email-api-data/Services/EmailTemplateService.cs b/Apis/email-api-data/Services/EmailTemplateService.cs index 79cdd56..3cc4674 100644 --- a/Apis/email-api-data/Services/EmailTemplateService.cs +++ b/Apis/email-api-data/Services/EmailTemplateService.cs @@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging; namespace EmailApi.Data.Services; +/// +/// Singleton implementation of that caches all email templates +/// from the database and refreshes them every 10 minutes. +/// Uses to resolve the scoped repository from a singleton lifetime. +/// public sealed class EmailTemplateService : IEmailTemplateService { private readonly IServiceScopeFactory _scopeFactory; @@ -20,6 +25,7 @@ public sealed class EmailTemplateService : IEmailTemplateService _logger = logger; } + /// public string Get(string key, string language = "en") { EnsureCacheLoaded(); @@ -35,6 +41,7 @@ public sealed class EmailTemplateService : IEmailTemplateService return key; } + /// public string Render(string key, string language, params (string Key, string Value)[] placeholders) { var template = Get(key, language); @@ -43,6 +50,7 @@ public sealed class EmailTemplateService : IEmailTemplateService return template; } + /// public string? GetOperatorCopy(string key, string language) { EnsureCacheLoaded(); @@ -61,6 +69,10 @@ public sealed class EmailTemplateService : IEmailTemplateService return null; } + /// + /// Reloads all templates from the database when the cache TTL has expired. + /// Swaps both caches atomically; logs an error and continues serving the stale cache on failure. + /// private void EnsureCacheLoaded() { if (DateTime.UtcNow - _loadedAt < CacheTtl) return; @@ -91,5 +103,6 @@ public sealed class EmailTemplateService : IEmailTemplateService } } + /// Builds the dictionary key used for both caches. private static string CacheKey(string key, string language) => $"{key}::{language}"; } diff --git a/Apis/email-api-data/Services/IEmailTemplateService.cs b/Apis/email-api-data/Services/IEmailTemplateService.cs index 835e9eb..dfe0665 100644 --- a/Apis/email-api-data/Services/IEmailTemplateService.cs +++ b/Apis/email-api-data/Services/IEmailTemplateService.cs @@ -1,8 +1,38 @@ namespace EmailApi.Data.Services; +/// +/// Provides access to localised email templates stored in the emailApi.EmailTemplates table. +/// Implementations are expected to cache templates and refresh periodically. +/// public interface IEmailTemplateService { + /// + /// Returns the template value for the given key and language. + /// Falls back to "en" when the requested language has no entry. + /// Returns the raw key string when no matching template is found. + /// + /// Template key (e.g. "email.match.subject"). + /// Two-letter language code (e.g. "en", "ro"). + /// Template value string. string Get(string key, string language = "en"); + + /// + /// Retrieves the template and substitutes {{placeholder}} tokens with the provided values. + /// + /// Template key. + /// Two-letter language code. + /// Named replacement pairs in the form ("name", value). + /// Rendered template string with all placeholders replaced. string Render(string key, string language, params (string Key, string Value)[] placeholders); + + /// + /// Returns the operator copy address for the given template key. + /// Uses the specific row's OperatorCopy value when non-empty; otherwise falls back + /// to the first non-empty OperatorCopy across all cached rows, so future template rows + /// with an empty value automatically inherit the globally configured address. + /// + /// Template key used to look up the specific row (typically the subject key). + /// Two-letter language code. + /// Operator copy email address, or null when none is configured. string? GetOperatorCopy(string key, string language); } diff --git a/Apis/email-api/Services/SmtpEmailDispatcher.cs b/Apis/email-api/Services/SmtpEmailDispatcher.cs index d0adc1f..1c65007 100644 --- a/Apis/email-api/Services/SmtpEmailDispatcher.cs +++ b/Apis/email-api/Services/SmtpEmailDispatcher.cs @@ -8,6 +8,10 @@ using Models.Settings; namespace EmailApi.Services; +/// +/// Wraps an HTML body fragment in the branded HTML shell and sends the resulting email via SMTP using MailKit. +/// Attaches files from the shared file-storage volume when an attachment path is provided. +/// public sealed class SmtpEmailDispatcher { private readonly SmtpSettings _smtp; @@ -29,6 +33,13 @@ public sealed class SmtpEmailDispatcher _environmentName = Environment.GetEnvironmentVariable("APP_ENVIRONMENT_NAME") ?? "Development"; } + /// + /// Builds a from , wraps the body in the HTML shell, + /// optionally attaches a file, and sends via the configured SMTP server. + /// Logs a warning and returns without throwing when the SMTP host is not configured. + /// + /// Email payload containing recipients, subject, HTML body, and optional attachment path. + /// Cancellation token. public async Task SendAsync(SendEmailRequest req, CancellationToken ct) { if (string.IsNullOrWhiteSpace(_smtp.Host)) diff --git a/Apis/myai-data/Services/DbTemplateService.cs b/Apis/myai-data/Services/DbTemplateService.cs index aa9bd50..0dd5b6d 100644 --- a/Apis/myai-data/Services/DbTemplateService.cs +++ b/Apis/myai-data/Services/DbTemplateService.cs @@ -6,6 +6,11 @@ using System.Collections.Concurrent; namespace MyAi.Data.Services; +/// +/// Singleton implementation of that caches all templates from the +/// myAi.Templates table and refreshes them every 10 minutes. +/// Uses to resolve the scoped DbContext from a singleton lifetime. +/// public sealed class DbTemplateService : ITemplateService { private readonly IServiceScopeFactory _scopeFactory; @@ -20,6 +25,7 @@ public sealed class DbTemplateService : ITemplateService _logger = logger; } + /// public string Get(string key, string language = "en") { EnsureCacheLoaded(); @@ -35,6 +41,7 @@ public sealed class DbTemplateService : ITemplateService return key; } + /// public string Render(string key, string language, params (string Key, string Value)[] placeholders) { var template = Get(key, language); @@ -43,6 +50,10 @@ public sealed class DbTemplateService : ITemplateService return template; } + /// + /// Reloads all templates from the database when the cache TTL has expired. + /// Swaps the cache atomically; logs an error and continues serving the stale cache on failure. + /// private void EnsureCacheLoaded() { if (DateTime.UtcNow - _loadedAt < CacheTtl) return; @@ -66,5 +77,6 @@ public sealed class DbTemplateService : ITemplateService } } + /// Builds the dictionary key used in the cache. private static string CacheKey(string key, string language) => $"{key}::{language}"; } diff --git a/Apis/myai-data/Services/ITemplateService.cs b/Apis/myai-data/Services/ITemplateService.cs index 1c4f239..e457dbd 100644 --- a/Apis/myai-data/Services/ITemplateService.cs +++ b/Apis/myai-data/Services/ITemplateService.cs @@ -1,7 +1,27 @@ namespace MyAi.Data.Services; +/// +/// Provides access to localised string templates stored in the myAi.Templates table. +/// Implementations are expected to cache templates and refresh periodically. +/// public interface ITemplateService { + /// + /// Returns the template value for the given key and language. + /// Falls back to "en" when the requested language has no entry. + /// Returns the raw key string when no matching template is found. + /// + /// Template key (e.g. "html.job-search-start.title"). + /// Two-letter language code (e.g. "en", "ro"). + /// Template value string. string Get(string key, string language = "en"); + + /// + /// Retrieves the template and substitutes {{placeholder}} tokens with the provided values. + /// + /// Template key. + /// Two-letter language code. + /// Named replacement pairs in the form ("name", value). + /// Rendered template string with all placeholders replaced. string Render(string key, string language, params (string Key, string Value)[] placeholders); } diff --git a/Apis/rag-api/Services/Contracts/IDocumentClassifier.cs b/Apis/rag-api/Services/Contracts/IDocumentClassifier.cs index 00766ab..fbaa2ae 100644 --- a/Apis/rag-api/Services/Contracts/IDocumentClassifier.cs +++ b/Apis/rag-api/Services/Contracts/IDocumentClassifier.cs @@ -2,7 +2,20 @@ using Rag.Models; namespace Api.Services.Contracts; +/// +/// Classifies a document into a known type (cv, job, contract, etc.) and extracts a title. +/// public interface IDocumentClassifier { + /// + /// Determines the document type and title from the provided text. + /// Uses and directly when supplied; + /// otherwise falls back to a keyword-frequency heuristic over the text. + /// + /// Full document text to classify. + /// Caller-supplied document type hint; skips heuristic when non-empty. + /// Caller-supplied document title; skips title extraction when non-empty. + /// Cancellation token. + /// A with type, confidence score, and title. Task ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct); } diff --git a/Apis/rag-api/Services/Contracts/IRagService.cs b/Apis/rag-api/Services/Contracts/IRagService.cs index 3d68812..001e794 100644 --- a/Apis/rag-api/Services/Contracts/IRagService.cs +++ b/Apis/rag-api/Services/Contracts/IRagService.cs @@ -3,10 +3,46 @@ using Rag.Models.Responses; namespace Api.Services.Contracts; +/// +/// Core RAG (Retrieval-Augmented Generation) operations: document indexing, vector search, and retrieval. +/// public interface IRagService { + /// + /// Indexes a plain-text document by classifying it, chunking the text, generating embeddings, + /// and persisting the document and its chunks. Returns cached metadata when the text hash already exists. + /// + /// Indexing request with text, optional document type, title, and source URL. + /// Cancellation token. + /// Response with document ID, hash, type, and chunk/character counts. Task IndexTextAsync(IndexDocumentRequest request, CancellationToken ct); + + /// + /// Extracts text from a PDF file, then indexes it the same way as . + /// Returns cached metadata when the extracted text hash already exists. + /// + /// Uploaded PDF file (must be ≤ configured max size). + /// Optional document type hint; if omitted the classifier is used. + /// Optional title override; if omitted the title is extracted from the text. + /// Optional source URL to associate with the document. + /// Cancellation token. + /// Response with document ID, hash, type, and chunk/character counts. Task IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct); + + /// + /// Performs a vector similarity search over indexed document chunks, groups results by document, + /// and returns the top-K documents with their best-matching chunks. + /// + /// Search request with query text, optional document type filter, and top-K limit. + /// Cancellation token. + /// Ranked list of matching documents with scored chunk excerpts. Task SearchAsync(SearchRequest request, CancellationToken ct); + + /// + /// Retrieves full document details — including the original text — by document ID. + /// + /// The document's unique identifier. + /// Cancellation token. + /// Document details, or null if no document with that ID exists. Task GetDocumentAsync(string documentId, CancellationToken ct); } diff --git a/Apis/rag-api/Services/Contracts/ITextChunker.cs b/Apis/rag-api/Services/Contracts/ITextChunker.cs index 6c7e660..eda76fe 100644 --- a/Apis/rag-api/Services/Contracts/ITextChunker.cs +++ b/Apis/rag-api/Services/Contracts/ITextChunker.cs @@ -1,6 +1,17 @@ namespace Api.Services.Contracts; +/// +/// Splits document text into overlapping chunks suitable for embedding and vector search. +/// public interface ITextChunker { + /// + /// Divides into a list of chunks using a sliding window. + /// Adjacent chunks share characters to preserve cross-boundary context. + /// + /// The full document text to chunk. + /// Maximum character length per chunk (clamped to 300–3000). + /// Number of trailing characters from the previous chunk to repeat at the start of the next (clamped to 0–chunkSize/2). + /// Ordered list of non-empty text chunks. IReadOnlyList Chunk(string text, int chunkSize, int overlap); } diff --git a/Apis/rag-api/Services/Contracts/ITextExtractor.cs b/Apis/rag-api/Services/Contracts/ITextExtractor.cs index 4241474..4c56657 100644 --- a/Apis/rag-api/Services/Contracts/ITextExtractor.cs +++ b/Apis/rag-api/Services/Contracts/ITextExtractor.cs @@ -1,7 +1,23 @@ namespace Api.Services.Contracts; +/// +/// Extracts and normalises plain text from documents. +/// public interface ITextExtractor { + /// + /// Reads all pages of a PDF stream and returns the concatenated, normalised plain text. + /// + /// Readable stream positioned at the start of the PDF file. + /// Cancellation token (checked between pages). + /// Normalised plain text extracted from the PDF. Task ExtractPdfAsync(Stream stream, CancellationToken ct); + + /// + /// Collapses all whitespace sequences in to single spaces and trims the result. + /// Returns an empty string for null/whitespace input. + /// + /// Raw text to normalise. + /// Whitespace-normalised text. string Normalize(string value); } diff --git a/Apis/rag-api/Services/DocumentClassifier.cs b/Apis/rag-api/Services/DocumentClassifier.cs index 28c8b8c..4262bfb 100644 --- a/Apis/rag-api/Services/DocumentClassifier.cs +++ b/Apis/rag-api/Services/DocumentClassifier.cs @@ -4,6 +4,9 @@ using Rag.Models; namespace Api.Services; +/// +/// Classifies documents by type using a keyword-frequency heuristic and extracts a title from the text. +/// public sealed class DocumentClassifier : IDocumentClassifier { private static readonly HashSet KnownTypes = new(StringComparer.OrdinalIgnoreCase) @@ -11,6 +14,7 @@ public sealed class DocumentClassifier : IDocumentClassifier "cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown" }; + /// public Task ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct) { if (!string.IsNullOrWhiteSpace(providedType)) @@ -51,14 +55,20 @@ public sealed class DocumentClassifier : IDocumentClassifier }); } + /// Counts how many of the given appear in the lower-cased text. private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term)); + /// Lowercases and replaces non-alphanumeric characters with hyphens to produce a safe type slug. private static string NormalizeType(string value) { var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-"); return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned; } + /// + /// Returns when available; otherwise extracts the first sentence-like + /// fragment from the text, or falls back to a generic "{type} document" label. + /// private static string BuildTitle(string? providedTitle, string text, string documentType) { if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim(); diff --git a/Apis/rag-api/Services/RagService.cs b/Apis/rag-api/Services/RagService.cs index 9a8eab2..e1ba9d2 100644 --- a/Apis/rag-api/Services/RagService.cs +++ b/Apis/rag-api/Services/RagService.cs @@ -11,6 +11,9 @@ using CommonHelpers; namespace Api.Services; +/// +/// Implements the core RAG pipeline: document classification, chunking, embedding, vector search, and retrieval. +/// public sealed class RagService : IRagService { private readonly ITextExtractor _textExtractor; @@ -36,6 +39,7 @@ public sealed class RagService : IRagService _settings = options.Value; } + /// public async Task IndexTextAsync(IndexDocumentRequest request, CancellationToken ct) { var text = _textExtractor.Normalize(request.Text ?? string.Empty); @@ -44,6 +48,7 @@ public sealed class RagService : IRagService return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct); } + /// public async Task IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct) { if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty."); @@ -57,6 +62,7 @@ public sealed class RagService : IRagService return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary { ["fileName"] = file.FileName }, ct); } + /// public async Task SearchAsync(SearchRequest request, CancellationToken ct) { var query = _textExtractor.Normalize(request.QueryText); @@ -97,6 +103,7 @@ public sealed class RagService : IRagService return new SearchResponse { Results = results }; } + /// public async Task GetDocumentAsync(string documentId, CancellationToken ct) { var document = await _repository.GetDocumentByIdAsync(documentId, ct); @@ -112,6 +119,11 @@ public sealed class RagService : IRagService }; } + /// + /// Core indexing pipeline: computes a text hash for deduplication, classifies and chunks the text, + /// generates embeddings for each chunk, and persists the document and chunks to the repository. + /// Returns cached metadata without re-indexing when the same text hash and source URL already exist. + /// private async Task IndexNormalizedTextAsync( string text, string? documentType, diff --git a/Apis/rag-api/Services/TextChunker.cs b/Apis/rag-api/Services/TextChunker.cs index 0b011fb..87c3812 100644 --- a/Apis/rag-api/Services/TextChunker.cs +++ b/Apis/rag-api/Services/TextChunker.cs @@ -2,8 +2,12 @@ using Api.Services.Contracts; namespace Api.Services; +/// +/// Splits text into overlapping fixed-size chunks using a sliding window for use in vector embedding pipelines. +/// public sealed class TextChunker : ITextChunker { + /// public IReadOnlyList Chunk(string text, int chunkSize, int overlap) { if (string.IsNullOrWhiteSpace(text)) return []; diff --git a/Apis/rag-api/Services/TextExtractor.cs b/Apis/rag-api/Services/TextExtractor.cs index 78e85ca..5c67830 100644 --- a/Apis/rag-api/Services/TextExtractor.cs +++ b/Apis/rag-api/Services/TextExtractor.cs @@ -4,8 +4,12 @@ using UglyToad.PdfPig; namespace Api.Services; +/// +/// Extracts and normalises plain text from PDF files using PdfPig. +/// public sealed class TextExtractor : ITextExtractor { + /// public Task ExtractPdfAsync(Stream stream, CancellationToken ct) { using var document = PdfDocument.Open(stream); @@ -19,6 +23,7 @@ public sealed class TextExtractor : ITextExtractor return Task.FromResult(Normalize(builder.ToString())); } + /// public string Normalize(string value) { if (string.IsNullOrWhiteSpace(value)) return string.Empty; diff --git a/CLAUDE.md b/CLAUDE.md index 948be91..a7786c6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -212,8 +212,8 @@ Every service follows this structure: ## Coding conventions -- No XML doc comments on internal code; Swagger annotations on public controller actions -- No explanatory inline comments — code should be self-describing +- XML doc comments (`/// `) on all public methods, interfaces, and non-trivial private/protected helpers; Swagger annotations on public controller actions +- Inline `//` comments for non-obvious logic; avoid restating what the code already says clearly - Use `$$"""..."""` raw string literals (not `$"""`) when the content contains CSS or other curly-brace-heavy text — avoids CS9006 brace-escaping errors - `sealed` on all concrete service classes - Settings classes injected via `IOptions` — registered with `Configure(config.GetSection("..."))` diff --git a/Jobs/cv-search-job/Services/CvSearchEmailSender.cs b/Jobs/cv-search-job/Services/CvSearchEmailSender.cs index 89be54f..2394cb5 100644 --- a/Jobs/cv-search-job/Services/CvSearchEmailSender.cs +++ b/Jobs/cv-search-job/Services/CvSearchEmailSender.cs @@ -7,6 +7,10 @@ using Microsoft.Extensions.Logging; namespace CvSearchJob.Services; +/// +/// Sends job search results emails to the session user and the operator copy address, +/// with an optional CV PDF attachment. +/// public sealed class CvSearchEmailSender { private readonly IEmailApiClient _emailApi; @@ -23,6 +27,16 @@ public sealed class CvSearchEmailSender _logger = logger; } + /// + /// Builds and sends the job search results email. + /// Resolves the recipient list from and the operator copy address + /// stored in the email template. Does nothing when no recipients can be resolved. + /// + /// Primary recipient (the user who triggered the search). + /// Relative filename of the CV PDF to attach, or null. + /// Ranked list of job search results to include in the email body. + /// Two-letter language code for template rendering. + /// Cancellation token. public async Task SendResultsAsync( string toEmail, string? attachmentFileName, @@ -64,6 +78,10 @@ public sealed class CvSearchEmailSender } } + /// + /// Renders the HTML email body from the results list. + /// Returns the empty-results template when no results are present. + /// private string BuildBody(IReadOnlyList results, string language) { if (results.Count == 0) @@ -92,6 +110,10 @@ public sealed class CvSearchEmailSender ("items", items.ToString())); } + /// + /// Attempts to deserialise the stored result JSON into a . + /// Returns null on parse failure so the email still renders without a summary. + /// private static JobMatchResponse? TryParseResult(string json) { try diff --git a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs index 7fba235..d3dcd5d 100644 --- a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs +++ b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs @@ -5,6 +5,11 @@ using Microsoft.Extensions.Logging; namespace CvSearchJob.Services; +/// +/// Config-driven HTML scraper that fetches a provider's job listing page and extracts matching job URLs. +/// Uses a two-stage anchor filter: href must contain the provider's link pattern, and anchor text must +/// contain at least one CV keyword. +/// public sealed class HtmlJobSearcher { private readonly HttpClient _http; @@ -18,6 +23,15 @@ public sealed class HtmlJobSearcher _http.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; MyAi.ro CV-Search/1.0)"); } + /// + /// Fetches the provider's search result page for the combined initial + CV keywords, parses all anchor + /// tags, applies the two-stage filter, and returns up to absolute URLs. + /// Returns an empty list when the HTTP request fails rather than throwing. + /// + /// Provider configuration including search URL template, link filter, and result cap. + /// Keywords extracted from the user's CV to inject into the search query. + /// Cancellation token. + /// Deduplicated list of absolute job page URLs (query string stripped). public async Task> SearchJobUrlsAsync( JobProviderConfig provider, IReadOnlyList cvKeywords, diff --git a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs index 16b0087..76791be 100644 --- a/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs +++ b/Jobs/cv-search-job/Tasks/CvSearchJobTask.cs @@ -14,6 +14,10 @@ using Microsoft.Extensions.Options; namespace CvSearchJob.Tasks; +/// +/// Background job task that processes pending job search sessions: scrapes providers, +/// scores each URL against the CV via the matcher API, persists results, and sends the results email. +/// public sealed class CvSearchJobTask : IJobTask { private readonly IServiceScopeFactory _scopeFactory; @@ -41,6 +45,11 @@ public sealed class CvSearchJobTask : IJobTask _logger = logger; } + /// + /// Called by the scheduler on each tick. Resets orphaned sessions, picks the oldest pending session, + /// runs the full search pipeline, and sends the results email. + /// Does nothing when JobSearch:Enabled is false. + /// public async Task ExecuteAsync(IConfiguration parametersSection, CancellationToken cancellationToken) { if (!_settings.Enabled) return; @@ -92,6 +101,10 @@ public sealed class CvSearchJobTask : IJobTask } } + /// + /// Runs the full search pipeline for a session: scrapes all providers, deduplicates URLs, + /// scores each candidate via the matcher API, and persists results that meet the minimum score threshold. + /// private async Task> RunSearchAsync( JobSearchSessionEntity session, CvSearchDbContext db, @@ -163,6 +176,10 @@ public sealed class CvSearchJobTask : IJobTask return results; } + /// + /// Deserialises the provider configuration snapshot stored on the session. + /// Falls back to the current live config when the snapshot is absent or unparseable. + /// private List GetProviders(string? providerConfigJson) { if (string.IsNullOrWhiteSpace(providerConfigJson)) return _settings.Providers.Where(p => p.Enabled).ToList(); @@ -178,6 +195,10 @@ public sealed class CvSearchJobTask : IJobTask } } + /// + /// Infers the provider name from the job URL by matching against each provider's JobLinkContains pattern. + /// Falls back to the URL hostname when no provider matches. + /// private static string GuessProvider(string url, List providers) { foreach (var p in providers) @@ -190,6 +211,9 @@ public sealed class CvSearchJobTask : IJobTask return Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri.Host : "unknown"; } + /// + /// Constructs the CV PDF filename from the document ID. + /// private static string BuildCvFileName(string cvDocumentId) { // Strip non-alphanumeric characters so the filename is safe for all OS/email clients. -- 2.52.0