Add XML doc to all service interfaces and implementations (#26)

- Update CLAUDE.md: replace incorrect 'no XML doc on internal code' rule with the correct convention (XML doc on all public methods and non-trivial private/protected helpers) - Restore /// <summary> on FileDownloadController private helpers (HandleRangeRequest, StreamRangeAsync) - Add full XML doc to all service contracts: ICaptchaVerifier, IEmailSender, ICvMatcherService, IJobTextExtractor, IJobTokenService, IDocumentClassifier, IRagService, ITextChunker, ITextExtractor, IEmailTemplateService, ITemplateService - Add /// <summary> and /// <inheritdoc /> to all concrete service classes and their methods: RecaptchaVerifier, EmailApiEmailSender, SmtpEmailDispatcher, CvMatcherService, JobTextExtractor, JobTokenService, RagService, DocumentClassifier, TextChunker, TextExtractor, HtmlJobSearcher, CvSearchEmailSender, CvSearchJobTask, EmailTemplateService, DbTemplateService Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-28 09:17:42 +03:00
parent 4ee4a59b5e
commit 16bb195cb5
28 changed files with 436 additions and 6 deletions
@@ -11,6 +11,9 @@ using CommonHelpers;

 namespace Api.Services;

+/// <summary>
+/// Implements the core RAG pipeline: document classification, chunking, embedding, vector search, and retrieval.
+/// </summary>
 public sealed class RagService : IRagService
 {
    private readonly ITextExtractor _textExtractor;
@@ -36,6 +39,7 @@ public sealed class RagService : IRagService
        _settings = options.Value;
    }

+    /// <inheritdoc />
    public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
    {
        var text = _textExtractor.Normalize(request.Text ?? string.Empty);
@@ -44,6 +48,7 @@ public sealed class RagService : IRagService
        return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
    }

+    /// <inheritdoc />
    public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
    {
        if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
@@ -57,6 +62,7 @@ public sealed class RagService : IRagService
        return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
    }

+    /// <inheritdoc />
    public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
    {
        var query = _textExtractor.Normalize(request.QueryText);
@@ -97,6 +103,7 @@ public sealed class RagService : IRagService
        return new SearchResponse { Results = results };
    }

+    /// <inheritdoc />
    public async Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct)
    {
        var document = await _repository.GetDocumentByIdAsync(documentId, ct);
@@ -112,6 +119,11 @@ public sealed class RagService : IRagService
        };
    }

+    /// <summary>
+    /// Core indexing pipeline: computes a text hash for deduplication, classifies and chunks the text,
+    /// generates embeddings for each chunk, and persists the document and chunks to the repository.
+    /// Returns cached metadata without re-indexing when the same text hash and source URL already exist.
+    /// </summary>
    private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
        string text,
        string? documentType,