16bb195cb5
- Update CLAUDE.md: replace incorrect 'no XML doc on internal code' rule with the correct convention (XML doc on all public methods and non-trivial private/protected helpers) - Restore /// <summary> on FileDownloadController private helpers (HandleRangeRequest, StreamRangeAsync) - Add full XML doc to all service contracts: ICaptchaVerifier, IEmailSender, ICvMatcherService, IJobTextExtractor, IJobTokenService, IDocumentClassifier, IRagService, ITextChunker, ITextExtractor, IEmailTemplateService, ITemplateService - Add /// <summary> and /// <inheritdoc /> to all concrete service classes and their methods: RecaptchaVerifier, EmailApiEmailSender, SmtpEmailDispatcher, CvMatcherService, JobTextExtractor, JobTokenService, RagService, DocumentClassifier, TextChunker, TextExtractor, HtmlJobSearcher, CvSearchEmailSender, CvSearchJobTask, EmailTemplateService, DbTemplateService Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
49 lines
2.7 KiB
C#
49 lines
2.7 KiB
C#
using Rag.Models.Requests;
|
|
using Rag.Models.Responses;
|
|
|
|
namespace Api.Services.Contracts;
|
|
|
|
/// <summary>
|
|
/// Core RAG (Retrieval-Augmented Generation) operations: document indexing, vector search, and retrieval.
|
|
/// </summary>
|
|
public interface IRagService
|
|
{
|
|
/// <summary>
|
|
/// Indexes a plain-text document by classifying it, chunking the text, generating embeddings,
|
|
/// and persisting the document and its chunks. Returns cached metadata when the text hash already exists.
|
|
/// </summary>
|
|
/// <param name="request">Indexing request with text, optional document type, title, and source URL.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
|
|
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
|
|
|
|
/// <summary>
|
|
/// Extracts text from a PDF file, then indexes it the same way as <see cref="IndexTextAsync"/>.
|
|
/// Returns cached metadata when the extracted text hash already exists.
|
|
/// </summary>
|
|
/// <param name="file">Uploaded PDF file (must be ≤ configured max size).</param>
|
|
/// <param name="documentType">Optional document type hint; if omitted the classifier is used.</param>
|
|
/// <param name="title">Optional title override; if omitted the title is extracted from the text.</param>
|
|
/// <param name="sourceUrl">Optional source URL to associate with the document.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>Response with document ID, hash, type, and chunk/character counts.</returns>
|
|
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
|
|
|
|
/// <summary>
|
|
/// Performs a vector similarity search over indexed document chunks, groups results by document,
|
|
/// and returns the top-K documents with their best-matching chunks.
|
|
/// </summary>
|
|
/// <param name="request">Search request with query text, optional document type filter, and top-K limit.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>Ranked list of matching documents with scored chunk excerpts.</returns>
|
|
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
|
|
|
|
/// <summary>
|
|
/// Retrieves full document details — including the original text — by document ID.
|
|
/// </summary>
|
|
/// <param name="documentId">The document's unique identifier.</param>
|
|
/// <param name="ct">Cancellation token.</param>
|
|
/// <returns>Document details, or <c>null</c> if no document with that ID exists.</returns>
|
|
Task<RagDocumentDetailsResponse?> GetDocumentAsync(string documentId, CancellationToken ct);
|
|
}
|