16bb195cb5
- Update CLAUDE.md: replace incorrect 'no XML doc on internal code' rule with the correct convention (XML doc on all public methods and non-trivial private/protected helpers) - Restore /// <summary> on FileDownloadController private helpers (HandleRangeRequest, StreamRangeAsync) - Add full XML doc to all service contracts: ICaptchaVerifier, IEmailSender, ICvMatcherService, IJobTextExtractor, IJobTokenService, IDocumentClassifier, IRagService, ITextChunker, ITextExtractor, IEmailTemplateService, ITemplateService - Add /// <summary> and /// <inheritdoc /> to all concrete service classes and their methods: RecaptchaVerifier, EmailApiEmailSender, SmtpEmailDispatcher, CvMatcherService, JobTextExtractor, JobTokenService, RagService, DocumentClassifier, TextChunker, TextExtractor, HtmlJobSearcher, CvSearchEmailSender, CvSearchJobTask, EmailTemplateService, DbTemplateService Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
18 lines
881 B
C#
18 lines
881 B
C#
namespace Api.Services.Contracts;
|
||
|
||
/// <summary>
|
||
/// Splits document text into overlapping chunks suitable for embedding and vector search.
|
||
/// </summary>
|
||
public interface ITextChunker
|
||
{
|
||
/// <summary>
|
||
/// Divides <paramref name="text"/> into a list of chunks using a sliding window.
|
||
/// Adjacent chunks share <paramref name="overlap"/> characters to preserve cross-boundary context.
|
||
/// </summary>
|
||
/// <param name="text">The full document text to chunk.</param>
|
||
/// <param name="chunkSize">Maximum character length per chunk (clamped to 300–3000).</param>
|
||
/// <param name="overlap">Number of trailing characters from the previous chunk to repeat at the start of the next (clamped to 0–chunkSize/2).</param>
|
||
/// <returns>Ordered list of non-empty text chunks.</returns>
|
||
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
|
||
}
|