Files
myAi/Apis/rag-api/Services/TextChunker.cs
T
claude 4ee4a59b5e Improve comments and Swagger annotations across services (#26)
- EmailController: add class summary, full SwaggerResponse/ProducesResponseType
  for 400 and 500, and Description on SwaggerOperation
- ContactController: fix terse "Failed." error message to
  "Could not process subscription."
- FileDownloadController: remove redundant XML <response code> tags from
  the public action doc block; convert private-method /// <summary> to //
  (project convention: no XML doc on internal code)
- CvMatcherService: remove two dead commented-out blocks (old email send
  and BuildEmailBody helper)
- JobTokenService: comment the phone/contact-line regex filter in
  ExtractKeywords
- DocumentClassifier: comment the keyword-frequency scoring approach and
  the confidence formula
- TextChunker: comment the sliding-window step (chunkSize - overlap)
- CvSearchJobTask: comment the GdprConsent = true rationale and the
  BuildCvFileName sanitisation logic
- HtmlJobSearcher: comment GetLeftPart(UriPartial.Path) query-strip dedup

Closes #26

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-28 09:07:23 +03:00

27 lines
924 B
C#

using Api.Services.Contracts;
namespace Api.Services;
public sealed class TextChunker : ITextChunker
{
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
{
if (string.IsNullOrWhiteSpace(text)) return [];
chunkSize = Math.Clamp(chunkSize, 300, 3000);
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
// Sliding window: step forward by (chunkSize - overlap) each iteration so
// adjacent chunks share `overlap` characters, preserving cross-boundary context.
var chunks = new List<string>();
var start = 0;
while (start < text.Length)
{
var length = Math.Min(chunkSize, text.Length - start);
var chunk = text.Substring(start, length).Trim();
if (!string.IsNullOrWhiteSpace(chunk)) chunks.Add(chunk);
start += chunkSize - overlap;
}
return chunks;
}
}