using Api.Services.Contracts;
namespace Api.Services;
///
/// Splits text into overlapping fixed-size chunks using a sliding window for use in vector embedding pipelines.
///
public sealed class TextChunker : ITextChunker
{
///
public IReadOnlyList Chunk(string text, int chunkSize, int overlap)
{
if (string.IsNullOrWhiteSpace(text)) return [];
chunkSize = Math.Clamp(chunkSize, 300, 3000);
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
// Sliding window: step forward by (chunkSize - overlap) each iteration so
// adjacent chunks share `overlap` characters, preserving cross-boundary context.
var chunks = new List();
var start = 0;
while (start < text.Length)
{
var length = Math.Min(chunkSize, text.Length - start);
var chunk = text.Substring(start, length).Trim();
if (!string.IsNullOrWhiteSpace(chunk)) chunks.Add(chunk);
start += chunkSize - overlap;
}
return chunks;
}
}