@@ -0,0 +1,27 @@
|
||||
namespace Api.Services.Rag;
|
||||
|
||||
public interface ITextChunker
|
||||
{
|
||||
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
|
||||
}
|
||||
|
||||
public sealed class TextChunker : ITextChunker
|
||||
{
|
||||
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text)) return [];
|
||||
chunkSize = Math.Clamp(chunkSize, 300, 3000);
|
||||
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
|
||||
|
||||
var chunks = new List<string>();
|
||||
var start = 0;
|
||||
while (start < text.Length)
|
||||
{
|
||||
var length = Math.Min(chunkSize, text.Length - start);
|
||||
chunks.Add(text.Substring(start, length).Trim());
|
||||
start += chunkSize - overlap;
|
||||
}
|
||||
|
||||
return chunks.Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user