Files
myAi/api/Services/Rag/PdfTextExtractor.cs
T
claude ab31d41d88
Build and Push Docker Images / build (push) Successful in 20s
Changes
2026-05-04 17:49:04 +03:00

30 lines
807 B
C#

using Api.Services.Contracts.Rag;
using System.Text;
using UglyToad.PdfPig;
namespace Api.Services.Rag;
public sealed class PdfTextExtractor : IPdfTextExtractor
{
public string ExtractText(Stream pdfStream)
{
using var document = PdfDocument.Open(pdfStream);
var builder = new StringBuilder();
foreach (var page in document.GetPages())
{
builder.AppendLine(page.Text);
builder.AppendLine();
}
return NormalizeWhitespace(builder.ToString());
}
private static string NormalizeWhitespace(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
return string.Join(' ', parts).Trim();
}
}