using System.Text; using Api.Services.Contracts; using UglyToad.PdfPig; namespace Api.Services; /// /// Extracts and normalises plain text from PDF files using PdfPig. /// public sealed class TextExtractor : ITextExtractor { /// public Task ExtractPdfAsync(Stream stream, CancellationToken ct) { using var document = PdfDocument.Open(stream); var builder = new StringBuilder(); foreach (var page in document.GetPages()) { ct.ThrowIfCancellationRequested(); builder.AppendLine(page.Text); builder.AppendLine(); } return Task.FromResult(Normalize(builder.ToString())); } /// public string Normalize(string value) { if (string.IsNullOrWhiteSpace(value)) return string.Empty; return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim(); } }