namespace Api.Services.Contracts; /// /// Extracts and normalises plain text from documents. /// public interface ITextExtractor { /// /// Reads all pages of a PDF stream and returns the concatenated, normalised plain text. /// /// Readable stream positioned at the start of the PDF file. /// Cancellation token (checked between pages). /// Normalised plain text extracted from the PDF. Task ExtractPdfAsync(Stream stream, CancellationToken ct); /// /// Collapses all whitespace sequences in to single spaces and trims the result. /// Returns an empty string for null/whitespace input. /// /// Raw text to normalise. /// Whitespace-normalised text. string Normalize(string value); }