using Microsoft.AspNetCore.Mvc; using Api.Services.Contracts; using Rag.Models.Requests; using Rag.Models.Responses; using Swashbuckle.AspNetCore.Annotations; using Common.Responses; namespace Api.Controllers; /// /// Internal endpoints for indexing documents into the vector store and performing semantic search. /// Routes are prefixed with api/rag. Protected by the internal API key middleware — not reachable from the public internet. /// [ApiController] [Route("api/rag")] public sealed class RagController : ControllerBase { private readonly IRagService _ragService; private readonly ILogger _logger; public RagController(IRagService ragService, ILogger logger) { _ragService = ragService; _logger = logger; } /// /// Indexes a PDF file or plain-text document into the vector store via multipart/form-data. /// Chunks the content, generates embeddings, and stores them for semantic retrieval. /// Returns immediately from cache if an identical document was previously indexed. /// /// The indexing request: either a PDF file or raw text, plus optional title, source URL, and document type. /// Cancellation token. /// /// 200 OK with an containing the document ID, chunk count, and cache status; /// 400 Bad Request if neither a file nor text is provided, or the request is otherwise invalid. /// [HttpPost("documents")] [RequestSizeLimit(10 * 1024 * 1024)] [SwaggerOperation(Summary = "Index document (multipart)", Description = "Indexes a PDF or plain-text document via multipart/form-data. Returns from cache if the same content was previously indexed.")] [SwaggerResponse(StatusCodes.Status200OK, "Document indexed successfully", typeof(IndexDocumentResponse))] [SwaggerResponse(StatusCodes.Status400BadRequest, "Neither file nor text provided, or request is invalid", typeof(ErrorResponse))] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)] public async Task> IndexDocument( [FromForm] IndexDocumentUploadRequest request, CancellationToken ct) { try { _logger.LogInformation("Index document request received. HasFile={HasFile}, DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}", request.File is not null, request.DocumentType, request.Title, request.SourceUrl); if (request.File is not null) { var result = await _ragService.IndexPdfAsync(request.File, request.DocumentType, request.Title, request.SourceUrl, ct); _logger.LogInformation("Indexed PDF document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}", result.DocumentId, result.DocumentType, result.Chunks, result.Cached); return Ok(result); } var textResult = await _ragService.IndexTextAsync(new IndexDocumentRequest { Text = request.Text, DocumentType = request.DocumentType, Title = request.Title, SourceUrl = request.SourceUrl }, ct); _logger.LogInformation("Indexed text document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}", textResult.DocumentId, textResult.DocumentType, textResult.Chunks, textResult.Cached); return Ok(textResult); } catch (InvalidOperationException ex) { _logger.LogWarning(ex, "Invalid document indexing request."); return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" }); } } /// /// Indexes a plain-text document sent as JSON into the vector store. /// Returns immediately from cache if an identical document was previously indexed. /// /// The indexing request containing the raw text and optional title, source URL, and document type. /// Cancellation token. /// /// 200 OK with an containing the document ID, chunk count, and cache status; /// 400 Bad Request if the text is empty or the request is otherwise invalid. /// [HttpPost("documents/json")] [SwaggerOperation(Summary = "Index document (JSON)", Description = "Indexes a plain-text document sent as JSON. Returns from cache if the same content was previously indexed.")] [SwaggerResponse(StatusCodes.Status200OK, "Document indexed successfully", typeof(IndexDocumentResponse))] [SwaggerResponse(StatusCodes.Status400BadRequest, "Text missing or request invalid", typeof(ErrorResponse))] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)] public async Task> IndexJsonDocument([FromBody] IndexDocumentRequest request, CancellationToken ct) { try { _logger.LogInformation("JSON document indexing request received. DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}", request.DocumentType, request.Title, request.SourceUrl); var result = await _ragService.IndexTextAsync(request, ct); _logger.LogInformation("Indexed JSON document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}", result.DocumentId, result.DocumentType, result.Chunks, result.Cached); return Ok(result); } catch (InvalidOperationException ex) { _logger.LogWarning(ex, "Invalid JSON document indexing request."); return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" }); } } /// /// Performs semantic (vector) search over indexed documents. /// Embeds the query, retrieves the closest chunks by cosine similarity, and returns the ranked results. /// /// The search request: query text, optional document type filter, and maximum result count. /// Cancellation token. /// /// 200 OK with a containing the ranked matching chunks with scores and metadata; /// 400 Bad Request if the query is empty or the request is otherwise invalid. /// [HttpPost("search")] [SwaggerOperation(Summary = "Semantic search", Description = "Embeds the query and retrieves the closest document chunks by vector similarity.")] [SwaggerResponse(StatusCodes.Status200OK, "Search results returned", typeof(SearchResponse))] [SwaggerResponse(StatusCodes.Status400BadRequest, "Query missing or request invalid", typeof(ErrorResponse))] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)] public async Task> Search([FromBody] SearchRequest request, CancellationToken ct) { try { _logger.LogInformation("Semantic search request received. TargetTypes={TargetTypes}, TopK={TopK}", string.Join(',', request.TargetDocumentTypes ?? System.Array.Empty()), request.TopK); var result = await _ragService.SearchAsync(request, ct); _logger.LogInformation("Semantic search completed. ResultCount={ResultCount}", result.Results.Count); return Ok(result); } catch (InvalidOperationException ex) { _logger.LogWarning(ex, "Invalid semantic search request."); return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" }); } } /// /// Returns the stored details for a previously indexed document, including its extracted text and metadata. /// /// The document ID returned when the document was indexed. /// Cancellation token. /// /// 200 OK with a containing the document text and metadata; /// 404 Not Found if no document with the given ID exists in the store. /// [HttpGet("documents/{id}")] [SwaggerOperation(Summary = "Get document details", Description = "Returns the stored text and metadata for a previously indexed document.")] [SwaggerResponse(StatusCodes.Status200OK, "Document details returned", typeof(RagDocumentDetailsResponse))] [SwaggerResponse(StatusCodes.Status404NotFound, "Document not found", typeof(ErrorResponse))] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status404NotFound)] public async Task> GetDocument(string id, CancellationToken ct) { _logger.LogInformation("Get document request received. DocumentId={DocumentId}", id); var document = await _ragService.GetDocumentAsync(id, ct); if (document is null) { _logger.LogWarning("Document not found. DocumentId={DocumentId}", id); return NotFound(new ErrorResponse { Error = "Document not found.", Code = "document_not_found" }); } return Ok(document); } }