using Microsoft.AspNetCore.Mvc;
using Api.Services.Contracts;
using Rag.Models.Requests;
using Rag.Models.Responses;
using Swashbuckle.AspNetCore.Annotations;
using Common.Responses;
namespace Api.Controllers;
///
/// Internal endpoints for indexing documents into the vector store and performing semantic search.
/// Routes are prefixed with api/rag. Protected by the internal API key middleware — not reachable from the public internet.
///
[ApiController]
[Route("api/rag")]
public sealed class RagController : ControllerBase
{
private readonly IRagService _ragService;
private readonly ILogger _logger;
public RagController(IRagService ragService, ILogger logger)
{
_ragService = ragService;
_logger = logger;
}
///
/// Indexes a PDF file or plain-text document into the vector store via multipart/form-data.
/// Chunks the content, generates embeddings, and stores them for semantic retrieval.
/// Returns immediately from cache if an identical document was previously indexed.
///
/// The indexing request: either a PDF file or raw text, plus optional title, source URL, and document type.
/// Cancellation token.
///
/// 200 OK with an containing the document ID, chunk count, and cache status;
/// 400 Bad Request if neither a file nor text is provided, or the request is otherwise invalid.
///
[HttpPost("documents")]
[RequestSizeLimit(10 * 1024 * 1024)]
[SwaggerOperation(Summary = "Index document (multipart)", Description = "Indexes a PDF or plain-text document via multipart/form-data. Returns from cache if the same content was previously indexed.")]
[SwaggerResponse(StatusCodes.Status200OK, "Document indexed successfully", typeof(IndexDocumentResponse))]
[SwaggerResponse(StatusCodes.Status400BadRequest, "Neither file nor text provided, or request is invalid", typeof(ErrorResponse))]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)]
public async Task> IndexDocument(
[FromForm] IndexDocumentUploadRequest request,
CancellationToken ct)
{
try
{
_logger.LogInformation("Index document request received. HasFile={HasFile}, DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
request.File is not null, request.DocumentType, request.Title, request.SourceUrl);
if (request.File is not null)
{
var result = await _ragService.IndexPdfAsync(request.File, request.DocumentType, request.Title, request.SourceUrl, ct);
_logger.LogInformation("Indexed PDF document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
return Ok(result);
}
var textResult = await _ragService.IndexTextAsync(new IndexDocumentRequest
{
Text = request.Text,
DocumentType = request.DocumentType,
Title = request.Title,
SourceUrl = request.SourceUrl
}, ct);
_logger.LogInformation("Indexed text document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
textResult.DocumentId, textResult.DocumentType, textResult.Chunks, textResult.Cached);
return Ok(textResult);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid document indexing request.");
return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" });
}
}
///
/// Indexes a plain-text document sent as JSON into the vector store.
/// Returns immediately from cache if an identical document was previously indexed.
///
/// The indexing request containing the raw text and optional title, source URL, and document type.
/// Cancellation token.
///
/// 200 OK with an containing the document ID, chunk count, and cache status;
/// 400 Bad Request if the text is empty or the request is otherwise invalid.
///
[HttpPost("documents/json")]
[SwaggerOperation(Summary = "Index document (JSON)", Description = "Indexes a plain-text document sent as JSON. Returns from cache if the same content was previously indexed.")]
[SwaggerResponse(StatusCodes.Status200OK, "Document indexed successfully", typeof(IndexDocumentResponse))]
[SwaggerResponse(StatusCodes.Status400BadRequest, "Text missing or request invalid", typeof(ErrorResponse))]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)]
public async Task> IndexJsonDocument([FromBody] IndexDocumentRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("JSON document indexing request received. DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
request.DocumentType, request.Title, request.SourceUrl);
var result = await _ragService.IndexTextAsync(request, ct);
_logger.LogInformation("Indexed JSON document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid JSON document indexing request.");
return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" });
}
}
///
/// Performs semantic (vector) search over indexed documents.
/// Embeds the query, retrieves the closest chunks by cosine similarity, and returns the ranked results.
///
/// The search request: query text, optional document type filter, and maximum result count.
/// Cancellation token.
///
/// 200 OK with a containing the ranked matching chunks with scores and metadata;
/// 400 Bad Request if the query is empty or the request is otherwise invalid.
///
[HttpPost("search")]
[SwaggerOperation(Summary = "Semantic search", Description = "Embeds the query and retrieves the closest document chunks by vector similarity.")]
[SwaggerResponse(StatusCodes.Status200OK, "Search results returned", typeof(SearchResponse))]
[SwaggerResponse(StatusCodes.Status400BadRequest, "Query missing or request invalid", typeof(ErrorResponse))]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status400BadRequest)]
public async Task> Search([FromBody] SearchRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("Semantic search request received. TargetTypes={TargetTypes}, TopK={TopK}",
string.Join(',', request.TargetDocumentTypes ?? System.Array.Empty()), request.TopK);
var result = await _ragService.SearchAsync(request, ct);
_logger.LogInformation("Semantic search completed. ResultCount={ResultCount}", result.Results.Count);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid semantic search request.");
return BadRequest(new ErrorResponse { Error = ex.Message, Code = "invalid_request" });
}
}
///
/// Returns the stored details for a previously indexed document, including its extracted text and metadata.
///
/// The document ID returned when the document was indexed.
/// Cancellation token.
///
/// 200 OK with a containing the document text and metadata;
/// 404 Not Found if no document with the given ID exists in the store.
///
[HttpGet("documents/{id}")]
[SwaggerOperation(Summary = "Get document details", Description = "Returns the stored text and metadata for a previously indexed document.")]
[SwaggerResponse(StatusCodes.Status200OK, "Document details returned", typeof(RagDocumentDetailsResponse))]
[SwaggerResponse(StatusCodes.Status404NotFound, "Document not found", typeof(ErrorResponse))]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(ErrorResponse), StatusCodes.Status404NotFound)]
public async Task> GetDocument(string id, CancellationToken ct)
{
_logger.LogInformation("Get document request received. DocumentId={DocumentId}", id);
var document = await _ragService.GetDocumentAsync(id, ct);
if (document is null)
{
_logger.LogWarning("Document not found. DocumentId={DocumentId}", id);
return NotFound(new ErrorResponse { Error = "Document not found.", Code = "document_not_found" });
}
return Ok(document);
}
}