@@ -0,0 +1,110 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Requests;
|
||||
|
||||
namespace Api.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("api/rag")]
|
||||
public sealed class RagController : ControllerBase
|
||||
{
|
||||
private readonly IRagService _ragService;
|
||||
private readonly ILogger<RagController> _logger;
|
||||
|
||||
public RagController(IRagService ragService, ILogger<RagController> logger)
|
||||
{
|
||||
_ragService = ragService;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
[HttpPost("documents")]
|
||||
[RequestSizeLimit(10 * 1024 * 1024)]
|
||||
public async Task<IActionResult> IndexDocument(
|
||||
[FromForm] IFormFile? file,
|
||||
[FromForm] string? text,
|
||||
[FromForm] string? documentType,
|
||||
[FromForm] string? title,
|
||||
[FromForm] string? sourceUrl,
|
||||
CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Index document request received. HasFile={HasFile}, DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
|
||||
file is not null, documentType, title, sourceUrl);
|
||||
|
||||
if (file is not null)
|
||||
{
|
||||
var result = await _ragService.IndexPdfAsync(file, documentType, title, sourceUrl, ct);
|
||||
_logger.LogInformation("Indexed PDF document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
var textResult = await _ragService.IndexTextAsync(new IndexDocumentRequest
|
||||
{
|
||||
Text = text,
|
||||
DocumentType = documentType,
|
||||
Title = title,
|
||||
SourceUrl = sourceUrl
|
||||
}, ct);
|
||||
_logger.LogInformation("Indexed text document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||
textResult.DocumentId, textResult.DocumentType, textResult.Chunks, textResult.Cached);
|
||||
return Ok(textResult);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Invalid document indexing request.");
|
||||
return BadRequest(new { error = ex.Message });
|
||||
}
|
||||
}
|
||||
|
||||
[HttpPost("documents/json")]
|
||||
public async Task<IActionResult> IndexJsonDocument([FromBody] IndexDocumentRequest request, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("JSON document indexing request received. DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
|
||||
request.DocumentType, request.Title, request.SourceUrl);
|
||||
var result = await _ragService.IndexTextAsync(request, ct);
|
||||
_logger.LogInformation("Indexed JSON document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
|
||||
return Ok(result);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Invalid JSON document indexing request.");
|
||||
return BadRequest(new { error = ex.Message });
|
||||
}
|
||||
}
|
||||
|
||||
[HttpPost("search")]
|
||||
public async Task<IActionResult> Search([FromBody] SearchRequest request, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Semantic search request received. TargetTypes={TargetTypes}, TopK={TopK}",
|
||||
string.Join(',', request.TargetDocumentTypes ?? []), request.TopK);
|
||||
var result = await _ragService.SearchAsync(request, ct);
|
||||
_logger.LogInformation("Semantic search completed. ResultCount={ResultCount}", result.Results.Count);
|
||||
return Ok(result);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Invalid semantic search request.");
|
||||
return BadRequest(new { error = ex.Message });
|
||||
}
|
||||
}
|
||||
|
||||
[HttpGet("documents/{id}")]
|
||||
public async Task<IActionResult> GetDocument(string id, CancellationToken ct)
|
||||
{
|
||||
_logger.LogInformation("Get document request received. DocumentId={DocumentId}", id);
|
||||
var document = await _ragService.GetDocumentAsync(id, ct);
|
||||
if (document is null)
|
||||
{
|
||||
_logger.LogWarning("Document not found. DocumentId={DocumentId}", id);
|
||||
return NotFound(new { error = "Document not found." });
|
||||
}
|
||||
return Ok(document);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
IF OBJECT_ID('dbo.RagChunks', 'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.RagChunks (
|
||||
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChunks PRIMARY KEY,
|
||||
DocumentId NVARCHAR(64) NOT NULL,
|
||||
ChunkIndex INT NOT NULL,
|
||||
Text NVARCHAR(MAX) NOT NULL,
|
||||
Embedding VARBINARY(MAX) NOT NULL
|
||||
);
|
||||
END
|
||||
GO
|
||||
|
||||
IF OBJECT_ID('dbo.RagDocuments', 'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.RagDocuments (
|
||||
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagDocuments PRIMARY KEY,
|
||||
DocumentType NVARCHAR(80) NOT NULL,
|
||||
Title NVARCHAR(300) NOT NULL,
|
||||
SourceUrl NVARCHAR(1200) NULL,
|
||||
RawText NVARCHAR(MAX) NOT NULL,
|
||||
TextHash NVARCHAR(64) NOT NULL,
|
||||
TypeConfidence FLOAT NOT NULL,
|
||||
MetadataJson NVARCHAR(MAX) NOT NULL CONSTRAINT DF_RagDocuments_MetadataJson DEFAULT '{}',
|
||||
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagDocuments_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||
);
|
||||
|
||||
CREATE INDEX IX_RagDocuments_TextHash ON dbo.RagDocuments(TextHash);
|
||||
CREATE INDEX IX_RagDocuments_DocumentType ON dbo.RagDocuments(DocumentType);
|
||||
END
|
||||
GO
|
||||
|
||||
IF NOT EXISTS (SELECT 1 FROM sys.foreign_keys WHERE name = 'FK_RagChunks_RagDocuments')
|
||||
BEGIN
|
||||
ALTER TABLE dbo.RagChunks
|
||||
ADD CONSTRAINT FK_RagChunks_RagDocuments FOREIGN KEY (DocumentId) REFERENCES dbo.RagDocuments(Id) ON DELETE CASCADE;
|
||||
END
|
||||
GO
|
||||
|
||||
IF OBJECT_ID('dbo.RagEmbeddingCache', 'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.RagEmbeddingCache (
|
||||
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagEmbeddingCache PRIMARY KEY,
|
||||
Model NVARCHAR(120) NOT NULL,
|
||||
TextHash NVARCHAR(64) NOT NULL,
|
||||
Vector VARBINARY(MAX) NOT NULL,
|
||||
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagEmbeddingCache_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||
);
|
||||
|
||||
CREATE INDEX IX_RagEmbeddingCache_TextHash ON dbo.RagEmbeddingCache(TextHash);
|
||||
END
|
||||
GO
|
||||
|
||||
IF OBJECT_ID('dbo.RagChatCompletionCache', 'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.RagChatCompletionCache (
|
||||
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChatCompletionCache PRIMARY KEY,
|
||||
Model NVARCHAR(120) NOT NULL,
|
||||
Temperature DECIMAL(4,2) NOT NULL,
|
||||
ResponseText NVARCHAR(MAX) NOT NULL,
|
||||
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagChatCompletionCache_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||
);
|
||||
END
|
||||
GO
|
||||
@@ -0,0 +1,15 @@
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
|
||||
WORKDIR /app
|
||||
EXPOSE 8080
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
COPY ["rag-api.csproj", "./"]
|
||||
RUN dotnet restore "rag-api.csproj"
|
||||
COPY . .
|
||||
RUN dotnet publish "rag-api.csproj" -c Release -o /app/publish /p:UseAppHost=false
|
||||
|
||||
FROM base AS final
|
||||
WORKDIR /app
|
||||
COPY --from=build /app/publish .
|
||||
ENTRYPOINT ["dotnet", "rag-api.dll"]
|
||||
@@ -0,0 +1,282 @@
|
||||
using Azure.Identity;
|
||||
using Microsoft.AspNetCore.Diagnostics;
|
||||
using Api.Services;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Settings;
|
||||
using Serilog;
|
||||
using System.Reflection;
|
||||
|
||||
DotNetEnv.Env.Load();
|
||||
|
||||
try
|
||||
{
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
var appVersion = Assembly.GetExecutingAssembly()
|
||||
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?
|
||||
.InformationalVersion
|
||||
?? Assembly.GetExecutingAssembly().GetName().Version?.ToString()
|
||||
?? "unknown";
|
||||
|
||||
builder.Host.UseSerilog((context, services, configuration) =>
|
||||
{
|
||||
configuration
|
||||
.ReadFrom.Configuration(context.Configuration)
|
||||
.ReadFrom.Services(services)
|
||||
.Enrich.FromLogContext()
|
||||
.Enrich.WithMachineName()
|
||||
.Enrich.WithEnvironmentName()
|
||||
.Enrich.WithProperty("Service", "rag-api")
|
||||
.Enrich.WithProperty("AppVersion", appVersion)
|
||||
.WriteTo.Console(new Serilog.Formatting.Json.JsonFormatter());
|
||||
});
|
||||
|
||||
Log.Information("Starting {Service} version {AppVersion}", "rag-api", appVersion);
|
||||
|
||||
// --------------------
|
||||
// Azure Key Vault Configuration
|
||||
// --------------------
|
||||
var keyVaultUri = builder.Configuration["KeyVault:VaultUri"];
|
||||
var keyVaultEnabled = builder.Configuration.GetValue<bool>("KeyVault:Enabled");
|
||||
|
||||
if (keyVaultEnabled && !string.IsNullOrWhiteSpace(keyVaultUri))
|
||||
{
|
||||
Log.Information("Loading configuration from Azure Key Vault: {VaultUri}", keyVaultUri);
|
||||
|
||||
try
|
||||
{
|
||||
builder.Configuration.AddAzureKeyVault(
|
||||
new Uri(keyVaultUri),
|
||||
new DefaultAzureCredential());
|
||||
|
||||
Log.Information("Azure Key Vault configuration loaded successfully");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "Failed to load Azure Key Vault configuration. Continuing with other configuration sources.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Log.Information("Azure Key Vault is disabled or not configured");
|
||||
}
|
||||
|
||||
builder.Services.Configure<RagSettings>(builder.Configuration.GetSection("Rag"));
|
||||
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
||||
|
||||
builder.Services.AddHttpClient<RawAiClient>();
|
||||
builder.Services.AddSingleton<IRagRepository, SqlRagRepository>();
|
||||
builder.Services.AddScoped<IAiClient, CachedAiClient>();
|
||||
builder.Services.AddSingleton<ITextExtractor, TextExtractor>();
|
||||
builder.Services.AddSingleton<ITextChunker, TextChunker>();
|
||||
builder.Services.AddSingleton<IDocumentClassifier, DocumentClassifier>();
|
||||
builder.Services.AddScoped<IRagService, RagService>();
|
||||
|
||||
builder.Services.AddControllers();
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerGen();
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
var logger = app.Services.GetRequiredService<ILogger<Program>>();
|
||||
logger.LogInformation("API starting up...");
|
||||
logger.LogInformation("Environment: {Environment}", app.Environment.EnvironmentName);
|
||||
|
||||
// Log all environment variables and configuration settings at startup
|
||||
// Can be controlled via appsettings: "Logging:LogEnvironmentOnStartup": true
|
||||
var logEnvironmentOnStartup = app.Configuration.GetValue<bool>("Logging:LogEnvironmentOnStartup", defaultValue: true);
|
||||
if (logEnvironmentOnStartup)
|
||||
{
|
||||
LogEnvironmentSettings(logger, app.Configuration, app.Environment);
|
||||
}
|
||||
|
||||
using (var scope = app.Services.CreateScope())
|
||||
{
|
||||
var repository = scope.ServiceProvider.GetRequiredService<IRagRepository>();
|
||||
await repository.InitializeAsync(CancellationToken.None);
|
||||
}
|
||||
|
||||
app.UseSerilogRequestLogging(options =>
|
||||
{
|
||||
options.MessageTemplate = "HTTP {RequestMethod} {RequestPath} responded {StatusCode} in {Elapsed:0.0000} ms";
|
||||
options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
|
||||
{
|
||||
diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
|
||||
diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
|
||||
diagnosticContext.Set("RemoteIP", httpContext.Connection.RemoteIpAddress?.ToString());
|
||||
diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
|
||||
};
|
||||
});
|
||||
|
||||
app.UseExceptionHandler(errorApp =>
|
||||
{
|
||||
errorApp.Run(async context =>
|
||||
{
|
||||
var feature = context.Features.Get<IExceptionHandlerFeature>();
|
||||
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||
if (feature?.Error is not null)
|
||||
{
|
||||
logger.LogError(feature.Error, "Unhandled exception in {Service}", "rag-api");
|
||||
}
|
||||
|
||||
context.Response.StatusCode = StatusCodes.Status500InternalServerError;
|
||||
context.Response.ContentType = "application/json";
|
||||
await context.Response.WriteAsJsonAsync(new { error = "Unexpected server error." });
|
||||
});
|
||||
});
|
||||
|
||||
app.Use(async (context, next) =>
|
||||
{
|
||||
var settings = context.RequestServices.GetRequiredService<Microsoft.Extensions.Options.IOptions<InternalApiSettings>>().Value;
|
||||
if (settings.RequireApiKey)
|
||||
{
|
||||
var header = context.Request.Headers["X-Internal-Api-Key"].ToString();
|
||||
if (string.IsNullOrWhiteSpace(settings.ApiKey) || header != settings.ApiKey)
|
||||
{
|
||||
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||
logger.LogWarning("Rejected unauthorized internal API call. Path={Path}, RemoteIP={RemoteIP}", context.Request.Path, context.Connection.RemoteIpAddress?.ToString());
|
||||
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
|
||||
await context.Response.WriteAsJsonAsync(new { error = "Unauthorized internal API call." });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await next();
|
||||
});
|
||||
|
||||
// Swagger (typically only in Development)
|
||||
if (app.Environment.IsDevelopment())
|
||||
{
|
||||
app.UseSwagger();
|
||||
app.UseSwaggerUI(options =>
|
||||
{
|
||||
options.DocumentTitle = "rag-api";
|
||||
options.SwaggerEndpoint("/swagger/v1/swagger.json", "rag-api v1");
|
||||
options.RoutePrefix = "swagger";
|
||||
});
|
||||
}
|
||||
|
||||
app.MapControllers();
|
||||
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "rag-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
||||
|
||||
Log.Information("{Service} startup complete", "rag-api");
|
||||
app.Run();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Fatal(ex, "rag-api terminated unexpectedly");
|
||||
}
|
||||
finally
|
||||
{
|
||||
Log.Information("Shutting down rag-api");
|
||||
Log.CloseAndFlush();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Logs all environment variables and configuration settings at startup for diagnostics.
|
||||
/// </summary>
|
||||
static void LogEnvironmentSettings(Microsoft.Extensions.Logging.ILogger logger, IConfiguration configuration, IWebHostEnvironment environment)
|
||||
{
|
||||
logger.LogInformation("==================== ENVIRONMENT SETTINGS ====================");
|
||||
|
||||
// Environment Information
|
||||
logger.LogInformation("Application Name: {ApplicationName}", environment.ApplicationName);
|
||||
logger.LogInformation("Environment Name: {EnvironmentName}", environment.EnvironmentName);
|
||||
logger.LogInformation("Content Root Path: {ContentRootPath}", environment.ContentRootPath);
|
||||
logger.LogInformation("Web Root Path: {WebRootPath}", environment.WebRootPath);
|
||||
|
||||
// Environment Variables
|
||||
logger.LogInformation("-------------- Environment Variables --------------");
|
||||
var envVars = Environment.GetEnvironmentVariables();
|
||||
var sortedEnvVars = new SortedDictionary<string, string?>();
|
||||
|
||||
foreach (System.Collections.DictionaryEntry entry in envVars)
|
||||
{
|
||||
var key = entry.Key?.ToString() ?? string.Empty;
|
||||
var value = entry.Value?.ToString() ?? string.Empty;
|
||||
|
||||
// Mask sensitive values (passwords, secrets, tokens, keys) but show last 4 characters
|
||||
if (IsSensitiveKey(key))
|
||||
{
|
||||
value = MaskValueWithLastChars(value);
|
||||
}
|
||||
|
||||
sortedEnvVars[key] = value;
|
||||
}
|
||||
|
||||
foreach (var kvp in sortedEnvVars)
|
||||
{
|
||||
logger.LogInformation(" {Key} = {Value}", kvp.Key, kvp.Value);
|
||||
}
|
||||
|
||||
// Configuration Settings
|
||||
logger.LogInformation("-------------- Configuration Settings --------------");
|
||||
LogConfigurationRecursive(logger, configuration.GetChildren(), "");
|
||||
|
||||
logger.LogInformation("===========================================================");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Recursively logs configuration settings with hierarchy.
|
||||
/// </summary>
|
||||
static void LogConfigurationRecursive(Microsoft.Extensions.Logging.ILogger logger, IEnumerable<IConfigurationSection> sections, string prefix)
|
||||
{
|
||||
foreach (var section in sections)
|
||||
{
|
||||
var key = string.IsNullOrEmpty(prefix) ? section.Key : $"{prefix}:{section.Key}";
|
||||
|
||||
if (section.Value != null)
|
||||
{
|
||||
var value = section.Value;
|
||||
|
||||
// Mask sensitive configuration values but show last 4 characters
|
||||
if (IsSensitiveKey(key))
|
||||
{
|
||||
value = MaskValueWithLastChars(value);
|
||||
}
|
||||
|
||||
logger.LogInformation(" {Key} = {Value}", key, value);
|
||||
}
|
||||
|
||||
// Recurse into child sections
|
||||
if (section.GetChildren().Any())
|
||||
{
|
||||
LogConfigurationRecursive(logger, section.GetChildren(), key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a configuration key contains sensitive information.
|
||||
/// </summary>
|
||||
static bool IsSensitiveKey(string key)
|
||||
{
|
||||
return key.Contains("Password", StringComparison.OrdinalIgnoreCase) ||
|
||||
key.Contains("Secret", StringComparison.OrdinalIgnoreCase) ||
|
||||
key.Contains("Token", StringComparison.OrdinalIgnoreCase) ||
|
||||
key.Contains("Key", StringComparison.OrdinalIgnoreCase) ||
|
||||
key.Contains("ConnectionString", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Masks a sensitive value but shows the last 4 characters for verification.
|
||||
/// </summary>
|
||||
/// <param name="value">The value to mask.</param>
|
||||
/// <returns>Masked value showing last 4 characters (e.g., "***MASKED***...abcd")</returns>
|
||||
static string MaskValueWithLastChars(string value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value))
|
||||
{
|
||||
return "***NOT SET***";
|
||||
}
|
||||
|
||||
// If value is too short, just mask it completely
|
||||
if (value.Length <= 4)
|
||||
{
|
||||
return "***MASKED***";
|
||||
}
|
||||
|
||||
// Show last 4 characters
|
||||
var lastChars = value.Substring(value.Length - 4);
|
||||
return $"***MASKED***...{lastChars}";
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"profiles": {
|
||||
"rag-api": {
|
||||
"commandName": "Project",
|
||||
"launchBrowser": true,
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
},
|
||||
"applicationUrl": "https://localhost:58424;http://localhost:58426"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Api.Requests
|
||||
{
|
||||
public sealed class IndexDocumentRequest
|
||||
{
|
||||
public string? Text { get; set; }
|
||||
public string? SourceUrl { get; set; }
|
||||
public string? DocumentType { get; set; }
|
||||
public string? Title { get; set; }
|
||||
public Dictionary<string, string>? Metadata { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
namespace Api.Requests
|
||||
{
|
||||
public sealed class SearchRequest
|
||||
{
|
||||
public required string QueryText { get; init; }
|
||||
public IReadOnlyList<string>? TargetDocumentTypes { get; init; }
|
||||
public int? TopK { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
namespace Api.Responses
|
||||
{
|
||||
public sealed class IndexDocumentResponse
|
||||
{
|
||||
public required string DocumentId { get; init; }
|
||||
public required string TextHash { get; init; }
|
||||
public required string DocumentType { get; init; }
|
||||
public double DocumentTypeConfidence { get; init; }
|
||||
public required string Title { get; init; }
|
||||
public int Chunks { get; init; }
|
||||
public int Characters { get; init; }
|
||||
public bool Cached { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
namespace Api.Responses
|
||||
{
|
||||
public sealed class SearchResponse
|
||||
{
|
||||
public IReadOnlyList<SearchDocumentResult> Results { get; init; } = [];
|
||||
}
|
||||
|
||||
public sealed class SearchDocumentResult
|
||||
{
|
||||
public required string DocumentId { get; init; }
|
||||
public required string DocumentType { get; init; }
|
||||
public required string Title { get; init; }
|
||||
public string? SourceUrl { get; init; }
|
||||
public double Score { get; init; }
|
||||
public IReadOnlyList<SearchChunkResult> MatchedChunks { get; init; } = [];
|
||||
}
|
||||
|
||||
public sealed class SearchChunkResult
|
||||
{
|
||||
public required string ChunkId { get; init; }
|
||||
public int ChunkIndex { get; init; }
|
||||
public required string Text { get; init; }
|
||||
public double Score { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Settings;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class CachedAiClient : IAiClient
|
||||
{
|
||||
private readonly RawAiClient _raw;
|
||||
private readonly IRagRepository _repository;
|
||||
private readonly AiSettings _settings;
|
||||
|
||||
public CachedAiClient(RawAiClient raw, IRagRepository repository, IOptions<AiSettings> options)
|
||||
{
|
||||
_raw = raw;
|
||||
_repository = repository;
|
||||
_settings = options.Value;
|
||||
}
|
||||
|
||||
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
|
||||
{
|
||||
var model = GetEmbeddingModel();
|
||||
var textHash = HashHelper.Compute(input);
|
||||
var cacheKey = HashHelper.Compute($"embedding:{_settings.Provider}:{model}:{textHash}");
|
||||
var cached = await _repository.GetEmbeddingAsync(cacheKey, ct);
|
||||
if (cached is not null) return cached;
|
||||
|
||||
var vector = await _raw.CreateEmbeddingAsync(input, ct);
|
||||
await _repository.SaveEmbeddingAsync(cacheKey, model, textHash, vector, ct);
|
||||
return vector;
|
||||
}
|
||||
|
||||
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||
{
|
||||
var model = GetChatModel();
|
||||
var cacheKey = HashHelper.Compute($"chat:{_settings.Provider}:{model}:{temperature:0.00}:{systemPrompt}:{userPrompt}");
|
||||
var cached = await _repository.GetChatCompletionAsync(cacheKey, ct);
|
||||
if (cached is not null) return cached;
|
||||
|
||||
var response = await _raw.CreateChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
|
||||
await _repository.SaveChatCompletionAsync(cacheKey, model, temperature, response, ct);
|
||||
return response;
|
||||
}
|
||||
|
||||
private string GetEmbeddingModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
|
||||
? _settings.Ollama.EmbeddingModel
|
||||
: _settings.OpenAI.EmbeddingModel;
|
||||
|
||||
private string GetChatModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
|
||||
? _settings.Ollama.ChatModel
|
||||
: _settings.OpenAI.ChatModel;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface IAiClient
|
||||
{
|
||||
Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct);
|
||||
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface IDocumentClassifier
|
||||
{
|
||||
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface IRagRepository
|
||||
{
|
||||
Task InitializeAsync(CancellationToken ct);
|
||||
Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct);
|
||||
Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct);
|
||||
Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct);
|
||||
Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct);
|
||||
Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct);
|
||||
Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct);
|
||||
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
|
||||
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using Api.Requests;
|
||||
using Api.Responses;
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface IRagService
|
||||
{
|
||||
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
|
||||
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
|
||||
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
|
||||
Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface ITextChunker
|
||||
{
|
||||
IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap);
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Api.Services.Contracts;
|
||||
|
||||
public interface ITextExtractor
|
||||
{
|
||||
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
|
||||
string Normalize(string value);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Api.Services.Contracts.Models
|
||||
{
|
||||
public sealed class DocumentClassification
|
||||
{
|
||||
public required string DocumentType { get; init; }
|
||||
public double Confidence { get; init; }
|
||||
public required string Title { get; init; }
|
||||
public Dictionary<string, string> Metadata { get; init; } = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Api.Services.Contracts.Models
|
||||
{
|
||||
public sealed class RagChunkRecord
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string DocumentId { get; init; }
|
||||
public int ChunkIndex { get; init; }
|
||||
public required string Text { get; init; }
|
||||
public required float[] Embedding { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
namespace Api.Services.Contracts.Models
|
||||
{
|
||||
public sealed class RagDocumentDetails
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string DocumentType { get; init; }
|
||||
public required string Title { get; init; }
|
||||
public string? SourceUrl { get; init; }
|
||||
public required string Text { get; init; }
|
||||
public required string TextHash { get; init; }
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace Api.Services.Contracts.Models
|
||||
{
|
||||
public sealed class RagDocumentRecord
|
||||
{
|
||||
public required string Id { get; init; }
|
||||
public required string DocumentType { get; init; }
|
||||
public required string Title { get; init; }
|
||||
public string? SourceUrl { get; init; }
|
||||
public required string Text { get; init; }
|
||||
public required string TextHash { get; init; }
|
||||
public double TypeConfidence { get; init; }
|
||||
public string MetadataJson { get; init; } = "{}";
|
||||
public DateTimeOffset CreatedAt { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
namespace Api.Services.Contracts.Models
|
||||
{
|
||||
public sealed class SearchCandidateChunk
|
||||
{
|
||||
public required RagDocumentRecord Document { get; init; }
|
||||
public required RagChunkRecord Chunk { get; init; }
|
||||
public double Score { get; init; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class DocumentClassifier : IDocumentClassifier
|
||||
{
|
||||
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
|
||||
};
|
||||
|
||||
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(providedType))
|
||||
{
|
||||
var normalized = NormalizeType(providedType);
|
||||
return Task.FromResult(new DocumentClassification
|
||||
{
|
||||
DocumentType = normalized,
|
||||
Confidence = KnownTypes.Contains(normalized) && normalized != "unknown" ? 1.0 : 0.6,
|
||||
Title = BuildTitle(providedTitle, text, normalized)
|
||||
});
|
||||
}
|
||||
|
||||
var lower = text.ToLowerInvariant();
|
||||
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["cv"] = Count(lower, "curriculum vitae", "resume", "work experience", "professional experience", "education", "skills", "technologies", "linkedin", "github"),
|
||||
["job"] = Count(lower, "job description", "requirements", "responsibilities", "qualifications", "apply", "we are looking", "salary", "benefits", "remote", "hybrid"),
|
||||
["contract"] = Count(lower, "agreement", "contract", "party", "parties", "liability", "termination", "confidentiality", "governing law"),
|
||||
["invoice"] = Count(lower, "invoice", "vat", "subtotal", "total", "amount due", "due date", "billing"),
|
||||
["documentation"] = Count(lower, "api", "endpoint", "configuration", "install", "usage", "parameters", "response", "request"),
|
||||
["product"] = Count(lower, "features", "pricing", "sku", "product", "specification", "warranty")
|
||||
};
|
||||
|
||||
var best = scores.OrderByDescending(x => x.Value).First();
|
||||
var type = best.Value <= 0 ? "unknown" : best.Key;
|
||||
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
|
||||
|
||||
return Task.FromResult(new DocumentClassification
|
||||
{
|
||||
DocumentType = type,
|
||||
Confidence = confidence,
|
||||
Title = BuildTitle(providedTitle, text, type)
|
||||
});
|
||||
}
|
||||
|
||||
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
|
||||
|
||||
private static string NormalizeType(string value)
|
||||
{
|
||||
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
|
||||
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
|
||||
}
|
||||
|
||||
private static string BuildTitle(string? providedTitle, string text, string documentType)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
|
||||
var firstLine = text.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length > 20);
|
||||
if (!string.IsNullOrWhiteSpace(firstLine)) return firstLine.Length <= 120 ? firstLine : firstLine[..120];
|
||||
return $"{documentType} document";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public static class HashHelper
|
||||
{
|
||||
public static string Compute(string value)
|
||||
{
|
||||
using var sha = SHA256.Create();
|
||||
var bytes = sha.ComputeHash(Encoding.UTF8.GetBytes(value ?? string.Empty));
|
||||
return Convert.ToHexString(bytes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Settings;
|
||||
using Api.Responses;
|
||||
using Api.Requests;
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class RagService : IRagService
|
||||
{
|
||||
private readonly ITextExtractor _textExtractor;
|
||||
private readonly ITextChunker _chunker;
|
||||
private readonly IDocumentClassifier _classifier;
|
||||
private readonly IAiClient _ai;
|
||||
private readonly IRagRepository _repository;
|
||||
private readonly RagSettings _settings;
|
||||
|
||||
public RagService(
|
||||
ITextExtractor textExtractor,
|
||||
ITextChunker chunker,
|
||||
IDocumentClassifier classifier,
|
||||
IAiClient ai,
|
||||
IRagRepository repository,
|
||||
IOptions<RagSettings> options)
|
||||
{
|
||||
_textExtractor = textExtractor;
|
||||
_chunker = chunker;
|
||||
_classifier = classifier;
|
||||
_ai = ai;
|
||||
_repository = repository;
|
||||
_settings = options.Value;
|
||||
}
|
||||
|
||||
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
|
||||
{
|
||||
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
|
||||
if (text.Length < 40) throw new InvalidOperationException("Document text is too short.");
|
||||
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
|
||||
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
|
||||
}
|
||||
|
||||
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
|
||||
{
|
||||
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
|
||||
if (file.Length > _settings.MaxFileSizeMb * 1024L * 1024L) throw new InvalidOperationException($"File is too large. Max size is {_settings.MaxFileSizeMb} MB.");
|
||||
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are supported by this endpoint.");
|
||||
|
||||
await using var stream = file.OpenReadStream();
|
||||
var text = await _textExtractor.ExtractPdfAsync(stream, ct);
|
||||
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
|
||||
if (text.Length < 40) throw new InvalidOperationException("Could not extract enough text from the PDF.");
|
||||
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
|
||||
}
|
||||
|
||||
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
|
||||
{
|
||||
var query = _textExtractor.Normalize(request.QueryText);
|
||||
if (query.Length < 10) throw new InvalidOperationException("Search query is too short.");
|
||||
var topK = Math.Clamp(request.TopK ?? _settings.DefaultTopK, 1, Math.Max(1, _settings.MaxTopK));
|
||||
var queryEmbedding = await _ai.CreateEmbeddingAsync(query, ct);
|
||||
var candidates = await _repository.SearchChunksAsync(queryEmbedding, request.TargetDocumentTypes, topK, ct);
|
||||
|
||||
var results = candidates
|
||||
.GroupBy(x => x.Document.Id)
|
||||
.Select(group =>
|
||||
{
|
||||
var best = group.OrderByDescending(x => x.Score).First();
|
||||
return new SearchDocumentResult
|
||||
{
|
||||
DocumentId = best.Document.Id,
|
||||
DocumentType = best.Document.DocumentType,
|
||||
Title = best.Document.Title,
|
||||
SourceUrl = best.Document.SourceUrl,
|
||||
Score = group.Max(x => x.Score),
|
||||
MatchedChunks = group
|
||||
.OrderByDescending(x => x.Score)
|
||||
.Take(3)
|
||||
.Select(x => new SearchChunkResult
|
||||
{
|
||||
ChunkId = x.Chunk.Id,
|
||||
ChunkIndex = x.Chunk.ChunkIndex,
|
||||
Text = x.Chunk.Text,
|
||||
Score = x.Score
|
||||
})
|
||||
.ToList()
|
||||
};
|
||||
})
|
||||
.OrderByDescending(x => x.Score)
|
||||
.Take(topK)
|
||||
.ToList();
|
||||
|
||||
return new SearchResponse { Results = results };
|
||||
}
|
||||
|
||||
public async Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct)
|
||||
{
|
||||
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
|
||||
return document is null ? null : new RagDocumentDetails
|
||||
{
|
||||
Id = document.Id,
|
||||
DocumentType = document.DocumentType,
|
||||
Title = document.Title,
|
||||
SourceUrl = document.SourceUrl,
|
||||
Text = document.Text,
|
||||
TextHash = document.TextHash,
|
||||
CreatedAt = document.CreatedAt
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
|
||||
string text,
|
||||
string? documentType,
|
||||
string? title,
|
||||
string? sourceUrl,
|
||||
Dictionary<string, string>? metadata,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var textHash = HashHelper.Compute(text);
|
||||
var cached = await _repository.GetDocumentByTextHashAsync(textHash, sourceUrl, ct);
|
||||
if (cached is not null)
|
||||
{
|
||||
return new IndexDocumentResponse
|
||||
{
|
||||
DocumentId = cached.Id,
|
||||
TextHash = cached.TextHash,
|
||||
DocumentType = cached.DocumentType,
|
||||
DocumentTypeConfidence = cached.TypeConfidence,
|
||||
Title = cached.Title,
|
||||
Chunks = 0,
|
||||
Characters = cached.Text.Length,
|
||||
Cached = true
|
||||
};
|
||||
}
|
||||
|
||||
var classification = await _classifier.ClassifyAsync(text, documentType, title, ct);
|
||||
var chunks = _chunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
|
||||
var document = new RagDocumentRecord
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
DocumentType = classification.DocumentType,
|
||||
Title = classification.Title,
|
||||
SourceUrl = sourceUrl,
|
||||
Text = text,
|
||||
TextHash = textHash,
|
||||
TypeConfidence = classification.Confidence,
|
||||
MetadataJson = JsonSerializer.Serialize(metadata ?? classification.Metadata),
|
||||
CreatedAt = DateTimeOffset.UtcNow
|
||||
};
|
||||
|
||||
var records = new List<RagChunkRecord>();
|
||||
for (var i = 0; i < chunks.Count; i++)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
records.Add(new RagChunkRecord
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
DocumentId = document.Id,
|
||||
ChunkIndex = i,
|
||||
Text = chunks[i],
|
||||
Embedding = await _ai.CreateEmbeddingAsync(chunks[i], ct)
|
||||
});
|
||||
}
|
||||
|
||||
await _repository.SaveDocumentAsync(document, records, ct);
|
||||
return new IndexDocumentResponse
|
||||
{
|
||||
DocumentId = document.Id,
|
||||
TextHash = document.TextHash,
|
||||
DocumentType = document.DocumentType,
|
||||
DocumentTypeConfidence = document.TypeConfidence,
|
||||
Title = document.Title,
|
||||
Chunks = records.Count,
|
||||
Characters = text.Length,
|
||||
Cached = false
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Settings;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class RawAiClient : IAiClient
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
private readonly AiSettings _settings;
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
public RawAiClient(HttpClient http, IOptions<AiSettings> options)
|
||||
{
|
||||
_http = http;
|
||||
_settings = options.Value;
|
||||
}
|
||||
|
||||
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
|
||||
{
|
||||
return IsOllama() ? await CreateOllamaEmbeddingAsync(input, ct) : await CreateOpenAiEmbeddingAsync(input, ct);
|
||||
}
|
||||
|
||||
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||
{
|
||||
return IsOllama()
|
||||
? await CreateOllamaChatCompletionAsync(systemPrompt, userPrompt, temperature, ct)
|
||||
: await CreateOpenAiChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
|
||||
}
|
||||
|
||||
private bool IsOllama() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private async Task<float[]> CreateOpenAiEmbeddingAsync(string input, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
|
||||
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/embeddings");
|
||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
|
||||
request.Content = ToJson(new { model = _settings.OpenAI.EmbeddingModel, input });
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
|
||||
using var response = await _http.SendAsync(request, cts.Token);
|
||||
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI embeddings failed: {(int)response.StatusCode} {json}");
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
return doc.RootElement.GetProperty("data")[0].GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
|
||||
}
|
||||
|
||||
private async Task<string> CreateOpenAiChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
|
||||
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/chat/completions");
|
||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
|
||||
request.Content = ToJson(new
|
||||
{
|
||||
model = _settings.OpenAI.ChatModel,
|
||||
temperature,
|
||||
response_format = new { type = "json_object" },
|
||||
messages = new[]
|
||||
{
|
||||
new { role = "system", content = systemPrompt },
|
||||
new { role = "user", content = userPrompt }
|
||||
}
|
||||
});
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
|
||||
using var response = await _http.SendAsync(request, cts.Token);
|
||||
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI chat failed: {(int)response.StatusCode} {json}");
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
return doc.RootElement.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||
}
|
||||
|
||||
private async Task<float[]> CreateOllamaEmbeddingAsync(string input, CancellationToken ct)
|
||||
{
|
||||
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
|
||||
using var response = await _http.PostAsync($"{baseUrl}/api/embeddings", ToJson(new { model = _settings.Ollama.EmbeddingModel, prompt = input }), cts.Token);
|
||||
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama embeddings failed: {(int)response.StatusCode} {json}");
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
return doc.RootElement.GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
|
||||
}
|
||||
|
||||
private async Task<string> CreateOllamaChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||
{
|
||||
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
|
||||
using var response = await _http.PostAsync($"{baseUrl}/api/chat", ToJson(new
|
||||
{
|
||||
model = _settings.Ollama.ChatModel,
|
||||
stream = false,
|
||||
format = "json",
|
||||
messages = new[]
|
||||
{
|
||||
new { role = "system", content = systemPrompt },
|
||||
new { role = "user", content = userPrompt }
|
||||
},
|
||||
options = new { temperature = (float)temperature }
|
||||
}), cts.Token);
|
||||
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama chat failed: {(int)response.StatusCode} {json}");
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
return doc.RootElement.GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||
}
|
||||
|
||||
private static StringContent ToJson<T>(T payload) => new(JsonSerializer.Serialize(payload, JsonOptions), Encoding.UTF8, "application/json");
|
||||
}
|
||||
@@ -0,0 +1,238 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Api.Services.Contracts;
|
||||
using Api.Services.Contracts.Models;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class SqlRagRepository : IRagRepository
|
||||
{
|
||||
private readonly string _connectionString;
|
||||
|
||||
public SqlRagRepository(IConfiguration configuration)
|
||||
{
|
||||
_connectionString = configuration.GetConnectionString("RagDb")
|
||||
?? throw new InvalidOperationException("Connection string 'RagDb' is missing.");
|
||||
}
|
||||
|
||||
public async Task InitializeAsync(CancellationToken ct)
|
||||
{
|
||||
await EnsureDatabaseExistsAsync(ct);
|
||||
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
||||
{
|
||||
await using var command = new SqlCommand(commandText, connection);
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT TOP 1 Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
||||
FROM RagDocuments
|
||||
WHERE TextHash = @TextHash AND (@SourceUrl IS NULL OR SourceUrl = @SourceUrl)
|
||||
ORDER BY CreatedAt DESC
|
||||
""";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@TextHash", textHash);
|
||||
command.Parameters.AddWithValue("@SourceUrl", (object?)sourceUrl ?? DBNull.Value);
|
||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
||||
}
|
||||
|
||||
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
SELECT Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
||||
FROM RagDocuments
|
||||
WHERE Id = @Id
|
||||
""";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@Id", id);
|
||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
||||
}
|
||||
|
||||
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
|
||||
{
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var tx = (SqlTransaction)await connection.BeginTransactionAsync(ct);
|
||||
try
|
||||
{
|
||||
const string insertDoc = """
|
||||
INSERT INTO RagDocuments (Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt)
|
||||
VALUES (@Id, @DocumentType, @Title, @SourceUrl, @RawText, @TextHash, @TypeConfidence, @MetadataJson, @CreatedAt)
|
||||
""";
|
||||
await using (var command = new SqlCommand(insertDoc, connection, tx))
|
||||
{
|
||||
command.Parameters.AddWithValue("@Id", document.Id);
|
||||
command.Parameters.AddWithValue("@DocumentType", document.DocumentType);
|
||||
command.Parameters.AddWithValue("@Title", document.Title);
|
||||
command.Parameters.AddWithValue("@SourceUrl", (object?)document.SourceUrl ?? DBNull.Value);
|
||||
command.Parameters.AddWithValue("@RawText", document.Text);
|
||||
command.Parameters.AddWithValue("@TextHash", document.TextHash);
|
||||
command.Parameters.AddWithValue("@TypeConfidence", document.TypeConfidence);
|
||||
command.Parameters.AddWithValue("@MetadataJson", document.MetadataJson);
|
||||
command.Parameters.AddWithValue("@CreatedAt", document.CreatedAt.UtcDateTime);
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
const string insertChunk = """
|
||||
INSERT INTO RagChunks (Id, DocumentId, ChunkIndex, Text, Embedding)
|
||||
VALUES (@Id, @DocumentId, @ChunkIndex, @Text, @Embedding)
|
||||
""";
|
||||
foreach (var chunk in chunks)
|
||||
{
|
||||
await using var command = new SqlCommand(insertChunk, connection, tx);
|
||||
command.Parameters.AddWithValue("@Id", chunk.Id);
|
||||
command.Parameters.AddWithValue("@DocumentId", document.Id);
|
||||
command.Parameters.AddWithValue("@ChunkIndex", chunk.ChunkIndex);
|
||||
command.Parameters.AddWithValue("@Text", chunk.Text);
|
||||
command.Parameters.AddWithValue("@Embedding", VectorSerializer.ToBytes(chunk.Embedding));
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
await tx.CommitAsync(ct);
|
||||
}
|
||||
catch
|
||||
{
|
||||
await tx.RollbackAsync(ct);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct)
|
||||
{
|
||||
var types = targetTypes?.Where(x => !string.IsNullOrWhiteSpace(x)).Select(x => x.Trim().ToLowerInvariant()).Distinct().ToArray() ?? [];
|
||||
var sql = """
|
||||
SELECT d.Id, d.DocumentType, d.Title, d.SourceUrl, d.RawText, d.TextHash, d.TypeConfidence, d.MetadataJson, d.CreatedAt,
|
||||
c.Id, c.DocumentId, c.ChunkIndex, c.Text, c.Embedding
|
||||
FROM RagChunks c
|
||||
INNER JOIN RagDocuments d ON d.Id = c.DocumentId
|
||||
""";
|
||||
|
||||
if (types.Length > 0)
|
||||
{
|
||||
sql += " WHERE LOWER(d.DocumentType) IN (" + string.Join(',', types.Select((_, i) => $"@Type{i}")) + ")";
|
||||
}
|
||||
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
for (var i = 0; i < types.Length; i++) command.Parameters.AddWithValue($"@Type{i}", types[i]);
|
||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||
var candidates = new List<SearchCandidateChunk>();
|
||||
while (await reader.ReadAsync(ct))
|
||||
{
|
||||
var doc = ReadDocument(reader, 0);
|
||||
var chunk = new RagChunkRecord
|
||||
{
|
||||
Id = reader.GetString(9),
|
||||
DocumentId = reader.GetString(10),
|
||||
ChunkIndex = reader.GetInt32(11),
|
||||
Text = reader.GetString(12),
|
||||
Embedding = VectorSerializer.FromBytes((byte[])reader[13])
|
||||
};
|
||||
candidates.Add(new SearchCandidateChunk
|
||||
{
|
||||
Document = doc,
|
||||
Chunk = chunk,
|
||||
Score = VectorSerializer.CosineSimilarity(queryEmbedding, chunk.Embedding)
|
||||
});
|
||||
}
|
||||
|
||||
return candidates
|
||||
.OrderByDescending(x => x.Score)
|
||||
.Take(Math.Max(topK * 4, topK))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
|
||||
{
|
||||
const string sql = "SELECT Vector FROM RagEmbeddingCache WHERE CacheKey = @CacheKey";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||
var value = await command.ExecuteScalarAsync(ct);
|
||||
return value is byte[] bytes ? VectorSerializer.FromBytes(bytes) : null;
|
||||
}
|
||||
|
||||
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
IF NOT EXISTS (SELECT 1 FROM RagEmbeddingCache WHERE CacheKey = @CacheKey)
|
||||
INSERT INTO RagEmbeddingCache (CacheKey, Model, TextHash, Vector, CreatedAt)
|
||||
VALUES (@CacheKey, @Model, @TextHash, @Vector, SYSUTCDATETIME())
|
||||
""";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||
command.Parameters.AddWithValue("@Model", model);
|
||||
command.Parameters.AddWithValue("@TextHash", textHash);
|
||||
command.Parameters.AddWithValue("@Vector", VectorSerializer.ToBytes(vector));
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||
{
|
||||
const string sql = "SELECT ResponseText FROM RagChatCompletionCache WHERE CacheKey = @CacheKey";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||
return await command.ExecuteScalarAsync(ct) as string;
|
||||
}
|
||||
|
||||
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||
{
|
||||
const string sql = """
|
||||
IF NOT EXISTS (SELECT 1 FROM RagChatCompletionCache WHERE CacheKey = @CacheKey)
|
||||
INSERT INTO RagChatCompletionCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
|
||||
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
|
||||
""";
|
||||
await using var connection = new SqlConnection(_connectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
await using var command = new SqlCommand(sql, connection);
|
||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||
command.Parameters.AddWithValue("@Model", model);
|
||||
command.Parameters.AddWithValue("@Temperature", temperature);
|
||||
command.Parameters.AddWithValue("@ResponseText", responseText);
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
private static RagDocumentRecord ReadDocument(SqlDataReader reader, int offset = 0) => new()
|
||||
{
|
||||
Id = reader.GetString(offset),
|
||||
DocumentType = reader.GetString(offset + 1),
|
||||
Title = reader.GetString(offset + 2),
|
||||
SourceUrl = reader.IsDBNull(offset + 3) ? null : reader.GetString(offset + 3),
|
||||
Text = reader.GetString(offset + 4),
|
||||
TextHash = reader.GetString(offset + 5),
|
||||
TypeConfidence = Convert.ToDouble(reader.GetValue(offset + 6)),
|
||||
MetadataJson = reader.GetString(offset + 7),
|
||||
CreatedAt = new DateTimeOffset(reader.GetDateTime(offset + 8), TimeSpan.Zero)
|
||||
};
|
||||
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
|
||||
{
|
||||
var builder = new SqlConnectionStringBuilder(_connectionString);
|
||||
var databaseName = builder.InitialCatalog;
|
||||
if (string.IsNullOrWhiteSpace(databaseName)) return;
|
||||
|
||||
builder.InitialCatalog = "master";
|
||||
await using var connection = new SqlConnection(builder.ConnectionString);
|
||||
await connection.OpenAsync(ct);
|
||||
var safeName = databaseName.Replace("]", "]]" );
|
||||
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
|
||||
command.Parameters.AddWithValue("@DatabaseName", databaseName);
|
||||
await command.ExecuteNonQueryAsync(ct);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
using Api.Services.Contracts;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class TextChunker : ITextChunker
|
||||
{
|
||||
public IReadOnlyList<string> Chunk(string text, int chunkSize, int overlap)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text)) return [];
|
||||
chunkSize = Math.Clamp(chunkSize, 300, 3000);
|
||||
overlap = Math.Clamp(overlap, 0, chunkSize / 2);
|
||||
|
||||
var chunks = new List<string>();
|
||||
var start = 0;
|
||||
while (start < text.Length)
|
||||
{
|
||||
var length = Math.Min(chunkSize, text.Length - start);
|
||||
var chunk = text.Substring(start, length).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(chunk)) chunks.Add(chunk);
|
||||
start += chunkSize - overlap;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using System.Text;
|
||||
using Api.Services.Contracts;
|
||||
using UglyToad.PdfPig;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
public sealed class TextExtractor : ITextExtractor
|
||||
{
|
||||
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
|
||||
{
|
||||
using var document = PdfDocument.Open(stream);
|
||||
var builder = new StringBuilder();
|
||||
foreach (var page in document.GetPages())
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
builder.AppendLine(page.Text);
|
||||
builder.AppendLine();
|
||||
}
|
||||
return Task.FromResult(Normalize(builder.ToString()));
|
||||
}
|
||||
|
||||
public string Normalize(string value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
namespace Api.Services;
|
||||
|
||||
public static class VectorSerializer
|
||||
{
|
||||
public static byte[] ToBytes(float[] vector)
|
||||
{
|
||||
var bytes = new byte[vector.Length * sizeof(float)];
|
||||
Buffer.BlockCopy(vector, 0, bytes, 0, bytes.Length);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static float[] FromBytes(byte[] bytes)
|
||||
{
|
||||
var vector = new float[bytes.Length / sizeof(float)];
|
||||
Buffer.BlockCopy(bytes, 0, vector, 0, bytes.Length);
|
||||
return vector;
|
||||
}
|
||||
|
||||
public static double CosineSimilarity(float[] a, float[] b)
|
||||
{
|
||||
if (a.Length == 0 || a.Length != b.Length) return 0;
|
||||
double dot = 0, magA = 0, magB = 0;
|
||||
for (var i = 0; i < a.Length; i++)
|
||||
{
|
||||
dot += a[i] * b[i];
|
||||
magA += a[i] * a[i];
|
||||
magB += b[i] * b[i];
|
||||
}
|
||||
return magA == 0 || magB == 0 ? 0 : dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
namespace Api.Settings;
|
||||
|
||||
public sealed class AiSettings
|
||||
{
|
||||
public string Provider { get; set; } = "OpenAI";
|
||||
public OpenAiProviderSettings OpenAI { get; set; } = new();
|
||||
public OllamaProviderSettings Ollama { get; set; } = new();
|
||||
}
|
||||
|
||||
public sealed class OpenAiProviderSettings
|
||||
{
|
||||
public string ApiKey { get; set; } = string.Empty;
|
||||
public string ChatModel { get; set; } = "gpt-4o-mini";
|
||||
public string EmbeddingModel { get; set; } = "text-embedding-3-small";
|
||||
public int TimeoutSeconds { get; set; } = 90;
|
||||
}
|
||||
|
||||
public sealed class OllamaProviderSettings
|
||||
{
|
||||
public string BaseUrl { get; set; } = "http://localhost:11434";
|
||||
public string ChatModel { get; set; } = "llama3.1:8b";
|
||||
public string EmbeddingModel { get; set; } = "nomic-embed-text";
|
||||
public int TimeoutSeconds { get; set; } = 180;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Api.Settings;
|
||||
|
||||
public sealed class InternalApiSettings
|
||||
{
|
||||
public string ApiKey { get; set; } = string.Empty;
|
||||
public bool RequireApiKey { get; set; } = false;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Api.Settings;
|
||||
|
||||
public sealed class RagSettings
|
||||
{
|
||||
public int MaxFileSizeMb { get; set; } = 8;
|
||||
public int ChunkSize { get; set; } = 900;
|
||||
public int ChunkOverlap { get; set; } = 150;
|
||||
public int MaxTextChars { get; set; } = 60000;
|
||||
public int DefaultTopK { get; set; } = 20;
|
||||
public int MaxTopK { get; set; } = 50;
|
||||
public bool ClassifyWithAi { get; set; } = false;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"AllowedHosts": "*",
|
||||
"Serilog": {
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"System.Net.Http.HttpClient": "Warning"
|
||||
}
|
||||
},
|
||||
"WriteTo": [
|
||||
{ "Name": "Console" }
|
||||
]
|
||||
},
|
||||
"ConnectionStrings": {
|
||||
"RagDb": "Server=localhost,1433;Database=MyAiRag;User Id=sa;Password=Your_strong_password123;TrustServerCertificate=True"
|
||||
},
|
||||
"InternalApi": {
|
||||
"ApiKey": "",
|
||||
"RequireApiKey": false
|
||||
},
|
||||
"Rag": {
|
||||
"MaxFileSizeMb": 8,
|
||||
"ChunkSize": 900,
|
||||
"ChunkOverlap": 150,
|
||||
"MaxTextChars": 60000,
|
||||
"DefaultTopK": 20,
|
||||
"MaxTopK": 50,
|
||||
"ClassifyWithAi": false
|
||||
},
|
||||
"Ai": {
|
||||
"Provider": "OpenAI",
|
||||
"OpenAI": {
|
||||
"ApiKey": "",
|
||||
"ChatModel": "gpt-4o-mini",
|
||||
"EmbeddingModel": "text-embedding-3-small",
|
||||
"TimeoutSeconds": 90
|
||||
},
|
||||
"Ollama": {
|
||||
"BaseUrl": "http://localhost:11434",
|
||||
"ChatModel": "llama3.1:8b",
|
||||
"EmbeddingModel": "nomic-embed-text",
|
||||
"TimeoutSeconds": 180
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||
<RootNamespace>Api</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
||||
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
||||
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
|
||||
<PackageReference Include="PdfPig" Version="0.1.14" />
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
||||
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.7" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Update="Database/schema.sql">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user