@@ -1,116 +0,0 @@
|
|||||||
# MyAi RAG split cleanup
|
|
||||||
|
|
||||||
## Public `api`
|
|
||||||
|
|
||||||
The existing `api` project is now only the public gateway for the existing frontend.
|
|
||||||
|
|
||||||
It keeps:
|
|
||||||
|
|
||||||
- contact API
|
|
||||||
- file download API
|
|
||||||
- Google/config APIs
|
|
||||||
- health API
|
|
||||||
- `api/rag/*` proxy endpoints
|
|
||||||
|
|
||||||
It no longer contains local RAG processing code. The removed responsibilities are:
|
|
||||||
|
|
||||||
- PDF extraction
|
|
||||||
- chunking
|
|
||||||
- embeddings
|
|
||||||
- vector storage
|
|
||||||
- OpenAI/Ollama calls
|
|
||||||
- job text extraction
|
|
||||||
- CV matching business logic
|
|
||||||
|
|
||||||
`api/Controllers/RagController.cs` is intentionally kept. It proxies the current frontend calls:
|
|
||||||
|
|
||||||
- `POST /api/rag/cv` -> `cv-matcher-api /api/cv/upload`
|
|
||||||
- `POST /api/rag/match-job` -> `cv-matcher-api /api/cv/match-job`
|
|
||||||
|
|
||||||
Required public API config:
|
|
||||||
|
|
||||||
```json
|
|
||||||
"CvMatcherApi": {
|
|
||||||
"BaseUrl": "http://cv-matcher-api:8080",
|
|
||||||
"InternalApiKey": "change-this-internal-key"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## `cv-matcher-api`
|
|
||||||
|
|
||||||
Business API for CV/job workflows.
|
|
||||||
|
|
||||||
Main endpoints:
|
|
||||||
|
|
||||||
- `POST /api/cv/upload`
|
|
||||||
- `POST /api/cv/match-job`
|
|
||||||
- `POST /api/cv/find-jobs`
|
|
||||||
- `GET /health`
|
|
||||||
- Swagger: `/swagger`
|
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
|
|
||||||
- CV matcher business logic
|
|
||||||
- job URL/text extraction
|
|
||||||
- final LLM scoring
|
|
||||||
- result persistence
|
|
||||||
- email sending
|
|
||||||
- calls `rag-api` for generic semantic indexing/search
|
|
||||||
|
|
||||||
## `rag-api`
|
|
||||||
|
|
||||||
Generic semantic search API.
|
|
||||||
|
|
||||||
Main endpoints:
|
|
||||||
|
|
||||||
- `POST /api/rag/documents`
|
|
||||||
- `POST /api/rag/documents/json`
|
|
||||||
- `POST /api/rag/search`
|
|
||||||
- `GET /api/rag/documents/{id}`
|
|
||||||
- `GET /health`
|
|
||||||
- Swagger: `/swagger`
|
|
||||||
|
|
||||||
Responsibilities:
|
|
||||||
|
|
||||||
- generic document indexing
|
|
||||||
- automatic document type classification when type is missing
|
|
||||||
- PDF/text extraction
|
|
||||||
- chunking
|
|
||||||
- embedding creation
|
|
||||||
- embedding and chat completion cache
|
|
||||||
- semantic search over generic documents
|
|
||||||
|
|
||||||
## Logging and Swagger
|
|
||||||
|
|
||||||
All three APIs now have:
|
|
||||||
|
|
||||||
- Serilog startup logging
|
|
||||||
- Serilog request logging
|
|
||||||
- structured JSON console logs
|
|
||||||
- health endpoint
|
|
||||||
- Swagger/OpenAPI support
|
|
||||||
|
|
||||||
Swagger is enabled by default and can be disabled per service with:
|
|
||||||
|
|
||||||
```json
|
|
||||||
"Swagger": {
|
|
||||||
"Enabled": false
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Internal API security
|
|
||||||
|
|
||||||
Both internal APIs support API-key protection:
|
|
||||||
|
|
||||||
```json
|
|
||||||
"InternalApi": {
|
|
||||||
"RequireApiKey": true,
|
|
||||||
"ApiKey": "change-this-internal-key"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Requests must include:
|
|
||||||
|
|
||||||
```http
|
|
||||||
X-Internal-Api-Key: change-this-internal-key
|
|
||||||
```
|
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
using Api.Data.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace Api.Data;
|
||||||
|
|
||||||
|
public sealed class CvMatcherDbContext : DbContext
|
||||||
|
{
|
||||||
|
public CvMatcherDbContext(DbContextOptions<CvMatcherDbContext> options) : base(options)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public DbSet<CvMatchResultEntity> CvMatchResults => Set<CvMatchResultEntity>();
|
||||||
|
public DbSet<CvMatcherChatCacheEntity> CvMatcherChatCache => Set<CvMatcherChatCacheEntity>();
|
||||||
|
|
||||||
|
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||||
|
{
|
||||||
|
modelBuilder.Entity<CvMatchResultEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("CvMatchResults");
|
||||||
|
entity.HasKey(x => x.Id);
|
||||||
|
entity.Property(x => x.Id).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.CvDocumentId).HasMaxLength(64).IsRequired();
|
||||||
|
entity.Property(x => x.JobDocumentId).HasMaxLength(64).IsRequired();
|
||||||
|
entity.Property(x => x.ResultJson).IsRequired();
|
||||||
|
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||||
|
entity.HasIndex(x => new { x.CvDocumentId, x.JobDocumentId }).IsUnique();
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity<CvMatcherChatCacheEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("CvMatcherChatCache");
|
||||||
|
entity.HasKey(x => x.CacheKey);
|
||||||
|
entity.Property(x => x.CacheKey).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.Model).HasMaxLength(120).IsRequired();
|
||||||
|
entity.Property(x => x.Temperature).HasColumnType("decimal(4,2)");
|
||||||
|
entity.Property(x => x.ResponseText).IsRequired();
|
||||||
|
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class CvMatchResultEntity
|
||||||
|
{
|
||||||
|
public string Id { get; set; } = string.Empty;
|
||||||
|
public string CvDocumentId { get; set; } = string.Empty;
|
||||||
|
public string JobDocumentId { get; set; } = string.Empty;
|
||||||
|
public string ResultJson { get; set; } = string.Empty;
|
||||||
|
public int Score { get; set; }
|
||||||
|
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class CvMatcherChatCacheEntity
|
||||||
|
{
|
||||||
|
public string CacheKey { get; set; } = string.Empty;
|
||||||
|
public string Model { get; set; } = string.Empty;
|
||||||
|
public decimal Temperature { get; set; }
|
||||||
|
public string ResponseText { get; set; } = string.Empty;
|
||||||
|
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||||
|
}
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
using Azure.Identity;
|
using Azure.Identity;
|
||||||
|
using Api.Data;
|
||||||
using Api.Services;
|
using Api.Services;
|
||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Microsoft.AspNetCore.Diagnostics;
|
using Microsoft.AspNetCore.Diagnostics;
|
||||||
using Serilog;
|
using Serilog;
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
DotNetEnv.Env.Load();
|
DotNetEnv.Env.Load();
|
||||||
|
|
||||||
@@ -69,7 +71,10 @@ try
|
|||||||
builder.Services.AddHttpClient<IRagApiClient, RagApiClient>();
|
builder.Services.AddHttpClient<IRagApiClient, RagApiClient>();
|
||||||
builder.Services.AddHttpClient<IMatcherAiClient, MatcherAiClient>();
|
builder.Services.AddHttpClient<IMatcherAiClient, MatcherAiClient>();
|
||||||
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
|
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
|
||||||
builder.Services.AddSingleton<IMatcherRepository, SqlMatcherRepository>();
|
builder.Services.AddDbContext<CvMatcherDbContext>(options =>
|
||||||
|
options.UseSqlServer(builder.Configuration.GetConnectionString("CvMatcherDb")
|
||||||
|
?? throw new InvalidOperationException("Connection string 'CvMatcherDb' is missing.")));
|
||||||
|
builder.Services.AddScoped<IMatcherRepository, EfMatcherRepository>();
|
||||||
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
|
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
|
||||||
builder.Services.AddSingleton<IEmailService, EmailService>();
|
builder.Services.AddSingleton<IEmailService, EmailService>();
|
||||||
|
|
||||||
@@ -160,6 +165,14 @@ try
|
|||||||
app.MapControllers();
|
app.MapControllers();
|
||||||
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "cv-matcher-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "cv-matcher-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
||||||
|
|
||||||
|
|
||||||
|
Log.Information("Running EfCore DbMigrations if any");
|
||||||
|
using (var scope = app.Services.CreateScope())
|
||||||
|
{
|
||||||
|
var db = scope.ServiceProvider.GetRequiredService<CvMatcherDbContext>();
|
||||||
|
db.Database.Migrate();
|
||||||
|
}
|
||||||
|
|
||||||
Log.Information("{Service} startup complete", "cv-matcher-api");
|
Log.Information("{Service} startup complete", "cv-matcher-api");
|
||||||
app.Run();
|
app.Run();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Api.Data;
|
||||||
|
using Api.Data.Entities;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class EfMatcherRepository : IMatcherRepository
|
||||||
|
{
|
||||||
|
private readonly CvMatcherDbContext _db;
|
||||||
|
private readonly ILogger<EfMatcherRepository> _logger;
|
||||||
|
|
||||||
|
public EfMatcherRepository(CvMatcherDbContext db, ILogger<EfMatcherRepository> logger)
|
||||||
|
{
|
||||||
|
_db = db;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InitializeAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Ensuring CV matcher database schema exists using EF Core");
|
||||||
|
await _db.Database.EnsureCreatedAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var json = await _db.CvMatchResults
|
||||||
|
.AsNoTracking()
|
||||||
|
.Where(x => x.CvDocumentId == cvDocumentId && x.JobDocumentId == jobDocumentId)
|
||||||
|
.Select(x => x.ResultJson)
|
||||||
|
.FirstOrDefaultAsync(ct);
|
||||||
|
|
||||||
|
if (string.IsNullOrWhiteSpace(json)) return null;
|
||||||
|
|
||||||
|
var result = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
||||||
|
if (result is not null) result.Cached = true;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var exists = await _db.CvMatchResults.AnyAsync(
|
||||||
|
x => x.CvDocumentId == cvDocumentId && x.JobDocumentId == jobDocumentId,
|
||||||
|
ct);
|
||||||
|
|
||||||
|
if (exists) return;
|
||||||
|
|
||||||
|
_db.CvMatchResults.Add(new CvMatchResultEntity
|
||||||
|
{
|
||||||
|
Id = Guid.NewGuid().ToString("N"),
|
||||||
|
CvDocumentId = cvDocumentId,
|
||||||
|
JobDocumentId = jobDocumentId,
|
||||||
|
ResultJson = JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
|
||||||
|
Score = response.Score,
|
||||||
|
CreatedAt = DateTime.UtcNow
|
||||||
|
});
|
||||||
|
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
return await _db.CvMatcherChatCache
|
||||||
|
.AsNoTracking()
|
||||||
|
.Where(x => x.CacheKey == cacheKey)
|
||||||
|
.Select(x => x.ResponseText)
|
||||||
|
.FirstOrDefaultAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var exists = await _db.CvMatcherChatCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
|
||||||
|
if (exists) return;
|
||||||
|
|
||||||
|
_db.CvMatcherChatCache.Add(new CvMatcherChatCacheEntity
|
||||||
|
{
|
||||||
|
CacheKey = cacheKey,
|
||||||
|
Model = model,
|
||||||
|
Temperature = temperature,
|
||||||
|
ResponseText = responseText,
|
||||||
|
CreatedAt = DateTime.UtcNow
|
||||||
|
});
|
||||||
|
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
using System.Text.Json;
|
|
||||||
using Api.Responses;
|
|
||||||
using Api.Services.Contracts;
|
|
||||||
using Microsoft.Data.SqlClient;
|
|
||||||
|
|
||||||
namespace Api.Services;
|
|
||||||
|
|
||||||
public sealed class SqlMatcherRepository : IMatcherRepository
|
|
||||||
{
|
|
||||||
private readonly string _connectionString;
|
|
||||||
|
|
||||||
public SqlMatcherRepository(IConfiguration configuration)
|
|
||||||
{
|
|
||||||
_connectionString = configuration.GetConnectionString("CvMatcherDb")
|
|
||||||
?? throw new InvalidOperationException("Connection string 'CvMatcherDb' is missing.");
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task InitializeAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
await EnsureDatabaseExistsAsync(ct);
|
|
||||||
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
|
||||||
{
|
|
||||||
await using var command = new SqlCommand(commandText, connection);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = "SELECT ResultJson FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
|
|
||||||
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
|
|
||||||
var json = await command.ExecuteScalarAsync(ct) as string;
|
|
||||||
if (string.IsNullOrWhiteSpace(json)) return null;
|
|
||||||
var result = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
|
||||||
if (result is not null) result.Cached = true;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
IF NOT EXISTS (SELECT 1 FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId)
|
|
||||||
INSERT INTO CvMatchResults (Id, CvDocumentId, JobDocumentId, ResultJson, Score, CreatedAt)
|
|
||||||
VALUES (@Id, @CvDocumentId, @JobDocumentId, @ResultJson, @Score, SYSUTCDATETIME())
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@Id", Guid.NewGuid().ToString("N"));
|
|
||||||
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
|
|
||||||
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
|
|
||||||
command.Parameters.AddWithValue("@ResultJson", JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)));
|
|
||||||
command.Parameters.AddWithValue("@Score", response.Score);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = "SELECT ResponseText FROM CvMatcherChatCache WHERE CacheKey = @CacheKey";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
return await command.ExecuteScalarAsync(ct) as string;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
IF NOT EXISTS (SELECT 1 FROM CvMatcherChatCache WHERE CacheKey = @CacheKey)
|
|
||||||
INSERT INTO CvMatcherChatCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
|
|
||||||
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
command.Parameters.AddWithValue("@Model", model);
|
|
||||||
command.Parameters.AddWithValue("@Temperature", temperature);
|
|
||||||
command.Parameters.AddWithValue("@ResponseText", responseText);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
var builder = new SqlConnectionStringBuilder(_connectionString);
|
|
||||||
var databaseName = builder.InitialCatalog;
|
|
||||||
if (string.IsNullOrWhiteSpace(databaseName)) return;
|
|
||||||
|
|
||||||
builder.InitialCatalog = "master";
|
|
||||||
await using var connection = new SqlConnection(builder.ConnectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
var safeName = databaseName.Replace("]", "]]" );
|
|
||||||
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
|
|
||||||
command.Parameters.AddWithValue("@DatabaseName", databaseName);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -11,8 +11,12 @@
|
|||||||
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
||||||
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
||||||
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||||
|
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.0" />
|
||||||
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.0">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
<PackageReference Include="MailKit" Version="4.16.0" />
|
<PackageReference Include="MailKit" Version="4.16.0" />
|
||||||
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
|
|
||||||
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||||
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class RagChatCompletionCacheEntity
|
||||||
|
{
|
||||||
|
public string CacheKey { get; set; } = string.Empty;
|
||||||
|
public string Model { get; set; } = string.Empty;
|
||||||
|
public decimal Temperature { get; set; }
|
||||||
|
public string ResponseText { get; set; } = string.Empty;
|
||||||
|
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class RagChunkEntity
|
||||||
|
{
|
||||||
|
public string Id { get; set; } = string.Empty;
|
||||||
|
public string DocumentId { get; set; } = string.Empty;
|
||||||
|
public int ChunkIndex { get; set; }
|
||||||
|
public string Text { get; set; } = string.Empty;
|
||||||
|
public byte[] Embedding { get; set; } = [];
|
||||||
|
|
||||||
|
public RagDocumentEntity? Document { get; set; }
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class RagDocumentEntity
|
||||||
|
{
|
||||||
|
public string Id { get; set; } = string.Empty;
|
||||||
|
public string DocumentType { get; set; } = string.Empty;
|
||||||
|
public string Title { get; set; } = string.Empty;
|
||||||
|
public string? SourceUrl { get; set; }
|
||||||
|
public string RawText { get; set; } = string.Empty;
|
||||||
|
public string TextHash { get; set; } = string.Empty;
|
||||||
|
public double TypeConfidence { get; set; }
|
||||||
|
public string MetadataJson { get; set; } = "{}";
|
||||||
|
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||||
|
|
||||||
|
public ICollection<RagChunkEntity> Chunks { get; set; } = [];
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Api.Data.Entities;
|
||||||
|
|
||||||
|
public sealed class RagEmbeddingCacheEntity
|
||||||
|
{
|
||||||
|
public string CacheKey { get; set; } = string.Empty;
|
||||||
|
public string Model { get; set; } = string.Empty;
|
||||||
|
public string TextHash { get; set; } = string.Empty;
|
||||||
|
public byte[] Vector { get; set; } = [];
|
||||||
|
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||||
|
}
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
using Api.Data.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace Api.Data;
|
||||||
|
|
||||||
|
public sealed class RagDbContext : DbContext
|
||||||
|
{
|
||||||
|
public RagDbContext(DbContextOptions<RagDbContext> options) : base(options)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public DbSet<RagDocumentEntity> RagDocuments => Set<RagDocumentEntity>();
|
||||||
|
public DbSet<RagChunkEntity> RagChunks => Set<RagChunkEntity>();
|
||||||
|
public DbSet<RagEmbeddingCacheEntity> RagEmbeddingCache => Set<RagEmbeddingCacheEntity>();
|
||||||
|
public DbSet<RagChatCompletionCacheEntity> RagChatCompletionCache => Set<RagChatCompletionCacheEntity>();
|
||||||
|
|
||||||
|
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||||
|
{
|
||||||
|
modelBuilder.Entity<RagDocumentEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("RagDocuments");
|
||||||
|
entity.HasKey(x => x.Id);
|
||||||
|
entity.Property(x => x.Id).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.DocumentType).HasMaxLength(80).IsRequired();
|
||||||
|
entity.Property(x => x.Title).HasMaxLength(300).IsRequired();
|
||||||
|
entity.Property(x => x.SourceUrl).HasMaxLength(1200);
|
||||||
|
entity.Property(x => x.RawText).IsRequired();
|
||||||
|
entity.Property(x => x.TextHash).HasMaxLength(64).IsRequired();
|
||||||
|
entity.Property(x => x.MetadataJson).HasDefaultValue("{}").IsRequired();
|
||||||
|
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||||
|
entity.HasIndex(x => x.TextHash);
|
||||||
|
entity.HasIndex(x => x.DocumentType);
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity<RagChunkEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("RagChunks");
|
||||||
|
entity.HasKey(x => x.Id);
|
||||||
|
entity.Property(x => x.Id).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.DocumentId).HasMaxLength(64).IsRequired();
|
||||||
|
entity.Property(x => x.Text).IsRequired();
|
||||||
|
entity.Property(x => x.Embedding).IsRequired();
|
||||||
|
entity.HasOne(x => x.Document)
|
||||||
|
.WithMany(x => x.Chunks)
|
||||||
|
.HasForeignKey(x => x.DocumentId)
|
||||||
|
.OnDelete(DeleteBehavior.Cascade);
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity<RagEmbeddingCacheEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("RagEmbeddingCache");
|
||||||
|
entity.HasKey(x => x.CacheKey);
|
||||||
|
entity.Property(x => x.CacheKey).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.Model).HasMaxLength(120).IsRequired();
|
||||||
|
entity.Property(x => x.TextHash).HasMaxLength(64).IsRequired();
|
||||||
|
entity.Property(x => x.Vector).IsRequired();
|
||||||
|
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||||
|
entity.HasIndex(x => x.TextHash);
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity<RagChatCompletionCacheEntity>(entity =>
|
||||||
|
{
|
||||||
|
entity.ToTable("RagChatCompletionCache");
|
||||||
|
entity.HasKey(x => x.CacheKey);
|
||||||
|
entity.Property(x => x.CacheKey).HasMaxLength(64);
|
||||||
|
entity.Property(x => x.Model).HasMaxLength(120).IsRequired();
|
||||||
|
entity.Property(x => x.Temperature).HasColumnType("decimal(4,2)");
|
||||||
|
entity.Property(x => x.ResponseText).IsRequired();
|
||||||
|
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
+14
-1
@@ -1,10 +1,12 @@
|
|||||||
using Azure.Identity;
|
using Azure.Identity;
|
||||||
using Microsoft.AspNetCore.Diagnostics;
|
using Microsoft.AspNetCore.Diagnostics;
|
||||||
|
using Api.Data;
|
||||||
using Api.Services;
|
using Api.Services;
|
||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Serilog;
|
using Serilog;
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
DotNetEnv.Env.Load();
|
DotNetEnv.Env.Load();
|
||||||
|
|
||||||
@@ -64,8 +66,12 @@ try
|
|||||||
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
|
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||||
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
||||||
|
|
||||||
|
builder.Services.AddDbContext<RagDbContext>(options =>
|
||||||
|
options.UseSqlServer(builder.Configuration.GetConnectionString("RagDb")
|
||||||
|
?? throw new InvalidOperationException("Connection string 'RagDb' is missing.")));
|
||||||
|
|
||||||
builder.Services.AddHttpClient<RawAiClient>();
|
builder.Services.AddHttpClient<RawAiClient>();
|
||||||
builder.Services.AddSingleton<IRagRepository, SqlRagRepository>();
|
builder.Services.AddScoped<IRagRepository, EfRagRepository>();
|
||||||
builder.Services.AddScoped<IAiClient, CachedAiClient>();
|
builder.Services.AddScoped<IAiClient, CachedAiClient>();
|
||||||
builder.Services.AddSingleton<ITextExtractor, TextExtractor>();
|
builder.Services.AddSingleton<ITextExtractor, TextExtractor>();
|
||||||
builder.Services.AddSingleton<ITextChunker, TextChunker>();
|
builder.Services.AddSingleton<ITextChunker, TextChunker>();
|
||||||
@@ -159,6 +165,13 @@ try
|
|||||||
app.MapControllers();
|
app.MapControllers();
|
||||||
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "rag-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "rag-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
||||||
|
|
||||||
|
Log.Information("Running EfCore DbMigrations if any");
|
||||||
|
using (var scope = app.Services.CreateScope())
|
||||||
|
{
|
||||||
|
var db = scope.ServiceProvider.GetRequiredService<RagDbContext>();
|
||||||
|
db.Database.Migrate();
|
||||||
|
}
|
||||||
|
|
||||||
Log.Information("{Service} startup complete", "rag-api");
|
Log.Information("{Service} startup complete", "rag-api");
|
||||||
app.Run();
|
app.Run();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,195 @@
|
|||||||
|
using Api.Data;
|
||||||
|
using Api.Data.Entities;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class EfRagRepository : IRagRepository
|
||||||
|
{
|
||||||
|
private readonly RagDbContext _db;
|
||||||
|
private readonly ILogger<EfRagRepository> _logger;
|
||||||
|
|
||||||
|
public EfRagRepository(RagDbContext db, ILogger<EfRagRepository> logger)
|
||||||
|
{
|
||||||
|
_db = db;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InitializeAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Ensuring RAG database schema exists using EF Core");
|
||||||
|
await _db.Database.EnsureCreatedAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var query = _db.RagDocuments
|
||||||
|
.AsNoTracking()
|
||||||
|
.Where(x => x.TextHash == textHash);
|
||||||
|
|
||||||
|
if (!string.IsNullOrWhiteSpace(sourceUrl))
|
||||||
|
{
|
||||||
|
query = query.Where(x => x.SourceUrl == sourceUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
var entity = await query
|
||||||
|
.OrderByDescending(x => x.CreatedAt)
|
||||||
|
.FirstOrDefaultAsync(ct);
|
||||||
|
|
||||||
|
return entity is null ? null : ToRecord(entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var entity = await _db.RagDocuments
|
||||||
|
.AsNoTracking()
|
||||||
|
.FirstOrDefaultAsync(x => x.Id == id, ct);
|
||||||
|
|
||||||
|
return entity is null ? null : ToRecord(entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var exists = await _db.RagDocuments.AnyAsync(x => x.Id == document.Id, ct);
|
||||||
|
if (exists)
|
||||||
|
{
|
||||||
|
_logger.LogInformation("RAG document already exists. DocumentId={DocumentId}", document.Id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var entity = new RagDocumentEntity
|
||||||
|
{
|
||||||
|
Id = document.Id,
|
||||||
|
DocumentType = document.DocumentType,
|
||||||
|
Title = document.Title,
|
||||||
|
SourceUrl = document.SourceUrl,
|
||||||
|
RawText = document.Text,
|
||||||
|
TextHash = document.TextHash,
|
||||||
|
TypeConfidence = document.TypeConfidence,
|
||||||
|
MetadataJson = document.MetadataJson,
|
||||||
|
CreatedAt = document.CreatedAt.UtcDateTime,
|
||||||
|
Chunks = chunks.Select(chunk => new RagChunkEntity
|
||||||
|
{
|
||||||
|
Id = chunk.Id,
|
||||||
|
DocumentId = chunk.DocumentId,
|
||||||
|
ChunkIndex = chunk.ChunkIndex,
|
||||||
|
Text = chunk.Text,
|
||||||
|
Embedding = VectorSerializer.ToBytes(chunk.Embedding)
|
||||||
|
}).ToList()
|
||||||
|
};
|
||||||
|
|
||||||
|
_db.RagDocuments.Add(entity);
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(
|
||||||
|
float[] queryEmbedding,
|
||||||
|
IReadOnlyList<string>? targetTypes,
|
||||||
|
int topK,
|
||||||
|
CancellationToken ct)
|
||||||
|
{
|
||||||
|
var types = targetTypes?
|
||||||
|
.Where(x => !string.IsNullOrWhiteSpace(x))
|
||||||
|
.Select(x => x.Trim().ToLowerInvariant())
|
||||||
|
.Distinct()
|
||||||
|
.ToArray() ?? [];
|
||||||
|
|
||||||
|
var query = _db.RagChunks
|
||||||
|
.AsNoTracking()
|
||||||
|
.Include(x => x.Document)
|
||||||
|
.AsQueryable();
|
||||||
|
|
||||||
|
if (types.Length > 0)
|
||||||
|
{
|
||||||
|
query = query.Where(x => x.Document != null && types.Contains(x.Document.DocumentType.ToLower()));
|
||||||
|
}
|
||||||
|
|
||||||
|
var rows = await query.ToListAsync(ct);
|
||||||
|
|
||||||
|
return rows
|
||||||
|
.Where(x => x.Document is not null)
|
||||||
|
.Select(x => new SearchCandidateChunk
|
||||||
|
{
|
||||||
|
Document = ToRecord(x.Document!),
|
||||||
|
Chunk = new RagChunkRecord
|
||||||
|
{
|
||||||
|
Id = x.Id,
|
||||||
|
DocumentId = x.DocumentId,
|
||||||
|
ChunkIndex = x.ChunkIndex,
|
||||||
|
Text = x.Text,
|
||||||
|
Embedding = VectorSerializer.FromBytes(x.Embedding)
|
||||||
|
},
|
||||||
|
Score = VectorSerializer.CosineSimilarity(queryEmbedding, VectorSerializer.FromBytes(x.Embedding))
|
||||||
|
})
|
||||||
|
.OrderByDescending(x => x.Score)
|
||||||
|
.Take(Math.Max(topK * 4, topK))
|
||||||
|
.ToList();
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var entry = await _db.RagEmbeddingCache
|
||||||
|
.AsNoTracking()
|
||||||
|
.FirstOrDefaultAsync(x => x.CacheKey == cacheKey, ct);
|
||||||
|
|
||||||
|
return entry is null ? null : VectorSerializer.FromBytes(entry.Vector);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var exists = await _db.RagEmbeddingCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
|
||||||
|
if (exists) return;
|
||||||
|
|
||||||
|
_db.RagEmbeddingCache.Add(new RagEmbeddingCacheEntity
|
||||||
|
{
|
||||||
|
CacheKey = cacheKey,
|
||||||
|
Model = model,
|
||||||
|
TextHash = textHash,
|
||||||
|
Vector = VectorSerializer.ToBytes(vector),
|
||||||
|
CreatedAt = DateTime.UtcNow
|
||||||
|
});
|
||||||
|
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
return await _db.RagChatCompletionCache
|
||||||
|
.AsNoTracking()
|
||||||
|
.Where(x => x.CacheKey == cacheKey)
|
||||||
|
.Select(x => x.ResponseText)
|
||||||
|
.FirstOrDefaultAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var exists = await _db.RagChatCompletionCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
|
||||||
|
if (exists) return;
|
||||||
|
|
||||||
|
_db.RagChatCompletionCache.Add(new RagChatCompletionCacheEntity
|
||||||
|
{
|
||||||
|
CacheKey = cacheKey,
|
||||||
|
Model = model,
|
||||||
|
Temperature = temperature,
|
||||||
|
ResponseText = responseText,
|
||||||
|
CreatedAt = DateTime.UtcNow
|
||||||
|
});
|
||||||
|
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static RagDocumentRecord ToRecord(RagDocumentEntity entity) => new()
|
||||||
|
{
|
||||||
|
Id = entity.Id,
|
||||||
|
DocumentType = entity.DocumentType,
|
||||||
|
Title = entity.Title,
|
||||||
|
SourceUrl = entity.SourceUrl,
|
||||||
|
Text = entity.RawText,
|
||||||
|
TextHash = entity.TextHash,
|
||||||
|
TypeConfidence = entity.TypeConfidence,
|
||||||
|
MetadataJson = entity.MetadataJson,
|
||||||
|
CreatedAt = new DateTimeOffset(DateTime.SpecifyKind(entity.CreatedAt, DateTimeKind.Utc))
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,238 +0,0 @@
|
|||||||
using Microsoft.Data.SqlClient;
|
|
||||||
using Api.Services.Contracts;
|
|
||||||
using Api.Services.Contracts.Models;
|
|
||||||
|
|
||||||
namespace Api.Services;
|
|
||||||
|
|
||||||
public sealed class SqlRagRepository : IRagRepository
|
|
||||||
{
|
|
||||||
private readonly string _connectionString;
|
|
||||||
|
|
||||||
public SqlRagRepository(IConfiguration configuration)
|
|
||||||
{
|
|
||||||
_connectionString = configuration.GetConnectionString("RagDb")
|
|
||||||
?? throw new InvalidOperationException("Connection string 'RagDb' is missing.");
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task InitializeAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
await EnsureDatabaseExistsAsync(ct);
|
|
||||||
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
|
||||||
{
|
|
||||||
await using var command = new SqlCommand(commandText, connection);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
SELECT TOP 1 Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
|
||||||
FROM RagDocuments
|
|
||||||
WHERE TextHash = @TextHash AND (@SourceUrl IS NULL OR SourceUrl = @SourceUrl)
|
|
||||||
ORDER BY CreatedAt DESC
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@TextHash", textHash);
|
|
||||||
command.Parameters.AddWithValue("@SourceUrl", (object?)sourceUrl ?? DBNull.Value);
|
|
||||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
|
||||||
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
SELECT Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
|
||||||
FROM RagDocuments
|
|
||||||
WHERE Id = @Id
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@Id", id);
|
|
||||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
|
||||||
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
|
|
||||||
{
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var tx = (SqlTransaction)await connection.BeginTransactionAsync(ct);
|
|
||||||
try
|
|
||||||
{
|
|
||||||
const string insertDoc = """
|
|
||||||
INSERT INTO RagDocuments (Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt)
|
|
||||||
VALUES (@Id, @DocumentType, @Title, @SourceUrl, @RawText, @TextHash, @TypeConfidence, @MetadataJson, @CreatedAt)
|
|
||||||
""";
|
|
||||||
await using (var command = new SqlCommand(insertDoc, connection, tx))
|
|
||||||
{
|
|
||||||
command.Parameters.AddWithValue("@Id", document.Id);
|
|
||||||
command.Parameters.AddWithValue("@DocumentType", document.DocumentType);
|
|
||||||
command.Parameters.AddWithValue("@Title", document.Title);
|
|
||||||
command.Parameters.AddWithValue("@SourceUrl", (object?)document.SourceUrl ?? DBNull.Value);
|
|
||||||
command.Parameters.AddWithValue("@RawText", document.Text);
|
|
||||||
command.Parameters.AddWithValue("@TextHash", document.TextHash);
|
|
||||||
command.Parameters.AddWithValue("@TypeConfidence", document.TypeConfidence);
|
|
||||||
command.Parameters.AddWithValue("@MetadataJson", document.MetadataJson);
|
|
||||||
command.Parameters.AddWithValue("@CreatedAt", document.CreatedAt.UtcDateTime);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
const string insertChunk = """
|
|
||||||
INSERT INTO RagChunks (Id, DocumentId, ChunkIndex, Text, Embedding)
|
|
||||||
VALUES (@Id, @DocumentId, @ChunkIndex, @Text, @Embedding)
|
|
||||||
""";
|
|
||||||
foreach (var chunk in chunks)
|
|
||||||
{
|
|
||||||
await using var command = new SqlCommand(insertChunk, connection, tx);
|
|
||||||
command.Parameters.AddWithValue("@Id", chunk.Id);
|
|
||||||
command.Parameters.AddWithValue("@DocumentId", document.Id);
|
|
||||||
command.Parameters.AddWithValue("@ChunkIndex", chunk.ChunkIndex);
|
|
||||||
command.Parameters.AddWithValue("@Text", chunk.Text);
|
|
||||||
command.Parameters.AddWithValue("@Embedding", VectorSerializer.ToBytes(chunk.Embedding));
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
await tx.CommitAsync(ct);
|
|
||||||
}
|
|
||||||
catch
|
|
||||||
{
|
|
||||||
await tx.RollbackAsync(ct);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var types = targetTypes?.Where(x => !string.IsNullOrWhiteSpace(x)).Select(x => x.Trim().ToLowerInvariant()).Distinct().ToArray() ?? [];
|
|
||||||
var sql = """
|
|
||||||
SELECT d.Id, d.DocumentType, d.Title, d.SourceUrl, d.RawText, d.TextHash, d.TypeConfidence, d.MetadataJson, d.CreatedAt,
|
|
||||||
c.Id, c.DocumentId, c.ChunkIndex, c.Text, c.Embedding
|
|
||||||
FROM RagChunks c
|
|
||||||
INNER JOIN RagDocuments d ON d.Id = c.DocumentId
|
|
||||||
""";
|
|
||||||
|
|
||||||
if (types.Length > 0)
|
|
||||||
{
|
|
||||||
sql += " WHERE LOWER(d.DocumentType) IN (" + string.Join(',', types.Select((_, i) => $"@Type{i}")) + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
for (var i = 0; i < types.Length; i++) command.Parameters.AddWithValue($"@Type{i}", types[i]);
|
|
||||||
await using var reader = await command.ExecuteReaderAsync(ct);
|
|
||||||
var candidates = new List<SearchCandidateChunk>();
|
|
||||||
while (await reader.ReadAsync(ct))
|
|
||||||
{
|
|
||||||
var doc = ReadDocument(reader, 0);
|
|
||||||
var chunk = new RagChunkRecord
|
|
||||||
{
|
|
||||||
Id = reader.GetString(9),
|
|
||||||
DocumentId = reader.GetString(10),
|
|
||||||
ChunkIndex = reader.GetInt32(11),
|
|
||||||
Text = reader.GetString(12),
|
|
||||||
Embedding = VectorSerializer.FromBytes((byte[])reader[13])
|
|
||||||
};
|
|
||||||
candidates.Add(new SearchCandidateChunk
|
|
||||||
{
|
|
||||||
Document = doc,
|
|
||||||
Chunk = chunk,
|
|
||||||
Score = VectorSerializer.CosineSimilarity(queryEmbedding, chunk.Embedding)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return candidates
|
|
||||||
.OrderByDescending(x => x.Score)
|
|
||||||
.Take(Math.Max(topK * 4, topK))
|
|
||||||
.ToList();
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = "SELECT Vector FROM RagEmbeddingCache WHERE CacheKey = @CacheKey";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
var value = await command.ExecuteScalarAsync(ct);
|
|
||||||
return value is byte[] bytes ? VectorSerializer.FromBytes(bytes) : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
IF NOT EXISTS (SELECT 1 FROM RagEmbeddingCache WHERE CacheKey = @CacheKey)
|
|
||||||
INSERT INTO RagEmbeddingCache (CacheKey, Model, TextHash, Vector, CreatedAt)
|
|
||||||
VALUES (@CacheKey, @Model, @TextHash, @Vector, SYSUTCDATETIME())
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
command.Parameters.AddWithValue("@Model", model);
|
|
||||||
command.Parameters.AddWithValue("@TextHash", textHash);
|
|
||||||
command.Parameters.AddWithValue("@Vector", VectorSerializer.ToBytes(vector));
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = "SELECT ResponseText FROM RagChatCompletionCache WHERE CacheKey = @CacheKey";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
return await command.ExecuteScalarAsync(ct) as string;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
|
||||||
{
|
|
||||||
const string sql = """
|
|
||||||
IF NOT EXISTS (SELECT 1 FROM RagChatCompletionCache WHERE CacheKey = @CacheKey)
|
|
||||||
INSERT INTO RagChatCompletionCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
|
|
||||||
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
|
|
||||||
""";
|
|
||||||
await using var connection = new SqlConnection(_connectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
await using var command = new SqlCommand(sql, connection);
|
|
||||||
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
|
||||||
command.Parameters.AddWithValue("@Model", model);
|
|
||||||
command.Parameters.AddWithValue("@Temperature", temperature);
|
|
||||||
command.Parameters.AddWithValue("@ResponseText", responseText);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static RagDocumentRecord ReadDocument(SqlDataReader reader, int offset = 0) => new()
|
|
||||||
{
|
|
||||||
Id = reader.GetString(offset),
|
|
||||||
DocumentType = reader.GetString(offset + 1),
|
|
||||||
Title = reader.GetString(offset + 2),
|
|
||||||
SourceUrl = reader.IsDBNull(offset + 3) ? null : reader.GetString(offset + 3),
|
|
||||||
Text = reader.GetString(offset + 4),
|
|
||||||
TextHash = reader.GetString(offset + 5),
|
|
||||||
TypeConfidence = Convert.ToDouble(reader.GetValue(offset + 6)),
|
|
||||||
MetadataJson = reader.GetString(offset + 7),
|
|
||||||
CreatedAt = new DateTimeOffset(reader.GetDateTime(offset + 8), TimeSpan.Zero)
|
|
||||||
};
|
|
||||||
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
var builder = new SqlConnectionStringBuilder(_connectionString);
|
|
||||||
var databaseName = builder.InitialCatalog;
|
|
||||||
if (string.IsNullOrWhiteSpace(databaseName)) return;
|
|
||||||
|
|
||||||
builder.InitialCatalog = "master";
|
|
||||||
await using var connection = new SqlConnection(builder.ConnectionString);
|
|
||||||
await connection.OpenAsync(ct);
|
|
||||||
var safeName = databaseName.Replace("]", "]]" );
|
|
||||||
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
|
|
||||||
command.Parameters.AddWithValue("@DatabaseName", databaseName);
|
|
||||||
await command.ExecuteNonQueryAsync(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -11,7 +11,11 @@
|
|||||||
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
||||||
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
||||||
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||||
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
|
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.0" />
|
||||||
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.0">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
<PackageReference Include="PdfPig" Version="0.1.14" />
|
<PackageReference Include="PdfPig" Version="0.1.14" />
|
||||||
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||||
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||||
|
|||||||
Reference in New Issue
Block a user