Move RAG repository from rag-api to rag-data — consolidate data layer ownership

- Move IRagRepository, EfRagRepository, and VectorSerializer from rag-api/Data to rag-data/Repositories
- Add rag-api-models ProjectReference to rag-data.csproj for model type availability
- Delete rag-api/Data folder (no longer needed; all data access is now in rag-data)
- This aligns RAG with email-api and other services: all data code in the data project

Pattern: rag-api (API logic) → rag-data (repository, EF entities, migrations)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 09:38:25 +03:00
parent 36759d8fee
commit 487924e345
25 changed files with 125 additions and 57 deletions
+11
View File
@@ -0,0 +1,11 @@
namespace Rag.Data;
/// <summary>
/// Schema constants used by RagDbContext and migrations.
/// Centralized to avoid hardcoded strings and ensure consistency.
/// </summary>
public static class MigrationConstants
{
public const string SchemaName = "rag";
public const string MigrationTableName = "_Migrations";
}
@@ -1,5 +1,6 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
using Rag.Data;
#nullable disable
@@ -12,11 +13,11 @@ namespace Rag.Data.Migrations
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.EnsureSchema(
name: "rag");
name: MigrationConstants.SchemaName);
migrationBuilder.CreateTable(
name: "ChatCompletionCache",
schema: "rag",
schema: MigrationConstants.SchemaName,
columns: table => new
{
CacheKey = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
@@ -32,7 +33,7 @@ namespace Rag.Data.Migrations
migrationBuilder.CreateTable(
name: "Documents",
schema: "rag",
schema: MigrationConstants.SchemaName,
columns: table => new
{
Id = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
@@ -52,7 +53,7 @@ namespace Rag.Data.Migrations
migrationBuilder.CreateTable(
name: "EmbeddingCache",
schema: "rag",
schema: MigrationConstants.SchemaName,
columns: table => new
{
CacheKey = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
@@ -68,7 +69,7 @@ namespace Rag.Data.Migrations
migrationBuilder.CreateTable(
name: "Chunks",
schema: "rag",
schema: MigrationConstants.SchemaName,
columns: table => new
{
Id = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
@@ -91,25 +92,25 @@ namespace Rag.Data.Migrations
migrationBuilder.CreateIndex(
name: "IX_Chunks_DocumentId",
schema: "rag",
schema: MigrationConstants.SchemaName,
table: "Chunks",
column: "DocumentId");
migrationBuilder.CreateIndex(
name: "IX_Documents_DocumentType",
schema: "rag",
schema: MigrationConstants.SchemaName,
table: "Documents",
column: "DocumentType");
migrationBuilder.CreateIndex(
name: "IX_Documents_TextHash",
schema: "rag",
schema: MigrationConstants.SchemaName,
table: "Documents",
column: "TextHash");
migrationBuilder.CreateIndex(
name: "IX_EmbeddingCache_TextHash",
schema: "rag",
schema: MigrationConstants.SchemaName,
table: "EmbeddingCache",
column: "TextHash");
}
@@ -119,19 +120,19 @@ namespace Rag.Data.Migrations
{
migrationBuilder.DropTable(
name: "ChatCompletionCache",
schema: "rag");
schema: MigrationConstants.SchemaName);
migrationBuilder.DropTable(
name: "Chunks",
schema: "rag");
schema: MigrationConstants.SchemaName);
migrationBuilder.DropTable(
name: "EmbeddingCache",
schema: "rag");
schema: MigrationConstants.SchemaName);
migrationBuilder.DropTable(
name: "Documents",
schema: "rag");
schema: MigrationConstants.SchemaName);
}
}
}
+2 -2
View File
@@ -5,8 +5,8 @@ namespace Rag.Data;
public sealed class RagDbContext : DbContext
{
public const string SchemaName = "rag";
public const string MigrationTableName = "_Migrations";
public const string SchemaName = MigrationConstants.SchemaName;
public const string MigrationTableName = MigrationConstants.MigrationTableName;
public RagDbContext(DbContextOptions<RagDbContext> options) : base(options)
{
@@ -0,0 +1,16 @@
using Rag.Models;
namespace Rag.Data.Repositories.Contracts;
public interface IRagRepository
{
Task InitializeAsync(CancellationToken ct);
Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct);
Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct);
Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct);
Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct);
Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct);
Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct);
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
}
@@ -0,0 +1,196 @@
using Rag.Data;
using Rag.Data.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Rag.Data.Repositories.Contracts;
using Rag.Models;
namespace Rag.Data.Repositories;
public sealed class EfRagRepository : IRagRepository
{
private readonly RagDbContext _db;
private readonly ILogger<EfRagRepository> _logger;
public EfRagRepository(RagDbContext db, ILogger<EfRagRepository> logger)
{
_db = db;
_logger = logger;
}
public async Task InitializeAsync(CancellationToken ct)
{
_logger.LogInformation("Ensuring RAG database schema exists using EF Core");
//await _db.Database.EnsureCreatedAsync(ct);
}
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
{
var query = _db.RagDocuments
.AsNoTracking()
.Where(x => x.TextHash == textHash);
if (!string.IsNullOrWhiteSpace(sourceUrl))
{
query = query.Where(x => x.SourceUrl == sourceUrl);
}
var entity = await query
.OrderByDescending(x => x.CreatedAt)
.FirstOrDefaultAsync(ct);
return entity is null ? null : ToRecord(entity);
}
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
{
var entity = await _db.RagDocuments
.AsNoTracking()
.FirstOrDefaultAsync(x => x.Id == id, ct);
return entity is null ? null : ToRecord(entity);
}
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
{
var exists = await _db.RagDocuments.AnyAsync(x => x.Id == document.Id, ct);
if (exists)
{
_logger.LogInformation("RAG document already exists. DocumentId={DocumentId}", document.Id);
return;
}
var entity = new RagDocumentEntity
{
Id = document.Id,
DocumentType = document.DocumentType,
Title = document.Title,
SourceUrl = document.SourceUrl,
RawText = document.Text,
TextHash = document.TextHash,
TypeConfidence = document.TypeConfidence,
MetadataJson = document.MetadataJson,
CreatedAt = document.CreatedAt.UtcDateTime,
Chunks = chunks.Select(chunk => new RagChunkEntity
{
Id = chunk.Id,
DocumentId = chunk.DocumentId,
ChunkIndex = chunk.ChunkIndex,
Text = chunk.Text,
Embedding = VectorSerializer.ToBytes(chunk.Embedding)
}).ToList()
};
_db.RagDocuments.Add(entity);
await _db.SaveChangesAsync(ct);
}
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(
float[] queryEmbedding,
IReadOnlyList<string>? targetTypes,
int topK,
CancellationToken ct)
{
var types = targetTypes?
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => x.Trim().ToLowerInvariant())
.Distinct()
.ToArray() ?? System.Array.Empty<string>();
var query = _db.RagChunks
.AsNoTracking()
.Include(x => x.Document)
.AsQueryable();
if (types.Length > 0)
{
query = query.Where(x => x.Document != null && types.Contains(x.Document.DocumentType.ToLower()));
}
var rows = await query.ToListAsync(ct);
return rows
.Where(x => x.Document is not null)
.Select(x => new SearchCandidateChunk
{
Document = ToRecord(x.Document!),
Chunk = new RagChunkRecord
{
Id = x.Id,
DocumentId = x.DocumentId,
ChunkIndex = x.ChunkIndex,
Text = x.Text,
Embedding = VectorSerializer.FromBytes(x.Embedding)
},
Score = VectorSerializer.CosineSimilarity(queryEmbedding, VectorSerializer.FromBytes(x.Embedding))
})
.OrderByDescending(x => x.Score)
.Take(Math.Max(topK * 4, topK))
.ToList();
}
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
{
var entry = await _db.RagEmbeddingCache
.AsNoTracking()
.FirstOrDefaultAsync(x => x.CacheKey == cacheKey, ct);
return entry is null ? null : VectorSerializer.FromBytes(entry.Vector);
}
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
{
var exists = await _db.RagEmbeddingCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
if (exists) return;
_db.RagEmbeddingCache.Add(new RagEmbeddingCacheEntity
{
CacheKey = cacheKey,
Model = model,
TextHash = textHash,
Vector = VectorSerializer.ToBytes(vector),
CreatedAt = DateTime.UtcNow
});
await _db.SaveChangesAsync(ct);
}
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
{
return await _db.RagChatCompletionCache
.AsNoTracking()
.Where(x => x.CacheKey == cacheKey)
.Select(x => x.ResponseText)
.FirstOrDefaultAsync(ct);
}
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
{
var exists = await _db.RagChatCompletionCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
if (exists) return;
_db.RagChatCompletionCache.Add(new RagChatCompletionCacheEntity
{
CacheKey = cacheKey,
Model = model,
Temperature = temperature,
ResponseText = responseText,
CreatedAt = DateTime.UtcNow
});
await _db.SaveChangesAsync(ct);
}
private static RagDocumentRecord ToRecord(RagDocumentEntity entity) => new()
{
Id = entity.Id,
DocumentType = entity.DocumentType,
Title = entity.Title,
SourceUrl = entity.SourceUrl,
Text = entity.RawText,
TextHash = entity.TextHash,
TypeConfidence = entity.TypeConfidence,
MetadataJson = entity.MetadataJson,
CreatedAt = new DateTimeOffset(DateTime.SpecifyKind(entity.CreatedAt, DateTimeKind.Utc))
};
}
@@ -0,0 +1,31 @@
namespace Rag.Data.Repositories;
public static class VectorSerializer
{
public static byte[] ToBytes(float[] vector)
{
var bytes = new byte[vector.Length * sizeof(float)];
Buffer.BlockCopy(vector, 0, bytes, 0, bytes.Length);
return bytes;
}
public static float[] FromBytes(byte[] bytes)
{
var vector = new float[bytes.Length / sizeof(float)];
Buffer.BlockCopy(bytes, 0, vector, 0, bytes.Length);
return vector;
}
public static double CosineSimilarity(float[] a, float[] b)
{
if (a.Length == 0 || a.Length != b.Length) return 0;
double dot = 0, magA = 0, magB = 0;
for (var i = 0; i < a.Length; i++)
{
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
return magA == 0 || magB == 0 ? 0 : dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
}
}
+1
View File
@@ -18,6 +18,7 @@
<ItemGroup>
<ProjectReference Include="..\shared-data\shared-data.csproj" />
<ProjectReference Include="..\rag-api-models\rag-api-models.csproj" />
</ItemGroup>
</Project>