This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
namespace Api.Data.Entities;
|
||||
|
||||
public sealed class RagChatCompletionCacheEntity
|
||||
{
|
||||
public string CacheKey { get; set; } = string.Empty;
|
||||
public string Model { get; set; } = string.Empty;
|
||||
public decimal Temperature { get; set; }
|
||||
public string ResponseText { get; set; } = string.Empty;
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Api.Data.Entities;
|
||||
|
||||
public sealed class RagChunkEntity
|
||||
{
|
||||
public string Id { get; set; } = string.Empty;
|
||||
public string DocumentId { get; set; } = string.Empty;
|
||||
public int ChunkIndex { get; set; }
|
||||
public string Text { get; set; } = string.Empty;
|
||||
public byte[] Embedding { get; set; } = [];
|
||||
|
||||
public RagDocumentEntity? Document { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
namespace Api.Data.Entities;
|
||||
|
||||
public sealed class RagDocumentEntity
|
||||
{
|
||||
public string Id { get; set; } = string.Empty;
|
||||
public string DocumentType { get; set; } = string.Empty;
|
||||
public string Title { get; set; } = string.Empty;
|
||||
public string? SourceUrl { get; set; }
|
||||
public string RawText { get; set; } = string.Empty;
|
||||
public string TextHash { get; set; } = string.Empty;
|
||||
public double TypeConfidence { get; set; }
|
||||
public string MetadataJson { get; set; } = "{}";
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
|
||||
public ICollection<RagChunkEntity> Chunks { get; set; } = [];
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Api.Data.Entities;
|
||||
|
||||
public sealed class RagEmbeddingCacheEntity
|
||||
{
|
||||
public string CacheKey { get; set; } = string.Empty;
|
||||
public string Model { get; set; } = string.Empty;
|
||||
public string TextHash { get; set; } = string.Empty;
|
||||
public byte[] Vector { get; set; } = [];
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
using Api.Data.Entities;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
|
||||
namespace Api.Data;
|
||||
|
||||
public sealed class RagDbContext : DbContext
|
||||
{
|
||||
public const string SchemaName = "rag";
|
||||
public const string MigrationTableName = "_Migrations";
|
||||
|
||||
public RagDbContext(DbContextOptions<RagDbContext> options) : base(options)
|
||||
{
|
||||
}
|
||||
|
||||
public DbSet<RagDocumentEntity> RagDocuments => Set<RagDocumentEntity>();
|
||||
public DbSet<RagChunkEntity> RagChunks => Set<RagChunkEntity>();
|
||||
public DbSet<RagEmbeddingCacheEntity> RagEmbeddingCache => Set<RagEmbeddingCacheEntity>();
|
||||
public DbSet<RagChatCompletionCacheEntity> RagChatCompletionCache => Set<RagChatCompletionCacheEntity>();
|
||||
|
||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||
{
|
||||
modelBuilder.HasDefaultSchema(SchemaName);
|
||||
|
||||
modelBuilder.Entity<RagDocumentEntity>(entity =>
|
||||
{
|
||||
entity.ToTable("Documents");
|
||||
entity.HasKey(x => x.Id);
|
||||
entity.Property(x => x.Id).HasMaxLength(64);
|
||||
entity.Property(x => x.DocumentType).HasMaxLength(80).IsRequired();
|
||||
entity.Property(x => x.Title).HasMaxLength(300).IsRequired();
|
||||
entity.Property(x => x.SourceUrl).HasMaxLength(1200);
|
||||
entity.Property(x => x.RawText).IsRequired();
|
||||
entity.Property(x => x.TextHash).HasMaxLength(64).IsRequired();
|
||||
entity.Property(x => x.MetadataJson).HasDefaultValue("{}").IsRequired();
|
||||
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
entity.HasIndex(x => x.TextHash);
|
||||
entity.HasIndex(x => x.DocumentType);
|
||||
});
|
||||
|
||||
modelBuilder.Entity<RagChunkEntity>(entity =>
|
||||
{
|
||||
entity.ToTable("Chunks");
|
||||
entity.HasKey(x => x.Id);
|
||||
entity.Property(x => x.Id).HasMaxLength(64);
|
||||
entity.Property(x => x.DocumentId).HasMaxLength(64).IsRequired();
|
||||
entity.Property(x => x.Text).IsRequired();
|
||||
entity.Property(x => x.Embedding).IsRequired();
|
||||
entity.HasOne(x => x.Document)
|
||||
.WithMany(x => x.Chunks)
|
||||
.HasForeignKey(x => x.DocumentId)
|
||||
.OnDelete(DeleteBehavior.Cascade);
|
||||
});
|
||||
|
||||
modelBuilder.Entity<RagEmbeddingCacheEntity>(entity =>
|
||||
{
|
||||
entity.ToTable("EmbeddingCache");
|
||||
entity.HasKey(x => x.CacheKey);
|
||||
entity.Property(x => x.CacheKey).HasMaxLength(64);
|
||||
entity.Property(x => x.Model).HasMaxLength(120).IsRequired();
|
||||
entity.Property(x => x.TextHash).HasMaxLength(64).IsRequired();
|
||||
entity.Property(x => x.Vector).IsRequired();
|
||||
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
entity.HasIndex(x => x.TextHash);
|
||||
});
|
||||
|
||||
modelBuilder.Entity<RagChatCompletionCacheEntity>(entity =>
|
||||
{
|
||||
entity.ToTable("ChatCompletionCache");
|
||||
entity.HasKey(x => x.CacheKey);
|
||||
entity.Property(x => x.CacheKey).HasMaxLength(64);
|
||||
entity.Property(x => x.Model).HasMaxLength(120).IsRequired();
|
||||
entity.Property(x => x.Temperature).HasColumnType("decimal(4,2)");
|
||||
entity.Property(x => x.ResponseText).IsRequired();
|
||||
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
using Rag.Models;
|
||||
|
||||
namespace Api.Data.Repositories.Contracts;
|
||||
|
||||
public interface IRagRepository
|
||||
{
|
||||
Task InitializeAsync(CancellationToken ct);
|
||||
Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct);
|
||||
Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct);
|
||||
Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct);
|
||||
Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct);
|
||||
Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct);
|
||||
Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct);
|
||||
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
|
||||
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
|
||||
}
|
||||
@@ -0,0 +1,195 @@
|
||||
using Api.Data;
|
||||
using Api.Data.Entities;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Api.Data.Repositories.Contracts;
|
||||
using Rag.Models;
|
||||
|
||||
namespace Api.Data.Repositories;
|
||||
|
||||
public sealed class EfRagRepository : IRagRepository
|
||||
{
|
||||
private readonly RagDbContext _db;
|
||||
private readonly ILogger<EfRagRepository> _logger;
|
||||
|
||||
public EfRagRepository(RagDbContext db, ILogger<EfRagRepository> logger)
|
||||
{
|
||||
_db = db;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task InitializeAsync(CancellationToken ct)
|
||||
{
|
||||
_logger.LogInformation("Ensuring RAG database schema exists using EF Core");
|
||||
//await _db.Database.EnsureCreatedAsync(ct);
|
||||
}
|
||||
|
||||
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
|
||||
{
|
||||
var query = _db.RagDocuments
|
||||
.AsNoTracking()
|
||||
.Where(x => x.TextHash == textHash);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(sourceUrl))
|
||||
{
|
||||
query = query.Where(x => x.SourceUrl == sourceUrl);
|
||||
}
|
||||
|
||||
var entity = await query
|
||||
.OrderByDescending(x => x.CreatedAt)
|
||||
.FirstOrDefaultAsync(ct);
|
||||
|
||||
return entity is null ? null : ToRecord(entity);
|
||||
}
|
||||
|
||||
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
|
||||
{
|
||||
var entity = await _db.RagDocuments
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(x => x.Id == id, ct);
|
||||
|
||||
return entity is null ? null : ToRecord(entity);
|
||||
}
|
||||
|
||||
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
|
||||
{
|
||||
var exists = await _db.RagDocuments.AnyAsync(x => x.Id == document.Id, ct);
|
||||
if (exists)
|
||||
{
|
||||
_logger.LogInformation("RAG document already exists. DocumentId={DocumentId}", document.Id);
|
||||
return;
|
||||
}
|
||||
|
||||
var entity = new RagDocumentEntity
|
||||
{
|
||||
Id = document.Id,
|
||||
DocumentType = document.DocumentType,
|
||||
Title = document.Title,
|
||||
SourceUrl = document.SourceUrl,
|
||||
RawText = document.Text,
|
||||
TextHash = document.TextHash,
|
||||
TypeConfidence = document.TypeConfidence,
|
||||
MetadataJson = document.MetadataJson,
|
||||
CreatedAt = document.CreatedAt.UtcDateTime,
|
||||
Chunks = chunks.Select(chunk => new RagChunkEntity
|
||||
{
|
||||
Id = chunk.Id,
|
||||
DocumentId = chunk.DocumentId,
|
||||
ChunkIndex = chunk.ChunkIndex,
|
||||
Text = chunk.Text,
|
||||
Embedding = VectorSerializer.ToBytes(chunk.Embedding)
|
||||
}).ToList()
|
||||
};
|
||||
|
||||
_db.RagDocuments.Add(entity);
|
||||
await _db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(
|
||||
float[] queryEmbedding,
|
||||
IReadOnlyList<string>? targetTypes,
|
||||
int topK,
|
||||
CancellationToken ct)
|
||||
{
|
||||
var types = targetTypes?
|
||||
.Where(x => !string.IsNullOrWhiteSpace(x))
|
||||
.Select(x => x.Trim().ToLowerInvariant())
|
||||
.Distinct()
|
||||
.ToArray() ?? System.Array.Empty<string>();
|
||||
|
||||
var query = _db.RagChunks
|
||||
.AsNoTracking()
|
||||
.Include(x => x.Document)
|
||||
.AsQueryable();
|
||||
|
||||
if (types.Length > 0)
|
||||
{
|
||||
query = query.Where(x => x.Document != null && types.Contains(x.Document.DocumentType.ToLower()));
|
||||
}
|
||||
|
||||
var rows = await query.ToListAsync(ct);
|
||||
|
||||
return rows
|
||||
.Where(x => x.Document is not null)
|
||||
.Select(x => new SearchCandidateChunk
|
||||
{
|
||||
Document = ToRecord(x.Document!),
|
||||
Chunk = new RagChunkRecord
|
||||
{
|
||||
Id = x.Id,
|
||||
DocumentId = x.DocumentId,
|
||||
ChunkIndex = x.ChunkIndex,
|
||||
Text = x.Text,
|
||||
Embedding = VectorSerializer.FromBytes(x.Embedding)
|
||||
},
|
||||
Score = VectorSerializer.CosineSimilarity(queryEmbedding, VectorSerializer.FromBytes(x.Embedding))
|
||||
})
|
||||
.OrderByDescending(x => x.Score)
|
||||
.Take(Math.Max(topK * 4, topK))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
|
||||
{
|
||||
var entry = await _db.RagEmbeddingCache
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(x => x.CacheKey == cacheKey, ct);
|
||||
|
||||
return entry is null ? null : VectorSerializer.FromBytes(entry.Vector);
|
||||
}
|
||||
|
||||
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
|
||||
{
|
||||
var exists = await _db.RagEmbeddingCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
|
||||
if (exists) return;
|
||||
|
||||
_db.RagEmbeddingCache.Add(new RagEmbeddingCacheEntity
|
||||
{
|
||||
CacheKey = cacheKey,
|
||||
Model = model,
|
||||
TextHash = textHash,
|
||||
Vector = VectorSerializer.ToBytes(vector),
|
||||
CreatedAt = DateTime.UtcNow
|
||||
});
|
||||
|
||||
await _db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||
{
|
||||
return await _db.RagChatCompletionCache
|
||||
.AsNoTracking()
|
||||
.Where(x => x.CacheKey == cacheKey)
|
||||
.Select(x => x.ResponseText)
|
||||
.FirstOrDefaultAsync(ct);
|
||||
}
|
||||
|
||||
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||
{
|
||||
var exists = await _db.RagChatCompletionCache.AnyAsync(x => x.CacheKey == cacheKey, ct);
|
||||
if (exists) return;
|
||||
|
||||
_db.RagChatCompletionCache.Add(new RagChatCompletionCacheEntity
|
||||
{
|
||||
CacheKey = cacheKey,
|
||||
Model = model,
|
||||
Temperature = temperature,
|
||||
ResponseText = responseText,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
});
|
||||
|
||||
await _db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
private static RagDocumentRecord ToRecord(RagDocumentEntity entity) => new()
|
||||
{
|
||||
Id = entity.Id,
|
||||
DocumentType = entity.DocumentType,
|
||||
Title = entity.Title,
|
||||
SourceUrl = entity.SourceUrl,
|
||||
Text = entity.RawText,
|
||||
TextHash = entity.TextHash,
|
||||
TypeConfidence = entity.TypeConfidence,
|
||||
MetadataJson = entity.MetadataJson,
|
||||
CreatedAt = new DateTimeOffset(DateTime.SpecifyKind(entity.CreatedAt, DateTimeKind.Utc))
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
namespace Api.Data.Repositories;
|
||||
|
||||
public static class VectorSerializer
|
||||
{
|
||||
public static byte[] ToBytes(float[] vector)
|
||||
{
|
||||
var bytes = new byte[vector.Length * sizeof(float)];
|
||||
Buffer.BlockCopy(vector, 0, bytes, 0, bytes.Length);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public static float[] FromBytes(byte[] bytes)
|
||||
{
|
||||
var vector = new float[bytes.Length / sizeof(float)];
|
||||
Buffer.BlockCopy(bytes, 0, vector, 0, bytes.Length);
|
||||
return vector;
|
||||
}
|
||||
|
||||
public static double CosineSimilarity(float[] a, float[] b)
|
||||
{
|
||||
if (a.Length == 0 || a.Length != b.Length) return 0;
|
||||
double dot = 0, magA = 0, magB = 0;
|
||||
for (var i = 0; i < a.Length; i++)
|
||||
{
|
||||
dot += a[i] * b[i];
|
||||
magA += a[i] * a[i];
|
||||
magB += b[i] * b[i];
|
||||
}
|
||||
return magA == 0 || magB == 0 ? 0 : dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user