Changes
Build and Push Docker Images / build (push) Successful in 37s

This commit is contained in:
2026-05-04 21:02:35 +03:00
parent 34625ae242
commit fa1ef23c02
87 changed files with 3151 additions and 522 deletions
+116
View File
@@ -0,0 +1,116 @@
# MyAi RAG split cleanup
## Public `api`
The existing `api` project is now only the public gateway for the existing frontend.
It keeps:
- contact API
- file download API
- Google/config APIs
- health API
- `api/rag/*` proxy endpoints
It no longer contains local RAG processing code. The removed responsibilities are:
- PDF extraction
- chunking
- embeddings
- vector storage
- OpenAI/Ollama calls
- job text extraction
- CV matching business logic
`api/Controllers/RagController.cs` is intentionally kept. It proxies the current frontend calls:
- `POST /api/rag/cv` -> `cv-matcher-api /api/cv/upload`
- `POST /api/rag/match-job` -> `cv-matcher-api /api/cv/match-job`
Required public API config:
```json
"CvMatcherApi": {
"BaseUrl": "http://cv-matcher-api:8080",
"InternalApiKey": "change-this-internal-key"
}
```
## `cv-matcher-api`
Business API for CV/job workflows.
Main endpoints:
- `POST /api/cv/upload`
- `POST /api/cv/match-job`
- `POST /api/cv/find-jobs`
- `GET /health`
- Swagger: `/swagger`
Responsibilities:
- CV matcher business logic
- job URL/text extraction
- final LLM scoring
- result persistence
- email sending
- calls `rag-api` for generic semantic indexing/search
## `rag-api`
Generic semantic search API.
Main endpoints:
- `POST /api/rag/documents`
- `POST /api/rag/documents/json`
- `POST /api/rag/search`
- `GET /api/rag/documents/{id}`
- `GET /health`
- Swagger: `/swagger`
Responsibilities:
- generic document indexing
- automatic document type classification when type is missing
- PDF/text extraction
- chunking
- embedding creation
- embedding and chat completion cache
- semantic search over generic documents
## Logging and Swagger
All three APIs now have:
- Serilog startup logging
- Serilog request logging
- structured JSON console logs
- health endpoint
- Swagger/OpenAPI support
Swagger is enabled by default and can be disabled per service with:
```json
"Swagger": {
"Enabled": false
}
```
## Internal API security
Both internal APIs support API-key protection:
```json
"InternalApi": {
"RequireApiKey": true,
"ApiKey": "change-this-internal-key"
}
```
Requests must include:
```http
X-Internal-Api-Key: change-this-internal-key
```
+3 -2
View File
@@ -1,4 +1,5 @@
using Api.Models;
using Api.Services.Contracts.Models;
using Api.Requests;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.AspNetCore.Cors;
@@ -118,7 +119,7 @@ namespace Api.Controllers
/// <param name="token">Client-provided reCAPTCHA token.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>Tuple containing the verification verdict and user IP.</returns>
private async Task<(CaptchaVerdict Verdict, string? UserIp)> ValidateCaptcha(string token, CancellationToken ct)
private async Task<(CaptchaVerdictModel Verdict, string? UserIp)> ValidateCaptcha(string token, CancellationToken ct)
{
var userIp = HttpContext.Connection.RemoteIpAddress?.ToString();
var verdict = await _captcha.VerifyAsync(token, userIp, ct);
+104 -20
View File
@@ -1,8 +1,9 @@
using api.Services.Contracts.Rag;
using Api.Models.Rag;
using Api.Services.Rag;
using Api.Requests;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.RateLimiting;
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
namespace Api.Controllers;
@@ -11,52 +12,135 @@ namespace Api.Controllers;
[EnableRateLimiting("rag")]
public sealed class RagController : ControllerBase
{
private readonly ICvRagService _cvRagService;
private readonly IHttpClientFactory _httpClientFactory;
private readonly IConfiguration _configuration;
private readonly ILogger<RagController> _logger;
public RagController(ICvRagService cvRagService, ILogger<RagController> logger)
public RagController(
IHttpClientFactory httpClientFactory,
IConfiguration configuration,
ILogger<RagController> logger)
{
_cvRagService = cvRagService;
_httpClientFactory = httpClientFactory;
_configuration = configuration;
_logger = logger;
}
[HttpPost("cv")]
[RequestSizeLimit(8 * 1024 * 1024)]
public async Task<IActionResult> UploadCv([FromForm(Name = "cv")] IFormFile? cv, [FromForm] bool gdprConsent, CancellationToken ct)
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status400BadRequest)]
[ProducesResponseType(StatusCodes.Status502BadGateway)]
public async Task<IActionResult> UploadCv(
[FromForm(Name = "cv")] IFormFile? cv,
[FromForm] bool gdprConsent,
CancellationToken ct)
{
if (cv is null)
{
return BadRequest(new { error = "Missing CV PDF." });
}
var baseUrl = GetCvMatcherBaseUrl();
if (string.IsNullOrWhiteSpace(baseUrl))
{
_logger.LogError("CvMatcherApi:BaseUrl is not configured. The public API cannot proxy CV upload requests.");
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API is not configured." });
}
try
{
if (cv is null) return BadRequest(new { error = "Missing CV PDF." });
var result = await _cvRagService.IngestCvAsync(cv, gdprConsent, ct);
return Ok(result);
_logger.LogInformation("Proxying CV upload to cv-matcher-api. FileName={FileName}, Size={SizeBytes}, GdprConsent={GdprConsent}",
cv.FileName, cv.Length, gdprConsent);
using var client = CreateCvMatcherClient(baseUrl);
using var form = new MultipartFormDataContent();
await using var stream = cv.OpenReadStream();
using var fileContent = new StreamContent(stream);
fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/pdf");
form.Add(fileContent, "cv", cv.FileName);
form.Add(new StringContent(gdprConsent.ToString().ToLowerInvariant()), "gdprConsent");
using var response = await client.PostAsync("api/cv/upload", form, ct);
return await ProxyResponseAsync(response, ct);
}
catch (InvalidOperationException ex)
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
return BadRequest(new { error = ex.Message });
_logger.LogWarning("CV upload proxy request was cancelled by the client.");
return StatusCode(499, new { error = "Request cancelled." });
}
catch (Exception ex)
{
_logger.LogError(ex, "CV ingestion failed");
return StatusCode(500, new { error = "CV ingestion failed." });
_logger.LogError(ex, "CV upload proxy request failed.");
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API request failed." });
}
}
[HttpPost("match-job")]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status400BadRequest)]
[ProducesResponseType(StatusCodes.Status502BadGateway)]
public async Task<IActionResult> MatchJob([FromBody] JobMatchRequest request, CancellationToken ct)
{
var baseUrl = GetCvMatcherBaseUrl();
if (string.IsNullOrWhiteSpace(baseUrl))
{
_logger.LogError("CvMatcherApi:BaseUrl is not configured. The public API cannot proxy job matching requests.");
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API is not configured." });
}
try
{
var result = await _cvRagService.MatchJobAsync(request, ct);
return Ok(result);
_logger.LogInformation("Proxying job match request to cv-matcher-api. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}",
request.CvDocumentId,
!string.IsNullOrWhiteSpace(request.JobUrl),
!string.IsNullOrWhiteSpace(request.JobDescription));
using var client = CreateCvMatcherClient(baseUrl);
var json = JsonSerializer.Serialize(request, new JsonSerializerOptions(JsonSerializerDefaults.Web));
using var response = await client.PostAsync(
"api/cv/match-job",
new StringContent(json, Encoding.UTF8, "application/json"),
ct);
return await ProxyResponseAsync(response, ct);
}
catch (InvalidOperationException ex)
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
return BadRequest(new { error = ex.Message });
_logger.LogWarning("Job match proxy request was cancelled by the client.");
return StatusCode(499, new { error = "Request cancelled." });
}
catch (Exception ex)
{
_logger.LogError(ex, "Job matching failed");
return StatusCode(500, new { error = "Job matching failed." });
_logger.LogError(ex, "Job match proxy request failed.");
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API request failed." });
}
}
private string GetCvMatcherBaseUrl() => _configuration["CvMatcherApi:BaseUrl"] ?? string.Empty;
private HttpClient CreateCvMatcherClient(string baseUrl)
{
var client = _httpClientFactory.CreateClient("CvMatcherApi");
client.BaseAddress = new Uri(baseUrl.TrimEnd('/') + "/");
var key = _configuration["CvMatcherApi:InternalApiKey"];
if (!string.IsNullOrWhiteSpace(key) && !client.DefaultRequestHeaders.Contains("X-Internal-Api-Key"))
{
client.DefaultRequestHeaders.Add("X-Internal-Api-Key", key);
}
return client;
}
private static async Task<ContentResult> ProxyResponseAsync(HttpResponseMessage response, CancellationToken ct)
{
var body = await response.Content.ReadAsStringAsync(ct);
return new ContentResult
{
StatusCode = (int)response.StatusCode,
Content = body,
ContentType = response.Content.Headers.ContentType?.ToString() ?? "application/json"
};
}
}
-43
View File
@@ -1,43 +0,0 @@
namespace Api.Models.Rag;
public sealed record CvIngestResponse(
string DocumentId,
int Chunks,
int CharactersExtracted,
string Summary
);
public sealed class JobMatchRequest
{
public string? CvDocumentId { get; set; }
public string? JobUrl { get; set; }
public string? JobDescription { get; set; }
public bool GdprConsent { get; set; }
}
public sealed class JobMatchResponse
{
public int Score { get; set; }
public string Summary { get; set; } = string.Empty;
public List<string> Strengths { get; set; } = [];
public List<string> Gaps { get; set; } = [];
public List<string> Recommendations { get; set; } = [];
public List<string> Evidence { get; set; } = [];
}
public sealed class StoredCvChunk
{
public required string Id { get; init; }
public required string DocumentId { get; init; }
public required string Text { get; init; }
public required float[] Embedding { get; init; }
public required int ChunkIndex { get; init; }
public DateTimeOffset ExpiresAt { get; init; }
}
public sealed class RetrievedCvChunk
{
public required string Text { get; init; }
public required int ChunkIndex { get; init; }
public double Score { get; init; }
}
+1 -11
View File
@@ -1,8 +1,5 @@
using api.Services.Contracts.Rag;
using Api.Services;
using Api.Services.Contracts;
using Api.Services.Contracts.Rag;
using Api.Services.Rag;
using Api.Settings;
using Azure.Identity;
using Microsoft.AspNetCore.HttpOverrides;
@@ -78,19 +75,12 @@ try
builder.Services.Configure<SmtpSettings>(builder.Configuration.GetSection("Smtp"));
builder.Services.Configure<CaptchaSettings>(builder.Configuration.GetSection("Captcha"));
builder.Services.Configure<FileStorageSettings>(builder.Configuration.GetSection("FileStorage"));
builder.Services.Configure<RagSettings>(builder.Configuration.GetSection("Rag"));
builder.Services.Configure<OpenAiSettings>(builder.Configuration.GetSection("OpenAI"));
// Services
builder.Services.AddHttpClient<ICaptchaVerifier, RecaptchaVerifier>();
builder.Services.AddSingleton<IEmailSender, SmtpEmailSender>();
builder.Services.AddSingleton<Microsoft.AspNetCore.StaticFiles.IContentTypeProvider, Microsoft.AspNetCore.StaticFiles.FileExtensionContentTypeProvider>();
builder.Services.AddSingleton<IPdfTextExtractor, PdfTextExtractor>();
builder.Services.AddSingleton<ITextChunker, TextChunker>();
builder.Services.AddSingleton<ICvVectorStore, InMemoryCvVectorStore>();
builder.Services.AddScoped<ICvRagService, CvRagService>();
builder.Services.AddHttpClient<IAiRagClient, OpenAiRagClient>();
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
builder.Services.AddHttpClient("CvMatcherApi");
// Swagger
builder.Services.AddEndpointsApiExplorer();
@@ -1,6 +1,6 @@
using System.ComponentModel.DataAnnotations;
namespace Api.Models
namespace Api.Requests
{
public sealed class ContactRequest
{
+9
View File
@@ -0,0 +1,9 @@
namespace Api.Requests;
public sealed class JobMatchRequest
{
public string? CvDocumentId { get; set; }
public string? JobUrl { get; set; }
public string? JobDescription { get; set; }
public bool GdprConsent { get; set; }
}
@@ -1,6 +1,6 @@
using System.ComponentModel.DataAnnotations;
namespace Api.Models
namespace Api.Requests
{
public sealed class SubscribeRequest
{
+4 -4
View File
@@ -1,9 +1,9 @@
namespace Api.Services.Contracts
{
public sealed record CaptchaVerdict(bool Success, string? Error, double? Score);
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts
{
public interface ICaptchaVerifier
{
Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct);
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct);
}
}
+1 -1
View File
@@ -1,4 +1,4 @@
using Api.Models;
using Api.Requests;
namespace Api.Services.Contracts
{
@@ -0,0 +1,4 @@
namespace Api.Services.Contracts.Models
{
public sealed record CaptchaVerdictModel(bool Success, string? Error, double? Score);
}
@@ -1,9 +0,0 @@
using Api.Models.Rag;
namespace api.Services.Contracts.Rag;
public interface ICvRagService
{
Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct);
}
@@ -1,6 +0,0 @@
namespace Api.Services.Contracts.Rag;
public interface IPdfTextExtractor
{
string ExtractText(Stream pdfStream);
}
-165
View File
@@ -1,165 +0,0 @@
using api.Services.Contracts.Rag;
using Api.Models.Rag;
using Api.Services.Contracts.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
using System.Text.Json;
namespace Api.Services.Rag;
public sealed class CvRagService : ICvRagService
{
private readonly IPdfTextExtractor _pdfTextExtractor;
private readonly ITextChunker _textChunker;
private readonly IAiRagClient _openAi;
private readonly ICvVectorStore _store;
private readonly IJobTextExtractor _jobTextExtractor;
private readonly RagSettings _settings;
private readonly ILogger<CvRagService> _logger;
public CvRagService(
IPdfTextExtractor pdfTextExtractor,
ITextChunker textChunker,
IAiRagClient openAi,
ICvVectorStore store,
IJobTextExtractor jobTextExtractor,
IOptions<RagSettings> options,
ILogger<CvRagService> logger)
{
_pdfTextExtractor = pdfTextExtractor;
_textChunker = textChunker;
_openAi = openAi;
_store = store;
_jobTextExtractor = jobTextExtractor;
_settings = options.Value;
_logger = logger;
}
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
{
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
await using var stream = file.OpenReadStream();
var text = _pdfTextExtractor.ExtractText(stream);
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
var documentId = $"cv_{Guid.NewGuid():N}";
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
var stored = new List<StoredCvChunk>();
for (var i = 0; i < chunks.Count; i++)
{
ct.ThrowIfCancellationRequested();
stored.Add(new StoredCvChunk
{
Id = Guid.NewGuid().ToString("N"),
DocumentId = documentId,
Text = chunks[i],
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
ChunkIndex = i,
ExpiresAt = expiresAt
});
}
_store.Save(documentId, stored);
var summary = await SummarizeCvAsync(text, ct);
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
}
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
var cvChunks = _store.Get(request.CvDocumentId);
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
var userPrompt = $$"""
Compare the candidate CV context with the job description.
Return this JSON shape exactly:
{
"score": 0,
"summary": "short direct assessment",
"strengths": ["strength 1"],
"gaps": ["gap 1"],
"recommendations": ["action 1"],
"evidence": ["short CV evidence quote or paraphrase"]
}
Score must be 0-100.
CV CONTEXT:
{{cvContext}}
JOB DESCRIPTION:
{{jobText}}
""";
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
var response = ParseMatchResponse(content);
if (response.Evidence.Count == 0)
{
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
}
return response;
}
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
{
try
{
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
var content = await _openAi.CreateChatCompletionAsync(
"Return only valid JSON.",
$$"""
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
CV:
{{shortened}}
""",
ct);
using var doc = JsonDocument.Parse(content);
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
}
catch (Exception ex)
{
_logger.LogWarning(ex, "CV summary failed");
return "CV indexed.";
}
}
private static JobMatchResponse ParseMatchResponse(string content)
{
try
{
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
response.Score = Math.Clamp(response.Score, 0, 100);
response.Strengths ??= [];
response.Gaps ??= [];
response.Recommendations ??= [];
response.Evidence ??= [];
return response;
}
catch
{
return new JobMatchResponse
{
Score = 0,
Summary = "The AI response could not be parsed. Check logs and prompt output.",
Gaps = ["Invalid JSON returned by the model."],
Evidence = []
};
}
}
}
-79
View File
@@ -1,79 +0,0 @@
using Api.Models.Rag;
namespace Api.Services.Rag;
public interface ICvVectorStore
{
void Save(string documentId, IEnumerable<StoredCvChunk> chunks);
IReadOnlyList<StoredCvChunk> Get(string documentId);
IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK);
}
public sealed class InMemoryCvVectorStore : ICvVectorStore
{
private readonly object _lock = new();
private readonly Dictionary<string, List<StoredCvChunk>> _store = new(StringComparer.OrdinalIgnoreCase);
public void Save(string documentId, IEnumerable<StoredCvChunk> chunks)
{
lock (_lock)
{
CleanupExpiredUnsafe();
_store[documentId] = chunks.ToList();
}
}
public IReadOnlyList<StoredCvChunk> Get(string documentId)
{
lock (_lock)
{
CleanupExpiredUnsafe();
return _store.TryGetValue(documentId, out var chunks) ? chunks.ToList() : [];
}
}
public IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK)
{
var chunks = Get(documentId);
if (chunks.Count == 0) return [];
return chunks
.Select(chunk => new RetrievedCvChunk
{
Text = chunk.Text,
ChunkIndex = chunk.ChunkIndex,
Score = CosineSimilarity(queryEmbedding, chunk.Embedding)
})
.OrderByDescending(x => x.Score)
.Take(Math.Clamp(topK, 1, 12))
.ToList();
}
private void CleanupExpiredUnsafe()
{
var now = DateTimeOffset.UtcNow;
foreach (var key in _store.Where(x => x.Value.All(c => c.ExpiresAt <= now)).Select(x => x.Key).ToList())
{
_store.Remove(key);
}
}
private static double CosineSimilarity(float[] a, float[] b)
{
if (a.Length != b.Length || a.Length == 0) return 0;
double dot = 0;
double magA = 0;
double magB = 0;
for (var i = 0; i < a.Length; i++)
{
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
if (magA == 0 || magB == 0) return 0;
return dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
}
}
-99
View File
@@ -1,99 +0,0 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Api.Services.Contracts.Rag;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services.Rag;
public sealed class OpenAiRagClient : IAiRagClient
{
private readonly HttpClient _httpClient;
private readonly OpenAiSettings _settings;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
public OpenAiRagClient(HttpClient httpClient, IOptions<OpenAiSettings> options)
{
_httpClient = httpClient;
_settings = options.Value;
if (!string.IsNullOrWhiteSpace(_settings.ApiKey))
{
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.ApiKey);
}
_httpClient.Timeout = TimeSpan.FromSeconds(Math.Max(15, _settings.TimeoutSeconds));
_httpClient.BaseAddress = new Uri("https://api.openai.com/v1/");
}
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
{
EnsureConfigured();
var payload = new { model = _settings.EmbeddingModel, input };
using var response = await _httpClient.PostAsync("embeddings", ToJson(payload), ct);
var json = await response.Content.ReadAsStringAsync(ct);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"OpenAI embeddings request failed: {(int)response.StatusCode} {json}");
}
using var document = JsonDocument.Parse(json);
var embedding = document.RootElement.GetProperty("data")[0].GetProperty("embedding");
var result = new float[embedding.GetArrayLength()];
var i = 0;
foreach (var value in embedding.EnumerateArray())
{
result[i++] = value.GetSingle();
}
return result;
}
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct)
{
EnsureConfigured();
var payload = new
{
model = _settings.ChatModel,
temperature = 0.2,
response_format = new { type = "json_object" },
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
}
};
using var response = await _httpClient.PostAsync("chat/completions", ToJson(payload), ct);
var json = await response.Content.ReadAsStringAsync(ct);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"OpenAI chat request failed: {(int)response.StatusCode} {json}");
}
using var document = JsonDocument.Parse(json);
return document.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString() ?? "{}";
}
private void EnsureConfigured()
{
if (string.IsNullOrWhiteSpace(_settings.ApiKey))
{
throw new InvalidOperationException("OpenAI API key is not configured. Set OpenAI__ApiKey.");
}
}
private static StringContent ToJson<T>(T payload) => new(
JsonSerializer.Serialize(payload, JsonOptions),
Encoding.UTF8,
"application/json"
);
}
-29
View File
@@ -1,29 +0,0 @@
using Api.Services.Contracts.Rag;
using System.Text;
using UglyToad.PdfPig;
namespace Api.Services.Rag;
public sealed class PdfTextExtractor : IPdfTextExtractor
{
public string ExtractText(Stream pdfStream)
{
using var document = PdfDocument.Open(pdfStream);
var builder = new StringBuilder();
foreach (var page in document.GetPages())
{
builder.AppendLine(page.Text);
builder.AppendLine();
}
return NormalizeWhitespace(builder.ToString());
}
private static string NormalizeWhitespace(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
return string.Join(' ', parts).Trim();
}
}
+10 -9
View File
@@ -1,3 +1,4 @@
using Api.Services.Contracts.Models;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
@@ -17,14 +18,14 @@ namespace Api.Services
_log = log;
}
public async Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct)
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct)
{
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
if (string.IsNullOrWhiteSpace(_opt.SecretKey))
{
_log.LogWarning("Captcha verification attempted but SecretKey is not configured");
return new CaptchaVerdict(false, "Captcha not configured", null);
return new CaptchaVerdictModel(false, "Captcha not configured", null);
}
var form = new Dictionary<string, string>
@@ -45,21 +46,21 @@ namespace Api.Services
{
_log.LogWarning("Captcha HTTP request failed with status {StatusCode} for IP {Ip}",
(int)resp.StatusCode, userIp ?? "unknown");
return new CaptchaVerdict(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
return new CaptchaVerdictModel(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
}
var data = await resp.Content.ReadFromJsonAsync<RecaptchaResponse>(cancellationToken: ct);
if (data is null)
{
_log.LogError("Failed to parse captcha response for IP {Ip}", userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha parse error", null);
return new CaptchaVerdictModel(false, "Captcha parse error", null);
}
if (!data.success)
{
_log.LogWarning("Captcha verification failed for IP {Ip}. Score={Score}",
userIp ?? "unknown", data.score);
return new CaptchaVerdict(false, "Captcha failed", data.score);
return new CaptchaVerdictModel(false, "Captcha failed", data.score);
}
// v3 score check (score is typically null for v2)
@@ -67,7 +68,7 @@ namespace Api.Services
{
_log.LogWarning("Captcha score {Score} below minimum {MinScore} for IP {Ip}",
score, _opt.MinimumScore, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha score too low", score);
return new CaptchaVerdictModel(false, "Captcha score too low", score);
}
// Optional strictness (usually v3): action/hostname checks
@@ -76,7 +77,7 @@ namespace Api.Services
{
_log.LogWarning("Captcha action mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
_opt.ExpectedAction, data.action, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha action mismatch", data.score);
return new CaptchaVerdictModel(false, "Captcha action mismatch", data.score);
}
if (!string.IsNullOrWhiteSpace(_opt.ExpectedHostname) &&
@@ -84,12 +85,12 @@ namespace Api.Services
{
_log.LogWarning("Captcha hostname mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
_opt.ExpectedHostname, data.hostname, userIp ?? "unknown");
return new CaptchaVerdict(false, "Captcha hostname mismatch", data.score);
return new CaptchaVerdictModel(false, "Captcha hostname mismatch", data.score);
}
_log.LogInformation("Captcha verified successfully for IP {Ip}. Score={Score}",
userIp ?? "unknown", data.score);
return new CaptchaVerdict(true, null, data.score);
return new CaptchaVerdictModel(true, null, data.score);
}
private sealed class RecaptchaResponse
+1 -1
View File
@@ -1,5 +1,5 @@
using Api.Services.Contracts;
using Api.Models;
using Api.Requests;
using Microsoft.Extensions.Options;
using MailKit.Net.Smtp;
using MailKit.Security;
+2 -2
View File
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
@@ -10,6 +10,7 @@
<InvariantGlobalization>false</InvariantGlobalization>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
<DisableStaticWebAssets>true</DisableStaticWebAssets>
<RootNamespace>Api</RootNamespace>
</PropertyGroup>
<ItemGroup>
@@ -18,7 +19,6 @@
<PackageReference Include="DotNetEnv" Version="3.2.0" />
<PackageReference Include="MailKit" Version="4.16.0" />
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.23.0" />
<PackageReference Include="PdfPig" Version="0.1.14" />
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Email" Version="4.2.1" />
+4 -14
View File
@@ -106,18 +106,8 @@
"FromEmail": "",
"SubjectPrefix": "[File Download]"
},
"OpenAI": {
"ApiKey": "",
"ChatModel": "gpt-4o-mini",
"EmbeddingModel": "text-embedding-3-small",
"TimeoutSeconds": 60
},
"Rag": {
"MaxPdfSizeMb": 5,
"ChunkSize": 900,
"ChunkOverlap": 150,
"CvTtlMinutes": 60,
"MaxJobTextChars": 20000,
"TopK": 6
"CvMatcherApi": {
"BaseUrl": "",
"InternalApiKey": ""
}
}
}
@@ -0,0 +1,73 @@
using Api.Requests;
using Api.Services.Contracts;
using Microsoft.AspNetCore.Mvc;
namespace Api.Controllers;
[ApiController]
[Route("api/cv")]
public sealed class CvController : ControllerBase
{
private readonly ICvMatcherService _service;
private readonly ILogger<CvController> _logger;
public CvController(ICvMatcherService service, ILogger<CvController> logger)
{
_service = service;
_logger = logger;
}
[HttpPost("upload")]
[RequestSizeLimit(10 * 1024 * 1024)]
public async Task<IActionResult> Upload([FromForm(Name = "cv")] IFormFile? cv, [FromForm] bool gdprConsent, CancellationToken ct)
{
try
{
if (cv is null) return BadRequest(new { error = "Missing CV PDF." });
_logger.LogInformation("CV upload received. FileName={FileName}, Size={SizeBytes}, GdprConsent={GdprConsent}", cv.FileName, cv.Length, gdprConsent);
var result = await _service.UploadCvAsync(cv, gdprConsent, ct);
_logger.LogInformation("CV upload processed. CvDocumentId={CvDocumentId}, Cached={Cached}", result.DocumentId, result.Cached);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid CV upload request.");
return BadRequest(new { error = ex.Message });
}
}
[HttpPost("find-jobs")]
public async Task<IActionResult> FindJobs([FromBody] FindJobsRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("Find jobs request received. CvDocumentId={CvDocumentId}, TopK={TopK}", request.CvDocumentId, request.TopK);
var result = await _service.FindJobsAsync(request, ct);
_logger.LogInformation("Find jobs completed. CvDocumentId={CvDocumentId}, ResultCount={ResultCount}", request.CvDocumentId, result.Jobs.Count);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid find jobs request.");
return BadRequest(new { error = ex.Message });
}
}
[HttpPost("match-job")]
public async Task<IActionResult> MatchJob([FromBody] MatchJobRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("Match job request received. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}, EmailRequested={EmailRequested}",
request.CvDocumentId, !string.IsNullOrWhiteSpace(request.JobUrl), !string.IsNullOrWhiteSpace(request.JobDescription), !string.IsNullOrWhiteSpace(request.Email));
var result = await _service.MatchJobAsync(request, ct);
_logger.LogInformation("Match job completed. CvDocumentId={CvDocumentId}, Score={Score}, Cached={Cached}", request.CvDocumentId, result.Score, result.Cached);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid match job request.");
return BadRequest(new { error = ex.Message });
}
}
}
+25
View File
@@ -0,0 +1,25 @@
IF OBJECT_ID('dbo.CvMatchResults', 'U') IS NULL
BEGIN
CREATE TABLE dbo.CvMatchResults (
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_CvMatchResults PRIMARY KEY,
CvDocumentId NVARCHAR(64) NOT NULL,
JobDocumentId NVARCHAR(64) NOT NULL,
ResultJson NVARCHAR(MAX) NOT NULL,
Score INT NOT NULL,
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_CvMatchResults_CreatedAt DEFAULT SYSUTCDATETIME()
);
CREATE UNIQUE INDEX UX_CvMatchResults_CvJob ON dbo.CvMatchResults(CvDocumentId, JobDocumentId);
END
GO
IF OBJECT_ID('dbo.CvMatcherChatCache', 'U') IS NULL
BEGIN
CREATE TABLE dbo.CvMatcherChatCache (
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_CvMatcherChatCache PRIMARY KEY,
Model NVARCHAR(120) NOT NULL,
Temperature DECIMAL(4,2) NOT NULL,
ResponseText NVARCHAR(MAX) NOT NULL,
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_CvMatcherChatCache_CreatedAt DEFAULT SYSUTCDATETIME()
);
END
GO
+15
View File
@@ -0,0 +1,15 @@
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
WORKDIR /app
EXPOSE 8080
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
WORKDIR /src
COPY ["cv-matcher-api.csproj", "./"]
RUN dotnet restore "cv-matcher-api.csproj"
COPY . .
RUN dotnet publish "cv-matcher-api.csproj" -c Release -o /app/publish /p:UseAppHost=false
FROM base AS final
WORKDIR /app
COPY --from=build /app/publish .
ENTRYPOINT ["dotnet", "cv-matcher-api.dll"]
+283
View File
@@ -0,0 +1,283 @@
using Azure.Identity;
using Api.Services;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.AspNetCore.Diagnostics;
using Serilog;
using System.Reflection;
DotNetEnv.Env.Load();
try
{
var builder = WebApplication.CreateBuilder(args);
var appVersion = Assembly.GetExecutingAssembly()
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?
.InformationalVersion
?? Assembly.GetExecutingAssembly().GetName().Version?.ToString()
?? "unknown";
builder.Host.UseSerilog((context, services, configuration) =>
{
configuration
.ReadFrom.Configuration(context.Configuration)
.ReadFrom.Services(services)
.Enrich.FromLogContext()
.Enrich.WithMachineName()
.Enrich.WithEnvironmentName()
.Enrich.WithProperty("Service", "cv-matcher-api")
.Enrich.WithProperty("AppVersion", appVersion)
.WriteTo.Console(new Serilog.Formatting.Json.JsonFormatter());
});
Log.Information("Starting {Service} version {AppVersion}", "cv-matcher-api", appVersion);
// --------------------
// Azure Key Vault Configuration
// --------------------
var keyVaultUri = builder.Configuration["KeyVault:VaultUri"];
var keyVaultEnabled = builder.Configuration.GetValue<bool>("KeyVault:Enabled");
if (keyVaultEnabled && !string.IsNullOrWhiteSpace(keyVaultUri))
{
Log.Information("Loading configuration from Azure Key Vault: {VaultUri}", keyVaultUri);
try
{
builder.Configuration.AddAzureKeyVault(
new Uri(keyVaultUri),
new DefaultAzureCredential());
Log.Information("Azure Key Vault configuration loaded successfully");
}
catch (Exception ex)
{
Log.Warning(ex, "Failed to load Azure Key Vault configuration. Continuing with other configuration sources.");
}
}
else
{
Log.Information("Azure Key Vault is disabled or not configured");
}
builder.Services.Configure<RagApiSettings>(builder.Configuration.GetSection("RagApi"));
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
builder.Services.Configure<MatcherSettings>(builder.Configuration.GetSection("Matcher"));
builder.Services.Configure<SmtpSettings>(builder.Configuration.GetSection("Smtp"));
builder.Services.AddHttpClient<IRagApiClient, RagApiClient>();
builder.Services.AddHttpClient<IMatcherAiClient, MatcherAiClient>();
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
builder.Services.AddSingleton<IMatcherRepository, SqlMatcherRepository>();
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
builder.Services.AddSingleton<IEmailService, EmailService>();
builder.Services.AddControllers();
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
var app = builder.Build();
var logger = app.Services.GetRequiredService<ILogger<Program>>();
logger.LogInformation("API starting up...");
logger.LogInformation("Environment: {Environment}", app.Environment.EnvironmentName);
// Log all environment variables and configuration settings at startup
// Can be controlled via appsettings: "Logging:LogEnvironmentOnStartup": true
var logEnvironmentOnStartup = app.Configuration.GetValue<bool>("Logging:LogEnvironmentOnStartup", defaultValue: true);
if (logEnvironmentOnStartup)
{
LogEnvironmentSettings(logger, app.Configuration, app.Environment);
}
using (var scope = app.Services.CreateScope())
{
var repository = scope.ServiceProvider.GetRequiredService<IMatcherRepository>();
await repository.InitializeAsync(CancellationToken.None);
}
app.UseSerilogRequestLogging(options =>
{
options.MessageTemplate = "HTTP {RequestMethod} {RequestPath} responded {StatusCode} in {Elapsed:0.0000} ms";
options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
{
diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
diagnosticContext.Set("RemoteIP", httpContext.Connection.RemoteIpAddress?.ToString());
diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
};
});
app.UseExceptionHandler(errorApp =>
{
errorApp.Run(async context =>
{
var feature = context.Features.Get<IExceptionHandlerFeature>();
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
if (feature?.Error is not null)
{
logger.LogError(feature.Error, "Unhandled exception in {Service}", "cv-matcher-api");
}
context.Response.StatusCode = StatusCodes.Status500InternalServerError;
context.Response.ContentType = "application/json";
await context.Response.WriteAsJsonAsync(new { error = "Unexpected server error." });
});
});
app.Use(async (context, next) =>
{
var settings = context.RequestServices.GetRequiredService<Microsoft.Extensions.Options.IOptions<InternalApiSettings>>().Value;
if (settings.RequireApiKey)
{
var header = context.Request.Headers["X-Internal-Api-Key"].ToString();
if (string.IsNullOrWhiteSpace(settings.ApiKey) || header != settings.ApiKey)
{
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
logger.LogWarning("Rejected unauthorized internal API call. Path={Path}, RemoteIP={RemoteIP}", context.Request.Path, context.Connection.RemoteIpAddress?.ToString());
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
await context.Response.WriteAsJsonAsync(new { error = "Unauthorized internal API call." });
return;
}
}
await next();
});
// Swagger (typically only in Development)
if (app.Environment.IsDevelopment())
{
app.UseSwagger();
app.UseSwaggerUI(options =>
{
options.DocumentTitle = "cv-matcher-api";
options.SwaggerEndpoint("/swagger/v1/swagger.json", "cv-matcher-api v1");
options.RoutePrefix = "swagger";
});
}
app.MapControllers();
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "cv-matcher-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
Log.Information("{Service} startup complete", "cv-matcher-api");
app.Run();
}
catch (Exception ex)
{
Log.Fatal(ex, "cv-matcher-api terminated unexpectedly");
}
finally
{
Log.Information("Shutting down cv-matcher-api");
Log.CloseAndFlush();
}
/// <summary>
/// Logs all environment variables and configuration settings at startup for diagnostics.
/// </summary>
static void LogEnvironmentSettings(Microsoft.Extensions.Logging.ILogger logger, IConfiguration configuration, IWebHostEnvironment environment)
{
logger.LogInformation("==================== ENVIRONMENT SETTINGS ====================");
// Environment Information
logger.LogInformation("Application Name: {ApplicationName}", environment.ApplicationName);
logger.LogInformation("Environment Name: {EnvironmentName}", environment.EnvironmentName);
logger.LogInformation("Content Root Path: {ContentRootPath}", environment.ContentRootPath);
logger.LogInformation("Web Root Path: {WebRootPath}", environment.WebRootPath);
// Environment Variables
logger.LogInformation("-------------- Environment Variables --------------");
var envVars = Environment.GetEnvironmentVariables();
var sortedEnvVars = new SortedDictionary<string, string?>();
foreach (System.Collections.DictionaryEntry entry in envVars)
{
var key = entry.Key?.ToString() ?? string.Empty;
var value = entry.Value?.ToString() ?? string.Empty;
// Mask sensitive values (passwords, secrets, tokens, keys) but show last 4 characters
if (IsSensitiveKey(key))
{
value = MaskValueWithLastChars(value);
}
sortedEnvVars[key] = value;
}
foreach (var kvp in sortedEnvVars)
{
logger.LogInformation(" {Key} = {Value}", kvp.Key, kvp.Value);
}
// Configuration Settings
logger.LogInformation("-------------- Configuration Settings --------------");
LogConfigurationRecursive(logger, configuration.GetChildren(), "");
logger.LogInformation("===========================================================");
}
/// <summary>
/// Recursively logs configuration settings with hierarchy.
/// </summary>
static void LogConfigurationRecursive(Microsoft.Extensions.Logging.ILogger logger, IEnumerable<IConfigurationSection> sections, string prefix)
{
foreach (var section in sections)
{
var key = string.IsNullOrEmpty(prefix) ? section.Key : $"{prefix}:{section.Key}";
if (section.Value != null)
{
var value = section.Value;
// Mask sensitive configuration values but show last 4 characters
if (IsSensitiveKey(key))
{
value = MaskValueWithLastChars(value);
}
logger.LogInformation(" {Key} = {Value}", key, value);
}
// Recurse into child sections
if (section.GetChildren().Any())
{
LogConfigurationRecursive(logger, section.GetChildren(), key);
}
}
}
/// <summary>
/// Checks if a configuration key contains sensitive information.
/// </summary>
static bool IsSensitiveKey(string key)
{
return key.Contains("Password", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Secret", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Token", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Key", StringComparison.OrdinalIgnoreCase) ||
key.Contains("ConnectionString", StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Masks a sensitive value but shows the last 4 characters for verification.
/// </summary>
/// <param name="value">The value to mask.</param>
/// <returns>Masked value showing last 4 characters (e.g., "***MASKED***...abcd")</returns>
static string MaskValueWithLastChars(string value)
{
if (string.IsNullOrEmpty(value))
{
return "***NOT SET***";
}
// If value is too short, just mask it completely
if (value.Length <= 4)
{
return "***MASKED***";
}
// Show last 4 characters
var lastChars = value.Substring(value.Length - 4);
return $"***MASKED***...{lastChars}";
}
@@ -0,0 +1,12 @@
{
"profiles": {
"cv-matcher-api": {
"commandName": "Project",
"launchBrowser": true,
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"applicationUrl": "https://localhost:58423;http://localhost:58425"
}
}
}
@@ -0,0 +1,9 @@
namespace Api.Requests
{
public sealed class FindJobsRequest
{
public required string CvDocumentId { get; init; }
public int? TopK { get; init; }
public string? Email { get; init; }
}
}
@@ -0,0 +1,11 @@
namespace Api.Requests
{
public sealed class MatchJobRequest
{
public string? CvDocumentId { get; set; }
public string? JobUrl { get; set; }
public string? JobDescription { get; set; }
public bool GdprConsent { get; set; }
public string? Email { get; set; }
}
}
@@ -0,0 +1,9 @@
namespace Api.Requests
{
public sealed class RagSearchRequest
{
public required string QueryText { get; init; }
public IReadOnlyList<string>? TargetDocumentTypes { get; init; }
public int? TopK { get; init; }
}
}
@@ -0,0 +1,14 @@
namespace Api.Responses
{
public sealed class CvUploadResponse
{
public required string DocumentId { get; init; }
public required string TextHash { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public int Chunks { get; init; }
public int Characters { get; init; }
public bool Cached { get; init; }
public string Summary { get; init; } = "CV indexed successfully.";
}
}
@@ -0,0 +1,8 @@
namespace Api.Responses
{
public sealed class FindJobsResponse
{
public required string CvDocumentId { get; init; }
public IReadOnlyList<JobMatchResponse> Jobs { get; init; } = [];
}
}
@@ -0,0 +1,15 @@
namespace Api.Responses
{
public sealed class JobMatchResponse
{
public int Score { get; set; }
public string Summary { get; set; } = string.Empty;
public List<string> Strengths { get; set; } = [];
public List<string> Gaps { get; set; } = [];
public List<string> Recommendations { get; set; } = [];
public List<string> Evidence { get; set; } = [];
public bool Cached { get; set; }
public string? JobDocumentId { get; set; }
public string? JobUrl { get; set; }
}
}
@@ -0,0 +1,14 @@
namespace Api.Responses
{
public sealed class RagIndexResponse
{
public required string DocumentId { get; init; }
public required string TextHash { get; init; }
public required string DocumentType { get; init; }
public double DocumentTypeConfidence { get; init; }
public required string Title { get; init; }
public int Chunks { get; init; }
public int Characters { get; init; }
public bool Cached { get; init; }
}
}
@@ -0,0 +1,34 @@
namespace Api.Responses
{
public sealed class RagSearchResponse
{
public IReadOnlyList<RagSearchDocumentResult> Results { get; init; } = [];
}
public sealed class RagDocumentDetails
{
public required string Id { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public string? SourceUrl { get; init; }
public required string Text { get; init; }
public required string TextHash { get; init; }
}
public sealed class RagSearchDocumentResult
{
public required string DocumentId { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public string? SourceUrl { get; init; }
public double Score { get; init; }
public IReadOnlyList<RagSearchChunkResult> MatchedChunks { get; init; } = [];
}
public sealed class RagSearchChunkResult
{
public required string ChunkId { get; init; }
public int ChunkIndex { get; init; }
public required string Text { get; init; }
public double Score { get; init; }
}
}
@@ -0,0 +1,11 @@
using Api.Requests;
using Api.Responses;
namespace Api.Services.Contracts;
public interface ICvMatcherService
{
Task<CvUploadResponse> UploadCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct);
Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct);
}
@@ -0,0 +1,6 @@
namespace Api.Services.Contracts;
public interface IEmailService
{
Task SendMatchAsync(string? explicitTo, string subject, string body, CancellationToken ct);
}
@@ -1,4 +1,4 @@
namespace Api.Services.Contracts.Rag;
namespace Api.Services.Contracts;
public interface IJobTextExtractor
{
@@ -0,0 +1,6 @@
namespace Api.Services.Contracts;
public interface IMatcherAiClient
{
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct);
}
@@ -0,0 +1,12 @@
using Api.Responses;
namespace Api.Services.Contracts;
public interface IMatcherRepository
{
Task InitializeAsync(CancellationToken ct);
Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct);
Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct);
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
}
@@ -0,0 +1,12 @@
using Api.Requests;
using Api.Responses;
namespace Api.Services.Contracts;
public interface IRagApiClient
{
Task<RagIndexResponse> IndexCvPdfAsync(IFormFile file, CancellationToken ct);
Task<RagIndexResponse> IndexJobTextAsync(string text, string? url, string? title, CancellationToken ct);
Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct);
Task<RagSearchResponse> SearchAsync(RagSearchRequest request, CancellationToken ct);
}
+201
View File
@@ -0,0 +1,201 @@
using System.Text.Json;
using Api.Requests;
using Api.Responses;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services;
public sealed class CvMatcherService : ICvMatcherService
{
private readonly IRagApiClient _rag;
private readonly IJobTextExtractor _jobTextExtractor;
private readonly IMatcherAiClient _ai;
private readonly IMatcherRepository _repository;
private readonly IEmailService _email;
private readonly MatcherSettings _settings;
public CvMatcherService(
IRagApiClient rag,
IJobTextExtractor jobTextExtractor,
IMatcherAiClient ai,
IMatcherRepository repository,
IEmailService email,
IOptions<MatcherSettings> options)
{
_rag = rag;
_jobTextExtractor = jobTextExtractor;
_ai = ai;
_repository = repository;
_email = email;
_settings = options.Value;
}
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
{
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
var response = await _rag.IndexCvPdfAsync(file, ct);
return new CvUploadResponse
{
DocumentId = response.DocumentId,
TextHash = response.TextHash,
DocumentType = response.DocumentType,
Title = response.Title,
Chunks = response.Chunks,
Characters = response.Characters,
Cached = response.Cached,
Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
};
}
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
{
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
{
throw new InvalidOperationException("The provided document is not a CV.");
}
var search = await _rag.SearchAsync(new RagSearchRequest
{
QueryText = BuildCvSearchProfile(cv.Text),
TargetDocumentTypes = ["job"],
TopK = request.TopK ?? _settings.TopK
}, ct);
var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
var jobs = new List<JobMatchResponse>();
foreach (var result in search.Results.Take(deepScoreLimit))
{
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
if (job is null) continue;
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, ct));
}
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
}
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
{
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");
var search = await _rag.SearchAsync(new RagSearchRequest
{
QueryText = BuildCvSearchProfile(cv.Text),
TargetDocumentTypes = ["job"],
TopK = Math.Max(5, _settings.TopK)
}, ct);
var matchedChunks = search.Results
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, ct);
}
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, CancellationToken ct)
{
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, ct);
if (cached is not null) return cached;
var cvText = Limit(cv.Text, 18000);
var jobText = Limit(job.Text, 14000);
var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
const string systemPrompt = """
You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100.
Penalize missing required skills. Do not invent experience. Use concise business language.
JSON shape: {"score":number,"summary":"...","strengths":["..."],"gaps":["..."],"recommendations":["..."],"evidence":["..."]}
""";
var userPrompt = $"""
CV:
{cvText}
JOB:
{jobText}
SEMANTICALLY MATCHED JOB EVIDENCE:
{evidence}
""";
var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
var result = ParseResult(json);
result.JobDocumentId = job.Id;
result.JobUrl = job.SourceUrl;
result.Cached = false;
await _repository.SaveMatchAsync(cv.Id, job.Id, result, ct);
await _email.SendMatchAsync(
email,
$"MyAi.ro CV Match: {result.Score}% - {job.Title}",
BuildEmailBody(cv, job, result),
ct);
return result;
}
private static JobMatchResponse ParseResult(string json)
{
try
{
var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
if (parsed is not null) return parsed;
}
catch
{
// Fall through to safe response.
}
return new JobMatchResponse
{
Score = 0,
Summary = "The AI response could not be parsed as structured JSON.",
Recommendations = ["Inspect the raw model output and tune the scoring prompt."]
};
}
private static string BuildCvSearchProfile(string cvText)
{
var text = Limit(cvText, 10000);
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
}
private static string ExtractJobTitle(string jobText)
{
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
return first ?? "Job description";
}
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
private static string BuildEmailBody(RagDocumentDetails cv, RagDocumentDetails job, JobMatchResponse result) => $"""
CV Matcher result
CV: {cv.Title}
Job: {job.Title}
Job URL: {job.SourceUrl ?? "N/A"}
Score: {result.Score}%
Summary:
{result.Summary}
Strengths:
- {string.Join("\n- ", result.Strengths)}
Gaps:
- {string.Join("\n- ", result.Gaps)}
Recommendations:
- {string.Join("\n- ", result.Recommendations)}
""";
}
+46
View File
@@ -0,0 +1,46 @@
using Api.Services.Contracts;
using Api.Settings;
using MailKit.Net.Smtp;
using MailKit.Security;
using Microsoft.Extensions.Options;
using MimeKit;
namespace Api.Services;
public sealed class EmailService : IEmailService
{
private readonly SmtpSettings _settings;
private readonly ILogger<EmailService> _logger;
public EmailService(IOptions<SmtpSettings> options, ILogger<EmailService> logger)
{
_settings = options.Value;
_logger = logger;
}
public async Task SendMatchAsync(string? explicitTo, string subject, string body, CancellationToken ct)
{
var to = !string.IsNullOrWhiteSpace(explicitTo) ? explicitTo : _settings.ToEmail;
if (string.IsNullOrWhiteSpace(_settings.Host) || string.IsNullOrWhiteSpace(to))
{
_logger.LogInformation("SMTP is not configured. Skipping CV matcher email.");
return;
}
var message = new MimeMessage();
message.From.Add(MailboxAddress.Parse(_settings.FromEmail));
message.To.Add(MailboxAddress.Parse(to));
message.Subject = subject;
message.Body = new TextPart("plain") { Text = body };
using var client = new SmtpClient();
var secureSocket = _settings.UseStartTls ? SecureSocketOptions.StartTls : SecureSocketOptions.Auto;
await client.ConnectAsync(_settings.Host, _settings.Port, secureSocket, ct);
if (!string.IsNullOrWhiteSpace(_settings.Username))
{
await client.AuthenticateAsync(_settings.Username, _settings.Password, ct);
}
await client.SendAsync(message, ct);
await client.DisconnectAsync(true, ct);
}
}
+13
View File
@@ -0,0 +1,13 @@
using System.Security.Cryptography;
using System.Text;
namespace Api.Services;
public static class HashHelper
{
public static string Compute(string value)
{
using var sha = SHA256.Create();
return Convert.ToHexString(sha.ComputeHash(Encoding.UTF8.GetBytes(value ?? string.Empty)));
}
}
@@ -1,21 +1,22 @@
using System.Net;
using System.Text.RegularExpressions;
using Api.Services.Contracts.Rag;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services.Rag;
namespace Api.Services;
public sealed class JobTextExtractor : IJobTextExtractor
{
private readonly HttpClient _httpClient;
private readonly RagSettings _settings;
private readonly HttpClient _http;
private readonly MatcherSettings _settings;
public JobTextExtractor(HttpClient httpClient, IOptions<RagSettings> options)
public JobTextExtractor(HttpClient http, IOptions<MatcherSettings> options)
{
_httpClient = httpClient;
_http = http;
_settings = options.Value;
_httpClient.Timeout = TimeSpan.FromSeconds(20);
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
_http.Timeout = TimeSpan.FromSeconds(25);
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
}
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
@@ -24,17 +25,16 @@ public sealed class JobTextExtractor : IJobTextExtractor
if (!string.IsNullOrWhiteSpace(pasted)) return Limit(pasted);
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || (uri.Scheme != "http" && uri.Scheme != "https"))
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || uri.Scheme is not ("http" or "https"))
{
throw new InvalidOperationException("Invalid job URL.");
}
var html = await _httpClient.GetStringAsync(uri, ct);
var html = await _http.GetStringAsync(uri, ct);
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
html = Regex.Replace(html, "<[^>]+>", " ");
var text = WebUtility.HtmlDecode(html);
return Limit(Normalize(text));
return Limit(Normalize(WebUtility.HtmlDecode(html)));
}
private string Limit(string value)
@@ -46,7 +46,6 @@ public sealed class JobTextExtractor : IJobTextExtractor
private static string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
return string.Join(' ', parts).Trim();
return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
}
}
@@ -0,0 +1,95 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services;
public sealed class MatcherAiClient : IMatcherAiClient
{
private readonly HttpClient _http;
private readonly IMatcherRepository _repository;
private readonly AiSettings _settings;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
public MatcherAiClient(HttpClient http, IMatcherRepository repository, IOptions<AiSettings> options)
{
_http = http;
_repository = repository;
_settings = options.Value;
}
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
var model = GetModel();
var cacheKey = HashHelper.Compute($"chat:{_settings.Provider}:{model}:{temperature:0.00}:{systemPrompt}:{userPrompt}");
var cached = await _repository.GetChatCompletionAsync(cacheKey, ct);
if (cached is not null) return cached;
var response = IsOllama()
? await CreateOllamaChatCompletionAsync(systemPrompt, userPrompt, temperature, ct)
: await CreateOpenAiChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
await _repository.SaveChatCompletionAsync(cacheKey, model, temperature, response, ct);
return response;
}
private bool IsOllama() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase);
private string GetModel() => IsOllama() ? _settings.Ollama.ChatModel : _settings.OpenAI.ChatModel;
private async Task<string> CreateOpenAiChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/chat/completions");
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
request.Content = ToJson(new
{
model = _settings.OpenAI.ChatModel,
temperature,
response_format = new { type = "json_object" },
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
}
});
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
using var response = await _http.SendAsync(request, cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI chat failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString() ?? "{}";
}
private async Task<string> CreateOllamaChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
using var response = await _http.PostAsync($"{baseUrl}/api/chat", ToJson(new
{
model = _settings.Ollama.ChatModel,
stream = false,
format = "json",
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
},
options = new { temperature = (float)temperature }
}), cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama chat failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("message").GetProperty("content").GetString() ?? "{}";
}
private static StringContent ToJson<T>(T payload) => new(JsonSerializer.Serialize(payload, JsonOptions), Encoding.UTF8, "application/json");
}
+80
View File
@@ -0,0 +1,80 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using Api.Requests;
using Api.Responses;
using Api.Services.Contracts;
using Api.Settings;
using Microsoft.Extensions.Options;
namespace Api.Services;
public sealed class RagApiClient : IRagApiClient
{
private readonly HttpClient _http;
private readonly RagApiSettings _settings;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
public RagApiClient(HttpClient http, IOptions<RagApiSettings> options)
{
_http = http;
_settings = options.Value;
_http.BaseAddress = new Uri(_settings.BaseUrl.TrimEnd('/') + "/");
if (!string.IsNullOrWhiteSpace(_settings.InternalApiKey))
{
_http.DefaultRequestHeaders.Add("X-Internal-Api-Key", _settings.InternalApiKey);
}
}
public async Task<RagIndexResponse> IndexCvPdfAsync(IFormFile file, CancellationToken ct)
{
using var content = new MultipartFormDataContent();
await using var stream = file.OpenReadStream();
using var fileContent = new StreamContent(stream);
fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/pdf");
content.Add(fileContent, "file", file.FileName);
content.Add(new StringContent("cv"), "documentType");
content.Add(new StringContent(file.FileName), "title");
using var response = await _http.PostAsync("api/rag/documents", content, ct);
return await ReadJsonAsync<RagIndexResponse>(response, ct);
}
public async Task<RagIndexResponse> IndexJobTextAsync(string text, string? url, string? title, CancellationToken ct)
{
using var content = new MultipartFormDataContent
{
{ new StringContent(text), "text" },
{ new StringContent("job"), "documentType" },
{ new StringContent(title ?? "Job description"), "title" }
};
if (!string.IsNullOrWhiteSpace(url)) content.Add(new StringContent(url), "sourceUrl");
using var response = await _http.PostAsync("api/rag/documents", content, ct);
return await ReadJsonAsync<RagIndexResponse>(response, ct);
}
public async Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct)
{
using var response = await _http.GetAsync($"api/rag/documents/{Uri.EscapeDataString(documentId)}", ct);
if (response.StatusCode == System.Net.HttpStatusCode.NotFound) return null;
return await ReadJsonAsync<RagDocumentDetails>(response, ct);
}
public async Task<RagSearchResponse> SearchAsync(RagSearchRequest request, CancellationToken ct)
{
using var response = await _http.PostAsync(
"api/rag/search",
new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"),
ct);
return await ReadJsonAsync<RagSearchResponse>(response, ct);
}
private static async Task<T> ReadJsonAsync<T>(HttpResponseMessage response, CancellationToken ct)
{
var json = await response.Content.ReadAsStringAsync(ct);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"RAG API failed: {(int)response.StatusCode} {json}");
}
return JsonSerializer.Deserialize<T>(json, JsonOptions) ?? throw new InvalidOperationException("RAG API returned invalid JSON.");
}
}
@@ -0,0 +1,105 @@
using System.Text.Json;
using Api.Responses;
using Api.Services.Contracts;
using Microsoft.Data.SqlClient;
namespace Api.Services;
public sealed class SqlMatcherRepository : IMatcherRepository
{
private readonly string _connectionString;
public SqlMatcherRepository(IConfiguration configuration)
{
_connectionString = configuration.GetConnectionString("CvMatcherDb")
?? throw new InvalidOperationException("Connection string 'CvMatcherDb' is missing.");
}
public async Task InitializeAsync(CancellationToken ct)
{
await EnsureDatabaseExistsAsync(ct);
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
{
await using var command = new SqlCommand(commandText, connection);
await command.ExecuteNonQueryAsync(ct);
}
}
public async Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct)
{
const string sql = "SELECT ResultJson FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
var json = await command.ExecuteScalarAsync(ct) as string;
if (string.IsNullOrWhiteSpace(json)) return null;
var result = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
if (result is not null) result.Cached = true;
return result;
}
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct)
{
const string sql = """
IF NOT EXISTS (SELECT 1 FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId)
INSERT INTO CvMatchResults (Id, CvDocumentId, JobDocumentId, ResultJson, Score, CreatedAt)
VALUES (@Id, @CvDocumentId, @JobDocumentId, @ResultJson, @Score, SYSUTCDATETIME())
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@Id", Guid.NewGuid().ToString("N"));
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
command.Parameters.AddWithValue("@ResultJson", JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)));
command.Parameters.AddWithValue("@Score", response.Score);
await command.ExecuteNonQueryAsync(ct);
}
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
{
const string sql = "SELECT ResponseText FROM CvMatcherChatCache WHERE CacheKey = @CacheKey";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
return await command.ExecuteScalarAsync(ct) as string;
}
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
{
const string sql = """
IF NOT EXISTS (SELECT 1 FROM CvMatcherChatCache WHERE CacheKey = @CacheKey)
INSERT INTO CvMatcherChatCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
command.Parameters.AddWithValue("@Model", model);
command.Parameters.AddWithValue("@Temperature", temperature);
command.Parameters.AddWithValue("@ResponseText", responseText);
await command.ExecuteNonQueryAsync(ct);
}
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
{
var builder = new SqlConnectionStringBuilder(_connectionString);
var databaseName = builder.InitialCatalog;
if (string.IsNullOrWhiteSpace(databaseName)) return;
builder.InitialCatalog = "master";
await using var connection = new SqlConnection(builder.ConnectionString);
await connection.OpenAsync(ct);
var safeName = databaseName.Replace("]", "]]" );
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
command.Parameters.AddWithValue("@DatabaseName", databaseName);
await command.ExecuteNonQueryAsync(ct);
}
}
+52
View File
@@ -0,0 +1,52 @@
namespace Api.Settings;
public sealed class RagApiSettings
{
public string BaseUrl { get; set; } = "http://localhost:8081";
public string InternalApiKey { get; set; } = string.Empty;
}
public sealed class InternalApiSettings
{
public string ApiKey { get; set; } = string.Empty;
public bool RequireApiKey { get; set; } = false;
}
public sealed class AiSettings
{
public string Provider { get; set; } = "OpenAI";
public OpenAiSettings OpenAI { get; set; } = new();
public OllamaSettings Ollama { get; set; } = new();
}
public sealed class OpenAiSettings
{
public string ApiKey { get; set; } = string.Empty;
public string ChatModel { get; set; } = "gpt-4o-mini";
public int TimeoutSeconds { get; set; } = 90;
}
public sealed class OllamaSettings
{
public string BaseUrl { get; set; } = "http://localhost:11434";
public string ChatModel { get; set; } = "llama3.1:8b";
public int TimeoutSeconds { get; set; } = 180;
}
public sealed class MatcherSettings
{
public int TopK { get; set; } = 10;
public int DeepScoreTopN { get; set; } = 5;
public int MaxJobTextChars { get; set; } = 60000;
}
public sealed class SmtpSettings
{
public string Host { get; set; } = string.Empty;
public int Port { get; set; } = 587;
public string Username { get; set; } = string.Empty;
public string Password { get; set; } = string.Empty;
public bool UseStartTls { get; set; } = true;
public string FromEmail { get; set; } = "noreply@myai.ro";
public string ToEmail { get; set; } = string.Empty;
}
+114
View File
@@ -0,0 +1,114 @@
{
"Serilog": {
"Using": [
"Serilog.Sinks.Console",
"Serilog.Sinks.File",
"Serilog.Sinks.Email"
],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft.AspNetCore": "Warning",
"Microsoft.AspNetCore.Hosting": "Information",
"Microsoft.AspNetCore.Routing": "Warning",
"System.Net.Http.HttpClient": "Warning",
"Api": "Information"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
}
},
{
"Name": "File",
"Args": {
"path": "logs/api-.log",
"rollingInterval": "Day",
"retainedFileCountLimit": 30,
"outputTemplate": "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
}
},
{
"Name": "Email",
"Args": {
"restrictedToMinimumLevel": "Error",
"fromEmail": "",
"toEmail": "",
"mailServer": "",
"networkCredential": {
"userName": "",
"password": ""
},
"port": 587,
"enableSsl": true,
"emailSubject": "[mihes.ro API] Error Alert",
"outputTemplate": "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {SourceContext}{NewLine}{Message:lj}{NewLine}{Exception}",
"batchPostingLimit": 10,
"period": "0.00:05:00"
}
}
],
"Enrich": [
"FromLogContext",
"WithMachineName",
"WithEnvironmentName"
]
},
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning",
"Microsoft.AspNetCore.Hosting": "Information",
"Microsoft.AspNetCore.Routing": "Warning",
"System.Net.Http.HttpClient": "Warning",
"Api": "Information"
},
"LogEnvironmentOnStartup": true
},
"AllowedHosts": "*",
"KeyVault": {
"VaultUri": "",
"Enabled": false
},
"ConnectionStrings": {
"CvMatcherDb": "Server=localhost,1433;Database=MyAiCvMatcher;User Id=sa;Password=Your_strong_password123;TrustServerCertificate=True"
},
"InternalApi": {
"ApiKey": "",
"RequireApiKey": false
},
"RagApi": {
"BaseUrl": "http://localhost:8081",
"InternalApiKey": ""
},
"Ai": {
"Provider": "OpenAI",
"OpenAI": {
"ApiKey": "",
"ChatModel": "gpt-4o-mini",
"TimeoutSeconds": 90
},
"Ollama": {
"BaseUrl": "http://localhost:11434",
"ChatModel": "llama3.1:8b",
"TimeoutSeconds": 180
}
},
"Matcher": {
"TopK": 10,
"DeepScoreTopN": 5,
"MaxJobTextChars": 60000
},
"Smtp": {
"Host": "",
"Port": 587,
"Username": "",
"Password": "",
"UseStartTls": true,
"FromEmail": "noreply@myai.ro",
"ToEmail": ""
}
}
+26
View File
@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
<RootNamespace>Api</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
<PackageReference Include="Azure.Identity" Version="1.21.0" />
<PackageReference Include="DotNetEnv" Version="3.2.0" />
<PackageReference Include="MailKit" Version="4.16.0" />
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.7" />
</ItemGroup>
<ItemGroup>
<None Update="Database/schema.sql">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>
+72
View File
@@ -1,7 +1,74 @@
version: "3.8"
services:
mssql:
image: mcr.microsoft.com/mssql/server:2022-latest
container_name: myai-mssql
environment:
- ACCEPT_EULA=Y
- MSSQL_SA_PASSWORD=${MSSQL_SA_PASSWORD:-Your_strong_password123}
ports:
- "1433:1433"
volumes:
- myai-mssql-data:/var/opt/mssql
networks:
- myai-network
restart: unless-stopped
rag-api:
build:
context: ../rag-api
dockerfile: Dockerfile
container_name: myai-rag-api
depends_on:
- mssql
ports:
- "8081:8080"
env_file:
- .env
environment:
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Development}
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__RagDb=Server=mssql,1433;Database=MyAiRag;User Id=sa;Password=${MSSQL_SA_PASSWORD:-Your_strong_password123};TrustServerCertificate=True
- InternalApi__RequireApiKey=true
- InternalApi__ApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
- Ai__Provider=${AI_PROVIDER:-OpenAI}
- Ai__OpenAI__ApiKey=${OPENAI_API_KEY:-}
- Ai__Ollama__BaseUrl=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
networks:
- myai-network
restart: unless-stopped
cv-matcher-api:
build:
context: ../cv-matcher-api
dockerfile: Dockerfile
container_name: myai-cv-matcher-api
depends_on:
- mssql
- rag-api
ports:
- "8082:8080"
env_file:
- .env
environment:
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Development}
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__CvMatcherDb=Server=mssql,1433;Database=MyAiCvMatcher;User Id=sa;Password=${MSSQL_SA_PASSWORD:-Your_strong_password123};TrustServerCertificate=True
- InternalApi__RequireApiKey=true
- InternalApi__ApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
- RagApi__BaseUrl=http://rag-api:8080
- RagApi__InternalApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
- Ai__Provider=${AI_PROVIDER:-OpenAI}
- Ai__OpenAI__ApiKey=${OPENAI_API_KEY:-}
- Ai__Ollama__BaseUrl=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
networks:
- myai-network
restart: unless-stopped
api:
depends_on:
- cv-matcher-api
build:
context: ../api
dockerfile: Dockerfile
@@ -16,6 +83,8 @@ services:
- ASPNETCORE_URLS=${ASPNETCORE_URLS:-http://+:8080}
- Cors__AllowedOrigins__0=http://localhost:5000
- Cors__AllowedOrigins__1=http://web:8080
- CvMatcherApi__BaseUrl=http://cv-matcher-api:8080
- CvMatcherApi__InternalApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
volumes:
- ../api/logs:/app/logs
networks:
@@ -40,6 +109,9 @@ services:
- myai-network
restart: unless-stopped
volumes:
myai-mssql-data:
networks:
myai-network:
driver: bridge
+12
View File
@@ -6,6 +6,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "api", "api\api.csproj", "{1
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "web", "web\web.csproj", "{B0A3EAB7-759A-448A-A906-52DF75A70016}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "rag-api", "rag-api\rag-api.csproj", "{A63E1C1A-4A78-49F4-9F5C-D43783294861}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "cv-matcher-api", "cv-matcher-api\cv-matcher-api.csproj", "{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}"
EndProject
Project("{E53339B2-1760-4266-BCC7-CA923CBCF16C}") = "docker-compose", "docker-compose\docker-compose.dcproj", "{81DDED9D-158B-E303-5F62-77A2896D2A5A}"
EndProject
Global
@@ -22,6 +26,14 @@ Global
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.Build.0 = Release|Any CPU
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Release|Any CPU.Build.0 = Release|Any CPU
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Release|Any CPU.Build.0 = Release|Any CPU
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Release|Any CPU.ActiveCfg = Release|Any CPU
+110
View File
@@ -0,0 +1,110 @@
using Microsoft.AspNetCore.Mvc;
using Api.Services.Contracts;
using Api.Requests;
namespace Api.Controllers;
[ApiController]
[Route("api/rag")]
public sealed class RagController : ControllerBase
{
private readonly IRagService _ragService;
private readonly ILogger<RagController> _logger;
public RagController(IRagService ragService, ILogger<RagController> logger)
{
_ragService = ragService;
_logger = logger;
}
[HttpPost("documents")]
[RequestSizeLimit(10 * 1024 * 1024)]
public async Task<IActionResult> IndexDocument(
[FromForm] IFormFile? file,
[FromForm] string? text,
[FromForm] string? documentType,
[FromForm] string? title,
[FromForm] string? sourceUrl,
CancellationToken ct)
{
try
{
_logger.LogInformation("Index document request received. HasFile={HasFile}, DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
file is not null, documentType, title, sourceUrl);
if (file is not null)
{
var result = await _ragService.IndexPdfAsync(file, documentType, title, sourceUrl, ct);
_logger.LogInformation("Indexed PDF document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
return Ok(result);
}
var textResult = await _ragService.IndexTextAsync(new IndexDocumentRequest
{
Text = text,
DocumentType = documentType,
Title = title,
SourceUrl = sourceUrl
}, ct);
_logger.LogInformation("Indexed text document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
textResult.DocumentId, textResult.DocumentType, textResult.Chunks, textResult.Cached);
return Ok(textResult);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid document indexing request.");
return BadRequest(new { error = ex.Message });
}
}
[HttpPost("documents/json")]
public async Task<IActionResult> IndexJsonDocument([FromBody] IndexDocumentRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("JSON document indexing request received. DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
request.DocumentType, request.Title, request.SourceUrl);
var result = await _ragService.IndexTextAsync(request, ct);
_logger.LogInformation("Indexed JSON document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid JSON document indexing request.");
return BadRequest(new { error = ex.Message });
}
}
[HttpPost("search")]
public async Task<IActionResult> Search([FromBody] SearchRequest request, CancellationToken ct)
{
try
{
_logger.LogInformation("Semantic search request received. TargetTypes={TargetTypes}, TopK={TopK}",
string.Join(',', request.TargetDocumentTypes ?? []), request.TopK);
var result = await _ragService.SearchAsync(request, ct);
_logger.LogInformation("Semantic search completed. ResultCount={ResultCount}", result.Results.Count);
return Ok(result);
}
catch (InvalidOperationException ex)
{
_logger.LogWarning(ex, "Invalid semantic search request.");
return BadRequest(new { error = ex.Message });
}
}
[HttpGet("documents/{id}")]
public async Task<IActionResult> GetDocument(string id, CancellationToken ct)
{
_logger.LogInformation("Get document request received. DocumentId={DocumentId}", id);
var document = await _ragService.GetDocumentAsync(id, ct);
if (document is null)
{
_logger.LogWarning("Document not found. DocumentId={DocumentId}", id);
return NotFound(new { error = "Document not found." });
}
return Ok(document);
}
}
+63
View File
@@ -0,0 +1,63 @@
IF OBJECT_ID('dbo.RagChunks', 'U') IS NULL
BEGIN
CREATE TABLE dbo.RagChunks (
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChunks PRIMARY KEY,
DocumentId NVARCHAR(64) NOT NULL,
ChunkIndex INT NOT NULL,
Text NVARCHAR(MAX) NOT NULL,
Embedding VARBINARY(MAX) NOT NULL
);
END
GO
IF OBJECT_ID('dbo.RagDocuments', 'U') IS NULL
BEGIN
CREATE TABLE dbo.RagDocuments (
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagDocuments PRIMARY KEY,
DocumentType NVARCHAR(80) NOT NULL,
Title NVARCHAR(300) NOT NULL,
SourceUrl NVARCHAR(1200) NULL,
RawText NVARCHAR(MAX) NOT NULL,
TextHash NVARCHAR(64) NOT NULL,
TypeConfidence FLOAT NOT NULL,
MetadataJson NVARCHAR(MAX) NOT NULL CONSTRAINT DF_RagDocuments_MetadataJson DEFAULT '{}',
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagDocuments_CreatedAt DEFAULT SYSUTCDATETIME()
);
CREATE INDEX IX_RagDocuments_TextHash ON dbo.RagDocuments(TextHash);
CREATE INDEX IX_RagDocuments_DocumentType ON dbo.RagDocuments(DocumentType);
END
GO
IF NOT EXISTS (SELECT 1 FROM sys.foreign_keys WHERE name = 'FK_RagChunks_RagDocuments')
BEGIN
ALTER TABLE dbo.RagChunks
ADD CONSTRAINT FK_RagChunks_RagDocuments FOREIGN KEY (DocumentId) REFERENCES dbo.RagDocuments(Id) ON DELETE CASCADE;
END
GO
IF OBJECT_ID('dbo.RagEmbeddingCache', 'U') IS NULL
BEGIN
CREATE TABLE dbo.RagEmbeddingCache (
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagEmbeddingCache PRIMARY KEY,
Model NVARCHAR(120) NOT NULL,
TextHash NVARCHAR(64) NOT NULL,
Vector VARBINARY(MAX) NOT NULL,
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagEmbeddingCache_CreatedAt DEFAULT SYSUTCDATETIME()
);
CREATE INDEX IX_RagEmbeddingCache_TextHash ON dbo.RagEmbeddingCache(TextHash);
END
GO
IF OBJECT_ID('dbo.RagChatCompletionCache', 'U') IS NULL
BEGIN
CREATE TABLE dbo.RagChatCompletionCache (
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChatCompletionCache PRIMARY KEY,
Model NVARCHAR(120) NOT NULL,
Temperature DECIMAL(4,2) NOT NULL,
ResponseText NVARCHAR(MAX) NOT NULL,
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagChatCompletionCache_CreatedAt DEFAULT SYSUTCDATETIME()
);
END
GO
+15
View File
@@ -0,0 +1,15 @@
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
WORKDIR /app
EXPOSE 8080
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
WORKDIR /src
COPY ["rag-api.csproj", "./"]
RUN dotnet restore "rag-api.csproj"
COPY . .
RUN dotnet publish "rag-api.csproj" -c Release -o /app/publish /p:UseAppHost=false
FROM base AS final
WORKDIR /app
COPY --from=build /app/publish .
ENTRYPOINT ["dotnet", "rag-api.dll"]
+282
View File
@@ -0,0 +1,282 @@
using Azure.Identity;
using Microsoft.AspNetCore.Diagnostics;
using Api.Services;
using Api.Services.Contracts;
using Api.Settings;
using Serilog;
using System.Reflection;
DotNetEnv.Env.Load();
try
{
var builder = WebApplication.CreateBuilder(args);
var appVersion = Assembly.GetExecutingAssembly()
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?
.InformationalVersion
?? Assembly.GetExecutingAssembly().GetName().Version?.ToString()
?? "unknown";
builder.Host.UseSerilog((context, services, configuration) =>
{
configuration
.ReadFrom.Configuration(context.Configuration)
.ReadFrom.Services(services)
.Enrich.FromLogContext()
.Enrich.WithMachineName()
.Enrich.WithEnvironmentName()
.Enrich.WithProperty("Service", "rag-api")
.Enrich.WithProperty("AppVersion", appVersion)
.WriteTo.Console(new Serilog.Formatting.Json.JsonFormatter());
});
Log.Information("Starting {Service} version {AppVersion}", "rag-api", appVersion);
// --------------------
// Azure Key Vault Configuration
// --------------------
var keyVaultUri = builder.Configuration["KeyVault:VaultUri"];
var keyVaultEnabled = builder.Configuration.GetValue<bool>("KeyVault:Enabled");
if (keyVaultEnabled && !string.IsNullOrWhiteSpace(keyVaultUri))
{
Log.Information("Loading configuration from Azure Key Vault: {VaultUri}", keyVaultUri);
try
{
builder.Configuration.AddAzureKeyVault(
new Uri(keyVaultUri),
new DefaultAzureCredential());
Log.Information("Azure Key Vault configuration loaded successfully");
}
catch (Exception ex)
{
Log.Warning(ex, "Failed to load Azure Key Vault configuration. Continuing with other configuration sources.");
}
}
else
{
Log.Information("Azure Key Vault is disabled or not configured");
}
builder.Services.Configure<RagSettings>(builder.Configuration.GetSection("Rag"));
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
builder.Services.AddHttpClient<RawAiClient>();
builder.Services.AddSingleton<IRagRepository, SqlRagRepository>();
builder.Services.AddScoped<IAiClient, CachedAiClient>();
builder.Services.AddSingleton<ITextExtractor, TextExtractor>();
builder.Services.AddSingleton<ITextChunker, TextChunker>();
builder.Services.AddSingleton<IDocumentClassifier, DocumentClassifier>();
builder.Services.AddScoped<IRagService, RagService>();
builder.Services.AddControllers();
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
var app = builder.Build();
var logger = app.Services.GetRequiredService<ILogger<Program>>();
logger.LogInformation("API starting up...");
logger.LogInformation("Environment: {Environment}", app.Environment.EnvironmentName);
// Log all environment variables and configuration settings at startup
// Can be controlled via appsettings: "Logging:LogEnvironmentOnStartup": true
var logEnvironmentOnStartup = app.Configuration.GetValue<bool>("Logging:LogEnvironmentOnStartup", defaultValue: true);
if (logEnvironmentOnStartup)
{
LogEnvironmentSettings(logger, app.Configuration, app.Environment);
}
using (var scope = app.Services.CreateScope())
{
var repository = scope.ServiceProvider.GetRequiredService<IRagRepository>();
await repository.InitializeAsync(CancellationToken.None);
}
app.UseSerilogRequestLogging(options =>
{
options.MessageTemplate = "HTTP {RequestMethod} {RequestPath} responded {StatusCode} in {Elapsed:0.0000} ms";
options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
{
diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
diagnosticContext.Set("RemoteIP", httpContext.Connection.RemoteIpAddress?.ToString());
diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
};
});
app.UseExceptionHandler(errorApp =>
{
errorApp.Run(async context =>
{
var feature = context.Features.Get<IExceptionHandlerFeature>();
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
if (feature?.Error is not null)
{
logger.LogError(feature.Error, "Unhandled exception in {Service}", "rag-api");
}
context.Response.StatusCode = StatusCodes.Status500InternalServerError;
context.Response.ContentType = "application/json";
await context.Response.WriteAsJsonAsync(new { error = "Unexpected server error." });
});
});
app.Use(async (context, next) =>
{
var settings = context.RequestServices.GetRequiredService<Microsoft.Extensions.Options.IOptions<InternalApiSettings>>().Value;
if (settings.RequireApiKey)
{
var header = context.Request.Headers["X-Internal-Api-Key"].ToString();
if (string.IsNullOrWhiteSpace(settings.ApiKey) || header != settings.ApiKey)
{
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
logger.LogWarning("Rejected unauthorized internal API call. Path={Path}, RemoteIP={RemoteIP}", context.Request.Path, context.Connection.RemoteIpAddress?.ToString());
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
await context.Response.WriteAsJsonAsync(new { error = "Unauthorized internal API call." });
return;
}
}
await next();
});
// Swagger (typically only in Development)
if (app.Environment.IsDevelopment())
{
app.UseSwagger();
app.UseSwaggerUI(options =>
{
options.DocumentTitle = "rag-api";
options.SwaggerEndpoint("/swagger/v1/swagger.json", "rag-api v1");
options.RoutePrefix = "swagger";
});
}
app.MapControllers();
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "rag-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
Log.Information("{Service} startup complete", "rag-api");
app.Run();
}
catch (Exception ex)
{
Log.Fatal(ex, "rag-api terminated unexpectedly");
}
finally
{
Log.Information("Shutting down rag-api");
Log.CloseAndFlush();
}
/// <summary>
/// Logs all environment variables and configuration settings at startup for diagnostics.
/// </summary>
static void LogEnvironmentSettings(Microsoft.Extensions.Logging.ILogger logger, IConfiguration configuration, IWebHostEnvironment environment)
{
logger.LogInformation("==================== ENVIRONMENT SETTINGS ====================");
// Environment Information
logger.LogInformation("Application Name: {ApplicationName}", environment.ApplicationName);
logger.LogInformation("Environment Name: {EnvironmentName}", environment.EnvironmentName);
logger.LogInformation("Content Root Path: {ContentRootPath}", environment.ContentRootPath);
logger.LogInformation("Web Root Path: {WebRootPath}", environment.WebRootPath);
// Environment Variables
logger.LogInformation("-------------- Environment Variables --------------");
var envVars = Environment.GetEnvironmentVariables();
var sortedEnvVars = new SortedDictionary<string, string?>();
foreach (System.Collections.DictionaryEntry entry in envVars)
{
var key = entry.Key?.ToString() ?? string.Empty;
var value = entry.Value?.ToString() ?? string.Empty;
// Mask sensitive values (passwords, secrets, tokens, keys) but show last 4 characters
if (IsSensitiveKey(key))
{
value = MaskValueWithLastChars(value);
}
sortedEnvVars[key] = value;
}
foreach (var kvp in sortedEnvVars)
{
logger.LogInformation(" {Key} = {Value}", kvp.Key, kvp.Value);
}
// Configuration Settings
logger.LogInformation("-------------- Configuration Settings --------------");
LogConfigurationRecursive(logger, configuration.GetChildren(), "");
logger.LogInformation("===========================================================");
}
/// <summary>
/// Recursively logs configuration settings with hierarchy.
/// </summary>
static void LogConfigurationRecursive(Microsoft.Extensions.Logging.ILogger logger, IEnumerable<IConfigurationSection> sections, string prefix)
{
foreach (var section in sections)
{
var key = string.IsNullOrEmpty(prefix) ? section.Key : $"{prefix}:{section.Key}";
if (section.Value != null)
{
var value = section.Value;
// Mask sensitive configuration values but show last 4 characters
if (IsSensitiveKey(key))
{
value = MaskValueWithLastChars(value);
}
logger.LogInformation(" {Key} = {Value}", key, value);
}
// Recurse into child sections
if (section.GetChildren().Any())
{
LogConfigurationRecursive(logger, section.GetChildren(), key);
}
}
}
/// <summary>
/// Checks if a configuration key contains sensitive information.
/// </summary>
static bool IsSensitiveKey(string key)
{
return key.Contains("Password", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Secret", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Token", StringComparison.OrdinalIgnoreCase) ||
key.Contains("Key", StringComparison.OrdinalIgnoreCase) ||
key.Contains("ConnectionString", StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Masks a sensitive value but shows the last 4 characters for verification.
/// </summary>
/// <param name="value">The value to mask.</param>
/// <returns>Masked value showing last 4 characters (e.g., "***MASKED***...abcd")</returns>
static string MaskValueWithLastChars(string value)
{
if (string.IsNullOrEmpty(value))
{
return "***NOT SET***";
}
// If value is too short, just mask it completely
if (value.Length <= 4)
{
return "***MASKED***";
}
// Show last 4 characters
var lastChars = value.Substring(value.Length - 4);
return $"***MASKED***...{lastChars}";
}
+12
View File
@@ -0,0 +1,12 @@
{
"profiles": {
"rag-api": {
"commandName": "Project",
"launchBrowser": true,
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"applicationUrl": "https://localhost:58424;http://localhost:58426"
}
}
}
+11
View File
@@ -0,0 +1,11 @@
namespace Api.Requests
{
public sealed class IndexDocumentRequest
{
public string? Text { get; set; }
public string? SourceUrl { get; set; }
public string? DocumentType { get; set; }
public string? Title { get; set; }
public Dictionary<string, string>? Metadata { get; set; }
}
}
+9
View File
@@ -0,0 +1,9 @@
namespace Api.Requests
{
public sealed class SearchRequest
{
public required string QueryText { get; init; }
public IReadOnlyList<string>? TargetDocumentTypes { get; init; }
public int? TopK { get; init; }
}
}
@@ -0,0 +1,14 @@
namespace Api.Responses
{
public sealed class IndexDocumentResponse
{
public required string DocumentId { get; init; }
public required string TextHash { get; init; }
public required string DocumentType { get; init; }
public double DocumentTypeConfidence { get; init; }
public required string Title { get; init; }
public int Chunks { get; init; }
public int Characters { get; init; }
public bool Cached { get; init; }
}
}
+25
View File
@@ -0,0 +1,25 @@
namespace Api.Responses
{
public sealed class SearchResponse
{
public IReadOnlyList<SearchDocumentResult> Results { get; init; } = [];
}
public sealed class SearchDocumentResult
{
public required string DocumentId { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public string? SourceUrl { get; init; }
public double Score { get; init; }
public IReadOnlyList<SearchChunkResult> MatchedChunks { get; init; } = [];
}
public sealed class SearchChunkResult
{
public required string ChunkId { get; init; }
public int ChunkIndex { get; init; }
public required string Text { get; init; }
public double Score { get; init; }
}
}
+52
View File
@@ -0,0 +1,52 @@
using Microsoft.Extensions.Options;
using Api.Services.Contracts;
using Api.Settings;
namespace Api.Services;
public sealed class CachedAiClient : IAiClient
{
private readonly RawAiClient _raw;
private readonly IRagRepository _repository;
private readonly AiSettings _settings;
public CachedAiClient(RawAiClient raw, IRagRepository repository, IOptions<AiSettings> options)
{
_raw = raw;
_repository = repository;
_settings = options.Value;
}
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
{
var model = GetEmbeddingModel();
var textHash = HashHelper.Compute(input);
var cacheKey = HashHelper.Compute($"embedding:{_settings.Provider}:{model}:{textHash}");
var cached = await _repository.GetEmbeddingAsync(cacheKey, ct);
if (cached is not null) return cached;
var vector = await _raw.CreateEmbeddingAsync(input, ct);
await _repository.SaveEmbeddingAsync(cacheKey, model, textHash, vector, ct);
return vector;
}
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
var model = GetChatModel();
var cacheKey = HashHelper.Compute($"chat:{_settings.Provider}:{model}:{temperature:0.00}:{systemPrompt}:{userPrompt}");
var cached = await _repository.GetChatCompletionAsync(cacheKey, ct);
if (cached is not null) return cached;
var response = await _raw.CreateChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
await _repository.SaveChatCompletionAsync(cacheKey, model, temperature, response, ct);
return response;
}
private string GetEmbeddingModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
? _settings.Ollama.EmbeddingModel
: _settings.OpenAI.EmbeddingModel;
private string GetChatModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
? _settings.Ollama.ChatModel
: _settings.OpenAI.ChatModel;
}
@@ -1,7 +1,7 @@
namespace Api.Services.Contracts.Rag;
namespace Api.Services.Contracts;
public interface IAiRagClient
public interface IAiClient
{
Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct);
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct);
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct);
}
@@ -0,0 +1,8 @@
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts;
public interface IDocumentClassifier
{
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
}
@@ -0,0 +1,16 @@
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts;
public interface IRagRepository
{
Task InitializeAsync(CancellationToken ct);
Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct);
Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct);
Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct);
Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct);
Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct);
Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct);
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
}
+13
View File
@@ -0,0 +1,13 @@
using Api.Requests;
using Api.Responses;
using Api.Services.Contracts.Models;
namespace Api.Services.Contracts;
public interface IRagService
{
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct);
}
@@ -1,4 +1,4 @@
namespace Api.Services.Contracts.Rag;
namespace Api.Services.Contracts;
public interface ITextChunker
{
@@ -0,0 +1,7 @@
namespace Api.Services.Contracts;
public interface ITextExtractor
{
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
string Normalize(string value);
}
@@ -0,0 +1,10 @@
namespace Api.Services.Contracts.Models
{
public sealed class DocumentClassification
{
public required string DocumentType { get; init; }
public double Confidence { get; init; }
public required string Title { get; init; }
public Dictionary<string, string> Metadata { get; init; } = [];
}
}
@@ -0,0 +1,11 @@
namespace Api.Services.Contracts.Models
{
public sealed class RagChunkRecord
{
public required string Id { get; init; }
public required string DocumentId { get; init; }
public int ChunkIndex { get; init; }
public required string Text { get; init; }
public required float[] Embedding { get; init; }
}
}
@@ -0,0 +1,13 @@
namespace Api.Services.Contracts.Models
{
public sealed class RagDocumentDetails
{
public required string Id { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public string? SourceUrl { get; init; }
public required string Text { get; init; }
public required string TextHash { get; init; }
public DateTimeOffset CreatedAt { get; init; }
}
}
@@ -0,0 +1,15 @@
namespace Api.Services.Contracts.Models
{
public sealed class RagDocumentRecord
{
public required string Id { get; init; }
public required string DocumentType { get; init; }
public required string Title { get; init; }
public string? SourceUrl { get; init; }
public required string Text { get; init; }
public required string TextHash { get; init; }
public double TypeConfidence { get; init; }
public string MetadataJson { get; init; } = "{}";
public DateTimeOffset CreatedAt { get; init; }
}
}
@@ -0,0 +1,9 @@
namespace Api.Services.Contracts.Models
{
public sealed class SearchCandidateChunk
{
public required RagDocumentRecord Document { get; init; }
public required RagChunkRecord Chunk { get; init; }
public double Score { get; init; }
}
}
+65
View File
@@ -0,0 +1,65 @@
using System.Text.RegularExpressions;
using Api.Services.Contracts;
using Api.Services.Contracts.Models;
namespace Api.Services;
public sealed class DocumentClassifier : IDocumentClassifier
{
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
{
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
};
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
{
if (!string.IsNullOrWhiteSpace(providedType))
{
var normalized = NormalizeType(providedType);
return Task.FromResult(new DocumentClassification
{
DocumentType = normalized,
Confidence = KnownTypes.Contains(normalized) && normalized != "unknown" ? 1.0 : 0.6,
Title = BuildTitle(providedTitle, text, normalized)
});
}
var lower = text.ToLowerInvariant();
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
{
["cv"] = Count(lower, "curriculum vitae", "resume", "work experience", "professional experience", "education", "skills", "technologies", "linkedin", "github"),
["job"] = Count(lower, "job description", "requirements", "responsibilities", "qualifications", "apply", "we are looking", "salary", "benefits", "remote", "hybrid"),
["contract"] = Count(lower, "agreement", "contract", "party", "parties", "liability", "termination", "confidentiality", "governing law"),
["invoice"] = Count(lower, "invoice", "vat", "subtotal", "total", "amount due", "due date", "billing"),
["documentation"] = Count(lower, "api", "endpoint", "configuration", "install", "usage", "parameters", "response", "request"),
["product"] = Count(lower, "features", "pricing", "sku", "product", "specification", "warranty")
};
var best = scores.OrderByDescending(x => x.Value).First();
var type = best.Value <= 0 ? "unknown" : best.Key;
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
return Task.FromResult(new DocumentClassification
{
DocumentType = type,
Confidence = confidence,
Title = BuildTitle(providedTitle, text, type)
});
}
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
private static string NormalizeType(string value)
{
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
}
private static string BuildTitle(string? providedTitle, string text, string documentType)
{
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
var firstLine = text.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length > 20);
if (!string.IsNullOrWhiteSpace(firstLine)) return firstLine.Length <= 120 ? firstLine : firstLine[..120];
return $"{documentType} document";
}
}
+14
View File
@@ -0,0 +1,14 @@
using System.Security.Cryptography;
using System.Text;
namespace Api.Services;
public static class HashHelper
{
public static string Compute(string value)
{
using var sha = SHA256.Create();
var bytes = sha.ComputeHash(Encoding.UTF8.GetBytes(value ?? string.Empty));
return Convert.ToHexString(bytes);
}
}
+179
View File
@@ -0,0 +1,179 @@
using System.Text.Json;
using Microsoft.Extensions.Options;
using Api.Services.Contracts;
using Api.Settings;
using Api.Responses;
using Api.Requests;
using Api.Services.Contracts.Models;
namespace Api.Services;
public sealed class RagService : IRagService
{
private readonly ITextExtractor _textExtractor;
private readonly ITextChunker _chunker;
private readonly IDocumentClassifier _classifier;
private readonly IAiClient _ai;
private readonly IRagRepository _repository;
private readonly RagSettings _settings;
public RagService(
ITextExtractor textExtractor,
ITextChunker chunker,
IDocumentClassifier classifier,
IAiClient ai,
IRagRepository repository,
IOptions<RagSettings> options)
{
_textExtractor = textExtractor;
_chunker = chunker;
_classifier = classifier;
_ai = ai;
_repository = repository;
_settings = options.Value;
}
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
{
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
if (text.Length < 40) throw new InvalidOperationException("Document text is too short.");
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
}
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
{
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
if (file.Length > _settings.MaxFileSizeMb * 1024L * 1024L) throw new InvalidOperationException($"File is too large. Max size is {_settings.MaxFileSizeMb} MB.");
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are supported by this endpoint.");
await using var stream = file.OpenReadStream();
var text = await _textExtractor.ExtractPdfAsync(stream, ct);
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
if (text.Length < 40) throw new InvalidOperationException("Could not extract enough text from the PDF.");
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
}
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
{
var query = _textExtractor.Normalize(request.QueryText);
if (query.Length < 10) throw new InvalidOperationException("Search query is too short.");
var topK = Math.Clamp(request.TopK ?? _settings.DefaultTopK, 1, Math.Max(1, _settings.MaxTopK));
var queryEmbedding = await _ai.CreateEmbeddingAsync(query, ct);
var candidates = await _repository.SearchChunksAsync(queryEmbedding, request.TargetDocumentTypes, topK, ct);
var results = candidates
.GroupBy(x => x.Document.Id)
.Select(group =>
{
var best = group.OrderByDescending(x => x.Score).First();
return new SearchDocumentResult
{
DocumentId = best.Document.Id,
DocumentType = best.Document.DocumentType,
Title = best.Document.Title,
SourceUrl = best.Document.SourceUrl,
Score = group.Max(x => x.Score),
MatchedChunks = group
.OrderByDescending(x => x.Score)
.Take(3)
.Select(x => new SearchChunkResult
{
ChunkId = x.Chunk.Id,
ChunkIndex = x.Chunk.ChunkIndex,
Text = x.Chunk.Text,
Score = x.Score
})
.ToList()
};
})
.OrderByDescending(x => x.Score)
.Take(topK)
.ToList();
return new SearchResponse { Results = results };
}
public async Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct)
{
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
return document is null ? null : new RagDocumentDetails
{
Id = document.Id,
DocumentType = document.DocumentType,
Title = document.Title,
SourceUrl = document.SourceUrl,
Text = document.Text,
TextHash = document.TextHash,
CreatedAt = document.CreatedAt
};
}
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
string text,
string? documentType,
string? title,
string? sourceUrl,
Dictionary<string, string>? metadata,
CancellationToken ct)
{
var textHash = HashHelper.Compute(text);
var cached = await _repository.GetDocumentByTextHashAsync(textHash, sourceUrl, ct);
if (cached is not null)
{
return new IndexDocumentResponse
{
DocumentId = cached.Id,
TextHash = cached.TextHash,
DocumentType = cached.DocumentType,
DocumentTypeConfidence = cached.TypeConfidence,
Title = cached.Title,
Chunks = 0,
Characters = cached.Text.Length,
Cached = true
};
}
var classification = await _classifier.ClassifyAsync(text, documentType, title, ct);
var chunks = _chunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
var document = new RagDocumentRecord
{
Id = Guid.NewGuid().ToString("N"),
DocumentType = classification.DocumentType,
Title = classification.Title,
SourceUrl = sourceUrl,
Text = text,
TextHash = textHash,
TypeConfidence = classification.Confidence,
MetadataJson = JsonSerializer.Serialize(metadata ?? classification.Metadata),
CreatedAt = DateTimeOffset.UtcNow
};
var records = new List<RagChunkRecord>();
for (var i = 0; i < chunks.Count; i++)
{
ct.ThrowIfCancellationRequested();
records.Add(new RagChunkRecord
{
Id = Guid.NewGuid().ToString("N"),
DocumentId = document.Id,
ChunkIndex = i,
Text = chunks[i],
Embedding = await _ai.CreateEmbeddingAsync(chunks[i], ct)
});
}
await _repository.SaveDocumentAsync(document, records, ct);
return new IndexDocumentResponse
{
DocumentId = document.Id,
TextHash = document.TextHash,
DocumentType = document.DocumentType,
DocumentTypeConfidence = document.TypeConfidence,
Title = document.Title,
Chunks = records.Count,
Characters = text.Length,
Cached = false
};
}
}
+116
View File
@@ -0,0 +1,116 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Options;
using Api.Services.Contracts;
using Api.Settings;
namespace Api.Services;
public sealed class RawAiClient : IAiClient
{
private readonly HttpClient _http;
private readonly AiSettings _settings;
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
public RawAiClient(HttpClient http, IOptions<AiSettings> options)
{
_http = http;
_settings = options.Value;
}
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
{
return IsOllama() ? await CreateOllamaEmbeddingAsync(input, ct) : await CreateOpenAiEmbeddingAsync(input, ct);
}
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
return IsOllama()
? await CreateOllamaChatCompletionAsync(systemPrompt, userPrompt, temperature, ct)
: await CreateOpenAiChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
}
private bool IsOllama() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase);
private async Task<float[]> CreateOpenAiEmbeddingAsync(string input, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/embeddings");
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
request.Content = ToJson(new { model = _settings.OpenAI.EmbeddingModel, input });
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
using var response = await _http.SendAsync(request, cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI embeddings failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("data")[0].GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
}
private async Task<string> CreateOpenAiChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/chat/completions");
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
request.Content = ToJson(new
{
model = _settings.OpenAI.ChatModel,
temperature,
response_format = new { type = "json_object" },
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
}
});
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
using var response = await _http.SendAsync(request, cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI chat failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString() ?? "{}";
}
private async Task<float[]> CreateOllamaEmbeddingAsync(string input, CancellationToken ct)
{
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
using var response = await _http.PostAsync($"{baseUrl}/api/embeddings", ToJson(new { model = _settings.Ollama.EmbeddingModel, prompt = input }), cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama embeddings failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
}
private async Task<string> CreateOllamaChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
{
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
using var response = await _http.PostAsync($"{baseUrl}/api/chat", ToJson(new
{
model = _settings.Ollama.ChatModel,
stream = false,
format = "json",
messages = new[]
{
new { role = "system", content = systemPrompt },
new { role = "user", content = userPrompt }
},
options = new { temperature = (float)temperature }
}), cts.Token);
var json = await response.Content.ReadAsStringAsync(cts.Token);
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama chat failed: {(int)response.StatusCode} {json}");
using var doc = JsonDocument.Parse(json);
return doc.RootElement.GetProperty("message").GetProperty("content").GetString() ?? "{}";
}
private static StringContent ToJson<T>(T payload) => new(JsonSerializer.Serialize(payload, JsonOptions), Encoding.UTF8, "application/json");
}
+238
View File
@@ -0,0 +1,238 @@
using Microsoft.Data.SqlClient;
using Api.Services.Contracts;
using Api.Services.Contracts.Models;
namespace Api.Services;
public sealed class SqlRagRepository : IRagRepository
{
private readonly string _connectionString;
public SqlRagRepository(IConfiguration configuration)
{
_connectionString = configuration.GetConnectionString("RagDb")
?? throw new InvalidOperationException("Connection string 'RagDb' is missing.");
}
public async Task InitializeAsync(CancellationToken ct)
{
await EnsureDatabaseExistsAsync(ct);
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
{
await using var command = new SqlCommand(commandText, connection);
await command.ExecuteNonQueryAsync(ct);
}
}
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
{
const string sql = """
SELECT TOP 1 Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
FROM RagDocuments
WHERE TextHash = @TextHash AND (@SourceUrl IS NULL OR SourceUrl = @SourceUrl)
ORDER BY CreatedAt DESC
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@TextHash", textHash);
command.Parameters.AddWithValue("@SourceUrl", (object?)sourceUrl ?? DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(ct);
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
}
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
{
const string sql = """
SELECT Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
FROM RagDocuments
WHERE Id = @Id
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@Id", id);
await using var reader = await command.ExecuteReaderAsync(ct);
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
}
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
{
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var tx = (SqlTransaction)await connection.BeginTransactionAsync(ct);
try
{
const string insertDoc = """
INSERT INTO RagDocuments (Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt)
VALUES (@Id, @DocumentType, @Title, @SourceUrl, @RawText, @TextHash, @TypeConfidence, @MetadataJson, @CreatedAt)
""";
await using (var command = new SqlCommand(insertDoc, connection, tx))
{
command.Parameters.AddWithValue("@Id", document.Id);
command.Parameters.AddWithValue("@DocumentType", document.DocumentType);
command.Parameters.AddWithValue("@Title", document.Title);
command.Parameters.AddWithValue("@SourceUrl", (object?)document.SourceUrl ?? DBNull.Value);
command.Parameters.AddWithValue("@RawText", document.Text);
command.Parameters.AddWithValue("@TextHash", document.TextHash);
command.Parameters.AddWithValue("@TypeConfidence", document.TypeConfidence);
command.Parameters.AddWithValue("@MetadataJson", document.MetadataJson);
command.Parameters.AddWithValue("@CreatedAt", document.CreatedAt.UtcDateTime);
await command.ExecuteNonQueryAsync(ct);
}
const string insertChunk = """
INSERT INTO RagChunks (Id, DocumentId, ChunkIndex, Text, Embedding)
VALUES (@Id, @DocumentId, @ChunkIndex, @Text, @Embedding)
""";
foreach (var chunk in chunks)
{
await using var command = new SqlCommand(insertChunk, connection, tx);
command.Parameters.AddWithValue("@Id", chunk.Id);
command.Parameters.AddWithValue("@DocumentId", document.Id);
command.Parameters.AddWithValue("@ChunkIndex", chunk.ChunkIndex);
command.Parameters.AddWithValue("@Text", chunk.Text);
command.Parameters.AddWithValue("@Embedding", VectorSerializer.ToBytes(chunk.Embedding));
await command.ExecuteNonQueryAsync(ct);
}
await tx.CommitAsync(ct);
}
catch
{
await tx.RollbackAsync(ct);
throw;
}
}
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct)
{
var types = targetTypes?.Where(x => !string.IsNullOrWhiteSpace(x)).Select(x => x.Trim().ToLowerInvariant()).Distinct().ToArray() ?? [];
var sql = """
SELECT d.Id, d.DocumentType, d.Title, d.SourceUrl, d.RawText, d.TextHash, d.TypeConfidence, d.MetadataJson, d.CreatedAt,
c.Id, c.DocumentId, c.ChunkIndex, c.Text, c.Embedding
FROM RagChunks c
INNER JOIN RagDocuments d ON d.Id = c.DocumentId
""";
if (types.Length > 0)
{
sql += " WHERE LOWER(d.DocumentType) IN (" + string.Join(',', types.Select((_, i) => $"@Type{i}")) + ")";
}
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
for (var i = 0; i < types.Length; i++) command.Parameters.AddWithValue($"@Type{i}", types[i]);
await using var reader = await command.ExecuteReaderAsync(ct);
var candidates = new List<SearchCandidateChunk>();
while (await reader.ReadAsync(ct))
{
var doc = ReadDocument(reader, 0);
var chunk = new RagChunkRecord
{
Id = reader.GetString(9),
DocumentId = reader.GetString(10),
ChunkIndex = reader.GetInt32(11),
Text = reader.GetString(12),
Embedding = VectorSerializer.FromBytes((byte[])reader[13])
};
candidates.Add(new SearchCandidateChunk
{
Document = doc,
Chunk = chunk,
Score = VectorSerializer.CosineSimilarity(queryEmbedding, chunk.Embedding)
});
}
return candidates
.OrderByDescending(x => x.Score)
.Take(Math.Max(topK * 4, topK))
.ToList();
}
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
{
const string sql = "SELECT Vector FROM RagEmbeddingCache WHERE CacheKey = @CacheKey";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
var value = await command.ExecuteScalarAsync(ct);
return value is byte[] bytes ? VectorSerializer.FromBytes(bytes) : null;
}
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
{
const string sql = """
IF NOT EXISTS (SELECT 1 FROM RagEmbeddingCache WHERE CacheKey = @CacheKey)
INSERT INTO RagEmbeddingCache (CacheKey, Model, TextHash, Vector, CreatedAt)
VALUES (@CacheKey, @Model, @TextHash, @Vector, SYSUTCDATETIME())
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
command.Parameters.AddWithValue("@Model", model);
command.Parameters.AddWithValue("@TextHash", textHash);
command.Parameters.AddWithValue("@Vector", VectorSerializer.ToBytes(vector));
await command.ExecuteNonQueryAsync(ct);
}
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
{
const string sql = "SELECT ResponseText FROM RagChatCompletionCache WHERE CacheKey = @CacheKey";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
return await command.ExecuteScalarAsync(ct) as string;
}
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
{
const string sql = """
IF NOT EXISTS (SELECT 1 FROM RagChatCompletionCache WHERE CacheKey = @CacheKey)
INSERT INTO RagChatCompletionCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
""";
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync(ct);
await using var command = new SqlCommand(sql, connection);
command.Parameters.AddWithValue("@CacheKey", cacheKey);
command.Parameters.AddWithValue("@Model", model);
command.Parameters.AddWithValue("@Temperature", temperature);
command.Parameters.AddWithValue("@ResponseText", responseText);
await command.ExecuteNonQueryAsync(ct);
}
private static RagDocumentRecord ReadDocument(SqlDataReader reader, int offset = 0) => new()
{
Id = reader.GetString(offset),
DocumentType = reader.GetString(offset + 1),
Title = reader.GetString(offset + 2),
SourceUrl = reader.IsDBNull(offset + 3) ? null : reader.GetString(offset + 3),
Text = reader.GetString(offset + 4),
TextHash = reader.GetString(offset + 5),
TypeConfidence = Convert.ToDouble(reader.GetValue(offset + 6)),
MetadataJson = reader.GetString(offset + 7),
CreatedAt = new DateTimeOffset(reader.GetDateTime(offset + 8), TimeSpan.Zero)
};
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
{
var builder = new SqlConnectionStringBuilder(_connectionString);
var databaseName = builder.InitialCatalog;
if (string.IsNullOrWhiteSpace(databaseName)) return;
builder.InitialCatalog = "master";
await using var connection = new SqlConnection(builder.ConnectionString);
await connection.OpenAsync(ct);
var safeName = databaseName.Replace("]", "]]" );
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
command.Parameters.AddWithValue("@DatabaseName", databaseName);
await command.ExecuteNonQueryAsync(ct);
}
}
@@ -1,6 +1,6 @@
using Api.Services.Contracts.Rag;
using Api.Services.Contracts;
namespace Api.Services.Rag;
namespace Api.Services;
public sealed class TextChunker : ITextChunker
{
@@ -15,10 +15,10 @@ public sealed class TextChunker : ITextChunker
while (start < text.Length)
{
var length = Math.Min(chunkSize, text.Length - start);
chunks.Add(text.Substring(start, length).Trim());
var chunk = text.Substring(start, length).Trim();
if (!string.IsNullOrWhiteSpace(chunk)) chunks.Add(chunk);
start += chunkSize - overlap;
}
return chunks.Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
return chunks;
}
}
+27
View File
@@ -0,0 +1,27 @@
using System.Text;
using Api.Services.Contracts;
using UglyToad.PdfPig;
namespace Api.Services;
public sealed class TextExtractor : ITextExtractor
{
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
{
using var document = PdfDocument.Open(stream);
var builder = new StringBuilder();
foreach (var page in document.GetPages())
{
ct.ThrowIfCancellationRequested();
builder.AppendLine(page.Text);
builder.AppendLine();
}
return Task.FromResult(Normalize(builder.ToString()));
}
public string Normalize(string value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
}
}
+31
View File
@@ -0,0 +1,31 @@
namespace Api.Services;
public static class VectorSerializer
{
public static byte[] ToBytes(float[] vector)
{
var bytes = new byte[vector.Length * sizeof(float)];
Buffer.BlockCopy(vector, 0, bytes, 0, bytes.Length);
return bytes;
}
public static float[] FromBytes(byte[] bytes)
{
var vector = new float[bytes.Length / sizeof(float)];
Buffer.BlockCopy(bytes, 0, vector, 0, bytes.Length);
return vector;
}
public static double CosineSimilarity(float[] a, float[] b)
{
if (a.Length == 0 || a.Length != b.Length) return 0;
double dot = 0, magA = 0, magB = 0;
for (var i = 0; i < a.Length; i++)
{
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
return magA == 0 || magB == 0 ? 0 : dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
}
}
+24
View File
@@ -0,0 +1,24 @@
namespace Api.Settings;
public sealed class AiSettings
{
public string Provider { get; set; } = "OpenAI";
public OpenAiProviderSettings OpenAI { get; set; } = new();
public OllamaProviderSettings Ollama { get; set; } = new();
}
public sealed class OpenAiProviderSettings
{
public string ApiKey { get; set; } = string.Empty;
public string ChatModel { get; set; } = "gpt-4o-mini";
public string EmbeddingModel { get; set; } = "text-embedding-3-small";
public int TimeoutSeconds { get; set; } = 90;
}
public sealed class OllamaProviderSettings
{
public string BaseUrl { get; set; } = "http://localhost:11434";
public string ChatModel { get; set; } = "llama3.1:8b";
public string EmbeddingModel { get; set; } = "nomic-embed-text";
public int TimeoutSeconds { get; set; } = 180;
}
+7
View File
@@ -0,0 +1,7 @@
namespace Api.Settings;
public sealed class InternalApiSettings
{
public string ApiKey { get; set; } = string.Empty;
public bool RequireApiKey { get; set; } = false;
}
+12
View File
@@ -0,0 +1,12 @@
namespace Api.Settings;
public sealed class RagSettings
{
public int MaxFileSizeMb { get; set; } = 8;
public int ChunkSize { get; set; } = 900;
public int ChunkOverlap { get; set; } = 150;
public int MaxTextChars { get; set; } = 60000;
public int DefaultTopK { get; set; } = 20;
public int MaxTopK { get; set; } = 50;
public bool ClassifyWithAi { get; set; } = false;
}
+46
View File
@@ -0,0 +1,46 @@
{
"AllowedHosts": "*",
"Serilog": {
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft.AspNetCore": "Warning",
"System.Net.Http.HttpClient": "Warning"
}
},
"WriteTo": [
{ "Name": "Console" }
]
},
"ConnectionStrings": {
"RagDb": "Server=localhost,1433;Database=MyAiRag;User Id=sa;Password=Your_strong_password123;TrustServerCertificate=True"
},
"InternalApi": {
"ApiKey": "",
"RequireApiKey": false
},
"Rag": {
"MaxFileSizeMb": 8,
"ChunkSize": 900,
"ChunkOverlap": 150,
"MaxTextChars": 60000,
"DefaultTopK": 20,
"MaxTopK": 50,
"ClassifyWithAi": false
},
"Ai": {
"Provider": "OpenAI",
"OpenAI": {
"ApiKey": "",
"ChatModel": "gpt-4o-mini",
"EmbeddingModel": "text-embedding-3-small",
"TimeoutSeconds": 90
},
"Ollama": {
"BaseUrl": "http://localhost:11434",
"ChatModel": "llama3.1:8b",
"EmbeddingModel": "nomic-embed-text",
"TimeoutSeconds": 180
}
}
}
+26
View File
@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
<RootNamespace>Api</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
<PackageReference Include="Azure.Identity" Version="1.21.0" />
<PackageReference Include="DotNetEnv" Version="3.2.0" />
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
<PackageReference Include="PdfPig" Version="0.1.14" />
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.7" />
</ItemGroup>
<ItemGroup>
<None Update="Database/schema.sql">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>
+2 -2
View File
@@ -89,7 +89,7 @@
"cv.noConsent": "GDPR consent is required.",
"cv.processing": "Processing...",
"cv.extracting": "Extracting CV and matching job...",
"cv.processingLong": "Processing CV PDF and job input.",
"cv.processingLong": "Processing CV PDF and job input. Backend endpoints must be available.",
"cv.cvFailed": "CV extraction failed",
"cv.matchFailed": "Job matching failed",
"cv.completed": "Match completed.",
@@ -182,7 +182,7 @@
"cv.noConsent": "Consimțământul GDPR este obligatoriu.",
"cv.processing": "Se procesează...",
"cv.extracting": "Se extrage CV-ul și se compară jobul...",
"cv.processingLong": "Se procesează PDF-ul și informațiile despre job.",
"cv.processingLong": "Se procesează PDF-ul și informațiile despre job. Endpoint-urile backend trebuie să fie disponibile.",
"cv.cvFailed": "Extragerea CV-ului a eșuat",
"cv.matchFailed": "Matching-ul jobului a eșuat",
"cv.completed": "Matching finalizat.",