@@ -0,0 +1,116 @@
|
|||||||
|
# MyAi RAG split cleanup
|
||||||
|
|
||||||
|
## Public `api`
|
||||||
|
|
||||||
|
The existing `api` project is now only the public gateway for the existing frontend.
|
||||||
|
|
||||||
|
It keeps:
|
||||||
|
|
||||||
|
- contact API
|
||||||
|
- file download API
|
||||||
|
- Google/config APIs
|
||||||
|
- health API
|
||||||
|
- `api/rag/*` proxy endpoints
|
||||||
|
|
||||||
|
It no longer contains local RAG processing code. The removed responsibilities are:
|
||||||
|
|
||||||
|
- PDF extraction
|
||||||
|
- chunking
|
||||||
|
- embeddings
|
||||||
|
- vector storage
|
||||||
|
- OpenAI/Ollama calls
|
||||||
|
- job text extraction
|
||||||
|
- CV matching business logic
|
||||||
|
|
||||||
|
`api/Controllers/RagController.cs` is intentionally kept. It proxies the current frontend calls:
|
||||||
|
|
||||||
|
- `POST /api/rag/cv` -> `cv-matcher-api /api/cv/upload`
|
||||||
|
- `POST /api/rag/match-job` -> `cv-matcher-api /api/cv/match-job`
|
||||||
|
|
||||||
|
Required public API config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"CvMatcherApi": {
|
||||||
|
"BaseUrl": "http://cv-matcher-api:8080",
|
||||||
|
"InternalApiKey": "change-this-internal-key"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## `cv-matcher-api`
|
||||||
|
|
||||||
|
Business API for CV/job workflows.
|
||||||
|
|
||||||
|
Main endpoints:
|
||||||
|
|
||||||
|
- `POST /api/cv/upload`
|
||||||
|
- `POST /api/cv/match-job`
|
||||||
|
- `POST /api/cv/find-jobs`
|
||||||
|
- `GET /health`
|
||||||
|
- Swagger: `/swagger`
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- CV matcher business logic
|
||||||
|
- job URL/text extraction
|
||||||
|
- final LLM scoring
|
||||||
|
- result persistence
|
||||||
|
- email sending
|
||||||
|
- calls `rag-api` for generic semantic indexing/search
|
||||||
|
|
||||||
|
## `rag-api`
|
||||||
|
|
||||||
|
Generic semantic search API.
|
||||||
|
|
||||||
|
Main endpoints:
|
||||||
|
|
||||||
|
- `POST /api/rag/documents`
|
||||||
|
- `POST /api/rag/documents/json`
|
||||||
|
- `POST /api/rag/search`
|
||||||
|
- `GET /api/rag/documents/{id}`
|
||||||
|
- `GET /health`
|
||||||
|
- Swagger: `/swagger`
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- generic document indexing
|
||||||
|
- automatic document type classification when type is missing
|
||||||
|
- PDF/text extraction
|
||||||
|
- chunking
|
||||||
|
- embedding creation
|
||||||
|
- embedding and chat completion cache
|
||||||
|
- semantic search over generic documents
|
||||||
|
|
||||||
|
## Logging and Swagger
|
||||||
|
|
||||||
|
All three APIs now have:
|
||||||
|
|
||||||
|
- Serilog startup logging
|
||||||
|
- Serilog request logging
|
||||||
|
- structured JSON console logs
|
||||||
|
- health endpoint
|
||||||
|
- Swagger/OpenAPI support
|
||||||
|
|
||||||
|
Swagger is enabled by default and can be disabled per service with:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"Swagger": {
|
||||||
|
"Enabled": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Internal API security
|
||||||
|
|
||||||
|
Both internal APIs support API-key protection:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"InternalApi": {
|
||||||
|
"RequireApiKey": true,
|
||||||
|
"ApiKey": "change-this-internal-key"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Requests must include:
|
||||||
|
|
||||||
|
```http
|
||||||
|
X-Internal-Api-Key: change-this-internal-key
|
||||||
|
```
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
using Api.Models;
|
using Api.Services.Contracts.Models;
|
||||||
|
using Api.Requests;
|
||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Microsoft.AspNetCore.Cors;
|
using Microsoft.AspNetCore.Cors;
|
||||||
@@ -118,7 +119,7 @@ namespace Api.Controllers
|
|||||||
/// <param name="token">Client-provided reCAPTCHA token.</param>
|
/// <param name="token">Client-provided reCAPTCHA token.</param>
|
||||||
/// <param name="ct">Cancellation token.</param>
|
/// <param name="ct">Cancellation token.</param>
|
||||||
/// <returns>Tuple containing the verification verdict and user IP.</returns>
|
/// <returns>Tuple containing the verification verdict and user IP.</returns>
|
||||||
private async Task<(CaptchaVerdict Verdict, string? UserIp)> ValidateCaptcha(string token, CancellationToken ct)
|
private async Task<(CaptchaVerdictModel Verdict, string? UserIp)> ValidateCaptcha(string token, CancellationToken ct)
|
||||||
{
|
{
|
||||||
var userIp = HttpContext.Connection.RemoteIpAddress?.ToString();
|
var userIp = HttpContext.Connection.RemoteIpAddress?.ToString();
|
||||||
var verdict = await _captcha.VerifyAsync(token, userIp, ct);
|
var verdict = await _captcha.VerifyAsync(token, userIp, ct);
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
using api.Services.Contracts.Rag;
|
using Api.Requests;
|
||||||
using Api.Models.Rag;
|
|
||||||
using Api.Services.Rag;
|
|
||||||
using Microsoft.AspNetCore.Mvc;
|
using Microsoft.AspNetCore.Mvc;
|
||||||
using Microsoft.AspNetCore.RateLimiting;
|
using Microsoft.AspNetCore.RateLimiting;
|
||||||
|
using System.Net.Http.Headers;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
|
||||||
namespace Api.Controllers;
|
namespace Api.Controllers;
|
||||||
|
|
||||||
@@ -11,52 +12,135 @@ namespace Api.Controllers;
|
|||||||
[EnableRateLimiting("rag")]
|
[EnableRateLimiting("rag")]
|
||||||
public sealed class RagController : ControllerBase
|
public sealed class RagController : ControllerBase
|
||||||
{
|
{
|
||||||
private readonly ICvRagService _cvRagService;
|
private readonly IHttpClientFactory _httpClientFactory;
|
||||||
|
private readonly IConfiguration _configuration;
|
||||||
private readonly ILogger<RagController> _logger;
|
private readonly ILogger<RagController> _logger;
|
||||||
|
|
||||||
public RagController(ICvRagService cvRagService, ILogger<RagController> logger)
|
public RagController(
|
||||||
|
IHttpClientFactory httpClientFactory,
|
||||||
|
IConfiguration configuration,
|
||||||
|
ILogger<RagController> logger)
|
||||||
{
|
{
|
||||||
_cvRagService = cvRagService;
|
_httpClientFactory = httpClientFactory;
|
||||||
|
_configuration = configuration;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
[HttpPost("cv")]
|
[HttpPost("cv")]
|
||||||
[RequestSizeLimit(8 * 1024 * 1024)]
|
[RequestSizeLimit(8 * 1024 * 1024)]
|
||||||
public async Task<IActionResult> UploadCv([FromForm(Name = "cv")] IFormFile? cv, [FromForm] bool gdprConsent, CancellationToken ct)
|
[ProducesResponseType(StatusCodes.Status200OK)]
|
||||||
|
[ProducesResponseType(StatusCodes.Status400BadRequest)]
|
||||||
|
[ProducesResponseType(StatusCodes.Status502BadGateway)]
|
||||||
|
public async Task<IActionResult> UploadCv(
|
||||||
|
[FromForm(Name = "cv")] IFormFile? cv,
|
||||||
|
[FromForm] bool gdprConsent,
|
||||||
|
CancellationToken ct)
|
||||||
{
|
{
|
||||||
|
if (cv is null)
|
||||||
|
{
|
||||||
|
return BadRequest(new { error = "Missing CV PDF." });
|
||||||
|
}
|
||||||
|
|
||||||
|
var baseUrl = GetCvMatcherBaseUrl();
|
||||||
|
if (string.IsNullOrWhiteSpace(baseUrl))
|
||||||
|
{
|
||||||
|
_logger.LogError("CvMatcherApi:BaseUrl is not configured. The public API cannot proxy CV upload requests.");
|
||||||
|
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API is not configured." });
|
||||||
|
}
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (cv is null) return BadRequest(new { error = "Missing CV PDF." });
|
_logger.LogInformation("Proxying CV upload to cv-matcher-api. FileName={FileName}, Size={SizeBytes}, GdprConsent={GdprConsent}",
|
||||||
var result = await _cvRagService.IngestCvAsync(cv, gdprConsent, ct);
|
cv.FileName, cv.Length, gdprConsent);
|
||||||
return Ok(result);
|
|
||||||
|
using var client = CreateCvMatcherClient(baseUrl);
|
||||||
|
using var form = new MultipartFormDataContent();
|
||||||
|
await using var stream = cv.OpenReadStream();
|
||||||
|
using var fileContent = new StreamContent(stream);
|
||||||
|
fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/pdf");
|
||||||
|
form.Add(fileContent, "cv", cv.FileName);
|
||||||
|
form.Add(new StringContent(gdprConsent.ToString().ToLowerInvariant()), "gdprConsent");
|
||||||
|
|
||||||
|
using var response = await client.PostAsync("api/cv/upload", form, ct);
|
||||||
|
return await ProxyResponseAsync(response, ct);
|
||||||
}
|
}
|
||||||
catch (InvalidOperationException ex)
|
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
return BadRequest(new { error = ex.Message });
|
_logger.LogWarning("CV upload proxy request was cancelled by the client.");
|
||||||
|
return StatusCode(499, new { error = "Request cancelled." });
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
_logger.LogError(ex, "CV ingestion failed");
|
_logger.LogError(ex, "CV upload proxy request failed.");
|
||||||
return StatusCode(500, new { error = "CV ingestion failed." });
|
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API request failed." });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[HttpPost("match-job")]
|
[HttpPost("match-job")]
|
||||||
|
[ProducesResponseType(StatusCodes.Status200OK)]
|
||||||
|
[ProducesResponseType(StatusCodes.Status400BadRequest)]
|
||||||
|
[ProducesResponseType(StatusCodes.Status502BadGateway)]
|
||||||
public async Task<IActionResult> MatchJob([FromBody] JobMatchRequest request, CancellationToken ct)
|
public async Task<IActionResult> MatchJob([FromBody] JobMatchRequest request, CancellationToken ct)
|
||||||
{
|
{
|
||||||
|
var baseUrl = GetCvMatcherBaseUrl();
|
||||||
|
if (string.IsNullOrWhiteSpace(baseUrl))
|
||||||
|
{
|
||||||
|
_logger.LogError("CvMatcherApi:BaseUrl is not configured. The public API cannot proxy job matching requests.");
|
||||||
|
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API is not configured." });
|
||||||
|
}
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var result = await _cvRagService.MatchJobAsync(request, ct);
|
_logger.LogInformation("Proxying job match request to cv-matcher-api. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}",
|
||||||
return Ok(result);
|
request.CvDocumentId,
|
||||||
|
!string.IsNullOrWhiteSpace(request.JobUrl),
|
||||||
|
!string.IsNullOrWhiteSpace(request.JobDescription));
|
||||||
|
|
||||||
|
using var client = CreateCvMatcherClient(baseUrl);
|
||||||
|
var json = JsonSerializer.Serialize(request, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
||||||
|
using var response = await client.PostAsync(
|
||||||
|
"api/cv/match-job",
|
||||||
|
new StringContent(json, Encoding.UTF8, "application/json"),
|
||||||
|
ct);
|
||||||
|
|
||||||
|
return await ProxyResponseAsync(response, ct);
|
||||||
}
|
}
|
||||||
catch (InvalidOperationException ex)
|
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
return BadRequest(new { error = ex.Message });
|
_logger.LogWarning("Job match proxy request was cancelled by the client.");
|
||||||
|
return StatusCode(499, new { error = "Request cancelled." });
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
_logger.LogError(ex, "Job matching failed");
|
_logger.LogError(ex, "Job match proxy request failed.");
|
||||||
return StatusCode(500, new { error = "Job matching failed." });
|
return StatusCode(StatusCodes.Status502BadGateway, new { error = "CV matcher API request failed." });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private string GetCvMatcherBaseUrl() => _configuration["CvMatcherApi:BaseUrl"] ?? string.Empty;
|
||||||
|
|
||||||
|
private HttpClient CreateCvMatcherClient(string baseUrl)
|
||||||
|
{
|
||||||
|
var client = _httpClientFactory.CreateClient("CvMatcherApi");
|
||||||
|
client.BaseAddress = new Uri(baseUrl.TrimEnd('/') + "/");
|
||||||
|
|
||||||
|
var key = _configuration["CvMatcherApi:InternalApiKey"];
|
||||||
|
if (!string.IsNullOrWhiteSpace(key) && !client.DefaultRequestHeaders.Contains("X-Internal-Api-Key"))
|
||||||
|
{
|
||||||
|
client.DefaultRequestHeaders.Add("X-Internal-Api-Key", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task<ContentResult> ProxyResponseAsync(HttpResponseMessage response, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var body = await response.Content.ReadAsStringAsync(ct);
|
||||||
|
return new ContentResult
|
||||||
|
{
|
||||||
|
StatusCode = (int)response.StatusCode,
|
||||||
|
Content = body,
|
||||||
|
ContentType = response.Content.Headers.ContentType?.ToString() ?? "application/json"
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,43 +0,0 @@
|
|||||||
namespace Api.Models.Rag;
|
|
||||||
|
|
||||||
public sealed record CvIngestResponse(
|
|
||||||
string DocumentId,
|
|
||||||
int Chunks,
|
|
||||||
int CharactersExtracted,
|
|
||||||
string Summary
|
|
||||||
);
|
|
||||||
|
|
||||||
public sealed class JobMatchRequest
|
|
||||||
{
|
|
||||||
public string? CvDocumentId { get; set; }
|
|
||||||
public string? JobUrl { get; set; }
|
|
||||||
public string? JobDescription { get; set; }
|
|
||||||
public bool GdprConsent { get; set; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public sealed class JobMatchResponse
|
|
||||||
{
|
|
||||||
public int Score { get; set; }
|
|
||||||
public string Summary { get; set; } = string.Empty;
|
|
||||||
public List<string> Strengths { get; set; } = [];
|
|
||||||
public List<string> Gaps { get; set; } = [];
|
|
||||||
public List<string> Recommendations { get; set; } = [];
|
|
||||||
public List<string> Evidence { get; set; } = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
public sealed class StoredCvChunk
|
|
||||||
{
|
|
||||||
public required string Id { get; init; }
|
|
||||||
public required string DocumentId { get; init; }
|
|
||||||
public required string Text { get; init; }
|
|
||||||
public required float[] Embedding { get; init; }
|
|
||||||
public required int ChunkIndex { get; init; }
|
|
||||||
public DateTimeOffset ExpiresAt { get; init; }
|
|
||||||
}
|
|
||||||
|
|
||||||
public sealed class RetrievedCvChunk
|
|
||||||
{
|
|
||||||
public required string Text { get; init; }
|
|
||||||
public required int ChunkIndex { get; init; }
|
|
||||||
public double Score { get; init; }
|
|
||||||
}
|
|
||||||
+1
-11
@@ -1,8 +1,5 @@
|
|||||||
using api.Services.Contracts.Rag;
|
|
||||||
using Api.Services;
|
using Api.Services;
|
||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Services.Contracts.Rag;
|
|
||||||
using Api.Services.Rag;
|
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Azure.Identity;
|
using Azure.Identity;
|
||||||
using Microsoft.AspNetCore.HttpOverrides;
|
using Microsoft.AspNetCore.HttpOverrides;
|
||||||
@@ -78,19 +75,12 @@ try
|
|||||||
builder.Services.Configure<SmtpSettings>(builder.Configuration.GetSection("Smtp"));
|
builder.Services.Configure<SmtpSettings>(builder.Configuration.GetSection("Smtp"));
|
||||||
builder.Services.Configure<CaptchaSettings>(builder.Configuration.GetSection("Captcha"));
|
builder.Services.Configure<CaptchaSettings>(builder.Configuration.GetSection("Captcha"));
|
||||||
builder.Services.Configure<FileStorageSettings>(builder.Configuration.GetSection("FileStorage"));
|
builder.Services.Configure<FileStorageSettings>(builder.Configuration.GetSection("FileStorage"));
|
||||||
builder.Services.Configure<RagSettings>(builder.Configuration.GetSection("Rag"));
|
|
||||||
builder.Services.Configure<OpenAiSettings>(builder.Configuration.GetSection("OpenAI"));
|
|
||||||
|
|
||||||
// Services
|
// Services
|
||||||
builder.Services.AddHttpClient<ICaptchaVerifier, RecaptchaVerifier>();
|
builder.Services.AddHttpClient<ICaptchaVerifier, RecaptchaVerifier>();
|
||||||
builder.Services.AddSingleton<IEmailSender, SmtpEmailSender>();
|
builder.Services.AddSingleton<IEmailSender, SmtpEmailSender>();
|
||||||
builder.Services.AddSingleton<Microsoft.AspNetCore.StaticFiles.IContentTypeProvider, Microsoft.AspNetCore.StaticFiles.FileExtensionContentTypeProvider>();
|
builder.Services.AddSingleton<Microsoft.AspNetCore.StaticFiles.IContentTypeProvider, Microsoft.AspNetCore.StaticFiles.FileExtensionContentTypeProvider>();
|
||||||
builder.Services.AddSingleton<IPdfTextExtractor, PdfTextExtractor>();
|
builder.Services.AddHttpClient("CvMatcherApi");
|
||||||
builder.Services.AddSingleton<ITextChunker, TextChunker>();
|
|
||||||
builder.Services.AddSingleton<ICvVectorStore, InMemoryCvVectorStore>();
|
|
||||||
builder.Services.AddScoped<ICvRagService, CvRagService>();
|
|
||||||
builder.Services.AddHttpClient<IAiRagClient, OpenAiRagClient>();
|
|
||||||
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
|
|
||||||
|
|
||||||
// Swagger
|
// Swagger
|
||||||
builder.Services.AddEndpointsApiExplorer();
|
builder.Services.AddEndpointsApiExplorer();
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
using System.ComponentModel.DataAnnotations;
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
|
||||||
namespace Api.Models
|
namespace Api.Requests
|
||||||
{
|
{
|
||||||
public sealed class ContactRequest
|
public sealed class ContactRequest
|
||||||
{
|
{
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Api.Requests;
|
||||||
|
|
||||||
|
public sealed class JobMatchRequest
|
||||||
|
{
|
||||||
|
public string? CvDocumentId { get; set; }
|
||||||
|
public string? JobUrl { get; set; }
|
||||||
|
public string? JobDescription { get; set; }
|
||||||
|
public bool GdprConsent { get; set; }
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
using System.ComponentModel.DataAnnotations;
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
|
||||||
namespace Api.Models
|
namespace Api.Requests
|
||||||
{
|
{
|
||||||
public sealed class SubscribeRequest
|
public sealed class SubscribeRequest
|
||||||
{
|
{
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
namespace Api.Services.Contracts
|
using Api.Services.Contracts.Models;
|
||||||
{
|
|
||||||
public sealed record CaptchaVerdict(bool Success, string? Error, double? Score);
|
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts
|
||||||
|
{
|
||||||
public interface ICaptchaVerifier
|
public interface ICaptchaVerifier
|
||||||
{
|
{
|
||||||
Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct);
|
Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
using Api.Models;
|
using Api.Requests;
|
||||||
|
|
||||||
namespace Api.Services.Contracts
|
namespace Api.Services.Contracts
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed record CaptchaVerdictModel(bool Success, string? Error, double? Score);
|
||||||
|
}
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
using Api.Models.Rag;
|
|
||||||
|
|
||||||
namespace api.Services.Contracts.Rag;
|
|
||||||
|
|
||||||
public interface ICvRagService
|
|
||||||
{
|
|
||||||
Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
|
|
||||||
Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct);
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
namespace Api.Services.Contracts.Rag;
|
|
||||||
|
|
||||||
public interface IPdfTextExtractor
|
|
||||||
{
|
|
||||||
string ExtractText(Stream pdfStream);
|
|
||||||
}
|
|
||||||
@@ -1,165 +0,0 @@
|
|||||||
using api.Services.Contracts.Rag;
|
|
||||||
using Api.Models.Rag;
|
|
||||||
using Api.Services.Contracts.Rag;
|
|
||||||
using Api.Settings;
|
|
||||||
using Microsoft.Extensions.Options;
|
|
||||||
using System.Text.Json;
|
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
|
||||||
|
|
||||||
public sealed class CvRagService : ICvRagService
|
|
||||||
{
|
|
||||||
private readonly IPdfTextExtractor _pdfTextExtractor;
|
|
||||||
private readonly ITextChunker _textChunker;
|
|
||||||
private readonly IAiRagClient _openAi;
|
|
||||||
private readonly ICvVectorStore _store;
|
|
||||||
private readonly IJobTextExtractor _jobTextExtractor;
|
|
||||||
private readonly RagSettings _settings;
|
|
||||||
private readonly ILogger<CvRagService> _logger;
|
|
||||||
|
|
||||||
public CvRagService(
|
|
||||||
IPdfTextExtractor pdfTextExtractor,
|
|
||||||
ITextChunker textChunker,
|
|
||||||
IAiRagClient openAi,
|
|
||||||
ICvVectorStore store,
|
|
||||||
IJobTextExtractor jobTextExtractor,
|
|
||||||
IOptions<RagSettings> options,
|
|
||||||
ILogger<CvRagService> logger)
|
|
||||||
{
|
|
||||||
_pdfTextExtractor = pdfTextExtractor;
|
|
||||||
_textChunker = textChunker;
|
|
||||||
_openAi = openAi;
|
|
||||||
_store = store;
|
|
||||||
_jobTextExtractor = jobTextExtractor;
|
|
||||||
_settings = options.Value;
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<CvIngestResponse> IngestCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
|
|
||||||
{
|
|
||||||
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
||||||
if (file.Length == 0) throw new InvalidOperationException("CV PDF is empty.");
|
|
||||||
if (file.Length > _settings.MaxPdfSizeMb * 1024L * 1024L) throw new InvalidOperationException($"PDF is too large. Max size is {_settings.MaxPdfSizeMb} MB.");
|
|
||||||
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are accepted.");
|
|
||||||
|
|
||||||
await using var stream = file.OpenReadStream();
|
|
||||||
var text = _pdfTextExtractor.ExtractText(stream);
|
|
||||||
if (text.Length < 80) throw new InvalidOperationException("Could not extract enough text from this PDF.");
|
|
||||||
|
|
||||||
var documentId = $"cv_{Guid.NewGuid():N}";
|
|
||||||
var expiresAt = DateTimeOffset.UtcNow.AddMinutes(Math.Max(10, _settings.CvTtlMinutes));
|
|
||||||
var chunks = _textChunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
|
|
||||||
|
|
||||||
var stored = new List<StoredCvChunk>();
|
|
||||||
for (var i = 0; i < chunks.Count; i++)
|
|
||||||
{
|
|
||||||
ct.ThrowIfCancellationRequested();
|
|
||||||
stored.Add(new StoredCvChunk
|
|
||||||
{
|
|
||||||
Id = Guid.NewGuid().ToString("N"),
|
|
||||||
DocumentId = documentId,
|
|
||||||
Text = chunks[i],
|
|
||||||
Embedding = await _openAi.CreateEmbeddingAsync(chunks[i], ct),
|
|
||||||
ChunkIndex = i,
|
|
||||||
ExpiresAt = expiresAt
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
_store.Save(documentId, stored);
|
|
||||||
var summary = await SummarizeCvAsync(text, ct);
|
|
||||||
return new CvIngestResponse(documentId, stored.Count, text.Length, summary);
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<JobMatchResponse> MatchJobAsync(JobMatchRequest request, CancellationToken ct)
|
|
||||||
{
|
|
||||||
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
|
||||||
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
|
||||||
|
|
||||||
var cvChunks = _store.Get(request.CvDocumentId);
|
|
||||||
if (cvChunks.Count == 0) throw new InvalidOperationException("CV context was not found or has expired. Upload the CV again.");
|
|
||||||
|
|
||||||
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
|
||||||
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
|
||||||
|
|
||||||
var jobEmbedding = await _openAi.CreateEmbeddingAsync(jobText, ct);
|
|
||||||
var retrieved = _store.Search(request.CvDocumentId, jobEmbedding, _settings.TopK);
|
|
||||||
var cvContext = string.Join("\n\n", retrieved.Select(x => $"CV chunk {x.ChunkIndex} | similarity {x.Score:0.000}:\n{x.Text}"));
|
|
||||||
|
|
||||||
var systemPrompt = "You are a strict senior technical recruiter and AI CV matcher. Return only valid JSON. Do not invent candidate experience. Use only the supplied CV context and job text.";
|
|
||||||
var userPrompt = $$"""
|
|
||||||
Compare the candidate CV context with the job description.
|
|
||||||
Return this JSON shape exactly:
|
|
||||||
{
|
|
||||||
"score": 0,
|
|
||||||
"summary": "short direct assessment",
|
|
||||||
"strengths": ["strength 1"],
|
|
||||||
"gaps": ["gap 1"],
|
|
||||||
"recommendations": ["action 1"],
|
|
||||||
"evidence": ["short CV evidence quote or paraphrase"]
|
|
||||||
}
|
|
||||||
Score must be 0-100.
|
|
||||||
|
|
||||||
CV CONTEXT:
|
|
||||||
{{cvContext}}
|
|
||||||
|
|
||||||
JOB DESCRIPTION:
|
|
||||||
{{jobText}}
|
|
||||||
""";
|
|
||||||
|
|
||||||
var content = await _openAi.CreateChatCompletionAsync(systemPrompt, userPrompt, ct);
|
|
||||||
var response = ParseMatchResponse(content);
|
|
||||||
if (response.Evidence.Count == 0)
|
|
||||||
{
|
|
||||||
response.Evidence = retrieved.Select(x => x.Text.Length > 280 ? x.Text[..280] + "..." : x.Text).ToList();
|
|
||||||
}
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
|
|
||||||
private async Task<string> SummarizeCvAsync(string cvText, CancellationToken ct)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var shortened = cvText.Length > 8000 ? cvText[..8000] : cvText;
|
|
||||||
var content = await _openAi.CreateChatCompletionAsync(
|
|
||||||
"Return only valid JSON.",
|
|
||||||
$$"""
|
|
||||||
Summarize this CV in one concise sentence. Return JSON: { "summary": "..." }
|
|
||||||
|
|
||||||
CV:
|
|
||||||
{{shortened}}
|
|
||||||
""",
|
|
||||||
ct);
|
|
||||||
using var doc = JsonDocument.Parse(content);
|
|
||||||
return doc.RootElement.TryGetProperty("summary", out var summary) ? summary.GetString() ?? "CV indexed." : "CV indexed.";
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
_logger.LogWarning(ex, "CV summary failed");
|
|
||||||
return "CV indexed.";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static JobMatchResponse ParseMatchResponse(string content)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var response = JsonSerializer.Deserialize<JobMatchResponse>(content, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? new JobMatchResponse();
|
|
||||||
response.Score = Math.Clamp(response.Score, 0, 100);
|
|
||||||
response.Strengths ??= [];
|
|
||||||
response.Gaps ??= [];
|
|
||||||
response.Recommendations ??= [];
|
|
||||||
response.Evidence ??= [];
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
catch
|
|
||||||
{
|
|
||||||
return new JobMatchResponse
|
|
||||||
{
|
|
||||||
Score = 0,
|
|
||||||
Summary = "The AI response could not be parsed. Check logs and prompt output.",
|
|
||||||
Gaps = ["Invalid JSON returned by the model."],
|
|
||||||
Evidence = []
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
using Api.Models.Rag;
|
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
|
||||||
|
|
||||||
public interface ICvVectorStore
|
|
||||||
{
|
|
||||||
void Save(string documentId, IEnumerable<StoredCvChunk> chunks);
|
|
||||||
IReadOnlyList<StoredCvChunk> Get(string documentId);
|
|
||||||
IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK);
|
|
||||||
}
|
|
||||||
|
|
||||||
public sealed class InMemoryCvVectorStore : ICvVectorStore
|
|
||||||
{
|
|
||||||
private readonly object _lock = new();
|
|
||||||
private readonly Dictionary<string, List<StoredCvChunk>> _store = new(StringComparer.OrdinalIgnoreCase);
|
|
||||||
|
|
||||||
public void Save(string documentId, IEnumerable<StoredCvChunk> chunks)
|
|
||||||
{
|
|
||||||
lock (_lock)
|
|
||||||
{
|
|
||||||
CleanupExpiredUnsafe();
|
|
||||||
_store[documentId] = chunks.ToList();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public IReadOnlyList<StoredCvChunk> Get(string documentId)
|
|
||||||
{
|
|
||||||
lock (_lock)
|
|
||||||
{
|
|
||||||
CleanupExpiredUnsafe();
|
|
||||||
return _store.TryGetValue(documentId, out var chunks) ? chunks.ToList() : [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public IReadOnlyList<RetrievedCvChunk> Search(string documentId, float[] queryEmbedding, int topK)
|
|
||||||
{
|
|
||||||
var chunks = Get(documentId);
|
|
||||||
if (chunks.Count == 0) return [];
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
.Select(chunk => new RetrievedCvChunk
|
|
||||||
{
|
|
||||||
Text = chunk.Text,
|
|
||||||
ChunkIndex = chunk.ChunkIndex,
|
|
||||||
Score = CosineSimilarity(queryEmbedding, chunk.Embedding)
|
|
||||||
})
|
|
||||||
.OrderByDescending(x => x.Score)
|
|
||||||
.Take(Math.Clamp(topK, 1, 12))
|
|
||||||
.ToList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void CleanupExpiredUnsafe()
|
|
||||||
{
|
|
||||||
var now = DateTimeOffset.UtcNow;
|
|
||||||
foreach (var key in _store.Where(x => x.Value.All(c => c.ExpiresAt <= now)).Select(x => x.Key).ToList())
|
|
||||||
{
|
|
||||||
_store.Remove(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static double CosineSimilarity(float[] a, float[] b)
|
|
||||||
{
|
|
||||||
if (a.Length != b.Length || a.Length == 0) return 0;
|
|
||||||
|
|
||||||
double dot = 0;
|
|
||||||
double magA = 0;
|
|
||||||
double magB = 0;
|
|
||||||
|
|
||||||
for (var i = 0; i < a.Length; i++)
|
|
||||||
{
|
|
||||||
dot += a[i] * b[i];
|
|
||||||
magA += a[i] * a[i];
|
|
||||||
magB += b[i] * b[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (magA == 0 || magB == 0) return 0;
|
|
||||||
return dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
using System.Net.Http.Headers;
|
|
||||||
using System.Text;
|
|
||||||
using System.Text.Json;
|
|
||||||
using System.Text.Json.Serialization;
|
|
||||||
using Api.Services.Contracts.Rag;
|
|
||||||
using Api.Settings;
|
|
||||||
using Microsoft.Extensions.Options;
|
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
|
||||||
|
|
||||||
public sealed class OpenAiRagClient : IAiRagClient
|
|
||||||
{
|
|
||||||
private readonly HttpClient _httpClient;
|
|
||||||
private readonly OpenAiSettings _settings;
|
|
||||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
|
||||||
{
|
|
||||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
|
||||||
};
|
|
||||||
|
|
||||||
public OpenAiRagClient(HttpClient httpClient, IOptions<OpenAiSettings> options)
|
|
||||||
{
|
|
||||||
_httpClient = httpClient;
|
|
||||||
_settings = options.Value;
|
|
||||||
|
|
||||||
if (!string.IsNullOrWhiteSpace(_settings.ApiKey))
|
|
||||||
{
|
|
||||||
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.ApiKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
_httpClient.Timeout = TimeSpan.FromSeconds(Math.Max(15, _settings.TimeoutSeconds));
|
|
||||||
_httpClient.BaseAddress = new Uri("https://api.openai.com/v1/");
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
|
|
||||||
{
|
|
||||||
EnsureConfigured();
|
|
||||||
var payload = new { model = _settings.EmbeddingModel, input };
|
|
||||||
using var response = await _httpClient.PostAsync("embeddings", ToJson(payload), ct);
|
|
||||||
var json = await response.Content.ReadAsStringAsync(ct);
|
|
||||||
if (!response.IsSuccessStatusCode)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException($"OpenAI embeddings request failed: {(int)response.StatusCode} {json}");
|
|
||||||
}
|
|
||||||
|
|
||||||
using var document = JsonDocument.Parse(json);
|
|
||||||
var embedding = document.RootElement.GetProperty("data")[0].GetProperty("embedding");
|
|
||||||
var result = new float[embedding.GetArrayLength()];
|
|
||||||
var i = 0;
|
|
||||||
foreach (var value in embedding.EnumerateArray())
|
|
||||||
{
|
|
||||||
result[i++] = value.GetSingle();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct)
|
|
||||||
{
|
|
||||||
EnsureConfigured();
|
|
||||||
var payload = new
|
|
||||||
{
|
|
||||||
model = _settings.ChatModel,
|
|
||||||
temperature = 0.2,
|
|
||||||
response_format = new { type = "json_object" },
|
|
||||||
messages = new[]
|
|
||||||
{
|
|
||||||
new { role = "system", content = systemPrompt },
|
|
||||||
new { role = "user", content = userPrompt }
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
using var response = await _httpClient.PostAsync("chat/completions", ToJson(payload), ct);
|
|
||||||
var json = await response.Content.ReadAsStringAsync(ct);
|
|
||||||
if (!response.IsSuccessStatusCode)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException($"OpenAI chat request failed: {(int)response.StatusCode} {json}");
|
|
||||||
}
|
|
||||||
|
|
||||||
using var document = JsonDocument.Parse(json);
|
|
||||||
return document.RootElement
|
|
||||||
.GetProperty("choices")[0]
|
|
||||||
.GetProperty("message")
|
|
||||||
.GetProperty("content")
|
|
||||||
.GetString() ?? "{}";
|
|
||||||
}
|
|
||||||
|
|
||||||
private void EnsureConfigured()
|
|
||||||
{
|
|
||||||
if (string.IsNullOrWhiteSpace(_settings.ApiKey))
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException("OpenAI API key is not configured. Set OpenAI__ApiKey.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static StringContent ToJson<T>(T payload) => new(
|
|
||||||
JsonSerializer.Serialize(payload, JsonOptions),
|
|
||||||
Encoding.UTF8,
|
|
||||||
"application/json"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
using Api.Services.Contracts.Rag;
|
|
||||||
using System.Text;
|
|
||||||
using UglyToad.PdfPig;
|
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
|
||||||
|
|
||||||
public sealed class PdfTextExtractor : IPdfTextExtractor
|
|
||||||
{
|
|
||||||
public string ExtractText(Stream pdfStream)
|
|
||||||
{
|
|
||||||
using var document = PdfDocument.Open(pdfStream);
|
|
||||||
var builder = new StringBuilder();
|
|
||||||
|
|
||||||
foreach (var page in document.GetPages())
|
|
||||||
{
|
|
||||||
builder.AppendLine(page.Text);
|
|
||||||
builder.AppendLine();
|
|
||||||
}
|
|
||||||
|
|
||||||
return NormalizeWhitespace(builder.ToString());
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string NormalizeWhitespace(string value)
|
|
||||||
{
|
|
||||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
|
||||||
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
|
|
||||||
return string.Join(' ', parts).Trim();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
using Api.Services.Contracts.Models;
|
||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
@@ -17,14 +18,14 @@ namespace Api.Services
|
|||||||
_log = log;
|
_log = log;
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<CaptchaVerdict> VerifyAsync(string token, string? userIp, CancellationToken ct)
|
public async Task<CaptchaVerdictModel> VerifyAsync(string token, string? userIp, CancellationToken ct)
|
||||||
{
|
{
|
||||||
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
|
_log.LogDebug("Verifying captcha token for IP {Ip}", userIp ?? "unknown");
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(_opt.SecretKey))
|
if (string.IsNullOrWhiteSpace(_opt.SecretKey))
|
||||||
{
|
{
|
||||||
_log.LogWarning("Captcha verification attempted but SecretKey is not configured");
|
_log.LogWarning("Captcha verification attempted but SecretKey is not configured");
|
||||||
return new CaptchaVerdict(false, "Captcha not configured", null);
|
return new CaptchaVerdictModel(false, "Captcha not configured", null);
|
||||||
}
|
}
|
||||||
|
|
||||||
var form = new Dictionary<string, string>
|
var form = new Dictionary<string, string>
|
||||||
@@ -45,21 +46,21 @@ namespace Api.Services
|
|||||||
{
|
{
|
||||||
_log.LogWarning("Captcha HTTP request failed with status {StatusCode} for IP {Ip}",
|
_log.LogWarning("Captcha HTTP request failed with status {StatusCode} for IP {Ip}",
|
||||||
(int)resp.StatusCode, userIp ?? "unknown");
|
(int)resp.StatusCode, userIp ?? "unknown");
|
||||||
return new CaptchaVerdict(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
|
return new CaptchaVerdictModel(false, $"Captcha HTTP {(int)resp.StatusCode}", null);
|
||||||
}
|
}
|
||||||
|
|
||||||
var data = await resp.Content.ReadFromJsonAsync<RecaptchaResponse>(cancellationToken: ct);
|
var data = await resp.Content.ReadFromJsonAsync<RecaptchaResponse>(cancellationToken: ct);
|
||||||
if (data is null)
|
if (data is null)
|
||||||
{
|
{
|
||||||
_log.LogError("Failed to parse captcha response for IP {Ip}", userIp ?? "unknown");
|
_log.LogError("Failed to parse captcha response for IP {Ip}", userIp ?? "unknown");
|
||||||
return new CaptchaVerdict(false, "Captcha parse error", null);
|
return new CaptchaVerdictModel(false, "Captcha parse error", null);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!data.success)
|
if (!data.success)
|
||||||
{
|
{
|
||||||
_log.LogWarning("Captcha verification failed for IP {Ip}. Score={Score}",
|
_log.LogWarning("Captcha verification failed for IP {Ip}. Score={Score}",
|
||||||
userIp ?? "unknown", data.score);
|
userIp ?? "unknown", data.score);
|
||||||
return new CaptchaVerdict(false, "Captcha failed", data.score);
|
return new CaptchaVerdictModel(false, "Captcha failed", data.score);
|
||||||
}
|
}
|
||||||
|
|
||||||
// v3 score check (score is typically null for v2)
|
// v3 score check (score is typically null for v2)
|
||||||
@@ -67,7 +68,7 @@ namespace Api.Services
|
|||||||
{
|
{
|
||||||
_log.LogWarning("Captcha score {Score} below minimum {MinScore} for IP {Ip}",
|
_log.LogWarning("Captcha score {Score} below minimum {MinScore} for IP {Ip}",
|
||||||
score, _opt.MinimumScore, userIp ?? "unknown");
|
score, _opt.MinimumScore, userIp ?? "unknown");
|
||||||
return new CaptchaVerdict(false, "Captcha score too low", score);
|
return new CaptchaVerdictModel(false, "Captcha score too low", score);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optional strictness (usually v3): action/hostname checks
|
// Optional strictness (usually v3): action/hostname checks
|
||||||
@@ -76,7 +77,7 @@ namespace Api.Services
|
|||||||
{
|
{
|
||||||
_log.LogWarning("Captcha action mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
|
_log.LogWarning("Captcha action mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
|
||||||
_opt.ExpectedAction, data.action, userIp ?? "unknown");
|
_opt.ExpectedAction, data.action, userIp ?? "unknown");
|
||||||
return new CaptchaVerdict(false, "Captcha action mismatch", data.score);
|
return new CaptchaVerdictModel(false, "Captcha action mismatch", data.score);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!string.IsNullOrWhiteSpace(_opt.ExpectedHostname) &&
|
if (!string.IsNullOrWhiteSpace(_opt.ExpectedHostname) &&
|
||||||
@@ -84,12 +85,12 @@ namespace Api.Services
|
|||||||
{
|
{
|
||||||
_log.LogWarning("Captcha hostname mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
|
_log.LogWarning("Captcha hostname mismatch. Expected={Expected}, Actual={Actual}, IP={Ip}",
|
||||||
_opt.ExpectedHostname, data.hostname, userIp ?? "unknown");
|
_opt.ExpectedHostname, data.hostname, userIp ?? "unknown");
|
||||||
return new CaptchaVerdict(false, "Captcha hostname mismatch", data.score);
|
return new CaptchaVerdictModel(false, "Captcha hostname mismatch", data.score);
|
||||||
}
|
}
|
||||||
|
|
||||||
_log.LogInformation("Captcha verified successfully for IP {Ip}. Score={Score}",
|
_log.LogInformation("Captcha verified successfully for IP {Ip}. Score={Score}",
|
||||||
userIp ?? "unknown", data.score);
|
userIp ?? "unknown", data.score);
|
||||||
return new CaptchaVerdict(true, null, data.score);
|
return new CaptchaVerdictModel(true, null, data.score);
|
||||||
}
|
}
|
||||||
|
|
||||||
private sealed class RecaptchaResponse
|
private sealed class RecaptchaResponse
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
using Api.Services.Contracts;
|
using Api.Services.Contracts;
|
||||||
using Api.Models;
|
using Api.Requests;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using MailKit.Net.Smtp;
|
using MailKit.Net.Smtp;
|
||||||
using MailKit.Security;
|
using MailKit.Security;
|
||||||
|
|||||||
+2
-2
@@ -1,4 +1,4 @@
|
|||||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||||
|
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<TargetFramework>net10.0</TargetFramework>
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
@@ -10,6 +10,7 @@
|
|||||||
<InvariantGlobalization>false</InvariantGlobalization>
|
<InvariantGlobalization>false</InvariantGlobalization>
|
||||||
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||||
<DisableStaticWebAssets>true</DisableStaticWebAssets>
|
<DisableStaticWebAssets>true</DisableStaticWebAssets>
|
||||||
|
<RootNamespace>Api</RootNamespace>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
@@ -18,7 +19,6 @@
|
|||||||
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||||
<PackageReference Include="MailKit" Version="4.16.0" />
|
<PackageReference Include="MailKit" Version="4.16.0" />
|
||||||
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.23.0" />
|
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.23.0" />
|
||||||
<PackageReference Include="PdfPig" Version="0.1.14" />
|
|
||||||
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||||
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||||
<PackageReference Include="Serilog.Sinks.Email" Version="4.2.1" />
|
<PackageReference Include="Serilog.Sinks.Email" Version="4.2.1" />
|
||||||
|
|||||||
+3
-13
@@ -106,18 +106,8 @@
|
|||||||
"FromEmail": "",
|
"FromEmail": "",
|
||||||
"SubjectPrefix": "[File Download]"
|
"SubjectPrefix": "[File Download]"
|
||||||
},
|
},
|
||||||
"OpenAI": {
|
"CvMatcherApi": {
|
||||||
"ApiKey": "",
|
"BaseUrl": "",
|
||||||
"ChatModel": "gpt-4o-mini",
|
"InternalApiKey": ""
|
||||||
"EmbeddingModel": "text-embedding-3-small",
|
|
||||||
"TimeoutSeconds": 60
|
|
||||||
},
|
|
||||||
"Rag": {
|
|
||||||
"MaxPdfSizeMb": 5,
|
|
||||||
"ChunkSize": 900,
|
|
||||||
"ChunkOverlap": 150,
|
|
||||||
"CvTtlMinutes": 60,
|
|
||||||
"MaxJobTextChars": 20000,
|
|
||||||
"TopK": 6
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
using Api.Requests;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
|
||||||
|
namespace Api.Controllers;
|
||||||
|
|
||||||
|
[ApiController]
|
||||||
|
[Route("api/cv")]
|
||||||
|
public sealed class CvController : ControllerBase
|
||||||
|
{
|
||||||
|
private readonly ICvMatcherService _service;
|
||||||
|
private readonly ILogger<CvController> _logger;
|
||||||
|
|
||||||
|
public CvController(ICvMatcherService service, ILogger<CvController> logger)
|
||||||
|
{
|
||||||
|
_service = service;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("upload")]
|
||||||
|
[RequestSizeLimit(10 * 1024 * 1024)]
|
||||||
|
public async Task<IActionResult> Upload([FromForm(Name = "cv")] IFormFile? cv, [FromForm] bool gdprConsent, CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (cv is null) return BadRequest(new { error = "Missing CV PDF." });
|
||||||
|
_logger.LogInformation("CV upload received. FileName={FileName}, Size={SizeBytes}, GdprConsent={GdprConsent}", cv.FileName, cv.Length, gdprConsent);
|
||||||
|
var result = await _service.UploadCvAsync(cv, gdprConsent, ct);
|
||||||
|
_logger.LogInformation("CV upload processed. CvDocumentId={CvDocumentId}, Cached={Cached}", result.DocumentId, result.Cached);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid CV upload request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("find-jobs")]
|
||||||
|
public async Task<IActionResult> FindJobs([FromBody] FindJobsRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Find jobs request received. CvDocumentId={CvDocumentId}, TopK={TopK}", request.CvDocumentId, request.TopK);
|
||||||
|
var result = await _service.FindJobsAsync(request, ct);
|
||||||
|
_logger.LogInformation("Find jobs completed. CvDocumentId={CvDocumentId}, ResultCount={ResultCount}", request.CvDocumentId, result.Jobs.Count);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid find jobs request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("match-job")]
|
||||||
|
public async Task<IActionResult> MatchJob([FromBody] MatchJobRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Match job request received. CvDocumentId={CvDocumentId}, HasJobUrl={HasJobUrl}, HasJobDescription={HasJobDescription}, EmailRequested={EmailRequested}",
|
||||||
|
request.CvDocumentId, !string.IsNullOrWhiteSpace(request.JobUrl), !string.IsNullOrWhiteSpace(request.JobDescription), !string.IsNullOrWhiteSpace(request.Email));
|
||||||
|
var result = await _service.MatchJobAsync(request, ct);
|
||||||
|
_logger.LogInformation("Match job completed. CvDocumentId={CvDocumentId}, Score={Score}, Cached={Cached}", request.CvDocumentId, result.Score, result.Cached);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid match job request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
IF OBJECT_ID('dbo.CvMatchResults', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.CvMatchResults (
|
||||||
|
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_CvMatchResults PRIMARY KEY,
|
||||||
|
CvDocumentId NVARCHAR(64) NOT NULL,
|
||||||
|
JobDocumentId NVARCHAR(64) NOT NULL,
|
||||||
|
ResultJson NVARCHAR(MAX) NOT NULL,
|
||||||
|
Score INT NOT NULL,
|
||||||
|
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_CvMatchResults_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||||
|
);
|
||||||
|
CREATE UNIQUE INDEX UX_CvMatchResults_CvJob ON dbo.CvMatchResults(CvDocumentId, JobDocumentId);
|
||||||
|
END
|
||||||
|
GO
|
||||||
|
|
||||||
|
IF OBJECT_ID('dbo.CvMatcherChatCache', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.CvMatcherChatCache (
|
||||||
|
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_CvMatcherChatCache PRIMARY KEY,
|
||||||
|
Model NVARCHAR(120) NOT NULL,
|
||||||
|
Temperature DECIMAL(4,2) NOT NULL,
|
||||||
|
ResponseText NVARCHAR(MAX) NOT NULL,
|
||||||
|
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_CvMatcherChatCache_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||||
|
);
|
||||||
|
END
|
||||||
|
GO
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
|
||||||
|
WORKDIR /app
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||||
|
WORKDIR /src
|
||||||
|
COPY ["cv-matcher-api.csproj", "./"]
|
||||||
|
RUN dotnet restore "cv-matcher-api.csproj"
|
||||||
|
COPY . .
|
||||||
|
RUN dotnet publish "cv-matcher-api.csproj" -c Release -o /app/publish /p:UseAppHost=false
|
||||||
|
|
||||||
|
FROM base AS final
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=build /app/publish .
|
||||||
|
ENTRYPOINT ["dotnet", "cv-matcher-api.dll"]
|
||||||
@@ -0,0 +1,283 @@
|
|||||||
|
using Azure.Identity;
|
||||||
|
using Api.Services;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Microsoft.AspNetCore.Diagnostics;
|
||||||
|
using Serilog;
|
||||||
|
using System.Reflection;
|
||||||
|
|
||||||
|
DotNetEnv.Env.Load();
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var builder = WebApplication.CreateBuilder(args);
|
||||||
|
var appVersion = Assembly.GetExecutingAssembly()
|
||||||
|
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?
|
||||||
|
.InformationalVersion
|
||||||
|
?? Assembly.GetExecutingAssembly().GetName().Version?.ToString()
|
||||||
|
?? "unknown";
|
||||||
|
|
||||||
|
builder.Host.UseSerilog((context, services, configuration) =>
|
||||||
|
{
|
||||||
|
configuration
|
||||||
|
.ReadFrom.Configuration(context.Configuration)
|
||||||
|
.ReadFrom.Services(services)
|
||||||
|
.Enrich.FromLogContext()
|
||||||
|
.Enrich.WithMachineName()
|
||||||
|
.Enrich.WithEnvironmentName()
|
||||||
|
.Enrich.WithProperty("Service", "cv-matcher-api")
|
||||||
|
.Enrich.WithProperty("AppVersion", appVersion)
|
||||||
|
.WriteTo.Console(new Serilog.Formatting.Json.JsonFormatter());
|
||||||
|
});
|
||||||
|
|
||||||
|
Log.Information("Starting {Service} version {AppVersion}", "cv-matcher-api", appVersion);
|
||||||
|
|
||||||
|
// --------------------
|
||||||
|
// Azure Key Vault Configuration
|
||||||
|
// --------------------
|
||||||
|
var keyVaultUri = builder.Configuration["KeyVault:VaultUri"];
|
||||||
|
var keyVaultEnabled = builder.Configuration.GetValue<bool>("KeyVault:Enabled");
|
||||||
|
|
||||||
|
if (keyVaultEnabled && !string.IsNullOrWhiteSpace(keyVaultUri))
|
||||||
|
{
|
||||||
|
Log.Information("Loading configuration from Azure Key Vault: {VaultUri}", keyVaultUri);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
builder.Configuration.AddAzureKeyVault(
|
||||||
|
new Uri(keyVaultUri),
|
||||||
|
new DefaultAzureCredential());
|
||||||
|
|
||||||
|
Log.Information("Azure Key Vault configuration loaded successfully");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log.Warning(ex, "Failed to load Azure Key Vault configuration. Continuing with other configuration sources.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Log.Information("Azure Key Vault is disabled or not configured");
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.Services.Configure<RagApiSettings>(builder.Configuration.GetSection("RagApi"));
|
||||||
|
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
||||||
|
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||||
|
builder.Services.Configure<MatcherSettings>(builder.Configuration.GetSection("Matcher"));
|
||||||
|
builder.Services.Configure<SmtpSettings>(builder.Configuration.GetSection("Smtp"));
|
||||||
|
|
||||||
|
builder.Services.AddHttpClient<IRagApiClient, RagApiClient>();
|
||||||
|
builder.Services.AddHttpClient<IMatcherAiClient, MatcherAiClient>();
|
||||||
|
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
|
||||||
|
builder.Services.AddSingleton<IMatcherRepository, SqlMatcherRepository>();
|
||||||
|
builder.Services.AddScoped<ICvMatcherService, CvMatcherService>();
|
||||||
|
builder.Services.AddSingleton<IEmailService, EmailService>();
|
||||||
|
|
||||||
|
builder.Services.AddControllers();
|
||||||
|
builder.Services.AddEndpointsApiExplorer();
|
||||||
|
builder.Services.AddSwaggerGen();
|
||||||
|
|
||||||
|
var app = builder.Build();
|
||||||
|
|
||||||
|
var logger = app.Services.GetRequiredService<ILogger<Program>>();
|
||||||
|
logger.LogInformation("API starting up...");
|
||||||
|
logger.LogInformation("Environment: {Environment}", app.Environment.EnvironmentName);
|
||||||
|
|
||||||
|
// Log all environment variables and configuration settings at startup
|
||||||
|
// Can be controlled via appsettings: "Logging:LogEnvironmentOnStartup": true
|
||||||
|
var logEnvironmentOnStartup = app.Configuration.GetValue<bool>("Logging:LogEnvironmentOnStartup", defaultValue: true);
|
||||||
|
if (logEnvironmentOnStartup)
|
||||||
|
{
|
||||||
|
LogEnvironmentSettings(logger, app.Configuration, app.Environment);
|
||||||
|
}
|
||||||
|
|
||||||
|
using (var scope = app.Services.CreateScope())
|
||||||
|
{
|
||||||
|
var repository = scope.ServiceProvider.GetRequiredService<IMatcherRepository>();
|
||||||
|
await repository.InitializeAsync(CancellationToken.None);
|
||||||
|
}
|
||||||
|
|
||||||
|
app.UseSerilogRequestLogging(options =>
|
||||||
|
{
|
||||||
|
options.MessageTemplate = "HTTP {RequestMethod} {RequestPath} responded {StatusCode} in {Elapsed:0.0000} ms";
|
||||||
|
options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
|
||||||
|
{
|
||||||
|
diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
|
||||||
|
diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
|
||||||
|
diagnosticContext.Set("RemoteIP", httpContext.Connection.RemoteIpAddress?.ToString());
|
||||||
|
diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
app.UseExceptionHandler(errorApp =>
|
||||||
|
{
|
||||||
|
errorApp.Run(async context =>
|
||||||
|
{
|
||||||
|
var feature = context.Features.Get<IExceptionHandlerFeature>();
|
||||||
|
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||||
|
if (feature?.Error is not null)
|
||||||
|
{
|
||||||
|
logger.LogError(feature.Error, "Unhandled exception in {Service}", "cv-matcher-api");
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Response.StatusCode = StatusCodes.Status500InternalServerError;
|
||||||
|
context.Response.ContentType = "application/json";
|
||||||
|
await context.Response.WriteAsJsonAsync(new { error = "Unexpected server error." });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
app.Use(async (context, next) =>
|
||||||
|
{
|
||||||
|
var settings = context.RequestServices.GetRequiredService<Microsoft.Extensions.Options.IOptions<InternalApiSettings>>().Value;
|
||||||
|
if (settings.RequireApiKey)
|
||||||
|
{
|
||||||
|
var header = context.Request.Headers["X-Internal-Api-Key"].ToString();
|
||||||
|
if (string.IsNullOrWhiteSpace(settings.ApiKey) || header != settings.ApiKey)
|
||||||
|
{
|
||||||
|
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||||
|
logger.LogWarning("Rejected unauthorized internal API call. Path={Path}, RemoteIP={RemoteIP}", context.Request.Path, context.Connection.RemoteIpAddress?.ToString());
|
||||||
|
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
|
||||||
|
await context.Response.WriteAsJsonAsync(new { error = "Unauthorized internal API call." });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await next();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Swagger (typically only in Development)
|
||||||
|
if (app.Environment.IsDevelopment())
|
||||||
|
{
|
||||||
|
app.UseSwagger();
|
||||||
|
app.UseSwaggerUI(options =>
|
||||||
|
{
|
||||||
|
options.DocumentTitle = "cv-matcher-api";
|
||||||
|
options.SwaggerEndpoint("/swagger/v1/swagger.json", "cv-matcher-api v1");
|
||||||
|
options.RoutePrefix = "swagger";
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
app.MapControllers();
|
||||||
|
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "cv-matcher-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
||||||
|
|
||||||
|
Log.Information("{Service} startup complete", "cv-matcher-api");
|
||||||
|
app.Run();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log.Fatal(ex, "cv-matcher-api terminated unexpectedly");
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
Log.Information("Shutting down cv-matcher-api");
|
||||||
|
Log.CloseAndFlush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Logs all environment variables and configuration settings at startup for diagnostics.
|
||||||
|
/// </summary>
|
||||||
|
static void LogEnvironmentSettings(Microsoft.Extensions.Logging.ILogger logger, IConfiguration configuration, IWebHostEnvironment environment)
|
||||||
|
{
|
||||||
|
logger.LogInformation("==================== ENVIRONMENT SETTINGS ====================");
|
||||||
|
|
||||||
|
// Environment Information
|
||||||
|
logger.LogInformation("Application Name: {ApplicationName}", environment.ApplicationName);
|
||||||
|
logger.LogInformation("Environment Name: {EnvironmentName}", environment.EnvironmentName);
|
||||||
|
logger.LogInformation("Content Root Path: {ContentRootPath}", environment.ContentRootPath);
|
||||||
|
logger.LogInformation("Web Root Path: {WebRootPath}", environment.WebRootPath);
|
||||||
|
|
||||||
|
// Environment Variables
|
||||||
|
logger.LogInformation("-------------- Environment Variables --------------");
|
||||||
|
var envVars = Environment.GetEnvironmentVariables();
|
||||||
|
var sortedEnvVars = new SortedDictionary<string, string?>();
|
||||||
|
|
||||||
|
foreach (System.Collections.DictionaryEntry entry in envVars)
|
||||||
|
{
|
||||||
|
var key = entry.Key?.ToString() ?? string.Empty;
|
||||||
|
var value = entry.Value?.ToString() ?? string.Empty;
|
||||||
|
|
||||||
|
// Mask sensitive values (passwords, secrets, tokens, keys) but show last 4 characters
|
||||||
|
if (IsSensitiveKey(key))
|
||||||
|
{
|
||||||
|
value = MaskValueWithLastChars(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
sortedEnvVars[key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var kvp in sortedEnvVars)
|
||||||
|
{
|
||||||
|
logger.LogInformation(" {Key} = {Value}", kvp.Key, kvp.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configuration Settings
|
||||||
|
logger.LogInformation("-------------- Configuration Settings --------------");
|
||||||
|
LogConfigurationRecursive(logger, configuration.GetChildren(), "");
|
||||||
|
|
||||||
|
logger.LogInformation("===========================================================");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Recursively logs configuration settings with hierarchy.
|
||||||
|
/// </summary>
|
||||||
|
static void LogConfigurationRecursive(Microsoft.Extensions.Logging.ILogger logger, IEnumerable<IConfigurationSection> sections, string prefix)
|
||||||
|
{
|
||||||
|
foreach (var section in sections)
|
||||||
|
{
|
||||||
|
var key = string.IsNullOrEmpty(prefix) ? section.Key : $"{prefix}:{section.Key}";
|
||||||
|
|
||||||
|
if (section.Value != null)
|
||||||
|
{
|
||||||
|
var value = section.Value;
|
||||||
|
|
||||||
|
// Mask sensitive configuration values but show last 4 characters
|
||||||
|
if (IsSensitiveKey(key))
|
||||||
|
{
|
||||||
|
value = MaskValueWithLastChars(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInformation(" {Key} = {Value}", key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recurse into child sections
|
||||||
|
if (section.GetChildren().Any())
|
||||||
|
{
|
||||||
|
LogConfigurationRecursive(logger, section.GetChildren(), key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if a configuration key contains sensitive information.
|
||||||
|
/// </summary>
|
||||||
|
static bool IsSensitiveKey(string key)
|
||||||
|
{
|
||||||
|
return key.Contains("Password", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Secret", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Token", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Key", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("ConnectionString", StringComparison.OrdinalIgnoreCase);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Masks a sensitive value but shows the last 4 characters for verification.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The value to mask.</param>
|
||||||
|
/// <returns>Masked value showing last 4 characters (e.g., "***MASKED***...abcd")</returns>
|
||||||
|
static string MaskValueWithLastChars(string value)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(value))
|
||||||
|
{
|
||||||
|
return "***NOT SET***";
|
||||||
|
}
|
||||||
|
|
||||||
|
// If value is too short, just mask it completely
|
||||||
|
if (value.Length <= 4)
|
||||||
|
{
|
||||||
|
return "***MASKED***";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show last 4 characters
|
||||||
|
var lastChars = value.Substring(value.Length - 4);
|
||||||
|
return $"***MASKED***...{lastChars}";
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"profiles": {
|
||||||
|
"cv-matcher-api": {
|
||||||
|
"commandName": "Project",
|
||||||
|
"launchBrowser": true,
|
||||||
|
"environmentVariables": {
|
||||||
|
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||||
|
},
|
||||||
|
"applicationUrl": "https://localhost:58423;http://localhost:58425"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Api.Requests
|
||||||
|
{
|
||||||
|
public sealed class FindJobsRequest
|
||||||
|
{
|
||||||
|
public required string CvDocumentId { get; init; }
|
||||||
|
public int? TopK { get; init; }
|
||||||
|
public string? Email { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Api.Requests
|
||||||
|
{
|
||||||
|
public sealed class MatchJobRequest
|
||||||
|
{
|
||||||
|
public string? CvDocumentId { get; set; }
|
||||||
|
public string? JobUrl { get; set; }
|
||||||
|
public string? JobDescription { get; set; }
|
||||||
|
public bool GdprConsent { get; set; }
|
||||||
|
public string? Email { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Api.Requests
|
||||||
|
{
|
||||||
|
public sealed class RagSearchRequest
|
||||||
|
{
|
||||||
|
public required string QueryText { get; init; }
|
||||||
|
public IReadOnlyList<string>? TargetDocumentTypes { get; init; }
|
||||||
|
public int? TopK { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class CvUploadResponse
|
||||||
|
{
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public int Chunks { get; init; }
|
||||||
|
public int Characters { get; init; }
|
||||||
|
public bool Cached { get; init; }
|
||||||
|
public string Summary { get; init; } = "CV indexed successfully.";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class FindJobsResponse
|
||||||
|
{
|
||||||
|
public required string CvDocumentId { get; init; }
|
||||||
|
public IReadOnlyList<JobMatchResponse> Jobs { get; init; } = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class JobMatchResponse
|
||||||
|
{
|
||||||
|
public int Score { get; set; }
|
||||||
|
public string Summary { get; set; } = string.Empty;
|
||||||
|
public List<string> Strengths { get; set; } = [];
|
||||||
|
public List<string> Gaps { get; set; } = [];
|
||||||
|
public List<string> Recommendations { get; set; } = [];
|
||||||
|
public List<string> Evidence { get; set; } = [];
|
||||||
|
public bool Cached { get; set; }
|
||||||
|
public string? JobDocumentId { get; set; }
|
||||||
|
public string? JobUrl { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class RagIndexResponse
|
||||||
|
{
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public double DocumentTypeConfidence { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public int Chunks { get; init; }
|
||||||
|
public int Characters { get; init; }
|
||||||
|
public bool Cached { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class RagSearchResponse
|
||||||
|
{
|
||||||
|
public IReadOnlyList<RagSearchDocumentResult> Results { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class RagDocumentDetails
|
||||||
|
{
|
||||||
|
public required string Id { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public string? SourceUrl { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
}
|
||||||
|
public sealed class RagSearchDocumentResult
|
||||||
|
{
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public string? SourceUrl { get; init; }
|
||||||
|
public double Score { get; init; }
|
||||||
|
public IReadOnlyList<RagSearchChunkResult> MatchedChunks { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class RagSearchChunkResult
|
||||||
|
{
|
||||||
|
public required string ChunkId { get; init; }
|
||||||
|
public int ChunkIndex { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public double Score { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
using Api.Requests;
|
||||||
|
using Api.Responses;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface ICvMatcherService
|
||||||
|
{
|
||||||
|
Task<CvUploadResponse> UploadCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct);
|
||||||
|
Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct);
|
||||||
|
Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IEmailService
|
||||||
|
{
|
||||||
|
Task SendMatchAsync(string? explicitTo, string subject, string body, CancellationToken ct);
|
||||||
|
}
|
||||||
+1
-1
@@ -1,4 +1,4 @@
|
|||||||
namespace Api.Services.Contracts.Rag;
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
public interface IJobTextExtractor
|
public interface IJobTextExtractor
|
||||||
{
|
{
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IMatcherAiClient
|
||||||
|
{
|
||||||
|
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
using Api.Responses;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IMatcherRepository
|
||||||
|
{
|
||||||
|
Task InitializeAsync(CancellationToken ct);
|
||||||
|
Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct);
|
||||||
|
Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct);
|
||||||
|
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
|
||||||
|
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
using Api.Requests;
|
||||||
|
using Api.Responses;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IRagApiClient
|
||||||
|
{
|
||||||
|
Task<RagIndexResponse> IndexCvPdfAsync(IFormFile file, CancellationToken ct);
|
||||||
|
Task<RagIndexResponse> IndexJobTextAsync(string text, string? url, string? title, CancellationToken ct);
|
||||||
|
Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct);
|
||||||
|
Task<RagSearchResponse> SearchAsync(RagSearchRequest request, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,201 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Api.Requests;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class CvMatcherService : ICvMatcherService
|
||||||
|
{
|
||||||
|
private readonly IRagApiClient _rag;
|
||||||
|
private readonly IJobTextExtractor _jobTextExtractor;
|
||||||
|
private readonly IMatcherAiClient _ai;
|
||||||
|
private readonly IMatcherRepository _repository;
|
||||||
|
private readonly IEmailService _email;
|
||||||
|
private readonly MatcherSettings _settings;
|
||||||
|
|
||||||
|
public CvMatcherService(
|
||||||
|
IRagApiClient rag,
|
||||||
|
IJobTextExtractor jobTextExtractor,
|
||||||
|
IMatcherAiClient ai,
|
||||||
|
IMatcherRepository repository,
|
||||||
|
IEmailService email,
|
||||||
|
IOptions<MatcherSettings> options)
|
||||||
|
{
|
||||||
|
_rag = rag;
|
||||||
|
_jobTextExtractor = jobTextExtractor;
|
||||||
|
_ai = ai;
|
||||||
|
_repository = repository;
|
||||||
|
_email = email;
|
||||||
|
_settings = options.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<CvUploadResponse> UploadCvAsync(IFormFile file, bool gdprConsent, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (!gdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
||||||
|
var response = await _rag.IndexCvPdfAsync(file, ct);
|
||||||
|
return new CvUploadResponse
|
||||||
|
{
|
||||||
|
DocumentId = response.DocumentId,
|
||||||
|
TextHash = response.TextHash,
|
||||||
|
DocumentType = response.DocumentType,
|
||||||
|
Title = response.Title,
|
||||||
|
Chunks = response.Chunks,
|
||||||
|
Characters = response.Characters,
|
||||||
|
Cached = response.Cached,
|
||||||
|
Summary = response.Cached ? "CV already indexed. Cached data reused." : "CV indexed successfully."
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<FindJobsResponse> FindJobsAsync(FindJobsRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
||||||
|
if (!string.Equals(cv.DocumentType, "cv", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("The provided document is not a CV.");
|
||||||
|
}
|
||||||
|
|
||||||
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
||||||
|
{
|
||||||
|
QueryText = BuildCvSearchProfile(cv.Text),
|
||||||
|
TargetDocumentTypes = ["job"],
|
||||||
|
TopK = request.TopK ?? _settings.TopK
|
||||||
|
}, ct);
|
||||||
|
|
||||||
|
var deepScoreLimit = Math.Clamp(_settings.DeepScoreTopN, 1, 10);
|
||||||
|
var jobs = new List<JobMatchResponse>();
|
||||||
|
foreach (var result in search.Results.Take(deepScoreLimit))
|
||||||
|
{
|
||||||
|
var job = await _rag.GetDocumentAsync(result.DocumentId, ct);
|
||||||
|
if (job is null) continue;
|
||||||
|
jobs.Add(await ScorePairAsync(cv, job, result.MatchedChunks.Select(x => x.Text).ToArray(), request.Email, ct));
|
||||||
|
}
|
||||||
|
|
||||||
|
return new FindJobsResponse { CvDocumentId = request.CvDocumentId, Jobs = jobs };
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<JobMatchResponse> MatchJobAsync(MatchJobRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (!request.GdprConsent) throw new InvalidOperationException("GDPR consent is required.");
|
||||||
|
if (string.IsNullOrWhiteSpace(request.CvDocumentId)) throw new InvalidOperationException("Missing CV document id.");
|
||||||
|
|
||||||
|
var cv = await _rag.GetDocumentAsync(request.CvDocumentId, ct) ?? throw new InvalidOperationException("CV document not found.");
|
||||||
|
var jobText = await _jobTextExtractor.ExtractAsync(request.JobUrl, request.JobDescription, ct);
|
||||||
|
if (jobText.Length < 80) throw new InvalidOperationException("Could not extract enough job text. Paste the job description manually.");
|
||||||
|
|
||||||
|
var job = await _rag.IndexJobTextAsync(jobText, request.JobUrl, ExtractJobTitle(jobText), ct);
|
||||||
|
var jobDocument = await _rag.GetDocumentAsync(job.DocumentId, ct) ?? throw new InvalidOperationException("Indexed job document not found.");
|
||||||
|
|
||||||
|
var search = await _rag.SearchAsync(new RagSearchRequest
|
||||||
|
{
|
||||||
|
QueryText = BuildCvSearchProfile(cv.Text),
|
||||||
|
TargetDocumentTypes = ["job"],
|
||||||
|
TopK = Math.Max(5, _settings.TopK)
|
||||||
|
}, ct);
|
||||||
|
|
||||||
|
var matchedChunks = search.Results
|
||||||
|
.FirstOrDefault(x => x.DocumentId == job.DocumentId)?
|
||||||
|
.MatchedChunks.Select(x => x.Text).ToArray() ?? [];
|
||||||
|
|
||||||
|
return await ScorePairAsync(cv, jobDocument, matchedChunks, request.Email, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<JobMatchResponse> ScorePairAsync(RagDocumentDetails cv, RagDocumentDetails job, IReadOnlyList<string> evidenceChunks, string? email, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var cached = await _repository.GetMatchAsync(cv.Id, job.Id, ct);
|
||||||
|
if (cached is not null) return cached;
|
||||||
|
|
||||||
|
var cvText = Limit(cv.Text, 18000);
|
||||||
|
var jobText = Limit(job.Text, 14000);
|
||||||
|
var evidence = evidenceChunks.Count > 0 ? string.Join("\n\n", evidenceChunks.Take(4)) : Limit(job.Text, 4000);
|
||||||
|
|
||||||
|
const string systemPrompt = """
|
||||||
|
You are a strict CV-to-job matching engine. Return JSON only. Score realistically from 0 to 100.
|
||||||
|
Penalize missing required skills. Do not invent experience. Use concise business language.
|
||||||
|
JSON shape: {"score":number,"summary":"...","strengths":["..."],"gaps":["..."],"recommendations":["..."],"evidence":["..."]}
|
||||||
|
""";
|
||||||
|
|
||||||
|
var userPrompt = $"""
|
||||||
|
CV:
|
||||||
|
{cvText}
|
||||||
|
|
||||||
|
JOB:
|
||||||
|
{jobText}
|
||||||
|
|
||||||
|
SEMANTICALLY MATCHED JOB EVIDENCE:
|
||||||
|
{evidence}
|
||||||
|
""";
|
||||||
|
|
||||||
|
var json = await _ai.CreateChatCompletionAsync(systemPrompt, userPrompt, 0.2m, ct);
|
||||||
|
var result = ParseResult(json);
|
||||||
|
result.JobDocumentId = job.Id;
|
||||||
|
result.JobUrl = job.SourceUrl;
|
||||||
|
result.Cached = false;
|
||||||
|
await _repository.SaveMatchAsync(cv.Id, job.Id, result, ct);
|
||||||
|
|
||||||
|
await _email.SendMatchAsync(
|
||||||
|
email,
|
||||||
|
$"MyAi.ro CV Match: {result.Score}% - {job.Title}",
|
||||||
|
BuildEmailBody(cv, job, result),
|
||||||
|
ct);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static JobMatchResponse ParseResult(string json)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var parsed = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
||||||
|
if (parsed is not null) return parsed;
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
// Fall through to safe response.
|
||||||
|
}
|
||||||
|
|
||||||
|
return new JobMatchResponse
|
||||||
|
{
|
||||||
|
Score = 0,
|
||||||
|
Summary = "The AI response could not be parsed as structured JSON.",
|
||||||
|
Recommendations = ["Inspect the raw model output and tune the scoring prompt."]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string BuildCvSearchProfile(string cvText)
|
||||||
|
{
|
||||||
|
var text = Limit(cvText, 10000);
|
||||||
|
return $"Candidate profile, skills, technologies, seniority, industry experience, project experience: {text}";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string ExtractJobTitle(string jobText)
|
||||||
|
{
|
||||||
|
var first = jobText.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length is > 8 and < 140);
|
||||||
|
return first ?? "Job description";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string Limit(string value, int max) => value.Length <= max ? value : value[..max];
|
||||||
|
|
||||||
|
private static string BuildEmailBody(RagDocumentDetails cv, RagDocumentDetails job, JobMatchResponse result) => $"""
|
||||||
|
CV Matcher result
|
||||||
|
|
||||||
|
CV: {cv.Title}
|
||||||
|
Job: {job.Title}
|
||||||
|
Job URL: {job.SourceUrl ?? "N/A"}
|
||||||
|
Score: {result.Score}%
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
{result.Summary}
|
||||||
|
|
||||||
|
Strengths:
|
||||||
|
- {string.Join("\n- ", result.Strengths)}
|
||||||
|
|
||||||
|
Gaps:
|
||||||
|
- {string.Join("\n- ", result.Gaps)}
|
||||||
|
|
||||||
|
Recommendations:
|
||||||
|
- {string.Join("\n- ", result.Recommendations)}
|
||||||
|
""";
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using MailKit.Net.Smtp;
|
||||||
|
using MailKit.Security;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using MimeKit;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class EmailService : IEmailService
|
||||||
|
{
|
||||||
|
private readonly SmtpSettings _settings;
|
||||||
|
private readonly ILogger<EmailService> _logger;
|
||||||
|
|
||||||
|
public EmailService(IOptions<SmtpSettings> options, ILogger<EmailService> logger)
|
||||||
|
{
|
||||||
|
_settings = options.Value;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SendMatchAsync(string? explicitTo, string subject, string body, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var to = !string.IsNullOrWhiteSpace(explicitTo) ? explicitTo : _settings.ToEmail;
|
||||||
|
if (string.IsNullOrWhiteSpace(_settings.Host) || string.IsNullOrWhiteSpace(to))
|
||||||
|
{
|
||||||
|
_logger.LogInformation("SMTP is not configured. Skipping CV matcher email.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var message = new MimeMessage();
|
||||||
|
message.From.Add(MailboxAddress.Parse(_settings.FromEmail));
|
||||||
|
message.To.Add(MailboxAddress.Parse(to));
|
||||||
|
message.Subject = subject;
|
||||||
|
message.Body = new TextPart("plain") { Text = body };
|
||||||
|
|
||||||
|
using var client = new SmtpClient();
|
||||||
|
var secureSocket = _settings.UseStartTls ? SecureSocketOptions.StartTls : SecureSocketOptions.Auto;
|
||||||
|
await client.ConnectAsync(_settings.Host, _settings.Port, secureSocket, ct);
|
||||||
|
if (!string.IsNullOrWhiteSpace(_settings.Username))
|
||||||
|
{
|
||||||
|
await client.AuthenticateAsync(_settings.Username, _settings.Password, ct);
|
||||||
|
}
|
||||||
|
await client.SendAsync(message, ct);
|
||||||
|
await client.DisconnectAsync(true, ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
using System.Security.Cryptography;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public static class HashHelper
|
||||||
|
{
|
||||||
|
public static string Compute(string value)
|
||||||
|
{
|
||||||
|
using var sha = SHA256.Create();
|
||||||
|
return Convert.ToHexString(sha.ComputeHash(Encoding.UTF8.GetBytes(value ?? string.Empty)));
|
||||||
|
}
|
||||||
|
}
|
||||||
+13
-14
@@ -1,21 +1,22 @@
|
|||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using Api.Services.Contracts.Rag;
|
using Api.Services.Contracts;
|
||||||
using Api.Settings;
|
using Api.Settings;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
namespace Api.Services;
|
||||||
|
|
||||||
public sealed class JobTextExtractor : IJobTextExtractor
|
public sealed class JobTextExtractor : IJobTextExtractor
|
||||||
{
|
{
|
||||||
private readonly HttpClient _httpClient;
|
private readonly HttpClient _http;
|
||||||
private readonly RagSettings _settings;
|
private readonly MatcherSettings _settings;
|
||||||
|
|
||||||
public JobTextExtractor(HttpClient httpClient, IOptions<RagSettings> options)
|
public JobTextExtractor(HttpClient http, IOptions<MatcherSettings> options)
|
||||||
{
|
{
|
||||||
_httpClient = httpClient;
|
_http = http;
|
||||||
_settings = options.Value;
|
_settings = options.Value;
|
||||||
_httpClient.Timeout = TimeSpan.FromSeconds(20);
|
_http.Timeout = TimeSpan.FromSeconds(25);
|
||||||
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
|
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
|
public async Task<string> ExtractAsync(string? jobUrl, string? jobDescription, CancellationToken ct)
|
||||||
@@ -24,17 +25,16 @@ public sealed class JobTextExtractor : IJobTextExtractor
|
|||||||
if (!string.IsNullOrWhiteSpace(pasted)) return Limit(pasted);
|
if (!string.IsNullOrWhiteSpace(pasted)) return Limit(pasted);
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
|
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
|
||||||
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || (uri.Scheme != "http" && uri.Scheme != "https"))
|
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || uri.Scheme is not ("http" or "https"))
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("Invalid job URL.");
|
throw new InvalidOperationException("Invalid job URL.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var html = await _httpClient.GetStringAsync(uri, ct);
|
var html = await _http.GetStringAsync(uri, ct);
|
||||||
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
|
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
|
||||||
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
|
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
|
||||||
html = Regex.Replace(html, "<[^>]+>", " ");
|
html = Regex.Replace(html, "<[^>]+>", " ");
|
||||||
var text = WebUtility.HtmlDecode(html);
|
return Limit(Normalize(WebUtility.HtmlDecode(html)));
|
||||||
return Limit(Normalize(text));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private string Limit(string value)
|
private string Limit(string value)
|
||||||
@@ -46,7 +46,6 @@ public sealed class JobTextExtractor : IJobTextExtractor
|
|||||||
private static string Normalize(string value)
|
private static string Normalize(string value)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||||
var parts = value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
|
return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
|
||||||
return string.Join(' ', parts).Trim();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
using System.Net.Http.Headers;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class MatcherAiClient : IMatcherAiClient
|
||||||
|
{
|
||||||
|
private readonly HttpClient _http;
|
||||||
|
private readonly IMatcherRepository _repository;
|
||||||
|
private readonly AiSettings _settings;
|
||||||
|
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||||
|
{
|
||||||
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||||
|
};
|
||||||
|
|
||||||
|
public MatcherAiClient(HttpClient http, IMatcherRepository repository, IOptions<AiSettings> options)
|
||||||
|
{
|
||||||
|
_http = http;
|
||||||
|
_repository = repository;
|
||||||
|
_settings = options.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var model = GetModel();
|
||||||
|
var cacheKey = HashHelper.Compute($"chat:{_settings.Provider}:{model}:{temperature:0.00}:{systemPrompt}:{userPrompt}");
|
||||||
|
var cached = await _repository.GetChatCompletionAsync(cacheKey, ct);
|
||||||
|
if (cached is not null) return cached;
|
||||||
|
|
||||||
|
var response = IsOllama()
|
||||||
|
? await CreateOllamaChatCompletionAsync(systemPrompt, userPrompt, temperature, ct)
|
||||||
|
: await CreateOpenAiChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
|
||||||
|
|
||||||
|
await _repository.SaveChatCompletionAsync(cacheKey, model, temperature, response, ct);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool IsOllama() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase);
|
||||||
|
private string GetModel() => IsOllama() ? _settings.Ollama.ChatModel : _settings.OpenAI.ChatModel;
|
||||||
|
|
||||||
|
private async Task<string> CreateOpenAiChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
|
||||||
|
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/chat/completions");
|
||||||
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
|
||||||
|
request.Content = ToJson(new
|
||||||
|
{
|
||||||
|
model = _settings.OpenAI.ChatModel,
|
||||||
|
temperature,
|
||||||
|
response_format = new { type = "json_object" },
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new { role = "system", content = systemPrompt },
|
||||||
|
new { role = "user", content = userPrompt }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
|
||||||
|
using var response = await _http.SendAsync(request, cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI chat failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<string> CreateOllamaChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
|
||||||
|
using var response = await _http.PostAsync($"{baseUrl}/api/chat", ToJson(new
|
||||||
|
{
|
||||||
|
model = _settings.Ollama.ChatModel,
|
||||||
|
stream = false,
|
||||||
|
format = "json",
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new { role = "system", content = systemPrompt },
|
||||||
|
new { role = "user", content = userPrompt }
|
||||||
|
},
|
||||||
|
options = new { temperature = (float)temperature }
|
||||||
|
}), cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama chat failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StringContent ToJson<T>(T payload) => new(JsonSerializer.Serialize(payload, JsonOptions), Encoding.UTF8, "application/json");
|
||||||
|
}
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
using System.Net.Http.Headers;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using Api.Requests;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class RagApiClient : IRagApiClient
|
||||||
|
{
|
||||||
|
private readonly HttpClient _http;
|
||||||
|
private readonly RagApiSettings _settings;
|
||||||
|
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
|
||||||
|
|
||||||
|
public RagApiClient(HttpClient http, IOptions<RagApiSettings> options)
|
||||||
|
{
|
||||||
|
_http = http;
|
||||||
|
_settings = options.Value;
|
||||||
|
_http.BaseAddress = new Uri(_settings.BaseUrl.TrimEnd('/') + "/");
|
||||||
|
if (!string.IsNullOrWhiteSpace(_settings.InternalApiKey))
|
||||||
|
{
|
||||||
|
_http.DefaultRequestHeaders.Add("X-Internal-Api-Key", _settings.InternalApiKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagIndexResponse> IndexCvPdfAsync(IFormFile file, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var content = new MultipartFormDataContent();
|
||||||
|
await using var stream = file.OpenReadStream();
|
||||||
|
using var fileContent = new StreamContent(stream);
|
||||||
|
fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/pdf");
|
||||||
|
content.Add(fileContent, "file", file.FileName);
|
||||||
|
content.Add(new StringContent("cv"), "documentType");
|
||||||
|
content.Add(new StringContent(file.FileName), "title");
|
||||||
|
using var response = await _http.PostAsync("api/rag/documents", content, ct);
|
||||||
|
return await ReadJsonAsync<RagIndexResponse>(response, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagIndexResponse> IndexJobTextAsync(string text, string? url, string? title, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var content = new MultipartFormDataContent
|
||||||
|
{
|
||||||
|
{ new StringContent(text), "text" },
|
||||||
|
{ new StringContent("job"), "documentType" },
|
||||||
|
{ new StringContent(title ?? "Job description"), "title" }
|
||||||
|
};
|
||||||
|
if (!string.IsNullOrWhiteSpace(url)) content.Add(new StringContent(url), "sourceUrl");
|
||||||
|
using var response = await _http.PostAsync("api/rag/documents", content, ct);
|
||||||
|
return await ReadJsonAsync<RagIndexResponse>(response, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var response = await _http.GetAsync($"api/rag/documents/{Uri.EscapeDataString(documentId)}", ct);
|
||||||
|
if (response.StatusCode == System.Net.HttpStatusCode.NotFound) return null;
|
||||||
|
return await ReadJsonAsync<RagDocumentDetails>(response, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagSearchResponse> SearchAsync(RagSearchRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var response = await _http.PostAsync(
|
||||||
|
"api/rag/search",
|
||||||
|
new StringContent(JsonSerializer.Serialize(request, JsonOptions), Encoding.UTF8, "application/json"),
|
||||||
|
ct);
|
||||||
|
return await ReadJsonAsync<RagSearchResponse>(response, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task<T> ReadJsonAsync<T>(HttpResponseMessage response, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var json = await response.Content.ReadAsStringAsync(ct);
|
||||||
|
if (!response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"RAG API failed: {(int)response.StatusCode} {json}");
|
||||||
|
}
|
||||||
|
return JsonSerializer.Deserialize<T>(json, JsonOptions) ?? throw new InvalidOperationException("RAG API returned invalid JSON.");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,105 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Microsoft.Data.SqlClient;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class SqlMatcherRepository : IMatcherRepository
|
||||||
|
{
|
||||||
|
private readonly string _connectionString;
|
||||||
|
|
||||||
|
public SqlMatcherRepository(IConfiguration configuration)
|
||||||
|
{
|
||||||
|
_connectionString = configuration.GetConnectionString("CvMatcherDb")
|
||||||
|
?? throw new InvalidOperationException("Connection string 'CvMatcherDb' is missing.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InitializeAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
await EnsureDatabaseExistsAsync(ct);
|
||||||
|
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
||||||
|
{
|
||||||
|
await using var command = new SqlCommand(commandText, connection);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<JobMatchResponse?> GetMatchAsync(string cvDocumentId, string jobDocumentId, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = "SELECT ResultJson FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
|
||||||
|
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
|
||||||
|
var json = await command.ExecuteScalarAsync(ct) as string;
|
||||||
|
if (string.IsNullOrWhiteSpace(json)) return null;
|
||||||
|
var result = JsonSerializer.Deserialize<JobMatchResponse>(json, new JsonSerializerOptions(JsonSerializerDefaults.Web));
|
||||||
|
if (result is not null) result.Cached = true;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveMatchAsync(string cvDocumentId, string jobDocumentId, JobMatchResponse response, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM CvMatchResults WHERE CvDocumentId = @CvDocumentId AND JobDocumentId = @JobDocumentId)
|
||||||
|
INSERT INTO CvMatchResults (Id, CvDocumentId, JobDocumentId, ResultJson, Score, CreatedAt)
|
||||||
|
VALUES (@Id, @CvDocumentId, @JobDocumentId, @ResultJson, @Score, SYSUTCDATETIME())
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@Id", Guid.NewGuid().ToString("N"));
|
||||||
|
command.Parameters.AddWithValue("@CvDocumentId", cvDocumentId);
|
||||||
|
command.Parameters.AddWithValue("@JobDocumentId", jobDocumentId);
|
||||||
|
command.Parameters.AddWithValue("@ResultJson", JsonSerializer.Serialize(response, new JsonSerializerOptions(JsonSerializerDefaults.Web)));
|
||||||
|
command.Parameters.AddWithValue("@Score", response.Score);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = "SELECT ResponseText FROM CvMatcherChatCache WHERE CacheKey = @CacheKey";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
return await command.ExecuteScalarAsync(ct) as string;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM CvMatcherChatCache WHERE CacheKey = @CacheKey)
|
||||||
|
INSERT INTO CvMatcherChatCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
|
||||||
|
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
command.Parameters.AddWithValue("@Model", model);
|
||||||
|
command.Parameters.AddWithValue("@Temperature", temperature);
|
||||||
|
command.Parameters.AddWithValue("@ResponseText", responseText);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
var builder = new SqlConnectionStringBuilder(_connectionString);
|
||||||
|
var databaseName = builder.InitialCatalog;
|
||||||
|
if (string.IsNullOrWhiteSpace(databaseName)) return;
|
||||||
|
|
||||||
|
builder.InitialCatalog = "master";
|
||||||
|
await using var connection = new SqlConnection(builder.ConnectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
var safeName = databaseName.Replace("]", "]]" );
|
||||||
|
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
|
||||||
|
command.Parameters.AddWithValue("@DatabaseName", databaseName);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
namespace Api.Settings;
|
||||||
|
|
||||||
|
public sealed class RagApiSettings
|
||||||
|
{
|
||||||
|
public string BaseUrl { get; set; } = "http://localhost:8081";
|
||||||
|
public string InternalApiKey { get; set; } = string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class InternalApiSettings
|
||||||
|
{
|
||||||
|
public string ApiKey { get; set; } = string.Empty;
|
||||||
|
public bool RequireApiKey { get; set; } = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class AiSettings
|
||||||
|
{
|
||||||
|
public string Provider { get; set; } = "OpenAI";
|
||||||
|
public OpenAiSettings OpenAI { get; set; } = new();
|
||||||
|
public OllamaSettings Ollama { get; set; } = new();
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class OpenAiSettings
|
||||||
|
{
|
||||||
|
public string ApiKey { get; set; } = string.Empty;
|
||||||
|
public string ChatModel { get; set; } = "gpt-4o-mini";
|
||||||
|
public int TimeoutSeconds { get; set; } = 90;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class OllamaSettings
|
||||||
|
{
|
||||||
|
public string BaseUrl { get; set; } = "http://localhost:11434";
|
||||||
|
public string ChatModel { get; set; } = "llama3.1:8b";
|
||||||
|
public int TimeoutSeconds { get; set; } = 180;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class MatcherSettings
|
||||||
|
{
|
||||||
|
public int TopK { get; set; } = 10;
|
||||||
|
public int DeepScoreTopN { get; set; } = 5;
|
||||||
|
public int MaxJobTextChars { get; set; } = 60000;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class SmtpSettings
|
||||||
|
{
|
||||||
|
public string Host { get; set; } = string.Empty;
|
||||||
|
public int Port { get; set; } = 587;
|
||||||
|
public string Username { get; set; } = string.Empty;
|
||||||
|
public string Password { get; set; } = string.Empty;
|
||||||
|
public bool UseStartTls { get; set; } = true;
|
||||||
|
public string FromEmail { get; set; } = "noreply@myai.ro";
|
||||||
|
public string ToEmail { get; set; } = string.Empty;
|
||||||
|
}
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
{
|
||||||
|
"Serilog": {
|
||||||
|
"Using": [
|
||||||
|
"Serilog.Sinks.Console",
|
||||||
|
"Serilog.Sinks.File",
|
||||||
|
"Serilog.Sinks.Email"
|
||||||
|
],
|
||||||
|
"MinimumLevel": {
|
||||||
|
"Default": "Information",
|
||||||
|
"Override": {
|
||||||
|
"Microsoft.AspNetCore": "Warning",
|
||||||
|
"Microsoft.AspNetCore.Hosting": "Information",
|
||||||
|
"Microsoft.AspNetCore.Routing": "Warning",
|
||||||
|
"System.Net.Http.HttpClient": "Warning",
|
||||||
|
"Api": "Information"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"WriteTo": [
|
||||||
|
{
|
||||||
|
"Name": "Console",
|
||||||
|
"Args": {
|
||||||
|
"outputTemplate": "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Name": "File",
|
||||||
|
"Args": {
|
||||||
|
"path": "logs/api-.log",
|
||||||
|
"rollingInterval": "Day",
|
||||||
|
"retainedFileCountLimit": 30,
|
||||||
|
"outputTemplate": "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Name": "Email",
|
||||||
|
"Args": {
|
||||||
|
"restrictedToMinimumLevel": "Error",
|
||||||
|
"fromEmail": "",
|
||||||
|
"toEmail": "",
|
||||||
|
"mailServer": "",
|
||||||
|
"networkCredential": {
|
||||||
|
"userName": "",
|
||||||
|
"password": ""
|
||||||
|
},
|
||||||
|
"port": 587,
|
||||||
|
"enableSsl": true,
|
||||||
|
"emailSubject": "[mihes.ro API] Error Alert",
|
||||||
|
"outputTemplate": "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {SourceContext}{NewLine}{Message:lj}{NewLine}{Exception}",
|
||||||
|
"batchPostingLimit": 10,
|
||||||
|
"period": "0.00:05:00"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Enrich": [
|
||||||
|
"FromLogContext",
|
||||||
|
"WithMachineName",
|
||||||
|
"WithEnvironmentName"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Logging": {
|
||||||
|
"LogLevel": {
|
||||||
|
"Default": "Information",
|
||||||
|
"Microsoft.AspNetCore": "Warning",
|
||||||
|
"Microsoft.AspNetCore.Hosting": "Information",
|
||||||
|
"Microsoft.AspNetCore.Routing": "Warning",
|
||||||
|
"System.Net.Http.HttpClient": "Warning",
|
||||||
|
"Api": "Information"
|
||||||
|
},
|
||||||
|
"LogEnvironmentOnStartup": true
|
||||||
|
},
|
||||||
|
"AllowedHosts": "*",
|
||||||
|
"KeyVault": {
|
||||||
|
"VaultUri": "",
|
||||||
|
"Enabled": false
|
||||||
|
},
|
||||||
|
"ConnectionStrings": {
|
||||||
|
"CvMatcherDb": "Server=localhost,1433;Database=MyAiCvMatcher;User Id=sa;Password=Your_strong_password123;TrustServerCertificate=True"
|
||||||
|
},
|
||||||
|
"InternalApi": {
|
||||||
|
"ApiKey": "",
|
||||||
|
"RequireApiKey": false
|
||||||
|
},
|
||||||
|
"RagApi": {
|
||||||
|
"BaseUrl": "http://localhost:8081",
|
||||||
|
"InternalApiKey": ""
|
||||||
|
},
|
||||||
|
"Ai": {
|
||||||
|
"Provider": "OpenAI",
|
||||||
|
"OpenAI": {
|
||||||
|
"ApiKey": "",
|
||||||
|
"ChatModel": "gpt-4o-mini",
|
||||||
|
"TimeoutSeconds": 90
|
||||||
|
},
|
||||||
|
"Ollama": {
|
||||||
|
"BaseUrl": "http://localhost:11434",
|
||||||
|
"ChatModel": "llama3.1:8b",
|
||||||
|
"TimeoutSeconds": 180
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Matcher": {
|
||||||
|
"TopK": 10,
|
||||||
|
"DeepScoreTopN": 5,
|
||||||
|
"MaxJobTextChars": 60000
|
||||||
|
},
|
||||||
|
"Smtp": {
|
||||||
|
"Host": "",
|
||||||
|
"Port": 587,
|
||||||
|
"Username": "",
|
||||||
|
"Password": "",
|
||||||
|
"UseStartTls": true,
|
||||||
|
"FromEmail": "noreply@myai.ro",
|
||||||
|
"ToEmail": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||||
|
<RootNamespace>Api</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
||||||
|
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
||||||
|
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||||
|
<PackageReference Include="MailKit" Version="4.16.0" />
|
||||||
|
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
|
||||||
|
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||||
|
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||||
|
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
||||||
|
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.7" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Update="Database/schema.sql">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
@@ -1,7 +1,74 @@
|
|||||||
version: "3.8"
|
version: "3.8"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
|
mssql:
|
||||||
|
image: mcr.microsoft.com/mssql/server:2022-latest
|
||||||
|
container_name: myai-mssql
|
||||||
|
environment:
|
||||||
|
- ACCEPT_EULA=Y
|
||||||
|
- MSSQL_SA_PASSWORD=${MSSQL_SA_PASSWORD:-Your_strong_password123}
|
||||||
|
ports:
|
||||||
|
- "1433:1433"
|
||||||
|
volumes:
|
||||||
|
- myai-mssql-data:/var/opt/mssql
|
||||||
|
networks:
|
||||||
|
- myai-network
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
rag-api:
|
||||||
|
build:
|
||||||
|
context: ../rag-api
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: myai-rag-api
|
||||||
|
depends_on:
|
||||||
|
- mssql
|
||||||
|
ports:
|
||||||
|
- "8081:8080"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Development}
|
||||||
|
- ASPNETCORE_URLS=http://+:8080
|
||||||
|
- ConnectionStrings__RagDb=Server=mssql,1433;Database=MyAiRag;User Id=sa;Password=${MSSQL_SA_PASSWORD:-Your_strong_password123};TrustServerCertificate=True
|
||||||
|
- InternalApi__RequireApiKey=true
|
||||||
|
- InternalApi__ApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
|
||||||
|
- Ai__Provider=${AI_PROVIDER:-OpenAI}
|
||||||
|
- Ai__OpenAI__ApiKey=${OPENAI_API_KEY:-}
|
||||||
|
- Ai__Ollama__BaseUrl=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||||||
|
networks:
|
||||||
|
- myai-network
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
cv-matcher-api:
|
||||||
|
build:
|
||||||
|
context: ../cv-matcher-api
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: myai-cv-matcher-api
|
||||||
|
depends_on:
|
||||||
|
- mssql
|
||||||
|
- rag-api
|
||||||
|
ports:
|
||||||
|
- "8082:8080"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Development}
|
||||||
|
- ASPNETCORE_URLS=http://+:8080
|
||||||
|
- ConnectionStrings__CvMatcherDb=Server=mssql,1433;Database=MyAiCvMatcher;User Id=sa;Password=${MSSQL_SA_PASSWORD:-Your_strong_password123};TrustServerCertificate=True
|
||||||
|
- InternalApi__RequireApiKey=true
|
||||||
|
- InternalApi__ApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
|
||||||
|
- RagApi__BaseUrl=http://rag-api:8080
|
||||||
|
- RagApi__InternalApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
|
||||||
|
- Ai__Provider=${AI_PROVIDER:-OpenAI}
|
||||||
|
- Ai__OpenAI__ApiKey=${OPENAI_API_KEY:-}
|
||||||
|
- Ai__Ollama__BaseUrl=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||||||
|
networks:
|
||||||
|
- myai-network
|
||||||
|
restart: unless-stopped
|
||||||
api:
|
api:
|
||||||
|
depends_on:
|
||||||
|
- cv-matcher-api
|
||||||
build:
|
build:
|
||||||
context: ../api
|
context: ../api
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
@@ -16,6 +83,8 @@ services:
|
|||||||
- ASPNETCORE_URLS=${ASPNETCORE_URLS:-http://+:8080}
|
- ASPNETCORE_URLS=${ASPNETCORE_URLS:-http://+:8080}
|
||||||
- Cors__AllowedOrigins__0=http://localhost:5000
|
- Cors__AllowedOrigins__0=http://localhost:5000
|
||||||
- Cors__AllowedOrigins__1=http://web:8080
|
- Cors__AllowedOrigins__1=http://web:8080
|
||||||
|
- CvMatcherApi__BaseUrl=http://cv-matcher-api:8080
|
||||||
|
- CvMatcherApi__InternalApiKey=${INTERNAL_API_KEY:-change-this-internal-key}
|
||||||
volumes:
|
volumes:
|
||||||
- ../api/logs:/app/logs
|
- ../api/logs:/app/logs
|
||||||
networks:
|
networks:
|
||||||
@@ -40,6 +109,9 @@ services:
|
|||||||
- myai-network
|
- myai-network
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
myai-mssql-data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
myai-network:
|
myai-network:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
@@ -6,6 +6,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "api", "api\api.csproj", "{1
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "web", "web\web.csproj", "{B0A3EAB7-759A-448A-A906-52DF75A70016}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "web", "web\web.csproj", "{B0A3EAB7-759A-448A-A906-52DF75A70016}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "rag-api", "rag-api\rag-api.csproj", "{A63E1C1A-4A78-49F4-9F5C-D43783294861}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "cv-matcher-api", "cv-matcher-api\cv-matcher-api.csproj", "{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}"
|
||||||
|
EndProject
|
||||||
Project("{E53339B2-1760-4266-BCC7-CA923CBCF16C}") = "docker-compose", "docker-compose\docker-compose.dcproj", "{81DDED9D-158B-E303-5F62-77A2896D2A5A}"
|
Project("{E53339B2-1760-4266-BCC7-CA923CBCF16C}") = "docker-compose", "docker-compose\docker-compose.dcproj", "{81DDED9D-158B-E303-5F62-77A2896D2A5A}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
@@ -22,6 +26,14 @@ Global
|
|||||||
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.Build.0 = Release|Any CPU
|
{B0A3EAB7-759A-448A-A906-52DF75A70016}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{A63E1C1A-4A78-49F4-9F5C-D43783294861}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{C40F5025-B0A6-4B25-B4A2-7EA568E06C40}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{81DDED9D-158B-E303-5F62-77A2896D2A5A}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
|||||||
@@ -0,0 +1,110 @@
|
|||||||
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Requests;
|
||||||
|
|
||||||
|
namespace Api.Controllers;
|
||||||
|
|
||||||
|
[ApiController]
|
||||||
|
[Route("api/rag")]
|
||||||
|
public sealed class RagController : ControllerBase
|
||||||
|
{
|
||||||
|
private readonly IRagService _ragService;
|
||||||
|
private readonly ILogger<RagController> _logger;
|
||||||
|
|
||||||
|
public RagController(IRagService ragService, ILogger<RagController> logger)
|
||||||
|
{
|
||||||
|
_ragService = ragService;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("documents")]
|
||||||
|
[RequestSizeLimit(10 * 1024 * 1024)]
|
||||||
|
public async Task<IActionResult> IndexDocument(
|
||||||
|
[FromForm] IFormFile? file,
|
||||||
|
[FromForm] string? text,
|
||||||
|
[FromForm] string? documentType,
|
||||||
|
[FromForm] string? title,
|
||||||
|
[FromForm] string? sourceUrl,
|
||||||
|
CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Index document request received. HasFile={HasFile}, DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
|
||||||
|
file is not null, documentType, title, sourceUrl);
|
||||||
|
|
||||||
|
if (file is not null)
|
||||||
|
{
|
||||||
|
var result = await _ragService.IndexPdfAsync(file, documentType, title, sourceUrl, ct);
|
||||||
|
_logger.LogInformation("Indexed PDF document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||||
|
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
var textResult = await _ragService.IndexTextAsync(new IndexDocumentRequest
|
||||||
|
{
|
||||||
|
Text = text,
|
||||||
|
DocumentType = documentType,
|
||||||
|
Title = title,
|
||||||
|
SourceUrl = sourceUrl
|
||||||
|
}, ct);
|
||||||
|
_logger.LogInformation("Indexed text document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||||
|
textResult.DocumentId, textResult.DocumentType, textResult.Chunks, textResult.Cached);
|
||||||
|
return Ok(textResult);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid document indexing request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("documents/json")]
|
||||||
|
public async Task<IActionResult> IndexJsonDocument([FromBody] IndexDocumentRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_logger.LogInformation("JSON document indexing request received. DocumentType={DocumentType}, Title={Title}, SourceUrl={SourceUrl}",
|
||||||
|
request.DocumentType, request.Title, request.SourceUrl);
|
||||||
|
var result = await _ragService.IndexTextAsync(request, ct);
|
||||||
|
_logger.LogInformation("Indexed JSON document. DocumentId={DocumentId}, DocumentType={DocumentType}, Chunks={Chunks}, Cached={Cached}",
|
||||||
|
result.DocumentId, result.DocumentType, result.Chunks, result.Cached);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid JSON document indexing request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("search")]
|
||||||
|
public async Task<IActionResult> Search([FromBody] SearchRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Semantic search request received. TargetTypes={TargetTypes}, TopK={TopK}",
|
||||||
|
string.Join(',', request.TargetDocumentTypes ?? []), request.TopK);
|
||||||
|
var result = await _ragService.SearchAsync(request, ct);
|
||||||
|
_logger.LogInformation("Semantic search completed. ResultCount={ResultCount}", result.Results.Count);
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Invalid semantic search request.");
|
||||||
|
return BadRequest(new { error = ex.Message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpGet("documents/{id}")]
|
||||||
|
public async Task<IActionResult> GetDocument(string id, CancellationToken ct)
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Get document request received. DocumentId={DocumentId}", id);
|
||||||
|
var document = await _ragService.GetDocumentAsync(id, ct);
|
||||||
|
if (document is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Document not found. DocumentId={DocumentId}", id);
|
||||||
|
return NotFound(new { error = "Document not found." });
|
||||||
|
}
|
||||||
|
return Ok(document);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
IF OBJECT_ID('dbo.RagChunks', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.RagChunks (
|
||||||
|
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChunks PRIMARY KEY,
|
||||||
|
DocumentId NVARCHAR(64) NOT NULL,
|
||||||
|
ChunkIndex INT NOT NULL,
|
||||||
|
Text NVARCHAR(MAX) NOT NULL,
|
||||||
|
Embedding VARBINARY(MAX) NOT NULL
|
||||||
|
);
|
||||||
|
END
|
||||||
|
GO
|
||||||
|
|
||||||
|
IF OBJECT_ID('dbo.RagDocuments', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.RagDocuments (
|
||||||
|
Id NVARCHAR(64) NOT NULL CONSTRAINT PK_RagDocuments PRIMARY KEY,
|
||||||
|
DocumentType NVARCHAR(80) NOT NULL,
|
||||||
|
Title NVARCHAR(300) NOT NULL,
|
||||||
|
SourceUrl NVARCHAR(1200) NULL,
|
||||||
|
RawText NVARCHAR(MAX) NOT NULL,
|
||||||
|
TextHash NVARCHAR(64) NOT NULL,
|
||||||
|
TypeConfidence FLOAT NOT NULL,
|
||||||
|
MetadataJson NVARCHAR(MAX) NOT NULL CONSTRAINT DF_RagDocuments_MetadataJson DEFAULT '{}',
|
||||||
|
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagDocuments_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IX_RagDocuments_TextHash ON dbo.RagDocuments(TextHash);
|
||||||
|
CREATE INDEX IX_RagDocuments_DocumentType ON dbo.RagDocuments(DocumentType);
|
||||||
|
END
|
||||||
|
GO
|
||||||
|
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM sys.foreign_keys WHERE name = 'FK_RagChunks_RagDocuments')
|
||||||
|
BEGIN
|
||||||
|
ALTER TABLE dbo.RagChunks
|
||||||
|
ADD CONSTRAINT FK_RagChunks_RagDocuments FOREIGN KEY (DocumentId) REFERENCES dbo.RagDocuments(Id) ON DELETE CASCADE;
|
||||||
|
END
|
||||||
|
GO
|
||||||
|
|
||||||
|
IF OBJECT_ID('dbo.RagEmbeddingCache', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.RagEmbeddingCache (
|
||||||
|
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagEmbeddingCache PRIMARY KEY,
|
||||||
|
Model NVARCHAR(120) NOT NULL,
|
||||||
|
TextHash NVARCHAR(64) NOT NULL,
|
||||||
|
Vector VARBINARY(MAX) NOT NULL,
|
||||||
|
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagEmbeddingCache_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IX_RagEmbeddingCache_TextHash ON dbo.RagEmbeddingCache(TextHash);
|
||||||
|
END
|
||||||
|
GO
|
||||||
|
|
||||||
|
IF OBJECT_ID('dbo.RagChatCompletionCache', 'U') IS NULL
|
||||||
|
BEGIN
|
||||||
|
CREATE TABLE dbo.RagChatCompletionCache (
|
||||||
|
CacheKey NVARCHAR(64) NOT NULL CONSTRAINT PK_RagChatCompletionCache PRIMARY KEY,
|
||||||
|
Model NVARCHAR(120) NOT NULL,
|
||||||
|
Temperature DECIMAL(4,2) NOT NULL,
|
||||||
|
ResponseText NVARCHAR(MAX) NOT NULL,
|
||||||
|
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_RagChatCompletionCache_CreatedAt DEFAULT SYSUTCDATETIME()
|
||||||
|
);
|
||||||
|
END
|
||||||
|
GO
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS base
|
||||||
|
WORKDIR /app
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||||
|
WORKDIR /src
|
||||||
|
COPY ["rag-api.csproj", "./"]
|
||||||
|
RUN dotnet restore "rag-api.csproj"
|
||||||
|
COPY . .
|
||||||
|
RUN dotnet publish "rag-api.csproj" -c Release -o /app/publish /p:UseAppHost=false
|
||||||
|
|
||||||
|
FROM base AS final
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=build /app/publish .
|
||||||
|
ENTRYPOINT ["dotnet", "rag-api.dll"]
|
||||||
@@ -0,0 +1,282 @@
|
|||||||
|
using Azure.Identity;
|
||||||
|
using Microsoft.AspNetCore.Diagnostics;
|
||||||
|
using Api.Services;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Serilog;
|
||||||
|
using System.Reflection;
|
||||||
|
|
||||||
|
DotNetEnv.Env.Load();
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var builder = WebApplication.CreateBuilder(args);
|
||||||
|
var appVersion = Assembly.GetExecutingAssembly()
|
||||||
|
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?
|
||||||
|
.InformationalVersion
|
||||||
|
?? Assembly.GetExecutingAssembly().GetName().Version?.ToString()
|
||||||
|
?? "unknown";
|
||||||
|
|
||||||
|
builder.Host.UseSerilog((context, services, configuration) =>
|
||||||
|
{
|
||||||
|
configuration
|
||||||
|
.ReadFrom.Configuration(context.Configuration)
|
||||||
|
.ReadFrom.Services(services)
|
||||||
|
.Enrich.FromLogContext()
|
||||||
|
.Enrich.WithMachineName()
|
||||||
|
.Enrich.WithEnvironmentName()
|
||||||
|
.Enrich.WithProperty("Service", "rag-api")
|
||||||
|
.Enrich.WithProperty("AppVersion", appVersion)
|
||||||
|
.WriteTo.Console(new Serilog.Formatting.Json.JsonFormatter());
|
||||||
|
});
|
||||||
|
|
||||||
|
Log.Information("Starting {Service} version {AppVersion}", "rag-api", appVersion);
|
||||||
|
|
||||||
|
// --------------------
|
||||||
|
// Azure Key Vault Configuration
|
||||||
|
// --------------------
|
||||||
|
var keyVaultUri = builder.Configuration["KeyVault:VaultUri"];
|
||||||
|
var keyVaultEnabled = builder.Configuration.GetValue<bool>("KeyVault:Enabled");
|
||||||
|
|
||||||
|
if (keyVaultEnabled && !string.IsNullOrWhiteSpace(keyVaultUri))
|
||||||
|
{
|
||||||
|
Log.Information("Loading configuration from Azure Key Vault: {VaultUri}", keyVaultUri);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
builder.Configuration.AddAzureKeyVault(
|
||||||
|
new Uri(keyVaultUri),
|
||||||
|
new DefaultAzureCredential());
|
||||||
|
|
||||||
|
Log.Information("Azure Key Vault configuration loaded successfully");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log.Warning(ex, "Failed to load Azure Key Vault configuration. Continuing with other configuration sources.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Log.Information("Azure Key Vault is disabled or not configured");
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.Services.Configure<RagSettings>(builder.Configuration.GetSection("Rag"));
|
||||||
|
builder.Services.Configure<AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||||
|
builder.Services.Configure<InternalApiSettings>(builder.Configuration.GetSection("InternalApi"));
|
||||||
|
|
||||||
|
builder.Services.AddHttpClient<RawAiClient>();
|
||||||
|
builder.Services.AddSingleton<IRagRepository, SqlRagRepository>();
|
||||||
|
builder.Services.AddScoped<IAiClient, CachedAiClient>();
|
||||||
|
builder.Services.AddSingleton<ITextExtractor, TextExtractor>();
|
||||||
|
builder.Services.AddSingleton<ITextChunker, TextChunker>();
|
||||||
|
builder.Services.AddSingleton<IDocumentClassifier, DocumentClassifier>();
|
||||||
|
builder.Services.AddScoped<IRagService, RagService>();
|
||||||
|
|
||||||
|
builder.Services.AddControllers();
|
||||||
|
builder.Services.AddEndpointsApiExplorer();
|
||||||
|
builder.Services.AddSwaggerGen();
|
||||||
|
|
||||||
|
var app = builder.Build();
|
||||||
|
|
||||||
|
var logger = app.Services.GetRequiredService<ILogger<Program>>();
|
||||||
|
logger.LogInformation("API starting up...");
|
||||||
|
logger.LogInformation("Environment: {Environment}", app.Environment.EnvironmentName);
|
||||||
|
|
||||||
|
// Log all environment variables and configuration settings at startup
|
||||||
|
// Can be controlled via appsettings: "Logging:LogEnvironmentOnStartup": true
|
||||||
|
var logEnvironmentOnStartup = app.Configuration.GetValue<bool>("Logging:LogEnvironmentOnStartup", defaultValue: true);
|
||||||
|
if (logEnvironmentOnStartup)
|
||||||
|
{
|
||||||
|
LogEnvironmentSettings(logger, app.Configuration, app.Environment);
|
||||||
|
}
|
||||||
|
|
||||||
|
using (var scope = app.Services.CreateScope())
|
||||||
|
{
|
||||||
|
var repository = scope.ServiceProvider.GetRequiredService<IRagRepository>();
|
||||||
|
await repository.InitializeAsync(CancellationToken.None);
|
||||||
|
}
|
||||||
|
|
||||||
|
app.UseSerilogRequestLogging(options =>
|
||||||
|
{
|
||||||
|
options.MessageTemplate = "HTTP {RequestMethod} {RequestPath} responded {StatusCode} in {Elapsed:0.0000} ms";
|
||||||
|
options.EnrichDiagnosticContext = (diagnosticContext, httpContext) =>
|
||||||
|
{
|
||||||
|
diagnosticContext.Set("RequestHost", httpContext.Request.Host.Value);
|
||||||
|
diagnosticContext.Set("RequestScheme", httpContext.Request.Scheme);
|
||||||
|
diagnosticContext.Set("RemoteIP", httpContext.Connection.RemoteIpAddress?.ToString());
|
||||||
|
diagnosticContext.Set("UserAgent", httpContext.Request.Headers.UserAgent.ToString());
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
app.UseExceptionHandler(errorApp =>
|
||||||
|
{
|
||||||
|
errorApp.Run(async context =>
|
||||||
|
{
|
||||||
|
var feature = context.Features.Get<IExceptionHandlerFeature>();
|
||||||
|
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||||
|
if (feature?.Error is not null)
|
||||||
|
{
|
||||||
|
logger.LogError(feature.Error, "Unhandled exception in {Service}", "rag-api");
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Response.StatusCode = StatusCodes.Status500InternalServerError;
|
||||||
|
context.Response.ContentType = "application/json";
|
||||||
|
await context.Response.WriteAsJsonAsync(new { error = "Unexpected server error." });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
app.Use(async (context, next) =>
|
||||||
|
{
|
||||||
|
var settings = context.RequestServices.GetRequiredService<Microsoft.Extensions.Options.IOptions<InternalApiSettings>>().Value;
|
||||||
|
if (settings.RequireApiKey)
|
||||||
|
{
|
||||||
|
var header = context.Request.Headers["X-Internal-Api-Key"].ToString();
|
||||||
|
if (string.IsNullOrWhiteSpace(settings.ApiKey) || header != settings.ApiKey)
|
||||||
|
{
|
||||||
|
var logger = context.RequestServices.GetRequiredService<ILogger<Program>>();
|
||||||
|
logger.LogWarning("Rejected unauthorized internal API call. Path={Path}, RemoteIP={RemoteIP}", context.Request.Path, context.Connection.RemoteIpAddress?.ToString());
|
||||||
|
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
|
||||||
|
await context.Response.WriteAsJsonAsync(new { error = "Unauthorized internal API call." });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await next();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Swagger (typically only in Development)
|
||||||
|
if (app.Environment.IsDevelopment())
|
||||||
|
{
|
||||||
|
app.UseSwagger();
|
||||||
|
app.UseSwaggerUI(options =>
|
||||||
|
{
|
||||||
|
options.DocumentTitle = "rag-api";
|
||||||
|
options.SwaggerEndpoint("/swagger/v1/swagger.json", "rag-api v1");
|
||||||
|
options.RoutePrefix = "swagger";
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
app.MapControllers();
|
||||||
|
app.MapGet("/health", () => Results.Ok(new { status = "ok", service = "rag-api", version = appVersion, timeUtc = DateTimeOffset.UtcNow }));
|
||||||
|
|
||||||
|
Log.Information("{Service} startup complete", "rag-api");
|
||||||
|
app.Run();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log.Fatal(ex, "rag-api terminated unexpectedly");
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
Log.Information("Shutting down rag-api");
|
||||||
|
Log.CloseAndFlush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Logs all environment variables and configuration settings at startup for diagnostics.
|
||||||
|
/// </summary>
|
||||||
|
static void LogEnvironmentSettings(Microsoft.Extensions.Logging.ILogger logger, IConfiguration configuration, IWebHostEnvironment environment)
|
||||||
|
{
|
||||||
|
logger.LogInformation("==================== ENVIRONMENT SETTINGS ====================");
|
||||||
|
|
||||||
|
// Environment Information
|
||||||
|
logger.LogInformation("Application Name: {ApplicationName}", environment.ApplicationName);
|
||||||
|
logger.LogInformation("Environment Name: {EnvironmentName}", environment.EnvironmentName);
|
||||||
|
logger.LogInformation("Content Root Path: {ContentRootPath}", environment.ContentRootPath);
|
||||||
|
logger.LogInformation("Web Root Path: {WebRootPath}", environment.WebRootPath);
|
||||||
|
|
||||||
|
// Environment Variables
|
||||||
|
logger.LogInformation("-------------- Environment Variables --------------");
|
||||||
|
var envVars = Environment.GetEnvironmentVariables();
|
||||||
|
var sortedEnvVars = new SortedDictionary<string, string?>();
|
||||||
|
|
||||||
|
foreach (System.Collections.DictionaryEntry entry in envVars)
|
||||||
|
{
|
||||||
|
var key = entry.Key?.ToString() ?? string.Empty;
|
||||||
|
var value = entry.Value?.ToString() ?? string.Empty;
|
||||||
|
|
||||||
|
// Mask sensitive values (passwords, secrets, tokens, keys) but show last 4 characters
|
||||||
|
if (IsSensitiveKey(key))
|
||||||
|
{
|
||||||
|
value = MaskValueWithLastChars(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
sortedEnvVars[key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (var kvp in sortedEnvVars)
|
||||||
|
{
|
||||||
|
logger.LogInformation(" {Key} = {Value}", kvp.Key, kvp.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configuration Settings
|
||||||
|
logger.LogInformation("-------------- Configuration Settings --------------");
|
||||||
|
LogConfigurationRecursive(logger, configuration.GetChildren(), "");
|
||||||
|
|
||||||
|
logger.LogInformation("===========================================================");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Recursively logs configuration settings with hierarchy.
|
||||||
|
/// </summary>
|
||||||
|
static void LogConfigurationRecursive(Microsoft.Extensions.Logging.ILogger logger, IEnumerable<IConfigurationSection> sections, string prefix)
|
||||||
|
{
|
||||||
|
foreach (var section in sections)
|
||||||
|
{
|
||||||
|
var key = string.IsNullOrEmpty(prefix) ? section.Key : $"{prefix}:{section.Key}";
|
||||||
|
|
||||||
|
if (section.Value != null)
|
||||||
|
{
|
||||||
|
var value = section.Value;
|
||||||
|
|
||||||
|
// Mask sensitive configuration values but show last 4 characters
|
||||||
|
if (IsSensitiveKey(key))
|
||||||
|
{
|
||||||
|
value = MaskValueWithLastChars(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.LogInformation(" {Key} = {Value}", key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recurse into child sections
|
||||||
|
if (section.GetChildren().Any())
|
||||||
|
{
|
||||||
|
LogConfigurationRecursive(logger, section.GetChildren(), key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if a configuration key contains sensitive information.
|
||||||
|
/// </summary>
|
||||||
|
static bool IsSensitiveKey(string key)
|
||||||
|
{
|
||||||
|
return key.Contains("Password", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Secret", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Token", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("Key", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
key.Contains("ConnectionString", StringComparison.OrdinalIgnoreCase);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Masks a sensitive value but shows the last 4 characters for verification.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The value to mask.</param>
|
||||||
|
/// <returns>Masked value showing last 4 characters (e.g., "***MASKED***...abcd")</returns>
|
||||||
|
static string MaskValueWithLastChars(string value)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(value))
|
||||||
|
{
|
||||||
|
return "***NOT SET***";
|
||||||
|
}
|
||||||
|
|
||||||
|
// If value is too short, just mask it completely
|
||||||
|
if (value.Length <= 4)
|
||||||
|
{
|
||||||
|
return "***MASKED***";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show last 4 characters
|
||||||
|
var lastChars = value.Substring(value.Length - 4);
|
||||||
|
return $"***MASKED***...{lastChars}";
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"profiles": {
|
||||||
|
"rag-api": {
|
||||||
|
"commandName": "Project",
|
||||||
|
"launchBrowser": true,
|
||||||
|
"environmentVariables": {
|
||||||
|
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||||
|
},
|
||||||
|
"applicationUrl": "https://localhost:58424;http://localhost:58426"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Api.Requests
|
||||||
|
{
|
||||||
|
public sealed class IndexDocumentRequest
|
||||||
|
{
|
||||||
|
public string? Text { get; set; }
|
||||||
|
public string? SourceUrl { get; set; }
|
||||||
|
public string? DocumentType { get; set; }
|
||||||
|
public string? Title { get; set; }
|
||||||
|
public Dictionary<string, string>? Metadata { get; set; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Api.Requests
|
||||||
|
{
|
||||||
|
public sealed class SearchRequest
|
||||||
|
{
|
||||||
|
public required string QueryText { get; init; }
|
||||||
|
public IReadOnlyList<string>? TargetDocumentTypes { get; init; }
|
||||||
|
public int? TopK { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class IndexDocumentResponse
|
||||||
|
{
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public double DocumentTypeConfidence { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public int Chunks { get; init; }
|
||||||
|
public int Characters { get; init; }
|
||||||
|
public bool Cached { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
namespace Api.Responses
|
||||||
|
{
|
||||||
|
public sealed class SearchResponse
|
||||||
|
{
|
||||||
|
public IReadOnlyList<SearchDocumentResult> Results { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class SearchDocumentResult
|
||||||
|
{
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public string? SourceUrl { get; init; }
|
||||||
|
public double Score { get; init; }
|
||||||
|
public IReadOnlyList<SearchChunkResult> MatchedChunks { get; init; } = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class SearchChunkResult
|
||||||
|
{
|
||||||
|
public required string ChunkId { get; init; }
|
||||||
|
public int ChunkIndex { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public double Score { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class CachedAiClient : IAiClient
|
||||||
|
{
|
||||||
|
private readonly RawAiClient _raw;
|
||||||
|
private readonly IRagRepository _repository;
|
||||||
|
private readonly AiSettings _settings;
|
||||||
|
|
||||||
|
public CachedAiClient(RawAiClient raw, IRagRepository repository, IOptions<AiSettings> options)
|
||||||
|
{
|
||||||
|
_raw = raw;
|
||||||
|
_repository = repository;
|
||||||
|
_settings = options.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var model = GetEmbeddingModel();
|
||||||
|
var textHash = HashHelper.Compute(input);
|
||||||
|
var cacheKey = HashHelper.Compute($"embedding:{_settings.Provider}:{model}:{textHash}");
|
||||||
|
var cached = await _repository.GetEmbeddingAsync(cacheKey, ct);
|
||||||
|
if (cached is not null) return cached;
|
||||||
|
|
||||||
|
var vector = await _raw.CreateEmbeddingAsync(input, ct);
|
||||||
|
await _repository.SaveEmbeddingAsync(cacheKey, model, textHash, vector, ct);
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var model = GetChatModel();
|
||||||
|
var cacheKey = HashHelper.Compute($"chat:{_settings.Provider}:{model}:{temperature:0.00}:{systemPrompt}:{userPrompt}");
|
||||||
|
var cached = await _repository.GetChatCompletionAsync(cacheKey, ct);
|
||||||
|
if (cached is not null) return cached;
|
||||||
|
|
||||||
|
var response = await _raw.CreateChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
|
||||||
|
await _repository.SaveChatCompletionAsync(cacheKey, model, temperature, response, ct);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private string GetEmbeddingModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
|
||||||
|
? _settings.Ollama.EmbeddingModel
|
||||||
|
: _settings.OpenAI.EmbeddingModel;
|
||||||
|
|
||||||
|
private string GetChatModel() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase)
|
||||||
|
? _settings.Ollama.ChatModel
|
||||||
|
: _settings.OpenAI.ChatModel;
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
namespace Api.Services.Contracts.Rag;
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
public interface IAiRagClient
|
public interface IAiClient
|
||||||
{
|
{
|
||||||
Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct);
|
Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct);
|
||||||
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, CancellationToken ct);
|
Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct);
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IDocumentClassifier
|
||||||
|
{
|
||||||
|
Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IRagRepository
|
||||||
|
{
|
||||||
|
Task InitializeAsync(CancellationToken ct);
|
||||||
|
Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct);
|
||||||
|
Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct);
|
||||||
|
Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct);
|
||||||
|
Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct);
|
||||||
|
Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct);
|
||||||
|
Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct);
|
||||||
|
Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct);
|
||||||
|
Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct);
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
using Api.Requests;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface IRagService
|
||||||
|
{
|
||||||
|
Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct);
|
||||||
|
Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct);
|
||||||
|
Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct);
|
||||||
|
Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct);
|
||||||
|
}
|
||||||
+1
-1
@@ -1,4 +1,4 @@
|
|||||||
namespace Api.Services.Contracts.Rag;
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
public interface ITextChunker
|
public interface ITextChunker
|
||||||
{
|
{
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Api.Services.Contracts;
|
||||||
|
|
||||||
|
public interface ITextExtractor
|
||||||
|
{
|
||||||
|
Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct);
|
||||||
|
string Normalize(string value);
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed class DocumentClassification
|
||||||
|
{
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public double Confidence { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public Dictionary<string, string> Metadata { get; init; } = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed class RagChunkRecord
|
||||||
|
{
|
||||||
|
public required string Id { get; init; }
|
||||||
|
public required string DocumentId { get; init; }
|
||||||
|
public int ChunkIndex { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public required float[] Embedding { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed class RagDocumentDetails
|
||||||
|
{
|
||||||
|
public required string Id { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public string? SourceUrl { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
public DateTimeOffset CreatedAt { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed class RagDocumentRecord
|
||||||
|
{
|
||||||
|
public required string Id { get; init; }
|
||||||
|
public required string DocumentType { get; init; }
|
||||||
|
public required string Title { get; init; }
|
||||||
|
public string? SourceUrl { get; init; }
|
||||||
|
public required string Text { get; init; }
|
||||||
|
public required string TextHash { get; init; }
|
||||||
|
public double TypeConfidence { get; init; }
|
||||||
|
public string MetadataJson { get; init; } = "{}";
|
||||||
|
public DateTimeOffset CreatedAt { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
namespace Api.Services.Contracts.Models
|
||||||
|
{
|
||||||
|
public sealed class SearchCandidateChunk
|
||||||
|
{
|
||||||
|
public required RagDocumentRecord Document { get; init; }
|
||||||
|
public required RagChunkRecord Chunk { get; init; }
|
||||||
|
public double Score { get; init; }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
using System.Text.RegularExpressions;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class DocumentClassifier : IDocumentClassifier
|
||||||
|
{
|
||||||
|
private static readonly HashSet<string> KnownTypes = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
"cv", "job", "article", "contract", "invoice", "product", "documentation", "unknown"
|
||||||
|
};
|
||||||
|
|
||||||
|
public Task<DocumentClassification> ClassifyAsync(string text, string? providedType, string? providedTitle, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrWhiteSpace(providedType))
|
||||||
|
{
|
||||||
|
var normalized = NormalizeType(providedType);
|
||||||
|
return Task.FromResult(new DocumentClassification
|
||||||
|
{
|
||||||
|
DocumentType = normalized,
|
||||||
|
Confidence = KnownTypes.Contains(normalized) && normalized != "unknown" ? 1.0 : 0.6,
|
||||||
|
Title = BuildTitle(providedTitle, text, normalized)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
var lower = text.ToLowerInvariant();
|
||||||
|
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
["cv"] = Count(lower, "curriculum vitae", "resume", "work experience", "professional experience", "education", "skills", "technologies", "linkedin", "github"),
|
||||||
|
["job"] = Count(lower, "job description", "requirements", "responsibilities", "qualifications", "apply", "we are looking", "salary", "benefits", "remote", "hybrid"),
|
||||||
|
["contract"] = Count(lower, "agreement", "contract", "party", "parties", "liability", "termination", "confidentiality", "governing law"),
|
||||||
|
["invoice"] = Count(lower, "invoice", "vat", "subtotal", "total", "amount due", "due date", "billing"),
|
||||||
|
["documentation"] = Count(lower, "api", "endpoint", "configuration", "install", "usage", "parameters", "response", "request"),
|
||||||
|
["product"] = Count(lower, "features", "pricing", "sku", "product", "specification", "warranty")
|
||||||
|
};
|
||||||
|
|
||||||
|
var best = scores.OrderByDescending(x => x.Value).First();
|
||||||
|
var type = best.Value <= 0 ? "unknown" : best.Key;
|
||||||
|
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
|
||||||
|
|
||||||
|
return Task.FromResult(new DocumentClassification
|
||||||
|
{
|
||||||
|
DocumentType = type,
|
||||||
|
Confidence = confidence,
|
||||||
|
Title = BuildTitle(providedTitle, text, type)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int Count(string lower, params string[] terms) => terms.Count(term => lower.Contains(term));
|
||||||
|
|
||||||
|
private static string NormalizeType(string value)
|
||||||
|
{
|
||||||
|
var cleaned = Regex.Replace(value.Trim().ToLowerInvariant(), "[^a-z0-9_-]", "-");
|
||||||
|
return string.IsNullOrWhiteSpace(cleaned) ? "unknown" : cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string BuildTitle(string? providedTitle, string text, string documentType)
|
||||||
|
{
|
||||||
|
if (!string.IsNullOrWhiteSpace(providedTitle)) return providedTitle.Trim();
|
||||||
|
var firstLine = text.Split('.', '\n', '\r').Select(x => x.Trim()).FirstOrDefault(x => x.Length > 20);
|
||||||
|
if (!string.IsNullOrWhiteSpace(firstLine)) return firstLine.Length <= 120 ? firstLine : firstLine[..120];
|
||||||
|
return $"{documentType} document";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
using System.Security.Cryptography;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public static class HashHelper
|
||||||
|
{
|
||||||
|
public static string Compute(string value)
|
||||||
|
{
|
||||||
|
using var sha = SHA256.Create();
|
||||||
|
var bytes = sha.ComputeHash(Encoding.UTF8.GetBytes(value ?? string.Empty));
|
||||||
|
return Convert.ToHexString(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,179 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
using Api.Responses;
|
||||||
|
using Api.Requests;
|
||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class RagService : IRagService
|
||||||
|
{
|
||||||
|
private readonly ITextExtractor _textExtractor;
|
||||||
|
private readonly ITextChunker _chunker;
|
||||||
|
private readonly IDocumentClassifier _classifier;
|
||||||
|
private readonly IAiClient _ai;
|
||||||
|
private readonly IRagRepository _repository;
|
||||||
|
private readonly RagSettings _settings;
|
||||||
|
|
||||||
|
public RagService(
|
||||||
|
ITextExtractor textExtractor,
|
||||||
|
ITextChunker chunker,
|
||||||
|
IDocumentClassifier classifier,
|
||||||
|
IAiClient ai,
|
||||||
|
IRagRepository repository,
|
||||||
|
IOptions<RagSettings> options)
|
||||||
|
{
|
||||||
|
_textExtractor = textExtractor;
|
||||||
|
_chunker = chunker;
|
||||||
|
_classifier = classifier;
|
||||||
|
_ai = ai;
|
||||||
|
_repository = repository;
|
||||||
|
_settings = options.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IndexDocumentResponse> IndexTextAsync(IndexDocumentRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var text = _textExtractor.Normalize(request.Text ?? string.Empty);
|
||||||
|
if (text.Length < 40) throw new InvalidOperationException("Document text is too short.");
|
||||||
|
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
|
||||||
|
return await IndexNormalizedTextAsync(text, request.DocumentType, request.Title, request.SourceUrl, request.Metadata, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IndexDocumentResponse> IndexPdfAsync(IFormFile file, string? documentType, string? title, string? sourceUrl, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (file.Length <= 0) throw new InvalidOperationException("Uploaded file is empty.");
|
||||||
|
if (file.Length > _settings.MaxFileSizeMb * 1024L * 1024L) throw new InvalidOperationException($"File is too large. Max size is {_settings.MaxFileSizeMb} MB.");
|
||||||
|
if (!string.Equals(Path.GetExtension(file.FileName), ".pdf", StringComparison.OrdinalIgnoreCase)) throw new InvalidOperationException("Only PDF files are supported by this endpoint.");
|
||||||
|
|
||||||
|
await using var stream = file.OpenReadStream();
|
||||||
|
var text = await _textExtractor.ExtractPdfAsync(stream, ct);
|
||||||
|
if (text.Length > _settings.MaxTextChars) text = text[.._settings.MaxTextChars];
|
||||||
|
if (text.Length < 40) throw new InvalidOperationException("Could not extract enough text from the PDF.");
|
||||||
|
return await IndexNormalizedTextAsync(text, documentType, title ?? file.FileName, sourceUrl, new Dictionary<string, string> { ["fileName"] = file.FileName }, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<SearchResponse> SearchAsync(SearchRequest request, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var query = _textExtractor.Normalize(request.QueryText);
|
||||||
|
if (query.Length < 10) throw new InvalidOperationException("Search query is too short.");
|
||||||
|
var topK = Math.Clamp(request.TopK ?? _settings.DefaultTopK, 1, Math.Max(1, _settings.MaxTopK));
|
||||||
|
var queryEmbedding = await _ai.CreateEmbeddingAsync(query, ct);
|
||||||
|
var candidates = await _repository.SearchChunksAsync(queryEmbedding, request.TargetDocumentTypes, topK, ct);
|
||||||
|
|
||||||
|
var results = candidates
|
||||||
|
.GroupBy(x => x.Document.Id)
|
||||||
|
.Select(group =>
|
||||||
|
{
|
||||||
|
var best = group.OrderByDescending(x => x.Score).First();
|
||||||
|
return new SearchDocumentResult
|
||||||
|
{
|
||||||
|
DocumentId = best.Document.Id,
|
||||||
|
DocumentType = best.Document.DocumentType,
|
||||||
|
Title = best.Document.Title,
|
||||||
|
SourceUrl = best.Document.SourceUrl,
|
||||||
|
Score = group.Max(x => x.Score),
|
||||||
|
MatchedChunks = group
|
||||||
|
.OrderByDescending(x => x.Score)
|
||||||
|
.Take(3)
|
||||||
|
.Select(x => new SearchChunkResult
|
||||||
|
{
|
||||||
|
ChunkId = x.Chunk.Id,
|
||||||
|
ChunkIndex = x.Chunk.ChunkIndex,
|
||||||
|
Text = x.Chunk.Text,
|
||||||
|
Score = x.Score
|
||||||
|
})
|
||||||
|
.ToList()
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.OrderByDescending(x => x.Score)
|
||||||
|
.Take(topK)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
return new SearchResponse { Results = results };
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentDetails?> GetDocumentAsync(string documentId, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var document = await _repository.GetDocumentByIdAsync(documentId, ct);
|
||||||
|
return document is null ? null : new RagDocumentDetails
|
||||||
|
{
|
||||||
|
Id = document.Id,
|
||||||
|
DocumentType = document.DocumentType,
|
||||||
|
Title = document.Title,
|
||||||
|
SourceUrl = document.SourceUrl,
|
||||||
|
Text = document.Text,
|
||||||
|
TextHash = document.TextHash,
|
||||||
|
CreatedAt = document.CreatedAt
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<IndexDocumentResponse> IndexNormalizedTextAsync(
|
||||||
|
string text,
|
||||||
|
string? documentType,
|
||||||
|
string? title,
|
||||||
|
string? sourceUrl,
|
||||||
|
Dictionary<string, string>? metadata,
|
||||||
|
CancellationToken ct)
|
||||||
|
{
|
||||||
|
var textHash = HashHelper.Compute(text);
|
||||||
|
var cached = await _repository.GetDocumentByTextHashAsync(textHash, sourceUrl, ct);
|
||||||
|
if (cached is not null)
|
||||||
|
{
|
||||||
|
return new IndexDocumentResponse
|
||||||
|
{
|
||||||
|
DocumentId = cached.Id,
|
||||||
|
TextHash = cached.TextHash,
|
||||||
|
DocumentType = cached.DocumentType,
|
||||||
|
DocumentTypeConfidence = cached.TypeConfidence,
|
||||||
|
Title = cached.Title,
|
||||||
|
Chunks = 0,
|
||||||
|
Characters = cached.Text.Length,
|
||||||
|
Cached = true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
var classification = await _classifier.ClassifyAsync(text, documentType, title, ct);
|
||||||
|
var chunks = _chunker.Chunk(text, _settings.ChunkSize, _settings.ChunkOverlap);
|
||||||
|
var document = new RagDocumentRecord
|
||||||
|
{
|
||||||
|
Id = Guid.NewGuid().ToString("N"),
|
||||||
|
DocumentType = classification.DocumentType,
|
||||||
|
Title = classification.Title,
|
||||||
|
SourceUrl = sourceUrl,
|
||||||
|
Text = text,
|
||||||
|
TextHash = textHash,
|
||||||
|
TypeConfidence = classification.Confidence,
|
||||||
|
MetadataJson = JsonSerializer.Serialize(metadata ?? classification.Metadata),
|
||||||
|
CreatedAt = DateTimeOffset.UtcNow
|
||||||
|
};
|
||||||
|
|
||||||
|
var records = new List<RagChunkRecord>();
|
||||||
|
for (var i = 0; i < chunks.Count; i++)
|
||||||
|
{
|
||||||
|
ct.ThrowIfCancellationRequested();
|
||||||
|
records.Add(new RagChunkRecord
|
||||||
|
{
|
||||||
|
Id = Guid.NewGuid().ToString("N"),
|
||||||
|
DocumentId = document.Id,
|
||||||
|
ChunkIndex = i,
|
||||||
|
Text = chunks[i],
|
||||||
|
Embedding = await _ai.CreateEmbeddingAsync(chunks[i], ct)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
await _repository.SaveDocumentAsync(document, records, ct);
|
||||||
|
return new IndexDocumentResponse
|
||||||
|
{
|
||||||
|
DocumentId = document.Id,
|
||||||
|
TextHash = document.TextHash,
|
||||||
|
DocumentType = document.DocumentType,
|
||||||
|
DocumentTypeConfidence = document.TypeConfidence,
|
||||||
|
Title = document.Title,
|
||||||
|
Chunks = records.Count,
|
||||||
|
Characters = text.Length,
|
||||||
|
Cached = false
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,116 @@
|
|||||||
|
using System.Net.Http.Headers;
|
||||||
|
using System.Text;
|
||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Settings;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class RawAiClient : IAiClient
|
||||||
|
{
|
||||||
|
private readonly HttpClient _http;
|
||||||
|
private readonly AiSettings _settings;
|
||||||
|
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
|
||||||
|
{
|
||||||
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||||
|
};
|
||||||
|
|
||||||
|
public RawAiClient(HttpClient http, IOptions<AiSettings> options)
|
||||||
|
{
|
||||||
|
_http = http;
|
||||||
|
_settings = options.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<float[]> CreateEmbeddingAsync(string input, CancellationToken ct)
|
||||||
|
{
|
||||||
|
return IsOllama() ? await CreateOllamaEmbeddingAsync(input, ct) : await CreateOpenAiEmbeddingAsync(input, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string> CreateChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
return IsOllama()
|
||||||
|
? await CreateOllamaChatCompletionAsync(systemPrompt, userPrompt, temperature, ct)
|
||||||
|
: await CreateOpenAiChatCompletionAsync(systemPrompt, userPrompt, temperature, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool IsOllama() => string.Equals(_settings.Provider, "Ollama", StringComparison.OrdinalIgnoreCase);
|
||||||
|
|
||||||
|
private async Task<float[]> CreateOpenAiEmbeddingAsync(string input, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
|
||||||
|
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/embeddings");
|
||||||
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
|
||||||
|
request.Content = ToJson(new { model = _settings.OpenAI.EmbeddingModel, input });
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
|
||||||
|
using var response = await _http.SendAsync(request, cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI embeddings failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("data")[0].GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<string> CreateOpenAiChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(_settings.OpenAI.ApiKey)) throw new InvalidOperationException("OpenAI API key is missing.");
|
||||||
|
using var request = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/chat/completions");
|
||||||
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _settings.OpenAI.ApiKey);
|
||||||
|
request.Content = ToJson(new
|
||||||
|
{
|
||||||
|
model = _settings.OpenAI.ChatModel,
|
||||||
|
temperature,
|
||||||
|
response_format = new { type = "json_object" },
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new { role = "system", content = systemPrompt },
|
||||||
|
new { role = "user", content = userPrompt }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(15, _settings.OpenAI.TimeoutSeconds)));
|
||||||
|
using var response = await _http.SendAsync(request, cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"OpenAI chat failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<float[]> CreateOllamaEmbeddingAsync(string input, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
|
||||||
|
using var response = await _http.PostAsync($"{baseUrl}/api/embeddings", ToJson(new { model = _settings.Ollama.EmbeddingModel, prompt = input }), cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama embeddings failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("embedding").EnumerateArray().Select(x => x.GetSingle()).ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<string> CreateOllamaChatCompletionAsync(string systemPrompt, string userPrompt, decimal temperature, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var baseUrl = _settings.Ollama.BaseUrl.TrimEnd('/');
|
||||||
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||||
|
cts.CancelAfter(TimeSpan.FromSeconds(Math.Max(30, _settings.Ollama.TimeoutSeconds)));
|
||||||
|
using var response = await _http.PostAsync($"{baseUrl}/api/chat", ToJson(new
|
||||||
|
{
|
||||||
|
model = _settings.Ollama.ChatModel,
|
||||||
|
stream = false,
|
||||||
|
format = "json",
|
||||||
|
messages = new[]
|
||||||
|
{
|
||||||
|
new { role = "system", content = systemPrompt },
|
||||||
|
new { role = "user", content = userPrompt }
|
||||||
|
},
|
||||||
|
options = new { temperature = (float)temperature }
|
||||||
|
}), cts.Token);
|
||||||
|
var json = await response.Content.ReadAsStringAsync(cts.Token);
|
||||||
|
if (!response.IsSuccessStatusCode) throw new InvalidOperationException($"Ollama chat failed: {(int)response.StatusCode} {json}");
|
||||||
|
using var doc = JsonDocument.Parse(json);
|
||||||
|
return doc.RootElement.GetProperty("message").GetProperty("content").GetString() ?? "{}";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StringContent ToJson<T>(T payload) => new(JsonSerializer.Serialize(payload, JsonOptions), Encoding.UTF8, "application/json");
|
||||||
|
}
|
||||||
@@ -0,0 +1,238 @@
|
|||||||
|
using Microsoft.Data.SqlClient;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using Api.Services.Contracts.Models;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class SqlRagRepository : IRagRepository
|
||||||
|
{
|
||||||
|
private readonly string _connectionString;
|
||||||
|
|
||||||
|
public SqlRagRepository(IConfiguration configuration)
|
||||||
|
{
|
||||||
|
_connectionString = configuration.GetConnectionString("RagDb")
|
||||||
|
?? throw new InvalidOperationException("Connection string 'RagDb' is missing.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task InitializeAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
await EnsureDatabaseExistsAsync(ct);
|
||||||
|
var sql = await File.ReadAllTextAsync(Path.Combine(AppContext.BaseDirectory, "Database", "schema.sql"), ct);
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
foreach (var commandText in sql.Split("GO", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
||||||
|
{
|
||||||
|
await using var command = new SqlCommand(commandText, connection);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentRecord?> GetDocumentByTextHashAsync(string textHash, string? sourceUrl, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
SELECT TOP 1 Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
||||||
|
FROM RagDocuments
|
||||||
|
WHERE TextHash = @TextHash AND (@SourceUrl IS NULL OR SourceUrl = @SourceUrl)
|
||||||
|
ORDER BY CreatedAt DESC
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@TextHash", textHash);
|
||||||
|
command.Parameters.AddWithValue("@SourceUrl", (object?)sourceUrl ?? DBNull.Value);
|
||||||
|
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||||
|
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<RagDocumentRecord?> GetDocumentByIdAsync(string id, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
SELECT Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt
|
||||||
|
FROM RagDocuments
|
||||||
|
WHERE Id = @Id
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@Id", id);
|
||||||
|
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||||
|
return await reader.ReadAsync(ct) ? ReadDocument(reader) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveDocumentAsync(RagDocumentRecord document, IReadOnlyList<RagChunkRecord> chunks, CancellationToken ct)
|
||||||
|
{
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var tx = (SqlTransaction)await connection.BeginTransactionAsync(ct);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
const string insertDoc = """
|
||||||
|
INSERT INTO RagDocuments (Id, DocumentType, Title, SourceUrl, RawText, TextHash, TypeConfidence, MetadataJson, CreatedAt)
|
||||||
|
VALUES (@Id, @DocumentType, @Title, @SourceUrl, @RawText, @TextHash, @TypeConfidence, @MetadataJson, @CreatedAt)
|
||||||
|
""";
|
||||||
|
await using (var command = new SqlCommand(insertDoc, connection, tx))
|
||||||
|
{
|
||||||
|
command.Parameters.AddWithValue("@Id", document.Id);
|
||||||
|
command.Parameters.AddWithValue("@DocumentType", document.DocumentType);
|
||||||
|
command.Parameters.AddWithValue("@Title", document.Title);
|
||||||
|
command.Parameters.AddWithValue("@SourceUrl", (object?)document.SourceUrl ?? DBNull.Value);
|
||||||
|
command.Parameters.AddWithValue("@RawText", document.Text);
|
||||||
|
command.Parameters.AddWithValue("@TextHash", document.TextHash);
|
||||||
|
command.Parameters.AddWithValue("@TypeConfidence", document.TypeConfidence);
|
||||||
|
command.Parameters.AddWithValue("@MetadataJson", document.MetadataJson);
|
||||||
|
command.Parameters.AddWithValue("@CreatedAt", document.CreatedAt.UtcDateTime);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
const string insertChunk = """
|
||||||
|
INSERT INTO RagChunks (Id, DocumentId, ChunkIndex, Text, Embedding)
|
||||||
|
VALUES (@Id, @DocumentId, @ChunkIndex, @Text, @Embedding)
|
||||||
|
""";
|
||||||
|
foreach (var chunk in chunks)
|
||||||
|
{
|
||||||
|
await using var command = new SqlCommand(insertChunk, connection, tx);
|
||||||
|
command.Parameters.AddWithValue("@Id", chunk.Id);
|
||||||
|
command.Parameters.AddWithValue("@DocumentId", document.Id);
|
||||||
|
command.Parameters.AddWithValue("@ChunkIndex", chunk.ChunkIndex);
|
||||||
|
command.Parameters.AddWithValue("@Text", chunk.Text);
|
||||||
|
command.Parameters.AddWithValue("@Embedding", VectorSerializer.ToBytes(chunk.Embedding));
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
await tx.CommitAsync(ct);
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
await tx.RollbackAsync(ct);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<IReadOnlyList<SearchCandidateChunk>> SearchChunksAsync(float[] queryEmbedding, IReadOnlyList<string>? targetTypes, int topK, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var types = targetTypes?.Where(x => !string.IsNullOrWhiteSpace(x)).Select(x => x.Trim().ToLowerInvariant()).Distinct().ToArray() ?? [];
|
||||||
|
var sql = """
|
||||||
|
SELECT d.Id, d.DocumentType, d.Title, d.SourceUrl, d.RawText, d.TextHash, d.TypeConfidence, d.MetadataJson, d.CreatedAt,
|
||||||
|
c.Id, c.DocumentId, c.ChunkIndex, c.Text, c.Embedding
|
||||||
|
FROM RagChunks c
|
||||||
|
INNER JOIN RagDocuments d ON d.Id = c.DocumentId
|
||||||
|
""";
|
||||||
|
|
||||||
|
if (types.Length > 0)
|
||||||
|
{
|
||||||
|
sql += " WHERE LOWER(d.DocumentType) IN (" + string.Join(',', types.Select((_, i) => $"@Type{i}")) + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
for (var i = 0; i < types.Length; i++) command.Parameters.AddWithValue($"@Type{i}", types[i]);
|
||||||
|
await using var reader = await command.ExecuteReaderAsync(ct);
|
||||||
|
var candidates = new List<SearchCandidateChunk>();
|
||||||
|
while (await reader.ReadAsync(ct))
|
||||||
|
{
|
||||||
|
var doc = ReadDocument(reader, 0);
|
||||||
|
var chunk = new RagChunkRecord
|
||||||
|
{
|
||||||
|
Id = reader.GetString(9),
|
||||||
|
DocumentId = reader.GetString(10),
|
||||||
|
ChunkIndex = reader.GetInt32(11),
|
||||||
|
Text = reader.GetString(12),
|
||||||
|
Embedding = VectorSerializer.FromBytes((byte[])reader[13])
|
||||||
|
};
|
||||||
|
candidates.Add(new SearchCandidateChunk
|
||||||
|
{
|
||||||
|
Document = doc,
|
||||||
|
Chunk = chunk,
|
||||||
|
Score = VectorSerializer.CosineSimilarity(queryEmbedding, chunk.Embedding)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
.OrderByDescending(x => x.Score)
|
||||||
|
.Take(Math.Max(topK * 4, topK))
|
||||||
|
.ToList();
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<float[]?> GetEmbeddingAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = "SELECT Vector FROM RagEmbeddingCache WHERE CacheKey = @CacheKey";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
var value = await command.ExecuteScalarAsync(ct);
|
||||||
|
return value is byte[] bytes ? VectorSerializer.FromBytes(bytes) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveEmbeddingAsync(string cacheKey, string model, string textHash, float[] vector, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM RagEmbeddingCache WHERE CacheKey = @CacheKey)
|
||||||
|
INSERT INTO RagEmbeddingCache (CacheKey, Model, TextHash, Vector, CreatedAt)
|
||||||
|
VALUES (@CacheKey, @Model, @TextHash, @Vector, SYSUTCDATETIME())
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
command.Parameters.AddWithValue("@Model", model);
|
||||||
|
command.Parameters.AddWithValue("@TextHash", textHash);
|
||||||
|
command.Parameters.AddWithValue("@Vector", VectorSerializer.ToBytes(vector));
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<string?> GetChatCompletionAsync(string cacheKey, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = "SELECT ResponseText FROM RagChatCompletionCache WHERE CacheKey = @CacheKey";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
return await command.ExecuteScalarAsync(ct) as string;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task SaveChatCompletionAsync(string cacheKey, string model, decimal temperature, string responseText, CancellationToken ct)
|
||||||
|
{
|
||||||
|
const string sql = """
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM RagChatCompletionCache WHERE CacheKey = @CacheKey)
|
||||||
|
INSERT INTO RagChatCompletionCache (CacheKey, Model, Temperature, ResponseText, CreatedAt)
|
||||||
|
VALUES (@CacheKey, @Model, @Temperature, @ResponseText, SYSUTCDATETIME())
|
||||||
|
""";
|
||||||
|
await using var connection = new SqlConnection(_connectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
await using var command = new SqlCommand(sql, connection);
|
||||||
|
command.Parameters.AddWithValue("@CacheKey", cacheKey);
|
||||||
|
command.Parameters.AddWithValue("@Model", model);
|
||||||
|
command.Parameters.AddWithValue("@Temperature", temperature);
|
||||||
|
command.Parameters.AddWithValue("@ResponseText", responseText);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static RagDocumentRecord ReadDocument(SqlDataReader reader, int offset = 0) => new()
|
||||||
|
{
|
||||||
|
Id = reader.GetString(offset),
|
||||||
|
DocumentType = reader.GetString(offset + 1),
|
||||||
|
Title = reader.GetString(offset + 2),
|
||||||
|
SourceUrl = reader.IsDBNull(offset + 3) ? null : reader.GetString(offset + 3),
|
||||||
|
Text = reader.GetString(offset + 4),
|
||||||
|
TextHash = reader.GetString(offset + 5),
|
||||||
|
TypeConfidence = Convert.ToDouble(reader.GetValue(offset + 6)),
|
||||||
|
MetadataJson = reader.GetString(offset + 7),
|
||||||
|
CreatedAt = new DateTimeOffset(reader.GetDateTime(offset + 8), TimeSpan.Zero)
|
||||||
|
};
|
||||||
|
private async Task EnsureDatabaseExistsAsync(CancellationToken ct)
|
||||||
|
{
|
||||||
|
var builder = new SqlConnectionStringBuilder(_connectionString);
|
||||||
|
var databaseName = builder.InitialCatalog;
|
||||||
|
if (string.IsNullOrWhiteSpace(databaseName)) return;
|
||||||
|
|
||||||
|
builder.InitialCatalog = "master";
|
||||||
|
await using var connection = new SqlConnection(builder.ConnectionString);
|
||||||
|
await connection.OpenAsync(ct);
|
||||||
|
var safeName = databaseName.Replace("]", "]]" );
|
||||||
|
await using var command = new SqlCommand($"IF DB_ID(@DatabaseName) IS NULL EXEC('CREATE DATABASE [{safeName}]')", connection);
|
||||||
|
command.Parameters.AddWithValue("@DatabaseName", databaseName);
|
||||||
|
await command.ExecuteNonQueryAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
using Api.Services.Contracts.Rag;
|
using Api.Services.Contracts;
|
||||||
|
|
||||||
namespace Api.Services.Rag;
|
namespace Api.Services;
|
||||||
|
|
||||||
public sealed class TextChunker : ITextChunker
|
public sealed class TextChunker : ITextChunker
|
||||||
{
|
{
|
||||||
@@ -15,10 +15,10 @@ public sealed class TextChunker : ITextChunker
|
|||||||
while (start < text.Length)
|
while (start < text.Length)
|
||||||
{
|
{
|
||||||
var length = Math.Min(chunkSize, text.Length - start);
|
var length = Math.Min(chunkSize, text.Length - start);
|
||||||
chunks.Add(text.Substring(start, length).Trim());
|
var chunk = text.Substring(start, length).Trim();
|
||||||
|
if (!string.IsNullOrWhiteSpace(chunk)) chunks.Add(chunk);
|
||||||
start += chunkSize - overlap;
|
start += chunkSize - overlap;
|
||||||
}
|
}
|
||||||
|
return chunks;
|
||||||
return chunks.Where(x => !string.IsNullOrWhiteSpace(x)).ToList();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
using System.Text;
|
||||||
|
using Api.Services.Contracts;
|
||||||
|
using UglyToad.PdfPig;
|
||||||
|
|
||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public sealed class TextExtractor : ITextExtractor
|
||||||
|
{
|
||||||
|
public Task<string> ExtractPdfAsync(Stream stream, CancellationToken ct)
|
||||||
|
{
|
||||||
|
using var document = PdfDocument.Open(stream);
|
||||||
|
var builder = new StringBuilder();
|
||||||
|
foreach (var page in document.GetPages())
|
||||||
|
{
|
||||||
|
ct.ThrowIfCancellationRequested();
|
||||||
|
builder.AppendLine(page.Text);
|
||||||
|
builder.AppendLine();
|
||||||
|
}
|
||||||
|
return Task.FromResult(Normalize(builder.ToString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public string Normalize(string value)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||||
|
return string.Join(' ', value.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
namespace Api.Services;
|
||||||
|
|
||||||
|
public static class VectorSerializer
|
||||||
|
{
|
||||||
|
public static byte[] ToBytes(float[] vector)
|
||||||
|
{
|
||||||
|
var bytes = new byte[vector.Length * sizeof(float)];
|
||||||
|
Buffer.BlockCopy(vector, 0, bytes, 0, bytes.Length);
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static float[] FromBytes(byte[] bytes)
|
||||||
|
{
|
||||||
|
var vector = new float[bytes.Length / sizeof(float)];
|
||||||
|
Buffer.BlockCopy(bytes, 0, vector, 0, bytes.Length);
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double CosineSimilarity(float[] a, float[] b)
|
||||||
|
{
|
||||||
|
if (a.Length == 0 || a.Length != b.Length) return 0;
|
||||||
|
double dot = 0, magA = 0, magB = 0;
|
||||||
|
for (var i = 0; i < a.Length; i++)
|
||||||
|
{
|
||||||
|
dot += a[i] * b[i];
|
||||||
|
magA += a[i] * a[i];
|
||||||
|
magB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
return magA == 0 || magB == 0 ? 0 : dot / (Math.Sqrt(magA) * Math.Sqrt(magB));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
namespace Api.Settings;
|
||||||
|
|
||||||
|
public sealed class AiSettings
|
||||||
|
{
|
||||||
|
public string Provider { get; set; } = "OpenAI";
|
||||||
|
public OpenAiProviderSettings OpenAI { get; set; } = new();
|
||||||
|
public OllamaProviderSettings Ollama { get; set; } = new();
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class OpenAiProviderSettings
|
||||||
|
{
|
||||||
|
public string ApiKey { get; set; } = string.Empty;
|
||||||
|
public string ChatModel { get; set; } = "gpt-4o-mini";
|
||||||
|
public string EmbeddingModel { get; set; } = "text-embedding-3-small";
|
||||||
|
public int TimeoutSeconds { get; set; } = 90;
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class OllamaProviderSettings
|
||||||
|
{
|
||||||
|
public string BaseUrl { get; set; } = "http://localhost:11434";
|
||||||
|
public string ChatModel { get; set; } = "llama3.1:8b";
|
||||||
|
public string EmbeddingModel { get; set; } = "nomic-embed-text";
|
||||||
|
public int TimeoutSeconds { get; set; } = 180;
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
namespace Api.Settings;
|
||||||
|
|
||||||
|
public sealed class InternalApiSettings
|
||||||
|
{
|
||||||
|
public string ApiKey { get; set; } = string.Empty;
|
||||||
|
public bool RequireApiKey { get; set; } = false;
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
namespace Api.Settings;
|
||||||
|
|
||||||
|
public sealed class RagSettings
|
||||||
|
{
|
||||||
|
public int MaxFileSizeMb { get; set; } = 8;
|
||||||
|
public int ChunkSize { get; set; } = 900;
|
||||||
|
public int ChunkOverlap { get; set; } = 150;
|
||||||
|
public int MaxTextChars { get; set; } = 60000;
|
||||||
|
public int DefaultTopK { get; set; } = 20;
|
||||||
|
public int MaxTopK { get; set; } = 50;
|
||||||
|
public bool ClassifyWithAi { get; set; } = false;
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"AllowedHosts": "*",
|
||||||
|
"Serilog": {
|
||||||
|
"MinimumLevel": {
|
||||||
|
"Default": "Information",
|
||||||
|
"Override": {
|
||||||
|
"Microsoft.AspNetCore": "Warning",
|
||||||
|
"System.Net.Http.HttpClient": "Warning"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"WriteTo": [
|
||||||
|
{ "Name": "Console" }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"ConnectionStrings": {
|
||||||
|
"RagDb": "Server=localhost,1433;Database=MyAiRag;User Id=sa;Password=Your_strong_password123;TrustServerCertificate=True"
|
||||||
|
},
|
||||||
|
"InternalApi": {
|
||||||
|
"ApiKey": "",
|
||||||
|
"RequireApiKey": false
|
||||||
|
},
|
||||||
|
"Rag": {
|
||||||
|
"MaxFileSizeMb": 8,
|
||||||
|
"ChunkSize": 900,
|
||||||
|
"ChunkOverlap": 150,
|
||||||
|
"MaxTextChars": 60000,
|
||||||
|
"DefaultTopK": 20,
|
||||||
|
"MaxTopK": 50,
|
||||||
|
"ClassifyWithAi": false
|
||||||
|
},
|
||||||
|
"Ai": {
|
||||||
|
"Provider": "OpenAI",
|
||||||
|
"OpenAI": {
|
||||||
|
"ApiKey": "",
|
||||||
|
"ChatModel": "gpt-4o-mini",
|
||||||
|
"EmbeddingModel": "text-embedding-3-small",
|
||||||
|
"TimeoutSeconds": 90
|
||||||
|
},
|
||||||
|
"Ollama": {
|
||||||
|
"BaseUrl": "http://localhost:11434",
|
||||||
|
"ChatModel": "llama3.1:8b",
|
||||||
|
"EmbeddingModel": "nomic-embed-text",
|
||||||
|
"TimeoutSeconds": 180
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||||
|
<RootNamespace>Api</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Azure.Extensions.AspNetCore.Configuration.Secrets" Version="1.5.1" />
|
||||||
|
<PackageReference Include="Azure.Identity" Version="1.21.0" />
|
||||||
|
<PackageReference Include="DotNetEnv" Version="3.2.0" />
|
||||||
|
<PackageReference Include="Microsoft.Data.SqlClient" Version="6.1.3" />
|
||||||
|
<PackageReference Include="PdfPig" Version="0.1.14" />
|
||||||
|
<PackageReference Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||||
|
<PackageReference Include="Serilog.Enrichers.Environment" Version="3.0.1" />
|
||||||
|
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
||||||
|
<PackageReference Include="Swashbuckle.AspNetCore" Version="10.1.7" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Update="Database/schema.sql">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
@@ -89,7 +89,7 @@
|
|||||||
"cv.noConsent": "GDPR consent is required.",
|
"cv.noConsent": "GDPR consent is required.",
|
||||||
"cv.processing": "Processing...",
|
"cv.processing": "Processing...",
|
||||||
"cv.extracting": "Extracting CV and matching job...",
|
"cv.extracting": "Extracting CV and matching job...",
|
||||||
"cv.processingLong": "Processing CV PDF and job input.",
|
"cv.processingLong": "Processing CV PDF and job input. Backend endpoints must be available.",
|
||||||
"cv.cvFailed": "CV extraction failed",
|
"cv.cvFailed": "CV extraction failed",
|
||||||
"cv.matchFailed": "Job matching failed",
|
"cv.matchFailed": "Job matching failed",
|
||||||
"cv.completed": "Match completed.",
|
"cv.completed": "Match completed.",
|
||||||
@@ -182,7 +182,7 @@
|
|||||||
"cv.noConsent": "Consimțământul GDPR este obligatoriu.",
|
"cv.noConsent": "Consimțământul GDPR este obligatoriu.",
|
||||||
"cv.processing": "Se procesează...",
|
"cv.processing": "Se procesează...",
|
||||||
"cv.extracting": "Se extrage CV-ul și se compară jobul...",
|
"cv.extracting": "Se extrage CV-ul și se compară jobul...",
|
||||||
"cv.processingLong": "Se procesează PDF-ul și informațiile despre job.",
|
"cv.processingLong": "Se procesează PDF-ul și informațiile despre job. Endpoint-urile backend trebuie să fie disponibile.",
|
||||||
"cv.cvFailed": "Extragerea CV-ului a eșuat",
|
"cv.cvFailed": "Extragerea CV-ului a eșuat",
|
||||||
"cv.matchFailed": "Matching-ul jobului a eșuat",
|
"cv.matchFailed": "Matching-ul jobului a eșuat",
|
||||||
"cv.completed": "Matching finalizat.",
|
"cv.completed": "Matching finalizat.",
|
||||||
|
|||||||
Reference in New Issue
Block a user