diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml
index 77e6f41..850a436 100644
--- a/.gitea/workflows/build.yml
+++ b/.gitea/workflows/build.yml
@@ -15,6 +15,7 @@ env:
WEB_IMAGE: apps/myai-web
CV_CLEANUP_JOB_IMAGE: apps/myai-cv-cleanup-job
CV_SEARCH_JOB_IMAGE: apps/myai-cv-search-job
+ PAGE_FETCHER_API_IMAGE: apps/myai-page-fetcher-api
IMAGE_TAG: staging
jobs:
@@ -62,6 +63,10 @@ jobs:
run: |
docker build -f Jobs/cv-search-job/Dockerfile -t "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}" .
+ - name: Build Page Fetcher API image
+ run: |
+ docker build -f Apis/page-fetcher-api/Dockerfile -t "${REGISTRY_HOST}/${PAGE_FETCHER_API_IMAGE}:${IMAGE_TAG}" .
+
- name: Push API image
run: |
docker push "${REGISTRY_HOST}/${API_IMAGE}:${IMAGE_TAG}"
@@ -88,4 +93,8 @@ jobs:
- name: Push CV search job image
run: |
- docker push "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}"
\ No newline at end of file
+ docker push "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}"
+
+ - name: Push Page Fetcher API image
+ run: |
+ docker push "${REGISTRY_HOST}/${PAGE_FETCHER_API_IMAGE}:${IMAGE_TAG}"
\ No newline at end of file
diff --git a/Apis/api/Program.cs b/Apis/api/Program.cs
index 76f6c5c..286612d 100644
--- a/Apis/api/Program.cs
+++ b/Apis/api/Program.cs
@@ -5,8 +5,8 @@ using Email.Data;
using Email.Data.Repositories;
using Email.Data.Repositories.Contracts;
using Email.Data.Services;
-using EmailApi.Models.Clients;
-using EmailApi.Models.Settings;
+using Email.Models.Clients;
+using Email.Models.Settings;
using Microsoft.EntityFrameworkCore;
using Models.Settings;
using MyAi.Data;
diff --git a/Apis/api/Services/EmailApiEmailSender.cs b/Apis/api/Services/EmailApiEmailSender.cs
index 384515d..86a416f 100644
--- a/Apis/api/Services/EmailApiEmailSender.cs
+++ b/Apis/api/Services/EmailApiEmailSender.cs
@@ -1,8 +1,8 @@
using Api.Services.Contracts;
using CvMatcher.Models.Responses;
using Email.Data.Services;
-using EmailApi.Models.Clients;
-using EmailApi.Models.Requests;
+using Email.Models.Clients;
+using Email.Models.Requests;
using Microsoft.Extensions.Options;
using Models.Requests;
using Models.Settings;
diff --git a/Apis/common/Settings/PageFetcherApiSettings.cs b/Apis/common/Settings/PageFetcherApiSettings.cs
new file mode 100644
index 0000000..c6367ac
--- /dev/null
+++ b/Apis/common/Settings/PageFetcherApiSettings.cs
@@ -0,0 +1,11 @@
+namespace Common.Settings;
+
+///
+/// Connection settings for the internal page-fetcher-api service.
+/// Bound from the PageFetcherApi configuration section.
+///
+public sealed class PageFetcherApiSettings
+{
+ public string BaseUrl { get; set; } = string.Empty;
+ public string InternalApiKey { get; set; } = string.Empty;
+}
diff --git a/Apis/cv-matcher-api/Dockerfile b/Apis/cv-matcher-api/Dockerfile
index 819002c..426ecb6 100644
--- a/Apis/cv-matcher-api/Dockerfile
+++ b/Apis/cv-matcher-api/Dockerfile
@@ -8,6 +8,7 @@ COPY Apis/cv-search-data/cv-search-data.csproj Apis/cv-search-data/
COPY Apis/cv-matcher-data/cv-matcher-data.csproj Apis/cv-matcher-data/
COPY Apis/common/common.csproj Apis/common/
COPY Apis/cv-matcher-api-models/cv-matcher-api-models.csproj Apis/cv-matcher-api-models/
+COPY Apis/page-fetcher-api-models/page-fetcher-api-models.csproj Apis/page-fetcher-api-models/
COPY Apis/myai-data/myai-data.csproj Apis/myai-data/
COPY Apis/shared-data/shared-data.csproj Apis/shared-data/
COPY Helpers/common-helpers/common-helpers.csproj Helpers/common-helpers/
@@ -20,6 +21,7 @@ COPY Apis/cv-search-data/ Apis/cv-search-data/
COPY Apis/cv-matcher-data/ Apis/cv-matcher-data/
COPY Apis/common/ Apis/common/
COPY Apis/cv-matcher-api-models/ Apis/cv-matcher-api-models/
+COPY Apis/page-fetcher-api-models/ Apis/page-fetcher-api-models/
COPY Apis/myai-data/ Apis/myai-data/
COPY Apis/shared-data/ Apis/shared-data/
COPY Helpers/common-helpers/ Helpers/common-helpers/
diff --git a/Apis/cv-matcher-api/Program.cs b/Apis/cv-matcher-api/Program.cs
index f247251..0fd642c 100644
--- a/Apis/cv-matcher-api/Program.cs
+++ b/Apis/cv-matcher-api/Program.cs
@@ -13,6 +13,7 @@ using Microsoft.EntityFrameworkCore;
using Refit;
using Serilog;
using Common.Settings;
+using PageFetcher.Models;
using StartupHelpers;
using System.Reflection;
@@ -36,6 +37,16 @@ try
builder.Services.Configure(builder.Configuration.GetSection("Ai"));
builder.Services.Configure(builder.Configuration.GetSection("Matcher"));
builder.Services.Configure(builder.Configuration.GetSection("JobSearch"));
+ builder.Services.Configure(builder.Configuration.GetSection("PageFetcherApi"));
+
+ builder.Services.AddRefitClient()
+ .ConfigureHttpClient((sp, c) =>
+ {
+ var settings = sp.GetRequiredService>().Value;
+ c.BaseAddress = new Uri(settings.BaseUrl.TrimEnd('/') + "/");
+ if (!string.IsNullOrWhiteSpace(settings.InternalApiKey))
+ c.DefaultRequestHeaders.Add("X-Internal-Api-Key", settings.InternalApiKey);
+ });
builder.Services.AddRefitClient()
.ConfigureHttpClient((sp, c) =>
@@ -50,7 +61,7 @@ try
builder.Services.AddScoped();
builder.Services.AddHttpClient();
- builder.Services.AddHttpClient();
+ builder.Services.AddScoped();
builder.Services.AddDbContext(options =>
{
diff --git a/Apis/cv-matcher-api/Services/JobTextExtractor.cs b/Apis/cv-matcher-api/Services/JobTextExtractor.cs
index f8e806b..c16b201 100644
--- a/Apis/cv-matcher-api/Services/JobTextExtractor.cs
+++ b/Apis/cv-matcher-api/Services/JobTextExtractor.cs
@@ -1,26 +1,23 @@
-using System.Net;
-using System.Text.RegularExpressions;
using CvMatcher.Models.Settings;
using Api.Services.Contracts;
using Microsoft.Extensions.Options;
+using PageFetcher.Models;
namespace Api.Services;
///
/// Extracts normalised plain text from a job posting, either from a pasted description or by
-/// fetching and stripping the HTML of the job page URL.
+/// fetching the job page text via page-fetcher-api (headless Chromium rendering).
///
public sealed class JobTextExtractor : IJobTextExtractor
{
- private readonly HttpClient _http;
+ private readonly IPageFetcherApiClient _pageFetcher;
private readonly MatcherSettings _settings;
- public JobTextExtractor(HttpClient http, IOptions options)
+ public JobTextExtractor(IPageFetcherApiClient pageFetcher, IOptions options)
{
- _http = http;
+ _pageFetcher = pageFetcher;
_settings = options.Value;
- _http.Timeout = TimeSpan.FromSeconds(25);
- _http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
}
///
@@ -31,15 +28,18 @@ public sealed class JobTextExtractor : IJobTextExtractor
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || uri.Scheme is not ("http" or "https"))
- {
throw new InvalidOperationException("Invalid job URL.");
- }
- var html = await _http.GetStringAsync(uri, ct);
- html = Regex.Replace(html, "", " ", RegexOptions.IgnoreCase);
+ text = Regex.Replace(text, "