feat: add page-fetcher-api — centralised Playwright page fetcher

Introduces page-fetcher-api, a new internal ASP.NET Core service that
centralises all web-page fetching through a single Playwright (headless
Chromium) browser instance. All fetches are persisted to the pageFetcher
SQL schema for auditing.

New projects:
- Apis/page-fetcher-api-models: FetchPageRequest, FetchPageResponse, IPageFetcherApiClient
- Apis/page-fetcher-data: PageFetchDbContext, PageFetchEntity, InitialSchema migration (schema: pageFetcher)
- Apis/page-fetcher-api: PlaywrightBrowserService (singleton), PageFetcherService, PageController

Changes to existing services:
- cv-matcher-api: JobTextExtractor now calls IPageFetcherApiClient instead of HttpClient
- cv-search-job: HtmlJobSearcher uses IPageFetcherApiClient (removes inline Playwright);
  CvSearchJobTask fetches individual job pages and applies keyword pre-filter before
  LLM call; passes pre-fetched JobDescription to cv-matcher-api to skip re-fetch
- common: add PageFetcherApiSettings
- docker-compose.yml, build.yml: add new service + env vars for callers

Closes #43

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 17:43:56 +03:00
parent 1222a86eb7
commit 898dd09d50
31 changed files with 1121 additions and 110 deletions
@@ -0,0 +1,82 @@
// <auto-generated />
using System;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using PageFetcher.Data;
#nullable disable
namespace PageFetcher.Data.Migrations
{
[DbContext(typeof(PageFetchDbContext))]
[Migration("20260608143523_InitialSchema")]
partial class InitialSchema
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("pageFetcher")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("PageFetcher.Data.Entities.PageFetchEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("CallerService")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<long>("DurationMs")
.HasColumnType("bigint");
b.Property<string>("ErrorMessage")
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)");
b.Property<string>("Html")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int?>("HttpStatusCode")
.HasColumnType("int");
b.Property<bool>("Success")
.HasColumnType("bit");
b.Property<string>("Text")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("Url")
.IsRequired()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)");
b.HasKey("Id");
b.HasIndex("CreatedAt");
b.HasIndex("Url");
b.ToTable("PageFetches", "pageFetcher");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,59 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace PageFetcher.Data.Migrations
{
/// <inheritdoc />
public partial class InitialSchema : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.EnsureSchema(
name: MigrationConstants.SchemaName);
migrationBuilder.CreateTable(
name: "PageFetches",
schema: MigrationConstants.SchemaName,
columns: table => new
{
Id = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
Url = table.Column<string>(type: "nvarchar(2000)", maxLength: 2000, nullable: false),
CallerService = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
HttpStatusCode = table.Column<int>(type: "int", nullable: true),
Html = table.Column<string>(type: "nvarchar(max)", nullable: false),
Text = table.Column<string>(type: "nvarchar(max)", nullable: false),
DurationMs = table.Column<long>(type: "bigint", nullable: false),
Success = table.Column<bool>(type: "bit", nullable: false),
ErrorMessage = table.Column<string>(type: "nvarchar(2000)", maxLength: 2000, nullable: true),
CreatedAt = table.Column<DateTime>(type: "datetime2", nullable: false, defaultValueSql: "SYSUTCDATETIME()")
},
constraints: table =>
{
table.PrimaryKey("PK_PageFetches", x => x.Id);
});
migrationBuilder.CreateIndex(
name: "IX_PageFetches_CreatedAt",
schema: MigrationConstants.SchemaName,
table: "PageFetches",
column: "CreatedAt");
migrationBuilder.CreateIndex(
name: "IX_PageFetches_Url",
schema: MigrationConstants.SchemaName,
table: "PageFetches",
column: "Url");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "PageFetches",
schema: MigrationConstants.SchemaName);
}
}
}
@@ -0,0 +1,79 @@
// <auto-generated />
using System;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using PageFetcher.Data;
#nullable disable
namespace PageFetcher.Data.Migrations
{
[DbContext(typeof(PageFetchDbContext))]
partial class PageFetchDbContextModelSnapshot : ModelSnapshot
{
protected override void BuildModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasDefaultSchema("pageFetcher")
.HasAnnotation("ProductVersion", "10.0.7")
.HasAnnotation("Relational:MaxIdentifierLength", 128);
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
modelBuilder.Entity("PageFetcher.Data.Entities.PageFetchEntity", b =>
{
b.Property<string>("Id")
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<string>("CallerService")
.IsRequired()
.HasMaxLength(64)
.HasColumnType("nvarchar(64)");
b.Property<DateTime>("CreatedAt")
.ValueGeneratedOnAdd()
.HasColumnType("datetime2")
.HasDefaultValueSql("SYSUTCDATETIME()");
b.Property<long>("DurationMs")
.HasColumnType("bigint");
b.Property<string>("ErrorMessage")
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)");
b.Property<string>("Html")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<int?>("HttpStatusCode")
.HasColumnType("int");
b.Property<bool>("Success")
.HasColumnType("bit");
b.Property<string>("Text")
.IsRequired()
.HasColumnType("nvarchar(max)");
b.Property<string>("Url")
.IsRequired()
.HasMaxLength(2000)
.HasColumnType("nvarchar(2000)");
b.HasKey("Id");
b.HasIndex("CreatedAt");
b.HasIndex("Url");
b.ToTable("PageFetches", "pageFetcher");
});
#pragma warning restore 612, 618
}
}
}