Files
myAi/Apis/cv-search-data/Migrations/20260529170000_AddHeadlessBrowserToProviders.cs
claude e38f40732f
Build and Push Docker Images Staging / build (push) Successful in 5m20s
feat(providers): add headless browser scraping via Playwright for SPA job sites
ejobs.ro migrated to a Nuxt SPA - plain HTTP GET returns only the JS
bundle. This change equips cv-search-job with a headless Chromium
(Playwright 1.60) so it can fully render SPA pages before extracting
job links.

- Add UseHeadlessBrowser flag to JobProviderEntity, JobProviderConfig,
  and CvSearchDbContext; map it in JobTokenService.ToConfig so the flag
  is included in the session provider-config snapshot
- Migration: add UseHeadlessBrowser column; fix ejobs.ro search URL
  (remove /user/ prefix that caused 404) and set UseHeadlessBrowser=true
- HtmlJobSearcher: detect flag and dispatch to FetchWithPlaywrightAsync;
  plain-HTTP path is unchanged; NetworkIdle timeout falls back to partial
  content rather than failing outright
- Dockerfile: download Playwright Chromium in the SDK build stage via
  npx; copy browser binaries to the final image; install Chromium system
  libs (Ubuntu noble t64 variants)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 13:42:52 +03:00

51 lines
2.0 KiB
C#

using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace CvSearch.Data.Migrations
{
/// <inheritdoc />
public partial class AddHeadlessBrowserToProviders : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "UseHeadlessBrowser",
schema: MigrationConstants.SchemaName,
table: "JobProviders",
type: "bit",
nullable: false,
defaultValue: false);
// ejobs.ro (Id=1) is a Nuxt SPA — the old /user/ URL 404s and plain HTTP GET
// returns only the JS bundle, not actual job listings.
// Fix: use the correct search URL and headless Chromium to render job results.
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
columns: ["SearchUrlTemplate", "JobLinkContains", "UseHeadlessBrowser"],
values: new object[] { "https://www.ejobs.ro/locuri-de-munca?q={keywords}", "/locuri-de-munca/", true });
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.UpdateData(
schema: MigrationConstants.SchemaName,
table: "JobProviders",
keyColumn: "Id",
keyValue: 1,
columns: ["SearchUrlTemplate", "JobLinkContains", "UseHeadlessBrowser"],
values: new object[] { "https://www.ejobs.ro/user/locuri-de-munca/?utm_source=myai&q={keywords}", "/user/locuri-de-munca/", false });
migrationBuilder.DropColumn(
name: "UseHeadlessBrowser",
schema: MigrationConstants.SchemaName,
table: "JobProviders");
}
}
}