e38f40732f
Build and Push Docker Images Staging / build (push) Successful in 5m20s
ejobs.ro migrated to a Nuxt SPA - plain HTTP GET returns only the JS bundle. This change equips cv-search-job with a headless Chromium (Playwright 1.60) so it can fully render SPA pages before extracting job links. - Add UseHeadlessBrowser flag to JobProviderEntity, JobProviderConfig, and CvSearchDbContext; map it in JobTokenService.ToConfig so the flag is included in the session provider-config snapshot - Migration: add UseHeadlessBrowser column; fix ejobs.ro search URL (remove /user/ prefix that caused 404) and set UseHeadlessBrowser=true - HtmlJobSearcher: detect flag and dispatch to FetchWithPlaywrightAsync; plain-HTTP path is unchanged; NetworkIdle timeout falls back to partial content rather than failing outright - Dockerfile: download Playwright Chromium in the SDK build stage via npx; copy browser binaries to the final image; install Chromium system libs (Ubuntu noble t64 variants) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
37 lines
1.6 KiB
C#
37 lines
1.6 KiB
C#
namespace CvSearch.Data.Entities;
|
|
|
|
/// <summary>
|
|
/// Persisted job-board provider configuration. Stored in <c>cvSearch.JobProviders</c>.
|
|
/// Providers are loaded from here at session-creation time and snapshotted into
|
|
/// <c>JobSearchSessionEntity.ProviderConfigJson</c> so runtime config changes do not
|
|
/// affect already-queued sessions.
|
|
/// </summary>
|
|
public sealed class JobProviderEntity
|
|
{
|
|
public int Id { get; set; }
|
|
|
|
/// <summary>Display name (e.g. "ejobs.ro").</summary>
|
|
public string Name { get; set; } = string.Empty;
|
|
|
|
/// <summary>When false the provider is skipped at session-creation and the job-search link is hidden.</summary>
|
|
public bool Enabled { get; set; }
|
|
|
|
/// <summary>URL template with <c>{keywords}</c> placeholder (URL-encoded keywords are substituted at runtime).</summary>
|
|
public string SearchUrlTemplate { get; set; } = string.Empty;
|
|
|
|
/// <summary>Substring that must appear in an anchor href to pass the stage-1 link filter.</summary>
|
|
public string JobLinkContains { get; set; } = string.Empty;
|
|
|
|
/// <summary>JSON array of baseline keywords merged with CV keywords before building the search URL.</summary>
|
|
public string InitialKeywordsJson { get; set; } = "[]";
|
|
|
|
/// <summary>Maximum number of job URLs to collect from this provider per session.</summary>
|
|
public int MaxResults { get; set; } = 20;
|
|
|
|
/// <summary>Controls display ordering in future admin UIs.</summary>
|
|
public int DisplayOrder { get; set; }
|
|
|
|
/// <summary>When true, the scraper renders the page with headless Chromium instead of a plain HTTP GET.</summary>
|
|
public bool UseHeadlessBrowser { get; set; }
|
|
}
|