feat: add page-fetcher-api — centralised Playwright page fetcher
Introduces page-fetcher-api, a new internal ASP.NET Core service that centralises all web-page fetching through a single Playwright (headless Chromium) browser instance. All fetches are persisted to the pageFetcher SQL schema for auditing. New projects: - Apis/page-fetcher-api-models: FetchPageRequest, FetchPageResponse, IPageFetcherApiClient - Apis/page-fetcher-data: PageFetchDbContext, PageFetchEntity, InitialSchema migration (schema: pageFetcher) - Apis/page-fetcher-api: PlaywrightBrowserService (singleton), PageFetcherService, PageController Changes to existing services: - cv-matcher-api: JobTextExtractor now calls IPageFetcherApiClient instead of HttpClient - cv-search-job: HtmlJobSearcher uses IPageFetcherApiClient (removes inline Playwright); CvSearchJobTask fetches individual job pages and applies keyword pre-filter before LLM call; passes pre-fetched JobDescription to cv-matcher-api to skip re-fetch - common: add PageFetcherApiSettings - docker-compose.yml, build.yml: add new service + env vars for callers Closes #43 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,7 @@ env:
|
||||
WEB_IMAGE: apps/myai-web
|
||||
CV_CLEANUP_JOB_IMAGE: apps/myai-cv-cleanup-job
|
||||
CV_SEARCH_JOB_IMAGE: apps/myai-cv-search-job
|
||||
PAGE_FETCHER_API_IMAGE: apps/myai-page-fetcher-api
|
||||
IMAGE_TAG: staging
|
||||
|
||||
jobs:
|
||||
@@ -62,6 +63,10 @@ jobs:
|
||||
run: |
|
||||
docker build -f Jobs/cv-search-job/Dockerfile -t "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}" .
|
||||
|
||||
- name: Build Page Fetcher API image
|
||||
run: |
|
||||
docker build -f Apis/page-fetcher-api/Dockerfile -t "${REGISTRY_HOST}/${PAGE_FETCHER_API_IMAGE}:${IMAGE_TAG}" .
|
||||
|
||||
- name: Push API image
|
||||
run: |
|
||||
docker push "${REGISTRY_HOST}/${API_IMAGE}:${IMAGE_TAG}"
|
||||
@@ -88,4 +93,8 @@ jobs:
|
||||
|
||||
- name: Push CV search job image
|
||||
run: |
|
||||
docker push "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}"
|
||||
docker push "${REGISTRY_HOST}/${CV_SEARCH_JOB_IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
- name: Push Page Fetcher API image
|
||||
run: |
|
||||
docker push "${REGISTRY_HOST}/${PAGE_FETCHER_API_IMAGE}:${IMAGE_TAG}"
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace Common.Settings;
|
||||
|
||||
/// <summary>
|
||||
/// Connection settings for the internal page-fetcher-api service.
|
||||
/// Bound from the <c>PageFetcherApi</c> configuration section.
|
||||
/// </summary>
|
||||
public sealed class PageFetcherApiSettings
|
||||
{
|
||||
public string BaseUrl { get; set; } = string.Empty;
|
||||
public string InternalApiKey { get; set; } = string.Empty;
|
||||
}
|
||||
@@ -13,6 +13,7 @@ using Microsoft.EntityFrameworkCore;
|
||||
using Refit;
|
||||
using Serilog;
|
||||
using Common.Settings;
|
||||
using PageFetcher.Models;
|
||||
using StartupHelpers;
|
||||
using System.Reflection;
|
||||
|
||||
@@ -36,6 +37,16 @@ try
|
||||
builder.Services.Configure<CvMatcher.Models.Settings.AiSettings>(builder.Configuration.GetSection("Ai"));
|
||||
builder.Services.Configure<MatcherSettings>(builder.Configuration.GetSection("Matcher"));
|
||||
builder.Services.Configure<JobSearchSettings>(builder.Configuration.GetSection("JobSearch"));
|
||||
builder.Services.Configure<PageFetcherApiSettings>(builder.Configuration.GetSection("PageFetcherApi"));
|
||||
|
||||
builder.Services.AddRefitClient<IPageFetcherApiClient>()
|
||||
.ConfigureHttpClient((sp, c) =>
|
||||
{
|
||||
var settings = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<PageFetcherApiSettings>>().Value;
|
||||
c.BaseAddress = new Uri(settings.BaseUrl.TrimEnd('/') + "/");
|
||||
if (!string.IsNullOrWhiteSpace(settings.InternalApiKey))
|
||||
c.DefaultRequestHeaders.Add("X-Internal-Api-Key", settings.InternalApiKey);
|
||||
});
|
||||
|
||||
builder.Services.AddRefitClient<IRefitRagApi>()
|
||||
.ConfigureHttpClient((sp, c) =>
|
||||
@@ -50,7 +61,7 @@ try
|
||||
|
||||
builder.Services.AddScoped<IRagApiClient, RagApiClient>();
|
||||
builder.Services.AddHttpClient<IMatcherAiClient, MatcherAiClient>();
|
||||
builder.Services.AddHttpClient<IJobTextExtractor, JobTextExtractor>();
|
||||
builder.Services.AddScoped<IJobTextExtractor, JobTextExtractor>();
|
||||
|
||||
builder.Services.AddDbContext<CvMatcherDbContext>(options =>
|
||||
{
|
||||
|
||||
@@ -1,26 +1,23 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using CvMatcher.Models.Settings;
|
||||
using Api.Services.Contracts;
|
||||
using Microsoft.Extensions.Options;
|
||||
using PageFetcher.Models;
|
||||
|
||||
namespace Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Extracts normalised plain text from a job posting, either from a pasted description or by
|
||||
/// fetching and stripping the HTML of the job page URL.
|
||||
/// fetching the job page text via <c>page-fetcher-api</c> (headless Chromium rendering).
|
||||
/// </summary>
|
||||
public sealed class JobTextExtractor : IJobTextExtractor
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
private readonly IPageFetcherApiClient _pageFetcher;
|
||||
private readonly MatcherSettings _settings;
|
||||
|
||||
public JobTextExtractor(HttpClient http, IOptions<MatcherSettings> options)
|
||||
public JobTextExtractor(IPageFetcherApiClient pageFetcher, IOptions<MatcherSettings> options)
|
||||
{
|
||||
_http = http;
|
||||
_pageFetcher = pageFetcher;
|
||||
_settings = options.Value;
|
||||
_http.Timeout = TimeSpan.FromSeconds(25);
|
||||
_http.DefaultRequestHeaders.UserAgent.ParseAdd("MyAi.ro CV Matcher/1.0");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -31,15 +28,18 @@ public sealed class JobTextExtractor : IJobTextExtractor
|
||||
|
||||
if (string.IsNullOrWhiteSpace(jobUrl)) return string.Empty;
|
||||
if (!Uri.TryCreate(jobUrl, UriKind.Absolute, out var uri) || uri.Scheme is not ("http" or "https"))
|
||||
{
|
||||
throw new InvalidOperationException("Invalid job URL.");
|
||||
}
|
||||
|
||||
var html = await _http.GetStringAsync(uri, ct);
|
||||
html = Regex.Replace(html, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
|
||||
html = Regex.Replace(html, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
|
||||
html = Regex.Replace(html, "<[^>]+>", " ");
|
||||
return Limit(Normalize(WebUtility.HtmlDecode(html)));
|
||||
var response = await _pageFetcher.FetchAsync(new FetchPageRequest
|
||||
{
|
||||
Url = jobUrl,
|
||||
CallerService = "cv-matcher-api"
|
||||
}, ct);
|
||||
|
||||
if (!response.Success)
|
||||
throw new InvalidOperationException($"Failed to fetch job page: {response.Error}");
|
||||
|
||||
return Limit(Normalize(response.Text));
|
||||
}
|
||||
|
||||
/// <summary>Truncates text to the configured maximum character count.</summary>
|
||||
|
||||
@@ -82,6 +82,7 @@
|
||||
<ProjectReference Include="..\cv-search-data\cv-search-data.csproj" />
|
||||
<ProjectReference Include="..\cv-matcher-data\cv-matcher-data.csproj" />
|
||||
<ProjectReference Include="..\common\common.csproj" />
|
||||
<ProjectReference Include="..\page-fetcher-api-models\page-fetcher-api-models.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace PageFetcher.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Request to fetch a web page via the page-fetcher-api.
|
||||
/// </summary>
|
||||
public sealed class FetchPageRequest
|
||||
{
|
||||
/// <summary>Absolute HTTP or HTTPS URL to fetch.</summary>
|
||||
public string Url { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Playwright wait condition. Accepted values: <c>networkidle</c> (default), <c>domcontentloaded</c>, <c>load</c>.
|
||||
/// </summary>
|
||||
public string WaitFor { get; set; } = "networkidle";
|
||||
|
||||
/// <summary>
|
||||
/// Identifies the calling service for audit purposes (e.g. <c>cv-matcher-api</c>, <c>cv-search-job</c>).
|
||||
/// </summary>
|
||||
public string CallerService { get; set; } = string.Empty;
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
namespace PageFetcher.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Result of a page fetch operation.
|
||||
/// </summary>
|
||||
public sealed class FetchPageResponse
|
||||
{
|
||||
/// <summary>Final URL after any redirects.</summary>
|
||||
public string Url { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>HTTP status code returned by the page. <c>0</c> on network failure.</summary>
|
||||
public int StatusCode { get; set; }
|
||||
|
||||
/// <summary>Full rendered HTML as returned by Playwright.</summary>
|
||||
public string Html { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Plain text extracted from the HTML (script/style stripped, whitespace normalised).</summary>
|
||||
public string Text { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Whether the fetch succeeded. <c>false</c> on timeout or network error.</summary>
|
||||
public bool Success { get; set; }
|
||||
|
||||
/// <summary>Exception message when <see cref="Success"/> is <c>false</c>.</summary>
|
||||
public string? Error { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
using Refit;
|
||||
|
||||
namespace PageFetcher.Models;
|
||||
|
||||
/// <summary>
|
||||
/// Refit client for the internal page-fetcher-api service.
|
||||
/// All calls require the <c>X-Internal-Api-Key</c> header, configured at registration time.
|
||||
/// </summary>
|
||||
public interface IPageFetcherApiClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Fetches a web page via headless Chromium and returns the rendered HTML and extracted plain text.
|
||||
/// </summary>
|
||||
[Post("/api/page/fetch")]
|
||||
Task<FetchPageResponse> FetchAsync([Body] FetchPageRequest request, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<AssemblyName>page-fetcher-api-models</AssemblyName>
|
||||
<RootNamespace>PageFetcher.Models</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Refit.HttpClientFactory" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,47 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using PageFetcher.Models;
|
||||
using PageFetcherApi.Services;
|
||||
using Swashbuckle.AspNetCore.Annotations;
|
||||
|
||||
namespace PageFetcherApi.Controllers;
|
||||
|
||||
/// <summary>
|
||||
/// Handles page-fetch requests: navigates to the URL via Playwright and returns rendered HTML and extracted text.
|
||||
/// </summary>
|
||||
[ApiController]
|
||||
[Route("api/page")]
|
||||
public sealed class PageController : ControllerBase
|
||||
{
|
||||
private readonly PageFetcherService _service;
|
||||
private readonly ILogger<PageController> _logger;
|
||||
|
||||
public PageController(PageFetcherService service, ILogger<PageController> logger)
|
||||
{
|
||||
_service = service;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a web page via headless Chromium.
|
||||
/// Returns rendered HTML and extracted plain text.
|
||||
/// </summary>
|
||||
[HttpPost("fetch")]
|
||||
[SwaggerOperation(Summary = "Fetch a web page", Description = "Navigates to the given URL using Playwright, returns rendered HTML and stripped plain text.")]
|
||||
[SwaggerResponse(StatusCodes.Status200OK, "Page fetched successfully", typeof(FetchPageResponse))]
|
||||
[SwaggerResponse(StatusCodes.Status400BadRequest, "Invalid or non-HTTP(S) URL")]
|
||||
public async Task<ActionResult<FetchPageResponse>> Fetch([FromBody] FetchPageRequest request, CancellationToken ct)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(request.Url))
|
||||
return BadRequest(new { Error = "Url is required." });
|
||||
|
||||
if (!Uri.TryCreate(request.Url, UriKind.Absolute, out var uri) ||
|
||||
(uri.Scheme != Uri.UriSchemeHttp && uri.Scheme != Uri.UriSchemeHttps))
|
||||
return BadRequest(new { Error = "Url must be an absolute HTTP or HTTPS URL." });
|
||||
|
||||
_logger.LogInformation("Fetch request: {Url} | caller={Caller} | waitFor={WaitFor}",
|
||||
request.Url, request.CallerService, request.WaitFor);
|
||||
|
||||
var result = await _service.FetchAsync(request, ct);
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
ARG BUILD_CONFIGURATION=Release
|
||||
WORKDIR /src
|
||||
COPY Directory.Packages.props ./
|
||||
|
||||
COPY Apis/page-fetcher-api/page-fetcher-api.csproj Apis/page-fetcher-api/
|
||||
COPY Apis/page-fetcher-data/page-fetcher-data.csproj Apis/page-fetcher-data/
|
||||
COPY Apis/page-fetcher-api-models/page-fetcher-api-models.csproj Apis/page-fetcher-api-models/
|
||||
COPY Apis/common/common.csproj Apis/common/
|
||||
COPY Apis/shared-data/shared-data.csproj Apis/shared-data/
|
||||
COPY Helpers/startup-helpers/startup-helpers.csproj Helpers/startup-helpers/
|
||||
COPY Helpers/common-helpers/common-helpers.csproj Helpers/common-helpers/
|
||||
|
||||
RUN dotnet restore Apis/page-fetcher-api/page-fetcher-api.csproj
|
||||
|
||||
COPY Apis/page-fetcher-api/ Apis/page-fetcher-api/
|
||||
COPY Apis/page-fetcher-data/ Apis/page-fetcher-data/
|
||||
COPY Apis/page-fetcher-api-models/ Apis/page-fetcher-api-models/
|
||||
COPY Apis/common/ Apis/common/
|
||||
COPY Apis/shared-data/ Apis/shared-data/
|
||||
COPY Helpers/startup-helpers/ Helpers/startup-helpers/
|
||||
COPY Helpers/common-helpers/ Helpers/common-helpers/
|
||||
|
||||
RUN dotnet publish Apis/page-fetcher-api/page-fetcher-api.csproj -c $BUILD_CONFIGURATION -o /app/publish /p:UseAppHost=false
|
||||
|
||||
# Download Playwright Chromium browser in the build stage.
|
||||
# Node.js is only needed here to run npx — it is not copied to the final image.
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends nodejs npm \
|
||||
&& npx --yes playwright@1.60.0 install chromium \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS final
|
||||
WORKDIR /app
|
||||
|
||||
# System libraries required by Chromium on Debian bookworm
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
|
||||
libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 \
|
||||
libgbm1 libasound2t64 libpango-1.0-0 libcairo2 libatspi2.0-0 \
|
||||
libwayland-client0 libx11-xcb1 libx11-6 libxcb1 libxext6 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy the Playwright Chromium browser from the build stage
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
COPY --from=build /ms-playwright /ms-playwright
|
||||
|
||||
COPY --from=build /app/publish .
|
||||
|
||||
ENTRYPOINT ["dotnet", "page-fetcher-api.dll"]
|
||||
@@ -0,0 +1,74 @@
|
||||
using System.Reflection;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using PageFetcher.Data;
|
||||
using PageFetcherApi.Services;
|
||||
using Serilog;
|
||||
using StartupHelpers;
|
||||
|
||||
StartupExtensions.LoadDotEnvFile();
|
||||
|
||||
const string ServiceName = "page-fetcher-api";
|
||||
var appVersion = StartupExtensions.GetApplicationVersion(Assembly.GetExecutingAssembly());
|
||||
|
||||
try
|
||||
{
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
builder.ConfigureJsonSerilog(ServiceName, appVersion);
|
||||
Log.Information("Starting {Service} version {AppVersion}", ServiceName, appVersion);
|
||||
|
||||
builder.AddAzureKeyVaultIfConfigured();
|
||||
|
||||
builder.Services.Configure<PageFetcherSettings>(builder.Configuration.GetSection("PageFetcher"));
|
||||
|
||||
builder.Services.AddDbContext<PageFetchDbContext>(options =>
|
||||
{
|
||||
var connectionString = builder.Services.GetConfiguredDbConnectionString(builder.Configuration);
|
||||
options.UseSqlServer(connectionString, sql =>
|
||||
{
|
||||
sql.MigrationsHistoryTable(PageFetchDbContext.MigrationTableName, PageFetchDbContext.SchemaName);
|
||||
sql.MigrationsAssembly("page-fetcher-data");
|
||||
});
|
||||
});
|
||||
|
||||
// Playwright browser: singleton hosted service, shared across all requests
|
||||
builder.Services.AddSingleton<PlaywrightBrowserService>();
|
||||
builder.Services.AddHostedService(sp => sp.GetRequiredService<PlaywrightBrowserService>());
|
||||
|
||||
builder.Services.AddScoped<PageFetcherService>();
|
||||
|
||||
builder.Services.AddControllers();
|
||||
builder.Services.AddSwaggerWithXmlComments(Assembly.GetExecutingAssembly(), "Page Fetcher API");
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
app.LogStartupDiagnostics(ServiceName);
|
||||
|
||||
app.UseDefaultSerilogRequestLogging();
|
||||
app.UseJsonExceptionHandler(ServiceName);
|
||||
app.UseInternalApiKeyProtection();
|
||||
app.UseSwaggerInDevelopment("Page Fetcher API", "PageFetcherAPI");
|
||||
|
||||
app.UseRouting();
|
||||
app.UseAuthorization();
|
||||
app.MapControllers();
|
||||
|
||||
Log.Information("Running EF Core migrations if any");
|
||||
using (var scope = app.Services.CreateScope())
|
||||
{
|
||||
var db = scope.ServiceProvider.GetRequiredService<PageFetchDbContext>();
|
||||
db.Database.Migrate();
|
||||
}
|
||||
|
||||
Log.Information("{Service} startup complete. Listening for requests...", ServiceName);
|
||||
app.Run();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Fatal(ex, "{Service} terminated unexpectedly", ServiceName);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Log.Information("Shutting down {Service}", ServiceName);
|
||||
Log.CloseAndFlush();
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"profiles": {
|
||||
"page-fetcher-api": {
|
||||
"commandName": "Project",
|
||||
"launchBrowser": true,
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
},
|
||||
"applicationUrl": "https://localhost:50268;http://localhost:50269"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
using System.Diagnostics;
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Microsoft.Playwright;
|
||||
using PageFetcher.Data;
|
||||
using PageFetcher.Data.Entities;
|
||||
using PageFetcher.Models;
|
||||
|
||||
namespace PageFetcherApi.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a web page via Playwright, extracts plain text, persists the result to the database,
|
||||
/// and returns a <see cref="FetchPageResponse"/>.
|
||||
/// </summary>
|
||||
public sealed class PageFetcherService
|
||||
{
|
||||
private readonly PlaywrightBrowserService _browserService;
|
||||
private readonly PageFetchDbContext _db;
|
||||
private readonly PageFetcherSettings _settings;
|
||||
private readonly ILogger<PageFetcherService> _logger;
|
||||
|
||||
public PageFetcherService(
|
||||
PlaywrightBrowserService browserService,
|
||||
PageFetchDbContext db,
|
||||
IOptions<PageFetcherSettings> settings,
|
||||
ILogger<PageFetcherService> logger)
|
||||
{
|
||||
_browserService = browserService;
|
||||
_db = db;
|
||||
_settings = settings.Value;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches the page at <paramref name="request.Url"/> using Playwright, saves the fetch record,
|
||||
/// and returns the HTML and extracted text.
|
||||
/// Returns a failed response (with <see cref="FetchPageResponse.Success"/> = false) rather than throwing
|
||||
/// on network or navigation errors.
|
||||
/// </summary>
|
||||
public async Task<FetchPageResponse> FetchAsync(FetchPageRequest request, CancellationToken ct)
|
||||
{
|
||||
var sw = Stopwatch.StartNew();
|
||||
string html = string.Empty;
|
||||
string text = string.Empty;
|
||||
int? statusCode = null;
|
||||
bool success = false;
|
||||
string? errorMessage = null;
|
||||
string finalUrl = request.Url;
|
||||
|
||||
try
|
||||
{
|
||||
var page = await _browserService.Browser.NewPageAsync();
|
||||
await using var _ = page.ConfigureAwait(false);
|
||||
|
||||
var waitUntil = request.WaitFor?.ToLowerInvariant() switch
|
||||
{
|
||||
"load" => WaitUntilState.Load,
|
||||
"domcontentloaded" => WaitUntilState.DOMContentLoaded,
|
||||
_ => WaitUntilState.NetworkIdle
|
||||
};
|
||||
|
||||
IResponse? response;
|
||||
try
|
||||
{
|
||||
response = await page.GotoAsync(request.Url, new PageGotoOptions
|
||||
{
|
||||
WaitUntil = waitUntil,
|
||||
Timeout = _settings.TimeoutSeconds * 1_000
|
||||
});
|
||||
}
|
||||
catch (TimeoutException)
|
||||
{
|
||||
_logger.LogWarning("Playwright NetworkIdle timeout for {Url}, using partial content", request.Url);
|
||||
response = null;
|
||||
}
|
||||
|
||||
statusCode = response?.Status;
|
||||
finalUrl = page.Url;
|
||||
html = await page.ContentAsync();
|
||||
text = ExtractText(html);
|
||||
success = true;
|
||||
|
||||
_logger.LogInformation("Fetched {Url} → HTTP {Status} | HTML {HtmlLen} chars | text {TextLen} chars | {DurationMs} ms",
|
||||
request.Url, statusCode?.ToString() ?? "timeout", html.Length, text.Length, sw.ElapsedMilliseconds);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
errorMessage = ex.Message;
|
||||
_logger.LogError(ex, "Failed to fetch {Url}", request.Url);
|
||||
}
|
||||
finally
|
||||
{
|
||||
sw.Stop();
|
||||
}
|
||||
|
||||
// Persist fetch record
|
||||
var entity = new PageFetchEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
Url = request.Url,
|
||||
CallerService = request.CallerService ?? string.Empty,
|
||||
HttpStatusCode = statusCode,
|
||||
Html = html,
|
||||
Text = text,
|
||||
DurationMs = sw.ElapsedMilliseconds,
|
||||
Success = success,
|
||||
ErrorMessage = errorMessage
|
||||
};
|
||||
|
||||
_db.PageFetches.Add(entity);
|
||||
await _db.SaveChangesAsync(ct);
|
||||
|
||||
return new FetchPageResponse
|
||||
{
|
||||
Url = finalUrl,
|
||||
StatusCode = statusCode ?? 0,
|
||||
Html = html,
|
||||
Text = text,
|
||||
Success = success,
|
||||
Error = errorMessage
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Strips script/style blocks and all HTML tags from raw HTML, normalises whitespace,
|
||||
/// and truncates to <see cref="PageFetcherSettings.MaxTextChars"/>.
|
||||
/// </summary>
|
||||
private string ExtractText(string html)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(html)) return string.Empty;
|
||||
|
||||
var text = html;
|
||||
text = Regex.Replace(text, "<script[\\s\\S]*?</script>", " ", RegexOptions.IgnoreCase);
|
||||
text = Regex.Replace(text, "<style[\\s\\S]*?</style>", " ", RegexOptions.IgnoreCase);
|
||||
text = Regex.Replace(text, "<[^>]+>", " ");
|
||||
text = WebUtility.HtmlDecode(text);
|
||||
text = string.Join(' ', text.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)).Trim();
|
||||
|
||||
var max = Math.Max(4_000, _settings.MaxTextChars);
|
||||
return text.Length <= max ? text : text[..max];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
namespace PageFetcherApi.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Runtime settings for the page-fetcher service.
|
||||
/// Bound from the <c>PageFetcher</c> configuration section.
|
||||
/// </summary>
|
||||
public sealed class PageFetcherSettings
|
||||
{
|
||||
/// <summary>Default Playwright wait condition (<c>networkidle</c>, <c>load</c>, <c>domcontentloaded</c>).</summary>
|
||||
public string DefaultWaitFor { get; set; } = "networkidle";
|
||||
|
||||
/// <summary>Page navigation timeout in seconds.</summary>
|
||||
public int TimeoutSeconds { get; set; } = 30;
|
||||
|
||||
/// <summary>Maximum characters stored/returned in the extracted text field.</summary>
|
||||
public int MaxTextChars { get; set; } = 60_000;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
using Microsoft.Playwright;
|
||||
|
||||
namespace PageFetcherApi.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Singleton hosted service that owns the Playwright Chromium browser process for the lifetime of the application.
|
||||
/// Launches the browser once at startup and exposes it for injection into <see cref="PageFetcherService"/>.
|
||||
/// </summary>
|
||||
public sealed class PlaywrightBrowserService : IHostedService, IAsyncDisposable
|
||||
{
|
||||
private IPlaywright? _playwright;
|
||||
private IBrowser? _browser;
|
||||
private readonly ILogger<PlaywrightBrowserService> _logger;
|
||||
|
||||
public PlaywrightBrowserService(ILogger<PlaywrightBrowserService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>The running Chromium browser instance. Available after <see cref="StartAsync"/> completes.</summary>
|
||||
public IBrowser Browser => _browser ?? throw new InvalidOperationException("Browser has not been started yet.");
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.LogInformation("Launching Playwright Chromium browser...");
|
||||
_playwright = await Playwright.CreateAsync();
|
||||
_browser = await _playwright.Chromium.LaunchAsync(new BrowserTypeLaunchOptions
|
||||
{
|
||||
Headless = true,
|
||||
Args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]
|
||||
});
|
||||
_logger.LogInformation("Playwright Chromium browser launched successfully.");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.LogInformation("Closing Playwright Chromium browser...");
|
||||
if (_browser is not null) await _browser.CloseAsync();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_browser is not null) await _browser.DisposeAsync();
|
||||
_playwright?.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
{
|
||||
"Serilog": {
|
||||
"Using": [
|
||||
"Serilog.Sinks.Console",
|
||||
"Serilog.Sinks.File"
|
||||
],
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Microsoft.AspNetCore.Hosting": "Information",
|
||||
"Microsoft.AspNetCore.Routing": "Warning",
|
||||
"System.Net.Http.HttpClient": "Warning",
|
||||
"PageFetcherApi": "Information"
|
||||
}
|
||||
},
|
||||
"WriteTo": [
|
||||
{
|
||||
"Name": "Console",
|
||||
"Args": {
|
||||
"outputTemplate": "[{Timestamp:HH:mm:ss} {Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"Name": "File",
|
||||
"Args": {
|
||||
"path": "logs/page-fetcher-api-.log",
|
||||
"rollingInterval": "Day",
|
||||
"retainedFileCountLimit": 30,
|
||||
"outputTemplate": "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {SourceContext}: {Message:lj}{NewLine}{Exception}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"Enrich": [
|
||||
"FromLogContext",
|
||||
"WithMachineName",
|
||||
"WithEnvironmentName"
|
||||
]
|
||||
},
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Microsoft.AspNetCore.Hosting": "Information",
|
||||
"Microsoft.AspNetCore.Routing": "Warning",
|
||||
"System.Net.Http.HttpClient": "Warning",
|
||||
"PageFetcherApi": "Information"
|
||||
}
|
||||
},
|
||||
"LogEnvironmentOnStartup": true,
|
||||
"AllowedHosts": "*",
|
||||
"KeyVault": {
|
||||
"VaultUri": "",
|
||||
"Enabled": false
|
||||
},
|
||||
"Database": {
|
||||
"Host": "localhost",
|
||||
"Port": 1433,
|
||||
"Name": "MyAiDb",
|
||||
"User": "sa",
|
||||
"Password": "",
|
||||
"TrustServerCertificate": true
|
||||
},
|
||||
"InternalApi": {
|
||||
"ApiKey": "",
|
||||
"RequireApiKey": true
|
||||
},
|
||||
"PageFetcher": {
|
||||
"DefaultWaitFor": "networkidle",
|
||||
"TimeoutSeconds": 30,
|
||||
"MaxTextChars": 60000
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
|
||||
<RootNamespace>PageFetcherApi</RootNamespace>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);1591</NoWarn>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Playwright" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Serilog.AspNetCore" />
|
||||
<PackageReference Include="Serilog.Enrichers.Environment" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" />
|
||||
<PackageReference Include="Serilog.Sinks.File" />
|
||||
<PackageReference Include="Serilog.Sinks.Email" />
|
||||
<PackageReference Include="Swashbuckle.AspNetCore" />
|
||||
<PackageReference Include="Swashbuckle.AspNetCore.Annotations" />
|
||||
<PackageReference Include="DotNetEnv" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\page-fetcher-data\page-fetcher-data.csproj" />
|
||||
<ProjectReference Include="..\page-fetcher-api-models\page-fetcher-api-models.csproj" />
|
||||
<ProjectReference Include="..\common\common.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\common-helpers\common-helpers.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,34 @@
|
||||
using Shared.Data.Entities;
|
||||
|
||||
namespace PageFetcher.Data.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Audit record of a single page-fetch operation performed by the page-fetcher-api.
|
||||
/// Stores the full rendered HTML and extracted plain text for every URL fetched.
|
||||
/// </summary>
|
||||
public sealed class PageFetchEntity : BaseEntity
|
||||
{
|
||||
/// <summary>The URL that was requested.</summary>
|
||||
public string Url { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Name of the service that requested the fetch (e.g. <c>cv-matcher-api</c>, <c>cv-search-job</c>).</summary>
|
||||
public string CallerService { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>HTTP status code returned by the remote server. <c>null</c> on network failure.</summary>
|
||||
public int? HttpStatusCode { get; set; }
|
||||
|
||||
/// <summary>Full rendered HTML as returned by Playwright.</summary>
|
||||
public string Html { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Plain text extracted from the HTML (script/style stripped, whitespace normalised).</summary>
|
||||
public string Text { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>Playwright round-trip time in milliseconds.</summary>
|
||||
public long DurationMs { get; set; }
|
||||
|
||||
/// <summary><c>true</c> when the page was fetched successfully; <c>false</c> on timeout or network error.</summary>
|
||||
public bool Success { get; set; }
|
||||
|
||||
/// <summary>Exception message when <see cref="Success"/> is <c>false</c>.</summary>
|
||||
public string? ErrorMessage { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace PageFetcher.Data;
|
||||
|
||||
/// <summary>Schema and migration-history table name constants for the pageFetcher EF schema.</summary>
|
||||
public static class MigrationConstants
|
||||
{
|
||||
public const string SchemaName = "pageFetcher";
|
||||
public const string MigrationTableName = "_Migrations";
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
// <auto-generated />
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||
using Microsoft.EntityFrameworkCore.Metadata;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||
using PageFetcher.Data;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace PageFetcher.Data.Migrations
|
||||
{
|
||||
[DbContext(typeof(PageFetchDbContext))]
|
||||
[Migration("20260608143523_InitialSchema")]
|
||||
partial class InitialSchema
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void BuildTargetModel(ModelBuilder modelBuilder)
|
||||
{
|
||||
#pragma warning disable 612, 618
|
||||
modelBuilder
|
||||
.HasDefaultSchema("pageFetcher")
|
||||
.HasAnnotation("ProductVersion", "10.0.7")
|
||||
.HasAnnotation("Relational:MaxIdentifierLength", 128);
|
||||
|
||||
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
|
||||
|
||||
modelBuilder.Entity("PageFetcher.Data.Entities.PageFetchEntity", b =>
|
||||
{
|
||||
b.Property<string>("Id")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("CallerService")
|
||||
.IsRequired()
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<DateTime>("CreatedAt")
|
||||
.ValueGeneratedOnAdd()
|
||||
.HasColumnType("datetime2")
|
||||
.HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
|
||||
b.Property<long>("DurationMs")
|
||||
.HasColumnType("bigint");
|
||||
|
||||
b.Property<string>("ErrorMessage")
|
||||
.HasMaxLength(2000)
|
||||
.HasColumnType("nvarchar(2000)");
|
||||
|
||||
b.Property<string>("Html")
|
||||
.IsRequired()
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<int?>("HttpStatusCode")
|
||||
.HasColumnType("int");
|
||||
|
||||
b.Property<bool>("Success")
|
||||
.HasColumnType("bit");
|
||||
|
||||
b.Property<string>("Text")
|
||||
.IsRequired()
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<string>("Url")
|
||||
.IsRequired()
|
||||
.HasMaxLength(2000)
|
||||
.HasColumnType("nvarchar(2000)");
|
||||
|
||||
b.HasKey("Id");
|
||||
|
||||
b.HasIndex("CreatedAt");
|
||||
|
||||
b.HasIndex("Url");
|
||||
|
||||
b.ToTable("PageFetches", "pageFetcher");
|
||||
});
|
||||
#pragma warning restore 612, 618
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace PageFetcher.Data.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class InitialSchema : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.EnsureSchema(
|
||||
name: MigrationConstants.SchemaName);
|
||||
|
||||
migrationBuilder.CreateTable(
|
||||
name: "PageFetches",
|
||||
schema: MigrationConstants.SchemaName,
|
||||
columns: table => new
|
||||
{
|
||||
Id = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
Url = table.Column<string>(type: "nvarchar(2000)", maxLength: 2000, nullable: false),
|
||||
CallerService = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
HttpStatusCode = table.Column<int>(type: "int", nullable: true),
|
||||
Html = table.Column<string>(type: "nvarchar(max)", nullable: false),
|
||||
Text = table.Column<string>(type: "nvarchar(max)", nullable: false),
|
||||
DurationMs = table.Column<long>(type: "bigint", nullable: false),
|
||||
Success = table.Column<bool>(type: "bit", nullable: false),
|
||||
ErrorMessage = table.Column<string>(type: "nvarchar(2000)", maxLength: 2000, nullable: true),
|
||||
CreatedAt = table.Column<DateTime>(type: "datetime2", nullable: false, defaultValueSql: "SYSUTCDATETIME()")
|
||||
},
|
||||
constraints: table =>
|
||||
{
|
||||
table.PrimaryKey("PK_PageFetches", x => x.Id);
|
||||
});
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_PageFetches_CreatedAt",
|
||||
schema: MigrationConstants.SchemaName,
|
||||
table: "PageFetches",
|
||||
column: "CreatedAt");
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_PageFetches_Url",
|
||||
schema: MigrationConstants.SchemaName,
|
||||
table: "PageFetches",
|
||||
column: "Url");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropTable(
|
||||
name: "PageFetches",
|
||||
schema: MigrationConstants.SchemaName);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
// <auto-generated />
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.EntityFrameworkCore.Infrastructure;
|
||||
using Microsoft.EntityFrameworkCore.Metadata;
|
||||
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
|
||||
using PageFetcher.Data;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace PageFetcher.Data.Migrations
|
||||
{
|
||||
[DbContext(typeof(PageFetchDbContext))]
|
||||
partial class PageFetchDbContextModelSnapshot : ModelSnapshot
|
||||
{
|
||||
protected override void BuildModel(ModelBuilder modelBuilder)
|
||||
{
|
||||
#pragma warning disable 612, 618
|
||||
modelBuilder
|
||||
.HasDefaultSchema("pageFetcher")
|
||||
.HasAnnotation("ProductVersion", "10.0.7")
|
||||
.HasAnnotation("Relational:MaxIdentifierLength", 128);
|
||||
|
||||
SqlServerModelBuilderExtensions.UseIdentityColumns(modelBuilder);
|
||||
|
||||
modelBuilder.Entity("PageFetcher.Data.Entities.PageFetchEntity", b =>
|
||||
{
|
||||
b.Property<string>("Id")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("CallerService")
|
||||
.IsRequired()
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<DateTime>("CreatedAt")
|
||||
.ValueGeneratedOnAdd()
|
||||
.HasColumnType("datetime2")
|
||||
.HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
|
||||
b.Property<long>("DurationMs")
|
||||
.HasColumnType("bigint");
|
||||
|
||||
b.Property<string>("ErrorMessage")
|
||||
.HasMaxLength(2000)
|
||||
.HasColumnType("nvarchar(2000)");
|
||||
|
||||
b.Property<string>("Html")
|
||||
.IsRequired()
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<int?>("HttpStatusCode")
|
||||
.HasColumnType("int");
|
||||
|
||||
b.Property<bool>("Success")
|
||||
.HasColumnType("bit");
|
||||
|
||||
b.Property<string>("Text")
|
||||
.IsRequired()
|
||||
.HasColumnType("nvarchar(max)");
|
||||
|
||||
b.Property<string>("Url")
|
||||
.IsRequired()
|
||||
.HasMaxLength(2000)
|
||||
.HasColumnType("nvarchar(2000)");
|
||||
|
||||
b.HasKey("Id");
|
||||
|
||||
b.HasIndex("CreatedAt");
|
||||
|
||||
b.HasIndex("Url");
|
||||
|
||||
b.ToTable("PageFetches", "pageFetcher");
|
||||
});
|
||||
#pragma warning restore 612, 618
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using PageFetcher.Data.Entities;
|
||||
|
||||
namespace PageFetcher.Data;
|
||||
|
||||
/// <summary>
|
||||
/// EF Core DbContext for the <c>pageFetcher</c> schema.
|
||||
/// Owns the <c>PageFetches</c> audit table.
|
||||
/// </summary>
|
||||
public sealed class PageFetchDbContext : DbContext
|
||||
{
|
||||
public const string SchemaName = MigrationConstants.SchemaName;
|
||||
public const string MigrationTableName = MigrationConstants.MigrationTableName;
|
||||
|
||||
public PageFetchDbContext(DbContextOptions<PageFetchDbContext> options) : base(options) { }
|
||||
|
||||
public DbSet<PageFetchEntity> PageFetches => Set<PageFetchEntity>();
|
||||
|
||||
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
|
||||
{
|
||||
base.OnConfiguring(optionsBuilder);
|
||||
optionsBuilder.UseSqlServer(x => x.MigrationsHistoryTable(MigrationTableName, SchemaName));
|
||||
}
|
||||
|
||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||
{
|
||||
modelBuilder.HasDefaultSchema(SchemaName);
|
||||
|
||||
modelBuilder.Entity<PageFetchEntity>(entity =>
|
||||
{
|
||||
entity.ToTable("PageFetches");
|
||||
entity.HasKey(x => x.Id);
|
||||
entity.Property(x => x.Id).HasMaxLength(64);
|
||||
entity.Property(x => x.Url).HasMaxLength(2000).IsRequired();
|
||||
entity.Property(x => x.CallerService).HasMaxLength(64).IsRequired();
|
||||
entity.Property(x => x.Html).IsRequired();
|
||||
entity.Property(x => x.Text).IsRequired();
|
||||
entity.Property(x => x.ErrorMessage).HasMaxLength(2000);
|
||||
entity.Property(x => x.CreatedAt).HasDefaultValueSql("SYSUTCDATETIME()");
|
||||
|
||||
entity.HasIndex(x => x.Url);
|
||||
entity.HasIndex(x => x.CreatedAt);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<AssemblyName>page-fetcher-data</AssemblyName>
|
||||
<RootNamespace>PageFetcher.Data</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\shared-data\shared-data.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -14,6 +14,7 @@ using JobScheduler.Tasks;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using PageFetcher.Models;
|
||||
using Refit;
|
||||
using Serilog;
|
||||
using Common.Settings;
|
||||
@@ -81,7 +82,19 @@ try
|
||||
client.DefaultRequestHeaders.Add("X-Internal-Api-Key", key);
|
||||
});
|
||||
|
||||
builder.Services.AddHttpClient<HtmlJobSearcher>();
|
||||
builder.Services.AddRefitClient<IPageFetcherApiClient>()
|
||||
.ConfigureHttpClient((sp, client) =>
|
||||
{
|
||||
var config = sp.GetRequiredService<Microsoft.Extensions.Configuration.IConfiguration>();
|
||||
var baseUrl = config["PageFetcherApi:BaseUrl"] ?? string.Empty;
|
||||
if (!string.IsNullOrWhiteSpace(baseUrl))
|
||||
client.BaseAddress = new Uri(baseUrl.TrimEnd('/') + "/");
|
||||
var key = config["PageFetcherApi:InternalApiKey"];
|
||||
if (!string.IsNullOrWhiteSpace(key))
|
||||
client.DefaultRequestHeaders.Add("X-Internal-Api-Key", key);
|
||||
});
|
||||
|
||||
builder.Services.AddSingleton<HtmlJobSearcher>();
|
||||
builder.Services.AddSingleton<CvSearchEmailSender>();
|
||||
|
||||
builder.Services.AddSingleton<CvSearchJobTask>();
|
||||
|
||||
@@ -1,36 +1,39 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Web;
|
||||
using CvMatcher.Models.Settings;
|
||||
using Microsoft.Playwright;
|
||||
using PageFetcher.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace CvSearchJob.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Config-driven HTML scraper that fetches a provider's job listing page and extracts matching job URLs.
|
||||
/// Uses a two-stage anchor filter: href must contain the provider's link pattern, and anchor text must
|
||||
/// contain at least one CV keyword.
|
||||
/// Supports both plain HTTP GET (default) and headless Chromium rendering for JS-heavy SPAs.
|
||||
/// A URL and its anchor text as scraped from a job listing search-results page.
|
||||
/// </summary>
|
||||
public sealed record JobCandidate(string Url, string Title);
|
||||
|
||||
/// <summary>
|
||||
/// Config-driven HTML scraper that fetches a provider's job listing page via <c>page-fetcher-api</c>
|
||||
/// and extracts matching job URL candidates.
|
||||
/// Uses a two-stage anchor filter: href must contain the provider's link pattern, and (optionally)
|
||||
/// anchor text must contain at least one CV keyword.
|
||||
/// </summary>
|
||||
public sealed class HtmlJobSearcher
|
||||
{
|
||||
private readonly HttpClient _http;
|
||||
private readonly IPageFetcherApiClient _pageFetcher;
|
||||
private readonly ILogger<HtmlJobSearcher> _logger;
|
||||
|
||||
public HtmlJobSearcher(HttpClient http, ILogger<HtmlJobSearcher> logger)
|
||||
public HtmlJobSearcher(IPageFetcherApiClient pageFetcher, ILogger<HtmlJobSearcher> logger)
|
||||
{
|
||||
_http = http;
|
||||
_pageFetcher = pageFetcher;
|
||||
_logger = logger;
|
||||
_http.Timeout = TimeSpan.FromSeconds(20);
|
||||
_http.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; MyAi.ro CV-Search/1.0)");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches the provider's search result page for the combined initial + CV keywords, parses all anchor
|
||||
/// tags, applies the two-stage filter, and returns up to <see cref="JobProviderConfig.MaxResults"/> absolute URLs.
|
||||
/// Returns an empty list when the HTTP request fails rather than throwing.
|
||||
/// Fetches the provider's search result page, parses all anchor tags, applies the two-stage filter,
|
||||
/// and returns up to <see cref="JobProviderConfig.MaxResults"/> candidates (URL + title).
|
||||
/// Returns an empty list when the page fetch fails rather than throwing.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<string>> SearchJobUrlsAsync(
|
||||
public async Task<IReadOnlyList<JobCandidate>> SearchJobUrlsAsync(
|
||||
JobProviderConfig provider,
|
||||
IReadOnlyList<string> cvKeywords,
|
||||
string? location,
|
||||
@@ -61,24 +64,29 @@ public sealed class HtmlJobSearcher
|
||||
.Replace("{location-slug}", locationSlug);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Provider {Provider}: fetching {Url} [{Mode}] | CV keywords: [{Keywords}] | Location: {Location}",
|
||||
"Provider {Provider}: fetching {Url} | CV keywords: [{Keywords}] | Location: {Location}",
|
||||
provider.Name, searchUrl,
|
||||
provider.UseHeadlessBrowser ? "headless" : "http",
|
||||
string.Join(", ", cvKeywords),
|
||||
location ?? "(none)");
|
||||
|
||||
string? html;
|
||||
if (provider.UseHeadlessBrowser)
|
||||
html = await FetchWithPlaywrightAsync(provider.Name, searchUrl, ct);
|
||||
else
|
||||
html = await FetchWithHttpAsync(provider.Name, searchUrl, ct);
|
||||
var fetchResponse = await _pageFetcher.FetchAsync(new FetchPageRequest
|
||||
{
|
||||
Url = searchUrl,
|
||||
WaitFor = provider.UseHeadlessBrowser ? "networkidle" : "domcontentloaded",
|
||||
CallerService = "cv-search-job"
|
||||
}, ct);
|
||||
|
||||
if (html is null) return [];
|
||||
if (!fetchResponse.Success || string.IsNullOrWhiteSpace(fetchResponse.Html))
|
||||
{
|
||||
_logger.LogWarning("Provider {Provider}: page fetch failed — {Error}", provider.Name, fetchResponse.Error);
|
||||
return [];
|
||||
}
|
||||
|
||||
var html = fetchResponse.Html;
|
||||
_logger.LogInformation("Provider {Provider}: received {Length} chars of HTML", provider.Name, html.Length);
|
||||
|
||||
var baseUri = new Uri(searchUrl);
|
||||
var results = new List<string>();
|
||||
var results = new List<JobCandidate>();
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var anchorPattern = new Regex(@"<a[^>]+href=[""']([^""']+)[""'][^>]*>(.*?)</a>",
|
||||
@@ -123,7 +131,7 @@ public sealed class HtmlJobSearcher
|
||||
|
||||
var url = absoluteUri.GetLeftPart(UriPartial.Path);
|
||||
if (seen.Add(url))
|
||||
results.Add(url);
|
||||
results.Add(new JobCandidate(url, anchorText));
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
@@ -132,61 +140,4 @@ public sealed class HtmlJobSearcher
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private async Task<string?> FetchWithHttpAsync(string providerName, string url, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
return await _http.GetStringAsync(url, ct);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Provider {Provider}: HTTP fetch failed for {Url}", providerName, url);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string?> FetchWithPlaywrightAsync(string providerName, string url, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var playwright = await Playwright.CreateAsync();
|
||||
await using var browser = await playwright.Chromium.LaunchAsync(new BrowserTypeLaunchOptions
|
||||
{
|
||||
Headless = true,
|
||||
Args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"]
|
||||
});
|
||||
|
||||
var page = await browser.NewPageAsync();
|
||||
|
||||
IResponse? response;
|
||||
try
|
||||
{
|
||||
response = await page.GotoAsync(url, new PageGotoOptions
|
||||
{
|
||||
WaitUntil = WaitUntilState.NetworkIdle,
|
||||
Timeout = 30_000
|
||||
});
|
||||
}
|
||||
catch (TimeoutException)
|
||||
{
|
||||
// NetworkIdle timed out — use whatever content rendered so far
|
||||
_logger.LogWarning("Provider {Provider}: Playwright NetworkIdle timeout for {Url}, using partial content", providerName, url);
|
||||
return await page.ContentAsync();
|
||||
}
|
||||
|
||||
if (response is null || response.Status >= 400)
|
||||
{
|
||||
_logger.LogWarning("Provider {Provider}: Playwright got HTTP {Status} for {Url}", providerName, response?.Status, url);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await page.ContentAsync();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Provider {Provider}: Playwright fetch failed for {Url}", providerName, url);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using PageFetcher.Models;
|
||||
|
||||
namespace CvSearchJob.Tasks;
|
||||
|
||||
@@ -24,6 +25,7 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
private readonly JobSearchSettings _settings;
|
||||
private readonly HtmlJobSearcher _searcher;
|
||||
private readonly ICvMatcherInternalApi _matcherApi;
|
||||
private readonly IPageFetcherApiClient _pageFetcher;
|
||||
private readonly CvSearchEmailSender _emailSender;
|
||||
private readonly ILogger<CvSearchJobTask> _logger;
|
||||
|
||||
@@ -34,6 +36,7 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
IOptions<JobSearchSettings> settings,
|
||||
HtmlJobSearcher searcher,
|
||||
ICvMatcherInternalApi matcherApi,
|
||||
IPageFetcherApiClient pageFetcher,
|
||||
CvSearchEmailSender emailSender,
|
||||
ILogger<CvSearchJobTask> logger)
|
||||
{
|
||||
@@ -41,6 +44,7 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
_settings = settings.Value;
|
||||
_searcher = searcher;
|
||||
_matcherApi = matcherApi;
|
||||
_pageFetcher = pageFetcher;
|
||||
_emailSender = emailSender;
|
||||
_logger = logger;
|
||||
}
|
||||
@@ -126,7 +130,8 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
|
||||
/// <summary>
|
||||
/// Runs the full search pipeline for a session: scrapes all providers, deduplicates URLs,
|
||||
/// scores each candidate via the matcher API, and persists results that meet the minimum score threshold.
|
||||
/// fetches each individual job page via page-fetcher-api, applies a keyword pre-filter,
|
||||
/// scores passing candidates via the matcher API, and persists results that meet the minimum score threshold.
|
||||
/// </summary>
|
||||
private async Task<List<JobSearchResultEntity>> RunSearchAsync(
|
||||
JobSearchSessionEntity session,
|
||||
@@ -138,30 +143,59 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
if (cvKeywords.Count == 0)
|
||||
_logger.LogWarning("Session {SessionId}: keyword list is empty — scraper will rely on provider InitialKeywords only", session.Id);
|
||||
|
||||
var jobUrls = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var jobCandidates = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase); // url → title
|
||||
|
||||
foreach (var provider in providers)
|
||||
{
|
||||
var urls = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct);
|
||||
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} URLs", session.Id, provider.Name, urls.Count);
|
||||
foreach (var url in urls) jobUrls.Add(url);
|
||||
var candidates = await _searcher.SearchJobUrlsAsync(provider, cvKeywords, session.Location, ct);
|
||||
_logger.LogInformation("Session {SessionId}: provider {Provider} returned {Count} candidates", session.Id, provider.Name, candidates.Count);
|
||||
foreach (var c in candidates)
|
||||
jobCandidates.TryAdd(c.Url, c.Title);
|
||||
}
|
||||
|
||||
var candidates = jobUrls.Take(_settings.MaxJobsToMatch).ToList();
|
||||
var deduped = jobCandidates.Take(_settings.MaxJobsToMatch).ToList();
|
||||
_logger.LogInformation(
|
||||
"Session {SessionId}: {Total} unique URLs across all providers, scoring {Scoring} (cap={Cap})",
|
||||
session.Id, jobUrls.Count, candidates.Count, _settings.MaxJobsToMatch);
|
||||
"Session {SessionId}: {Total} unique URLs across all providers, processing up to {Cap}",
|
||||
session.Id, jobCandidates.Count, deduped.Count);
|
||||
|
||||
var results = new List<JobSearchResultEntity>();
|
||||
|
||||
foreach (var url in candidates)
|
||||
foreach (var (url, title) in deduped)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Fetch individual job page text via page-fetcher-api
|
||||
var fetchResponse = await _pageFetcher.FetchAsync(new FetchPageRequest
|
||||
{
|
||||
Url = url,
|
||||
WaitFor = "domcontentloaded",
|
||||
CallerService = "cv-search-job"
|
||||
}, ct);
|
||||
|
||||
if (!fetchResponse.Success || string.IsNullOrWhiteSpace(fetchResponse.Text))
|
||||
{
|
||||
_logger.LogWarning("Session {SessionId}: fetch failed for {Url} — {Error}", session.Id, url, fetchResponse.Error);
|
||||
continue;
|
||||
}
|
||||
|
||||
var jobText = fetchResponse.Text;
|
||||
|
||||
// Keyword pre-filter: skip LLM call if no CV keyword appears in the job page text
|
||||
if (cvKeywords.Count > 0 &&
|
||||
!cvKeywords.Any(k => jobText.Contains(k, StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Session {SessionId}: pre-filter skip | {Url} | no CV keyword found in job text",
|
||||
session.Id, url);
|
||||
continue;
|
||||
}
|
||||
|
||||
var matchRequest = new MatchJobRequest
|
||||
{
|
||||
CvDocumentId = session.CvDocumentId,
|
||||
JobUrl = url,
|
||||
// Pre-fetched text passed directly so cv-matcher-api skips re-fetching the page
|
||||
JobDescription = jobText,
|
||||
// User already gave GDPR consent when they clicked the one-time job search link
|
||||
GdprConsent = true
|
||||
};
|
||||
@@ -182,7 +216,7 @@ public sealed class CvSearchJobTask : IJobTask
|
||||
SessionId = session.Id,
|
||||
ProviderName = GuessProvider(url, providers),
|
||||
JobUrl = url,
|
||||
JobTitle = matchResult.Summary.Split('.').FirstOrDefault()?.Trim() ?? "Job",
|
||||
JobTitle = matchResult.Summary.Split('.').FirstOrDefault()?.Trim() ?? title,
|
||||
JobText = string.Empty,
|
||||
Score = matchResult.Score,
|
||||
ResultJson = JsonSerializer.Serialize(matchResult, new JsonSerializerOptions(JsonSerializerDefaults.Web)),
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" />
|
||||
<PackageReference Include="Refit.HttpClientFactory" />
|
||||
<PackageReference Include="Microsoft.Playwright" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
@@ -26,6 +25,7 @@
|
||||
<ProjectReference Include="..\..\Apis\cv-search-data\cv-search-data.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\common\common.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\email-data\email-data.csproj" />
|
||||
<ProjectReference Include="..\..\Apis\page-fetcher-api-models\page-fetcher-api-models.csproj" />
|
||||
<ProjectReference Include="..\..\Helpers\startup-helpers\startup-helpers.csproj" />
|
||||
<ProjectReference Include="..\job-scheduler\job-scheduler.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -72,6 +72,9 @@ services:
|
||||
- RagApi__BaseUrl=${RagApi__BaseUrl:-http://rag-api:8080}
|
||||
- RagApi__InternalApiKey=${RagApi__InternalApiKey:-}
|
||||
|
||||
- PageFetcherApi__BaseUrl=${PageFetcherApi__BaseUrl:-http://myai-page-fetcher-api:8080}
|
||||
- PageFetcherApi__InternalApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
|
||||
- Ai__Provider=${Ai__Provider:-OpenAI}
|
||||
- Ai__OpenAI__ApiKey=${Ai__OpenAI__ApiKey:-}
|
||||
- Ai__OpenAI__ChatModel=${Ai__OpenAI__ChatModel:-gpt-4o-mini}
|
||||
@@ -266,6 +269,9 @@ services:
|
||||
- EmailApi__BaseUrl=${EmailApi__BaseUrl:-http://email-api:8080}
|
||||
- EmailApi__InternalApiKey=${EmailApi__InternalApiKey:-}
|
||||
|
||||
- PageFetcherApi__BaseUrl=${PageFetcherApi__BaseUrl:-http://myai-page-fetcher-api:8080}
|
||||
- PageFetcherApi__InternalApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
|
||||
- FileStorage__Path=${FileStorage__Path:-Files}
|
||||
|
||||
- JobSearch__Enabled=${JobSearch__Enabled:-true}
|
||||
@@ -293,6 +299,38 @@ services:
|
||||
labels:
|
||||
- "com.centurylinklabs.watchtower.enable=true"
|
||||
|
||||
page-fetcher-api:
|
||||
image: registry.easysoft.ro/apps/myai-page-fetcher-api:${IMAGE_TAG:-staging}
|
||||
container_name: myai-page-fetcher-api
|
||||
environment:
|
||||
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Staging}
|
||||
- ASPNETCORE_URLS=${ASPNETCORE_URLS:-http://+:8080}
|
||||
- APP_ENVIRONMENT_NAME=${APP_ENVIRONMENT_NAME:-myai.staging}
|
||||
|
||||
- Database__Host=${Database__Host:-sqlserver}
|
||||
- Database__Port=${Database__Port:-1433}
|
||||
- Database__Name=${Database__Name:-MyAiDb}
|
||||
- Database__User=${Database__User:-sa}
|
||||
- Database__Password=${Database__Password:-}
|
||||
- Database__TrustServerCertificate=${Database__TrustServerCertificate:-true}
|
||||
|
||||
- InternalApi__ApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
- InternalApi__RequireApiKey=true
|
||||
|
||||
- SerilogEmail__From=${SerilogEmail__From:-}
|
||||
- SerilogEmail__To=${SerilogEmail__To:-}
|
||||
- SerilogEmail__Host=${SerilogEmail__Host:-}
|
||||
- SerilogEmail__Port=${SerilogEmail__Port:-587}
|
||||
- SerilogEmail__UserName=${SerilogEmail__UserName:-}
|
||||
- SerilogEmail__Password=${SerilogEmail__Password:-}
|
||||
volumes:
|
||||
- ${LOGS_PATH:-/opt/myai/logs}/page-fetcher-api:/app/logs
|
||||
networks:
|
||||
- myai-network
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
- "com.centurylinklabs.watchtower.enable=true"
|
||||
|
||||
web:
|
||||
image: registry.easysoft.ro/apps/myai-web:${IMAGE_TAG:-staging}
|
||||
container_name: myai-web
|
||||
|
||||
@@ -63,6 +63,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "email-api", "Apis\email-api
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "email-data", "Apis\email-data\email-data.csproj", "{C1D2E3F4-A5B6-4789-CDEF-012345678ABC}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "page-fetcher-api-models", "Apis\page-fetcher-api-models\page-fetcher-api-models.csproj", "{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "page-fetcher-data", "Apis\page-fetcher-data\page-fetcher-data.csproj", "{06F803CD-329D-40C2-B62D-0F14E137D3C7}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "page-fetcher-api", "Apis\page-fetcher-api\page-fetcher-api.csproj", "{FC5A722A-7B12-459E-AB9F-0A724797783E}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -357,6 +363,42 @@ Global
|
||||
{C1D2E3F4-A5B6-4789-CDEF-012345678ABC}.Release|x64.Build.0 = Release|Any CPU
|
||||
{C1D2E3F4-A5B6-4789-CDEF-012345678ABC}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{C1D2E3F4-A5B6-4789-CDEF-012345678ABC}.Release|x86.Build.0 = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|x64.Build.0 = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B}.Release|x86.Build.0 = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|x64.Build.0 = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7}.Release|x86.Build.0 = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|x64.Build.0 = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E}.Release|x86.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
@@ -385,6 +427,9 @@ Global
|
||||
{BE44B4EB-9AB9-4D81-A9BF-5CF2832BEEE5} = {A9B8C7D6-E5F4-4321-ABCD-FEDCBA987654}
|
||||
{434119EA-2FFC-4433-9B8E-1E6D94006413} = {0FE6558F-2157-47F2-A835-558416CE0E2B}
|
||||
{C1D2E3F4-A5B6-4789-CDEF-012345678ABC} = {D4E5F6A7-B8C9-4012-3456-789ABCDEF012}
|
||||
{4F1A669E-C8AF-428F-87E7-3E0A213DD20B} = {0FE6558F-2157-47F2-A835-558416CE0E2B}
|
||||
{06F803CD-329D-40C2-B62D-0F14E137D3C7} = {0FE6558F-2157-47F2-A835-558416CE0E2B}
|
||||
{FC5A722A-7B12-459E-AB9F-0A724797783E} = {0FE6558F-2157-47F2-A835-558416CE0E2B}
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {6246A67B-299E-4E64-8DBE-1A66771E7C67}
|
||||
|
||||
Reference in New Issue
Block a user