feat: add page-fetcher-api — centralised Playwright page fetcher
Introduces page-fetcher-api, a new internal ASP.NET Core service that centralises all web-page fetching through a single Playwright (headless Chromium) browser instance. All fetches are persisted to the pageFetcher SQL schema for auditing. New projects: - Apis/page-fetcher-api-models: FetchPageRequest, FetchPageResponse, IPageFetcherApiClient - Apis/page-fetcher-data: PageFetchDbContext, PageFetchEntity, InitialSchema migration (schema: pageFetcher) - Apis/page-fetcher-api: PlaywrightBrowserService (singleton), PageFetcherService, PageController Changes to existing services: - cv-matcher-api: JobTextExtractor now calls IPageFetcherApiClient instead of HttpClient - cv-search-job: HtmlJobSearcher uses IPageFetcherApiClient (removes inline Playwright); CvSearchJobTask fetches individual job pages and applies keyword pre-filter before LLM call; passes pre-fetched JobDescription to cv-matcher-api to skip re-fetch - common: add PageFetcherApiSettings - docker-compose.yml, build.yml: add new service + env vars for callers Closes #43 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -72,6 +72,9 @@ services:
|
||||
- RagApi__BaseUrl=${RagApi__BaseUrl:-http://rag-api:8080}
|
||||
- RagApi__InternalApiKey=${RagApi__InternalApiKey:-}
|
||||
|
||||
- PageFetcherApi__BaseUrl=${PageFetcherApi__BaseUrl:-http://myai-page-fetcher-api:8080}
|
||||
- PageFetcherApi__InternalApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
|
||||
- Ai__Provider=${Ai__Provider:-OpenAI}
|
||||
- Ai__OpenAI__ApiKey=${Ai__OpenAI__ApiKey:-}
|
||||
- Ai__OpenAI__ChatModel=${Ai__OpenAI__ChatModel:-gpt-4o-mini}
|
||||
@@ -266,6 +269,9 @@ services:
|
||||
- EmailApi__BaseUrl=${EmailApi__BaseUrl:-http://email-api:8080}
|
||||
- EmailApi__InternalApiKey=${EmailApi__InternalApiKey:-}
|
||||
|
||||
- PageFetcherApi__BaseUrl=${PageFetcherApi__BaseUrl:-http://myai-page-fetcher-api:8080}
|
||||
- PageFetcherApi__InternalApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
|
||||
- FileStorage__Path=${FileStorage__Path:-Files}
|
||||
|
||||
- JobSearch__Enabled=${JobSearch__Enabled:-true}
|
||||
@@ -293,6 +299,38 @@ services:
|
||||
labels:
|
||||
- "com.centurylinklabs.watchtower.enable=true"
|
||||
|
||||
page-fetcher-api:
|
||||
image: registry.easysoft.ro/apps/myai-page-fetcher-api:${IMAGE_TAG:-staging}
|
||||
container_name: myai-page-fetcher-api
|
||||
environment:
|
||||
- ASPNETCORE_ENVIRONMENT=${ASPNETCORE_ENVIRONMENT:-Staging}
|
||||
- ASPNETCORE_URLS=${ASPNETCORE_URLS:-http://+:8080}
|
||||
- APP_ENVIRONMENT_NAME=${APP_ENVIRONMENT_NAME:-myai.staging}
|
||||
|
||||
- Database__Host=${Database__Host:-sqlserver}
|
||||
- Database__Port=${Database__Port:-1433}
|
||||
- Database__Name=${Database__Name:-MyAiDb}
|
||||
- Database__User=${Database__User:-sa}
|
||||
- Database__Password=${Database__Password:-}
|
||||
- Database__TrustServerCertificate=${Database__TrustServerCertificate:-true}
|
||||
|
||||
- InternalApi__ApiKey=${PageFetcherApi__InternalApiKey:-}
|
||||
- InternalApi__RequireApiKey=true
|
||||
|
||||
- SerilogEmail__From=${SerilogEmail__From:-}
|
||||
- SerilogEmail__To=${SerilogEmail__To:-}
|
||||
- SerilogEmail__Host=${SerilogEmail__Host:-}
|
||||
- SerilogEmail__Port=${SerilogEmail__Port:-587}
|
||||
- SerilogEmail__UserName=${SerilogEmail__UserName:-}
|
||||
- SerilogEmail__Password=${SerilogEmail__Password:-}
|
||||
volumes:
|
||||
- ${LOGS_PATH:-/opt/myai/logs}/page-fetcher-api:/app/logs
|
||||
networks:
|
||||
- myai-network
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
- "com.centurylinklabs.watchtower.enable=true"
|
||||
|
||||
web:
|
||||
image: registry.easysoft.ro/apps/myai-web:${IMAGE_TAG:-staging}
|
||||
container_name: myai-web
|
||||
|
||||
Reference in New Issue
Block a user