From 1222a86eb733ebc3f47d33ee930b30e114571182 Mon Sep 17 00:00:00 2001 From: claude Date: Mon, 8 Jun 2026 16:57:52 +0300 Subject: [PATCH] =?UTF-8?q?Fix=20file://=20URL=20bug=20in=20HtmlJobSearche?= =?UTF-8?q?r=20=E2=80=94=20skip=20non-HTTP(S)=20URLs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After resolving relative hrefs against the base search URL, some ejobs.ro links were producing file:/// URIs (e.g. file:///user/locuri-de-munca/...). These were sent to cv-matcher-api and rejected with HTTP 400, causing 0 matches. Added a scheme guard after URI resolution to skip any URL that is not http:// or https://, preventing malformed URLs from reaching the matcher. Co-Authored-By: Claude Sonnet 4.6 --- Jobs/cv-search-job/Services/HtmlJobSearcher.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs index 6c4cb78..a4e40f6 100644 --- a/Jobs/cv-search-job/Services/HtmlJobSearcher.cs +++ b/Jobs/cv-search-job/Services/HtmlJobSearcher.cs @@ -117,6 +117,10 @@ public sealed class HtmlJobSearcher continue; } + // Skip non-HTTP(S) URLs (e.g. file:// or javascript: that can appear in scraped HTML) + if (absoluteUri.Scheme != Uri.UriSchemeHttp && absoluteUri.Scheme != Uri.UriSchemeHttps) + continue; + var url = absoluteUri.GetLeftPart(UriPartial.Path); if (seen.Add(url)) results.Add(url);