Staging to Production #51

Merged
claude merged 165 commits from main into production 2026-06-08 18:28:46 +00:00
Showing only changes of commit a467fac35d - Show all commits
+10 -12
View File
@@ -138,23 +138,21 @@ public sealed class JobTokenService : IJobTokenService
/// <summary> /// <summary>
/// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM). /// Extracts up to 10 meaningful keywords from the CV text using simple heuristics (no LLM).
/// Takes the first 5 usable lines, splits them into words, strips punctuation, and deduplicates. /// Samples the first 2000 characters (where title/role/skills usually appear), splits by
/// whitespace and common delimiters, strips punctuation, and deduplicates.
/// Works regardless of whether the PDF extractor preserves newlines.
/// </summary> /// </summary>
private static string ExtractKeywords(string cvText) private static string ExtractKeywords(string cvText)
{ {
var lines = cvText // Focus on the header area where name/title/skills typically appear
.Split(['\n', '\r'], StringSplitOptions.RemoveEmptyEntries) var sample = cvText.Length > 2000 ? cvText[..2000] : cvText;
.Select(l => l.Trim())
.Where(l => l.Length > 5 && l.Length < 200)
// Skip lines that are purely digits, spaces, and phone/contact punctuation (phone numbers, emails, etc.)
.Where(l => !Regex.IsMatch(l, @"^[\d\s\+\-\(\)\@\.]+$"))
.Take(5)
.ToList();
var words = lines var words = sample
.SelectMany(l => l.Split(' ', StringSplitOptions.RemoveEmptyEntries)) .Split([' ', '\n', '\r', '\t', '|', '/', ',', ';', '(', ')'], StringSplitOptions.RemoveEmptyEntries)
.Select(w => Regex.Replace(w, @"[^\w\-]", "")) .Select(w => Regex.Replace(w, @"[^\w\-]", "").Trim('-'))
.Where(w => w.Length > 2) .Where(w => w.Length > 2)
.Where(w => !Regex.IsMatch(w, @"^[\d\-]+$")) // skip phone fragments and pure numbers
.Where(w => !w.Contains('@') && !w.Contains('.')) // skip emails and URLs
.Distinct(StringComparer.OrdinalIgnoreCase) .Distinct(StringComparer.OrdinalIgnoreCase)
.Take(10) .Take(10)
.ToList(); .ToList();