Improve comments and Swagger annotations across services (#26)
- EmailController: add class summary, full SwaggerResponse/ProducesResponseType for 400 and 500, and Description on SwaggerOperation - ContactController: fix terse "Failed." error message to "Could not process subscription." - FileDownloadController: remove redundant XML <response code> tags from the public action doc block; convert private-method /// <summary> to // (project convention: no XML doc on internal code) - CvMatcherService: remove two dead commented-out blocks (old email send and BuildEmailBody helper) - JobTokenService: comment the phone/contact-line regex filter in ExtractKeywords - DocumentClassifier: comment the keyword-frequency scoring approach and the confidence formula - TextChunker: comment the sliding-window step (chunkSize - overlap) - CvSearchJobTask: comment the GdprConsent = true rationale and the BuildCvFileName sanitisation logic - HtmlJobSearcher: comment GetLeftPart(UriPartial.Path) query-strip dedup Closes #26 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
});
|
||||
}
|
||||
|
||||
// Keyword-frequency heuristic: count how many characteristic terms each document
|
||||
// type contributes to the text, then pick the type with the highest hit count.
|
||||
var lower = text.ToLowerInvariant();
|
||||
var scores = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
@@ -37,6 +39,8 @@ public sealed class DocumentClassifier : IDocumentClassifier
|
||||
|
||||
var best = scores.OrderByDescending(x => x.Value).First();
|
||||
var type = best.Value <= 0 ? "unknown" : best.Key;
|
||||
// Confidence baseline 0.45 + 0.08 per matched keyword term, capped at 0.95.
|
||||
// Zero hits → 0.25 (effectively unknown).
|
||||
var confidence = best.Value <= 0 ? 0.25 : Math.Min(0.95, 0.45 + best.Value * 0.08);
|
||||
|
||||
return Task.FromResult(new DocumentClassification
|
||||
|
||||
Reference in New Issue
Block a user