feat(insights): anomaly outcome validator — hit-rate calibration page

Adds a calibration dashboard that joins persisted SuspensionFlip anomalies
with EventResult rows and reports whether the post-flip favourite actually
won — the single metric that says whether the detector is doing its job.

Domain:
- AnomalyEvidenceData + AnomalyEvidenceParser to read the JSON written by
  AnomalyDetector without re-implementing the schema.
- AnomalyOutcomeEvaluator: pure function returning Hit / Miss / Unresolved.
  Tennis-style two-way markets with a Draw winner are downgraded to
  Unresolved rather than silently counted as Miss.
- AnomalySeverityThresholds: shared Low/Medium/High constants so the UI
  badge and the report buckets cannot drift.

Application:
- EvaluateAnomalyOutcomesUseCase orchestrates the join + aggregation.
- AnomalyOutcomeReport carries totals, hit rate, three breakdowns
  (severity / sport / score bins) and a per-event title lookup so the UI
  needs no second pass over IEventRepository.
- Score bins extend below 0.30 automatically when the operator lowers the
  detector threshold so the histogram total always equals ResolvedCount.

UI:
- Insights page at /anomalies/insights — hero header, 4-card KPI strip
  (hit rate tinted by tone), three breakdown grids with bar visualisation,
  drill-down tables for resolved and unresolved anomalies. Honors
  prefers-reduced-motion. RU + EN localisation.
- Nav entry under Analysis section + chip button on the Anomaly Feed.

Tests: +42 across Domain + Application (evaluator boundary cases including
tennis two-way and Draw guard, score-bin edges, dynamic floor when
threshold is lowered, event-title pass-through). All 324 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 13:53:31 +03:00
parent 004dbeae8b
commit 292223174c
21 changed files with 2398 additions and 4 deletions
@@ -0,0 +1,258 @@
using System.Globalization;
using Marathon.Application.Abstractions;
using Marathon.Application.Reporting;
using Marathon.Domain.AnomalyDetection;
using Marathon.Domain.Entities;
using Microsoft.Extensions.Logging;
using DomainEventId = Marathon.Domain.ValueObjects.EventId;
namespace Marathon.Application.UseCases;
/// <summary>
/// Builds an <see cref="AnomalyOutcomeReport"/> by joining every persisted
/// <see cref="Anomaly"/> with the originating event and its
/// <see cref="EventResult"/>, then running the pure
/// <see cref="AnomalyOutcomeEvaluator"/> over each pair.
/// </summary>
/// <remarks>
/// <para>
/// This is the answer to "does the SuspensionFlip detector actually predict the
/// right side?" The report is the validator for the entire anomaly-detection
/// premise of the product — without it, the algorithm's confidence score is
/// just a number with no calibration.
/// </para>
/// <para>
/// The use case loads all three collections in one pass each and performs the
/// join in memory. Anomaly volumes are small (one per suspension interval per
/// event) so this is well within budget. If volumes grow significantly the
/// repository layer can later add a SQL-side join — the public shape of the
/// report does not change.
/// </para>
/// </remarks>
public sealed class EvaluateAnomalyOutcomesUseCase
{
/// <summary>
/// Lowest score bin shown in the histogram. Score values below this never
/// appear because the detector enforces a configurable threshold (default
/// 0.30) — but the constant is repeated here so the bucketer is independent
/// of any specific configuration value.
/// </summary>
public const decimal MinScore = 0.30m;
/// <summary>
/// Bin width for the score histogram. Yields 7 buckets:
/// [0.30, 0.40), [0.40, 0.50), [0.50, 0.60), [0.60, 0.70), [0.70, 0.80),
/// [0.80, 0.90), [0.90, 1.00]. The last bin is closed on the right.
/// </summary>
public const decimal BinWidth = 0.10m;
private readonly IAnomalyRepository _anomalies;
private readonly IEventRepository _events;
private readonly IResultRepository _results;
private readonly ILogger<EvaluateAnomalyOutcomesUseCase> _logger;
public EvaluateAnomalyOutcomesUseCase(
IAnomalyRepository anomalies,
IEventRepository events,
IResultRepository results,
ILogger<EvaluateAnomalyOutcomesUseCase> logger)
{
_anomalies = anomalies ?? throw new ArgumentNullException(nameof(anomalies));
_events = events ?? throw new ArgumentNullException(nameof(events));
_results = results ?? throw new ArgumentNullException(nameof(results));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<AnomalyOutcomeReport> ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("EvaluateAnomalyOutcomesUseCase: report build started");
var anomalies = await _anomalies.ListAsync(ct).ConfigureAwait(false);
if (anomalies.Count == 0)
{
_logger.LogInformation(
"EvaluateAnomalyOutcomesUseCase: no anomalies — empty report");
return EmptyReport();
}
// Build event + result lookups — distinct keys only to avoid quadratic loads.
// TODO (perf, future): batch via IEventRepository.GetManyAsync / IResultRepository.GetManyAsync
// once the repositories expose them. Today the per-event GetAsync round-trip is acceptable
// because anomaly volumes are bounded (1 row per suspension interval per event).
var distinctEventIds = anomalies.Select(a => a.EventId).Distinct().ToList();
var eventLookup = new Dictionary<DomainEventId, Event>(distinctEventIds.Count);
var resultLookup = new Dictionary<DomainEventId, EventResult>(distinctEventIds.Count);
var eventTitles = new Dictionary<DomainEventId, string>(distinctEventIds.Count);
foreach (var id in distinctEventIds)
{
ct.ThrowIfCancellationRequested();
var ev = await _events.GetAsync(id, ct).ConfigureAwait(false);
if (ev is not null)
{
eventLookup[id] = ev;
eventTitles[id] = string.Concat(ev.Side1Name, " vs ", ev.Side2Name);
}
var res = await _results.GetAsync(id, ct).ConfigureAwait(false);
if (res is not null) resultLookup[id] = res;
}
// Evaluate every anomaly through the pure domain function.
var resolved = new List<ResolvedAnomaly>();
var unresolved = new List<ResolvedAnomaly>();
foreach (var anomaly in anomalies)
{
ct.ThrowIfCancellationRequested();
eventLookup.TryGetValue(anomaly.EventId, out var ev);
resultLookup.TryGetValue(anomaly.EventId, out var result);
var evaluated = AnomalyOutcomeEvaluator.Evaluate(anomaly, ev?.Sport, result);
if (evaluated.Outcome == AnomalyOutcomeKind.Unresolved)
unresolved.Add(evaluated);
else
resolved.Add(evaluated);
}
var resolvedOrdered = resolved
.OrderByDescending(r => r.DetectedAt)
.ToList();
var unresolvedOrdered = unresolved
.OrderByDescending(r => r.DetectedAt)
.ToList();
var hitCount = resolvedOrdered.Count(r => r.Outcome == AnomalyOutcomeKind.Hit);
var missCount = resolvedOrdered.Count - hitCount;
var report = new AnomalyOutcomeReport(
TotalAnomalies: anomalies.Count,
ResolvedCount: resolvedOrdered.Count,
UnresolvedCount: unresolvedOrdered.Count,
HitCount: hitCount,
MissCount: missCount,
HitRate: ComputeRate(hitCount, resolvedOrdered.Count),
BySeverity: BuildSeverityBuckets(resolvedOrdered),
BySport: BuildSportBuckets(resolvedOrdered),
ByScoreBin: BuildScoreBins(resolvedOrdered),
Resolved: resolvedOrdered,
Unresolved: unresolvedOrdered,
EventTitles: eventTitles);
_logger.LogInformation(
"EvaluateAnomalyOutcomesUseCase: report ready — total={Total}, resolved={Resolved}, hits={Hits}",
report.TotalAnomalies, report.ResolvedCount, report.HitCount);
return report;
}
// ── Bucketers ────────────────────────────────────────────────────────────
private static IReadOnlyList<OutcomeBucket> BuildSeverityBuckets(
IReadOnlyCollection<ResolvedAnomaly> resolved)
{
// Thresholds sourced from the Domain so the UI's severity badge and
// this report cannot drift out of sync — single source of truth.
return new[]
{
BuildBucket(OutcomeBucketKeys.SeverityLow,
resolved.Where(r => r.Score < AnomalySeverityThresholds.Medium)),
BuildBucket(OutcomeBucketKeys.SeverityMedium,
resolved.Where(r => r.Score >= AnomalySeverityThresholds.Medium
&& r.Score < AnomalySeverityThresholds.High)),
BuildBucket(OutcomeBucketKeys.SeverityHigh,
resolved.Where(r => r.Score >= AnomalySeverityThresholds.High)),
};
}
private static IReadOnlyList<OutcomeBucket> BuildSportBuckets(
IReadOnlyCollection<ResolvedAnomaly> resolved)
{
return resolved
.Where(r => r.Sport is not null)
.GroupBy(r => r.Sport!.Value)
.OrderBy(g => g.Key)
.Select(g => BuildBucket(
key: string.Format(
CultureInfo.InvariantCulture,
"{0}{1}",
OutcomeBucketKeys.SportPrefix,
g.Key),
items: g))
.ToList();
}
private static IReadOnlyList<OutcomeBucket> BuildScoreBins(
IReadOnlyCollection<ResolvedAnomaly> resolved)
{
// Default range is the canonical [0.30, 1.00] with seven 0.10-wide bins.
// If the operator has lowered the detector's flip threshold and we have
// resolved anomalies below 0.30, prepend additional bins so every row in
// the report shows up in exactly one bucket — the histogram total must
// equal ResolvedCount no matter how the detector is tuned.
var floor = MinScore;
if (resolved.Count > 0)
{
var lowest = resolved.Min(r => r.Score);
if (lowest < MinScore)
{
var binsBelow = Math.Ceiling((MinScore - lowest) / BinWidth);
floor = MinScore - binsBelow * BinWidth;
if (floor < 0m) floor = 0m;
}
}
var bins = new List<OutcomeBucket>();
for (var start = floor; start < 1.0m; start += BinWidth)
{
var binStart = start;
var binEnd = start + BinWidth;
var isLast = binEnd >= 1.0m;
// Last bin is closed on the right so 1.00 lands in [0.90, 1.00].
var inBin = resolved.Where(r =>
r.Score >= binStart &&
(isLast ? r.Score <= 1.0m : r.Score < binEnd));
var key = string.Format(
CultureInfo.InvariantCulture,
"{0}{1:0.00}-{2:0.00}",
OutcomeBucketKeys.BinPrefix,
binStart,
Math.Min(binEnd, 1.0m));
bins.Add(BuildBucket(key, inBin));
}
return bins;
}
private static OutcomeBucket BuildBucket(string key, IEnumerable<ResolvedAnomaly> items)
{
var list = items as IReadOnlyCollection<ResolvedAnomaly> ?? items.ToList();
var total = list.Count;
var hits = list.Count(r => r.Outcome == AnomalyOutcomeKind.Hit);
return new OutcomeBucket(key, total, hits, ComputeRate(hits, total));
}
private static decimal? ComputeRate(int numerator, int denominator) =>
denominator == 0
? null
: Math.Round(numerator / (decimal)denominator, 4);
private static AnomalyOutcomeReport EmptyReport() =>
new(
TotalAnomalies: 0,
ResolvedCount: 0,
UnresolvedCount: 0,
HitCount: 0,
MissCount: 0,
HitRate: null,
BySeverity: Array.Empty<OutcomeBucket>(),
BySport: Array.Empty<OutcomeBucket>(),
ByScoreBin: Array.Empty<OutcomeBucket>(),
Resolved: Array.Empty<ResolvedAnomaly>(),
Unresolved: Array.Empty<ResolvedAnomaly>(),
EventTitles: new Dictionary<DomainEventId, string>());
}