chore(med): mapping culture-safe parse, dead-code, scope comparer, UA rotator, parser cache

Six MEDIUM-tier review items:

* Mapping.cs — DateTimeOffset.Parse now passes CultureInfo.InvariantCulture
  + DateTimeStyles.RoundtripKind so a non-en-US thread culture cannot
  corrupt round-tripped ScheduledAt / CapturedAt / DetectedAt / CompletedAt.
  Also replaces the magic 0/1 BetScope discriminator with named constants.

* Delete dead Placeholder.cs files in Marathon.Application and
  Marathon.Infrastructure — they were stubs from Phase 1 to satisfy
  "non-empty project" and have been dead since Phase 2/3.

* EventBrowsingService — drop the bespoke ScopeEqualityComparer; BetScope
  is a record hierarchy, .GroupBy uses value equality natively.

* UserAgentRotatorHandler — counter promoted to private static int with
  Interlocked.Increment so rotation is round-robin across the process.
  HttpClientFactory builds the handler Transient, so the previous instance
  field reset to zero on every new client and broke rotation.

* EventOddsParser — added a parallel "selection-key → IElement" index
  alongside the existing price index. Handicap extraction (6 call sites
  per event detail page) used to do a fresh document.QuerySelector("span[
  data-selection-key='...']") for every key — full-document CSS traversal.
  Now it's a dictionary lookup, with the pair-emit logic factored into a
  shared TryEmitHandicapPair helper.
This commit is contained in:
2026-05-09 15:45:18 +03:00
parent fed3a09695
commit c2934b2c8d
6 changed files with 98 additions and 118 deletions
-5
View File
@@ -1,5 +0,0 @@
// Phase 2/3/4 will populate this project.
// This file exists only to prevent the compiler from treating the project as empty.
namespace Marathon.Application;
internal static class Placeholder { }
@@ -1,3 +1,4 @@
using System.Globalization;
using Marathon.Domain.Entities;
using Marathon.Domain.Enums;
using Marathon.Domain.ValueObjects;
@@ -11,6 +12,15 @@ namespace Marathon.Infrastructure.Persistence;
/// </summary>
internal static class Mapping
{
// ScheduledAt / CapturedAt / DetectedAt / CompletedAt are written via
// DateTimeOffset.ToString("O") — round-trip ISO 8601. Parse with the
// invariant culture and RoundtripKind so a non-en-US thread culture
// (or a future locale change) cannot corrupt the round-trip.
private const DateTimeStyles RoundtripStyles = DateTimeStyles.RoundtripKind;
// ─── Bet scope discriminator constants ────────────────────────────────────
private const int ScopeMatch = 0;
private const int ScopePeriod = 1;
// ─── Event ───────────────────────────────────────────────────────────────
public static EventEntity ToEntity(Event domain) =>
@@ -34,7 +44,7 @@ internal static class Mapping
CountryCode: entity.CountryCode,
LeagueId: entity.LeagueId,
Category: entity.Category,
ScheduledAt: DateTimeOffset.Parse(entity.ScheduledAt),
ScheduledAt: DateTimeOffset.Parse(entity.ScheduledAt, CultureInfo.InvariantCulture, RoundtripStyles),
Side1Name: entity.Side1Name,
Side2Name: entity.Side2Name)
{
@@ -55,7 +65,7 @@ internal static class Mapping
public static OddsSnapshot ToDomain(SnapshotEntity entity) =>
new(
eventId: new EventId(entity.EventCode),
capturedAt: DateTimeOffset.Parse(entity.CapturedAt),
capturedAt: DateTimeOffset.Parse(entity.CapturedAt, CultureInfo.InvariantCulture, RoundtripStyles),
source: (OddsSource)entity.Source,
bets: entity.Bets.Select(ToDomain).ToList().AsReadOnly());
@@ -64,7 +74,7 @@ internal static class Mapping
public static BetEntity ToEntity(Bet domain) =>
new()
{
Scope = domain.Scope is MatchScope ? 0 : 1,
Scope = domain.Scope is MatchScope ? ScopeMatch : ScopePeriod,
PeriodNumber = domain.Scope is PeriodScope ps ? ps.Number : null,
Type = (int)domain.Type,
Side = (int)domain.Side,
@@ -76,8 +86,8 @@ internal static class Mapping
{
var scope = entity.Scope switch
{
0 => (BetScope)MatchScope.Instance,
1 => new PeriodScope(entity.PeriodNumber!.Value),
ScopeMatch => (BetScope)MatchScope.Instance,
ScopePeriod => new PeriodScope(entity.PeriodNumber!.Value),
_ => throw new InvalidOperationException(
$"Unknown BetScope discriminator: {entity.Scope}"),
};
@@ -108,7 +118,7 @@ internal static class Mapping
Side1Score: entity.Side1Score,
Side2Score: entity.Side2Score,
WinnerSide: (Side)entity.WinnerSide,
CompletedAt: DateTimeOffset.Parse(entity.CompletedAt));
CompletedAt: DateTimeOffset.Parse(entity.CompletedAt, CultureInfo.InvariantCulture, RoundtripStyles));
// ─── Anomaly ──────────────────────────────────────────────────────────────
@@ -127,7 +137,7 @@ internal static class Mapping
new(
Id: Guid.Parse(entity.Id),
EventId: new EventId(entity.EventCode),
DetectedAt: DateTimeOffset.Parse(entity.DetectedAt),
DetectedAt: DateTimeOffset.Parse(entity.DetectedAt, CultureInfo.InvariantCulture, RoundtripStyles),
Kind: (AnomalyKind)entity.Kind,
Score: entity.Score,
EvidenceJson: entity.EvidenceJson);
@@ -1,5 +0,0 @@
// Phase 2/3 will populate this project.
// This file exists only to prevent the compiler from treating the project as empty.
namespace Marathon.Infrastructure;
internal static class Placeholder { }
@@ -104,15 +104,21 @@ public sealed partial class EventOddsParser : IEventOddsParser
return null;
}
// Index selections by key for O(1) lookup
var selectionIndex = BuildSelectionIndex(selections);
// Index selections by key for O(1) lookup. Two parallel maps:
// priceIndex — selection-key → price (for fast rate lookups)
// elementIndex — selection-key → DOM element (so handicap-extraction
// no longer rescans the document with QuerySelector
// for every key — that was an O(N) cost paid 6× per
// period).
var priceIndex = BuildSelectionPriceIndex(selections);
var elementIndex = BuildSelectionElementIndex(selections);
var bets = new List<Bet>();
// ── Match scope bets ───────────────────────────────────────────────
ExtractMatchWin(selectionIndex, eventIdRaw, bets);
ExtractMatchHandicap(selectionIndex, document, eventIdRaw, bets);
ExtractMatchTotal(selectionIndex, document, eventIdRaw, bets);
ExtractMatchWin(priceIndex, eventIdRaw, bets);
ExtractMatchHandicap(priceIndex, elementIndex, eventIdRaw, bets);
ExtractMatchTotal(priceIndex, document, eventIdRaw, bets);
// ── Period scope bets ──────────────────────────────────────────────
if (sportCode is not null)
@@ -120,9 +126,9 @@ public sealed partial class EventOddsParser : IEventOddsParser
var maxPeriods = _periodMapper.MaxPeriods(sportCode);
for (var n = 1; n <= maxPeriods; n++)
{
ExtractPeriodWin(selectionIndex, document, sportCode, eventIdRaw, n, bets);
ExtractPeriodHandicap(selectionIndex, document, sportCode, eventIdRaw, n, bets);
ExtractPeriodTotal(selectionIndex, document, sportCode, eventIdRaw, n, bets);
ExtractPeriodWin(priceIndex, document, sportCode, eventIdRaw, n, bets);
ExtractPeriodHandicap(priceIndex, elementIndex, sportCode, eventIdRaw, n, bets);
ExtractPeriodTotal(priceIndex, document, sportCode, eventIdRaw, n, bets);
}
}
@@ -140,7 +146,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
// ── Selection indexing ─────────────────────────────────────────────────
private static Dictionary<string, decimal> BuildSelectionIndex(List<IElement> selections)
private static Dictionary<string, decimal> BuildSelectionPriceIndex(List<IElement> selections)
{
var index = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
foreach (var sel in selections)
@@ -159,6 +165,18 @@ public sealed partial class EventOddsParser : IEventOddsParser
return index;
}
private static Dictionary<string, IElement> BuildSelectionElementIndex(List<IElement> selections)
{
var index = new Dictionary<string, IElement>(StringComparer.OrdinalIgnoreCase);
foreach (var sel in selections)
{
var key = sel.GetAttribute("data-selection-key");
if (!string.IsNullOrWhiteSpace(key))
index.TryAdd(key, sel);
}
return index;
}
// ── Match Win / Draw ───────────────────────────────────────────────────
private void ExtractMatchWin(
@@ -207,7 +225,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
private void ExtractMatchHandicap(
Dictionary<string, decimal> idx,
IDocument document,
Dictionary<string, IElement> elements,
string eventId,
List<Bet> bets)
{
@@ -216,58 +234,43 @@ public sealed partial class EventOddsParser : IEventOddsParser
var hbhKey = $"{eventId}@{market}.HB_H";
var hbaKey = $"{eventId}@{market}.HB_A";
if (idx.TryGetValue(hbhKey, out var rateH) &&
idx.TryGetValue(hbaKey, out var rateA))
{
// Extract handicap value from the <td> containing the HB_H selection
var hbhSpan = document
.QuerySelector($"span[data-selection-key='{hbhKey}']");
var hbhTd = hbhSpan?.Closest("td");
var valueH = ExtractHandicapFromTd(hbhTd);
var hbaSpan = document
.QuerySelector($"span[data-selection-key='{hbaKey}']");
var hbaTd = hbaSpan?.Closest("td");
var valueA = ExtractHandicapFromTd(hbaTd);
if (valueH.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side1,
valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side2,
valueA.Value, rateA);
if (TryEmitHandicapPair(idx, elements, hbhKey, hbaKey, MatchScope.Instance, bets))
break;
}
// Also try no-suffix and suffix-0 fallback
// Also try suffix-0 fallback (e.g. "Match_Handicap0.HB_H")
var alt0HKey = $"{eventId}@{market}0.HB_H";
var alt0AKey = $"{eventId}@{market}0.HB_A";
if (idx.TryGetValue(alt0HKey, out rateH) &&
idx.TryGetValue(alt0AKey, out rateA))
{
var hbhSpan = document
.QuerySelector($"span[data-selection-key='{alt0HKey}']");
var hbhTd = hbhSpan?.Closest("td");
var valueH = ExtractHandicapFromTd(hbhTd);
var hbaSpan = document
.QuerySelector($"span[data-selection-key='{alt0AKey}']");
var hbaTd = hbaSpan?.Closest("td");
var valueA = ExtractHandicapFromTd(hbaTd);
if (valueH.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side1,
valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side2,
valueA.Value, rateA);
if (TryEmitHandicapPair(idx, elements, alt0HKey, alt0AKey, MatchScope.Instance, bets))
break;
}
}
}
private bool TryEmitHandicapPair(
Dictionary<string, decimal> idx,
Dictionary<string, IElement> elements,
string hbhKey,
string hbaKey,
BetScope scope,
List<Bet> bets)
{
if (!idx.TryGetValue(hbhKey, out var rateH) ||
!idx.TryGetValue(hbaKey, out var rateA))
return false;
var valueH = ExtractHandicapFromTd(LookupTd(elements, hbhKey));
var valueA = ExtractHandicapFromTd(LookupTd(elements, hbaKey));
if (valueH.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side1, valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side2, valueA.Value, rateA);
return true;
}
private static IElement? LookupTd(Dictionary<string, IElement> elements, string key)
=> elements.TryGetValue(key, out var span) ? span.Closest("td") : null;
// ── Match Total ────────────────────────────────────────────────────────
private void ExtractMatchTotal(
@@ -329,7 +332,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
private void ExtractPeriodHandicap(
Dictionary<string, decimal> idx,
IDocument document,
Dictionary<string, IElement> elements,
SportCode sport,
string eventId,
int n,
@@ -340,30 +343,17 @@ public sealed partial class EventOddsParser : IEventOddsParser
var scope = new PeriodScope(n);
var hbhKey = $"{eventId}@{marketToken}.HB_H";
var hbaKey = $"{eventId}@{marketToken}.HB_A";
if (TryEmitHandicapPair(idx, elements,
$"{eventId}@{marketToken}.HB_H",
$"{eventId}@{marketToken}.HB_A",
scope, bets))
return;
if (!idx.TryGetValue(hbhKey, out var rateH) ||
!idx.TryGetValue(hbaKey, out var rateA))
{
// Try suffix-0 variant
hbhKey = $"{eventId}@{marketToken}0.HB_H";
hbaKey = $"{eventId}@{marketToken}0.HB_A";
if (!idx.TryGetValue(hbhKey, out rateH) ||
!idx.TryGetValue(hbaKey, out rateA))
return;
}
var hbhSpan = document.QuerySelector($"span[data-selection-key='{hbhKey}']");
var valueH = ExtractHandicapFromTd(hbhSpan?.Closest("td"));
var hbaSpan = document.QuerySelector($"span[data-selection-key='{hbaKey}']");
var valueA = ExtractHandicapFromTd(hbaSpan?.Closest("td"));
if (valueH.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side1, valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side2, valueA.Value, rateA);
// Suffix-0 fallback
TryEmitHandicapPair(idx, elements,
$"{eventId}@{marketToken}0.HB_H",
$"{eventId}@{marketToken}0.HB_A",
scope, bets);
}
// ── Period Total ───────────────────────────────────────────────────────
@@ -14,8 +14,14 @@ namespace Marathon.Infrastructure.Scraping;
/// </remarks>
public sealed class UserAgentRotatorHandler : DelegatingHandler
{
// The handler is registered Transient (per HttpClientFactory convention),
// so a per-instance counter would reset every time HttpClientFactory built
// a new client and rotation would no longer be round-robin across the
// process. Hoisting to a static counter ensures genuine round-robin across
// every outbound request the process makes.
private static int s_counter;
private readonly string[] _userAgents;
private int _counter;
public UserAgentRotatorHandler(IOptions<ScrapingOptions> options)
{
@@ -29,9 +35,10 @@ public sealed class UserAgentRotatorHandler : DelegatingHandler
{
if (_userAgents.Length > 0)
{
// Thread-safe round-robin without modulo bias risk at reasonable scale
// Thread-safe round-robin. Math.Abs guards against the negative-overflow
// case once Interlocked.Increment wraps past int.MaxValue.
var index = Math.Abs(
Interlocked.Increment(ref _counter) % _userAgents.Length);
Interlocked.Increment(ref s_counter) % _userAgents.Length);
request.Headers.TryAddWithoutValidation("User-Agent", _userAgents[index]);
}
@@ -204,8 +204,10 @@ public sealed class EventBrowsingService : IEventBrowsingService
private static IReadOnlyList<EventScopeBoard> BuildBoards(OddsSnapshot snapshot)
{
// Group by scope, preserve Match-first order then ascending Period numbers.
// BetScope is a record hierarchy so .GroupBy uses value equality natively —
// no custom comparer needed.
var groups = snapshot.Bets
.GroupBy(static b => b.Scope, ScopeEqualityComparer.Instance)
.GroupBy(static b => b.Scope)
.OrderBy(static g => OrderKey(g.Key));
var boards = new List<EventScopeBoard>();
@@ -241,23 +243,4 @@ public sealed class EventBrowsingService : IEventBrowsingService
PeriodScope p => p.Number,
_ => int.MaxValue,
};
private sealed class ScopeEqualityComparer : IEqualityComparer<BetScope>
{
public static readonly ScopeEqualityComparer Instance = new();
public bool Equals(BetScope? x, BetScope? y) => (x, y) switch
{
(null, null) => true,
(MatchScope, MatchScope) => true,
(PeriodScope a, PeriodScope b) => a.Number == b.Number,
_ => false,
};
public int GetHashCode(BetScope obj) => obj switch
{
MatchScope => 0,
PeriodScope p => p.Number,
_ => -1,
};
}
}