chore(med): mapping culture-safe parse, dead-code, scope comparer, UA rotator, parser cache

Six MEDIUM-tier review items:

* Mapping.cs — DateTimeOffset.Parse now passes CultureInfo.InvariantCulture
  + DateTimeStyles.RoundtripKind so a non-en-US thread culture cannot
  corrupt round-tripped ScheduledAt / CapturedAt / DetectedAt / CompletedAt.
  Also replaces the magic 0/1 BetScope discriminator with named constants.

* Delete dead Placeholder.cs files in Marathon.Application and
  Marathon.Infrastructure — they were stubs from Phase 1 to satisfy
  "non-empty project" and have been dead since Phase 2/3.

* EventBrowsingService — drop the bespoke ScopeEqualityComparer; BetScope
  is a record hierarchy, .GroupBy uses value equality natively.

* UserAgentRotatorHandler — counter promoted to private static int with
  Interlocked.Increment so rotation is round-robin across the process.
  HttpClientFactory builds the handler Transient, so the previous instance
  field reset to zero on every new client and broke rotation.

* EventOddsParser — added a parallel "selection-key → IElement" index
  alongside the existing price index. Handicap extraction (6 call sites
  per event detail page) used to do a fresh document.QuerySelector("span[
  data-selection-key='...']") for every key — full-document CSS traversal.
  Now it's a dictionary lookup, with the pair-emit logic factored into a
  shared TryEmitHandicapPair helper.
This commit is contained in:
2026-05-09 15:45:18 +03:00
parent fed3a09695
commit c2934b2c8d
6 changed files with 98 additions and 118 deletions
-5
View File
@@ -1,5 +0,0 @@
// Phase 2/3/4 will populate this project.
// This file exists only to prevent the compiler from treating the project as empty.
namespace Marathon.Application;
internal static class Placeholder { }
@@ -1,3 +1,4 @@
using System.Globalization;
using Marathon.Domain.Entities; using Marathon.Domain.Entities;
using Marathon.Domain.Enums; using Marathon.Domain.Enums;
using Marathon.Domain.ValueObjects; using Marathon.Domain.ValueObjects;
@@ -11,6 +12,15 @@ namespace Marathon.Infrastructure.Persistence;
/// </summary> /// </summary>
internal static class Mapping internal static class Mapping
{ {
// ScheduledAt / CapturedAt / DetectedAt / CompletedAt are written via
// DateTimeOffset.ToString("O") — round-trip ISO 8601. Parse with the
// invariant culture and RoundtripKind so a non-en-US thread culture
// (or a future locale change) cannot corrupt the round-trip.
private const DateTimeStyles RoundtripStyles = DateTimeStyles.RoundtripKind;
// ─── Bet scope discriminator constants ────────────────────────────────────
private const int ScopeMatch = 0;
private const int ScopePeriod = 1;
// ─── Event ─────────────────────────────────────────────────────────────── // ─── Event ───────────────────────────────────────────────────────────────
public static EventEntity ToEntity(Event domain) => public static EventEntity ToEntity(Event domain) =>
@@ -34,7 +44,7 @@ internal static class Mapping
CountryCode: entity.CountryCode, CountryCode: entity.CountryCode,
LeagueId: entity.LeagueId, LeagueId: entity.LeagueId,
Category: entity.Category, Category: entity.Category,
ScheduledAt: DateTimeOffset.Parse(entity.ScheduledAt), ScheduledAt: DateTimeOffset.Parse(entity.ScheduledAt, CultureInfo.InvariantCulture, RoundtripStyles),
Side1Name: entity.Side1Name, Side1Name: entity.Side1Name,
Side2Name: entity.Side2Name) Side2Name: entity.Side2Name)
{ {
@@ -55,7 +65,7 @@ internal static class Mapping
public static OddsSnapshot ToDomain(SnapshotEntity entity) => public static OddsSnapshot ToDomain(SnapshotEntity entity) =>
new( new(
eventId: new EventId(entity.EventCode), eventId: new EventId(entity.EventCode),
capturedAt: DateTimeOffset.Parse(entity.CapturedAt), capturedAt: DateTimeOffset.Parse(entity.CapturedAt, CultureInfo.InvariantCulture, RoundtripStyles),
source: (OddsSource)entity.Source, source: (OddsSource)entity.Source,
bets: entity.Bets.Select(ToDomain).ToList().AsReadOnly()); bets: entity.Bets.Select(ToDomain).ToList().AsReadOnly());
@@ -64,7 +74,7 @@ internal static class Mapping
public static BetEntity ToEntity(Bet domain) => public static BetEntity ToEntity(Bet domain) =>
new() new()
{ {
Scope = domain.Scope is MatchScope ? 0 : 1, Scope = domain.Scope is MatchScope ? ScopeMatch : ScopePeriod,
PeriodNumber = domain.Scope is PeriodScope ps ? ps.Number : null, PeriodNumber = domain.Scope is PeriodScope ps ? ps.Number : null,
Type = (int)domain.Type, Type = (int)domain.Type,
Side = (int)domain.Side, Side = (int)domain.Side,
@@ -76,8 +86,8 @@ internal static class Mapping
{ {
var scope = entity.Scope switch var scope = entity.Scope switch
{ {
0 => (BetScope)MatchScope.Instance, ScopeMatch => (BetScope)MatchScope.Instance,
1 => new PeriodScope(entity.PeriodNumber!.Value), ScopePeriod => new PeriodScope(entity.PeriodNumber!.Value),
_ => throw new InvalidOperationException( _ => throw new InvalidOperationException(
$"Unknown BetScope discriminator: {entity.Scope}"), $"Unknown BetScope discriminator: {entity.Scope}"),
}; };
@@ -108,7 +118,7 @@ internal static class Mapping
Side1Score: entity.Side1Score, Side1Score: entity.Side1Score,
Side2Score: entity.Side2Score, Side2Score: entity.Side2Score,
WinnerSide: (Side)entity.WinnerSide, WinnerSide: (Side)entity.WinnerSide,
CompletedAt: DateTimeOffset.Parse(entity.CompletedAt)); CompletedAt: DateTimeOffset.Parse(entity.CompletedAt, CultureInfo.InvariantCulture, RoundtripStyles));
// ─── Anomaly ────────────────────────────────────────────────────────────── // ─── Anomaly ──────────────────────────────────────────────────────────────
@@ -127,7 +137,7 @@ internal static class Mapping
new( new(
Id: Guid.Parse(entity.Id), Id: Guid.Parse(entity.Id),
EventId: new EventId(entity.EventCode), EventId: new EventId(entity.EventCode),
DetectedAt: DateTimeOffset.Parse(entity.DetectedAt), DetectedAt: DateTimeOffset.Parse(entity.DetectedAt, CultureInfo.InvariantCulture, RoundtripStyles),
Kind: (AnomalyKind)entity.Kind, Kind: (AnomalyKind)entity.Kind,
Score: entity.Score, Score: entity.Score,
EvidenceJson: entity.EvidenceJson); EvidenceJson: entity.EvidenceJson);
@@ -1,5 +0,0 @@
// Phase 2/3 will populate this project.
// This file exists only to prevent the compiler from treating the project as empty.
namespace Marathon.Infrastructure;
internal static class Placeholder { }
@@ -104,15 +104,21 @@ public sealed partial class EventOddsParser : IEventOddsParser
return null; return null;
} }
// Index selections by key for O(1) lookup // Index selections by key for O(1) lookup. Two parallel maps:
var selectionIndex = BuildSelectionIndex(selections); // priceIndex — selection-key → price (for fast rate lookups)
// elementIndex — selection-key → DOM element (so handicap-extraction
// no longer rescans the document with QuerySelector
// for every key — that was an O(N) cost paid 6× per
// period).
var priceIndex = BuildSelectionPriceIndex(selections);
var elementIndex = BuildSelectionElementIndex(selections);
var bets = new List<Bet>(); var bets = new List<Bet>();
// ── Match scope bets ─────────────────────────────────────────────── // ── Match scope bets ───────────────────────────────────────────────
ExtractMatchWin(selectionIndex, eventIdRaw, bets); ExtractMatchWin(priceIndex, eventIdRaw, bets);
ExtractMatchHandicap(selectionIndex, document, eventIdRaw, bets); ExtractMatchHandicap(priceIndex, elementIndex, eventIdRaw, bets);
ExtractMatchTotal(selectionIndex, document, eventIdRaw, bets); ExtractMatchTotal(priceIndex, document, eventIdRaw, bets);
// ── Period scope bets ────────────────────────────────────────────── // ── Period scope bets ──────────────────────────────────────────────
if (sportCode is not null) if (sportCode is not null)
@@ -120,9 +126,9 @@ public sealed partial class EventOddsParser : IEventOddsParser
var maxPeriods = _periodMapper.MaxPeriods(sportCode); var maxPeriods = _periodMapper.MaxPeriods(sportCode);
for (var n = 1; n <= maxPeriods; n++) for (var n = 1; n <= maxPeriods; n++)
{ {
ExtractPeriodWin(selectionIndex, document, sportCode, eventIdRaw, n, bets); ExtractPeriodWin(priceIndex, document, sportCode, eventIdRaw, n, bets);
ExtractPeriodHandicap(selectionIndex, document, sportCode, eventIdRaw, n, bets); ExtractPeriodHandicap(priceIndex, elementIndex, sportCode, eventIdRaw, n, bets);
ExtractPeriodTotal(selectionIndex, document, sportCode, eventIdRaw, n, bets); ExtractPeriodTotal(priceIndex, document, sportCode, eventIdRaw, n, bets);
} }
} }
@@ -140,7 +146,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
// ── Selection indexing ───────────────────────────────────────────────── // ── Selection indexing ─────────────────────────────────────────────────
private static Dictionary<string, decimal> BuildSelectionIndex(List<IElement> selections) private static Dictionary<string, decimal> BuildSelectionPriceIndex(List<IElement> selections)
{ {
var index = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase); var index = new Dictionary<string, decimal>(StringComparer.OrdinalIgnoreCase);
foreach (var sel in selections) foreach (var sel in selections)
@@ -159,6 +165,18 @@ public sealed partial class EventOddsParser : IEventOddsParser
return index; return index;
} }
private static Dictionary<string, IElement> BuildSelectionElementIndex(List<IElement> selections)
{
var index = new Dictionary<string, IElement>(StringComparer.OrdinalIgnoreCase);
foreach (var sel in selections)
{
var key = sel.GetAttribute("data-selection-key");
if (!string.IsNullOrWhiteSpace(key))
index.TryAdd(key, sel);
}
return index;
}
// ── Match Win / Draw ─────────────────────────────────────────────────── // ── Match Win / Draw ───────────────────────────────────────────────────
private void ExtractMatchWin( private void ExtractMatchWin(
@@ -207,7 +225,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
private void ExtractMatchHandicap( private void ExtractMatchHandicap(
Dictionary<string, decimal> idx, Dictionary<string, decimal> idx,
IDocument document, Dictionary<string, IElement> elements,
string eventId, string eventId,
List<Bet> bets) List<Bet> bets)
{ {
@@ -216,58 +234,43 @@ public sealed partial class EventOddsParser : IEventOddsParser
var hbhKey = $"{eventId}@{market}.HB_H"; var hbhKey = $"{eventId}@{market}.HB_H";
var hbaKey = $"{eventId}@{market}.HB_A"; var hbaKey = $"{eventId}@{market}.HB_A";
if (idx.TryGetValue(hbhKey, out var rateH) && if (TryEmitHandicapPair(idx, elements, hbhKey, hbaKey, MatchScope.Instance, bets))
idx.TryGetValue(hbaKey, out var rateA))
{
// Extract handicap value from the <td> containing the HB_H selection
var hbhSpan = document
.QuerySelector($"span[data-selection-key='{hbhKey}']");
var hbhTd = hbhSpan?.Closest("td");
var valueH = ExtractHandicapFromTd(hbhTd);
var hbaSpan = document
.QuerySelector($"span[data-selection-key='{hbaKey}']");
var hbaTd = hbaSpan?.Closest("td");
var valueA = ExtractHandicapFromTd(hbaTd);
if (valueH.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side1,
valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side2,
valueA.Value, rateA);
break; break;
}
// Also try no-suffix and suffix-0 fallback // Also try suffix-0 fallback (e.g. "Match_Handicap0.HB_H")
var alt0HKey = $"{eventId}@{market}0.HB_H"; var alt0HKey = $"{eventId}@{market}0.HB_H";
var alt0AKey = $"{eventId}@{market}0.HB_A"; var alt0AKey = $"{eventId}@{market}0.HB_A";
if (idx.TryGetValue(alt0HKey, out rateH) && if (TryEmitHandicapPair(idx, elements, alt0HKey, alt0AKey, MatchScope.Instance, bets))
idx.TryGetValue(alt0AKey, out rateA))
{
var hbhSpan = document
.QuerySelector($"span[data-selection-key='{alt0HKey}']");
var hbhTd = hbhSpan?.Closest("td");
var valueH = ExtractHandicapFromTd(hbhTd);
var hbaSpan = document
.QuerySelector($"span[data-selection-key='{alt0AKey}']");
var hbaTd = hbaSpan?.Closest("td");
var valueA = ExtractHandicapFromTd(hbaTd);
if (valueH.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side1,
valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, MatchScope.Instance, BetType.WinFora, Side.Side2,
valueA.Value, rateA);
break; break;
}
} }
} }
private bool TryEmitHandicapPair(
Dictionary<string, decimal> idx,
Dictionary<string, IElement> elements,
string hbhKey,
string hbaKey,
BetScope scope,
List<Bet> bets)
{
if (!idx.TryGetValue(hbhKey, out var rateH) ||
!idx.TryGetValue(hbaKey, out var rateA))
return false;
var valueH = ExtractHandicapFromTd(LookupTd(elements, hbhKey));
var valueA = ExtractHandicapFromTd(LookupTd(elements, hbaKey));
if (valueH.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side1, valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side2, valueA.Value, rateA);
return true;
}
private static IElement? LookupTd(Dictionary<string, IElement> elements, string key)
=> elements.TryGetValue(key, out var span) ? span.Closest("td") : null;
// ── Match Total ──────────────────────────────────────────────────────── // ── Match Total ────────────────────────────────────────────────────────
private void ExtractMatchTotal( private void ExtractMatchTotal(
@@ -329,7 +332,7 @@ public sealed partial class EventOddsParser : IEventOddsParser
private void ExtractPeriodHandicap( private void ExtractPeriodHandicap(
Dictionary<string, decimal> idx, Dictionary<string, decimal> idx,
IDocument document, Dictionary<string, IElement> elements,
SportCode sport, SportCode sport,
string eventId, string eventId,
int n, int n,
@@ -340,30 +343,17 @@ public sealed partial class EventOddsParser : IEventOddsParser
var scope = new PeriodScope(n); var scope = new PeriodScope(n);
var hbhKey = $"{eventId}@{marketToken}.HB_H"; if (TryEmitHandicapPair(idx, elements,
var hbaKey = $"{eventId}@{marketToken}.HB_A"; $"{eventId}@{marketToken}.HB_H",
$"{eventId}@{marketToken}.HB_A",
scope, bets))
return;
if (!idx.TryGetValue(hbhKey, out var rateH) || // Suffix-0 fallback
!idx.TryGetValue(hbaKey, out var rateA)) TryEmitHandicapPair(idx, elements,
{ $"{eventId}@{marketToken}0.HB_H",
// Try suffix-0 variant $"{eventId}@{marketToken}0.HB_A",
hbhKey = $"{eventId}@{marketToken}0.HB_H"; scope, bets);
hbaKey = $"{eventId}@{marketToken}0.HB_A";
if (!idx.TryGetValue(hbhKey, out rateH) ||
!idx.TryGetValue(hbaKey, out rateA))
return;
}
var hbhSpan = document.QuerySelector($"span[data-selection-key='{hbhKey}']");
var valueH = ExtractHandicapFromTd(hbhSpan?.Closest("td"));
var hbaSpan = document.QuerySelector($"span[data-selection-key='{hbaKey}']");
var valueA = ExtractHandicapFromTd(hbaSpan?.Closest("td"));
if (valueH.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side1, valueH.Value, rateH);
if (valueA.HasValue)
TryAddBet(bets, scope, BetType.WinFora, Side.Side2, valueA.Value, rateA);
} }
// ── Period Total ─────────────────────────────────────────────────────── // ── Period Total ───────────────────────────────────────────────────────
@@ -14,8 +14,14 @@ namespace Marathon.Infrastructure.Scraping;
/// </remarks> /// </remarks>
public sealed class UserAgentRotatorHandler : DelegatingHandler public sealed class UserAgentRotatorHandler : DelegatingHandler
{ {
// The handler is registered Transient (per HttpClientFactory convention),
// so a per-instance counter would reset every time HttpClientFactory built
// a new client and rotation would no longer be round-robin across the
// process. Hoisting to a static counter ensures genuine round-robin across
// every outbound request the process makes.
private static int s_counter;
private readonly string[] _userAgents; private readonly string[] _userAgents;
private int _counter;
public UserAgentRotatorHandler(IOptions<ScrapingOptions> options) public UserAgentRotatorHandler(IOptions<ScrapingOptions> options)
{ {
@@ -29,9 +35,10 @@ public sealed class UserAgentRotatorHandler : DelegatingHandler
{ {
if (_userAgents.Length > 0) if (_userAgents.Length > 0)
{ {
// Thread-safe round-robin without modulo bias risk at reasonable scale // Thread-safe round-robin. Math.Abs guards against the negative-overflow
// case once Interlocked.Increment wraps past int.MaxValue.
var index = Math.Abs( var index = Math.Abs(
Interlocked.Increment(ref _counter) % _userAgents.Length); Interlocked.Increment(ref s_counter) % _userAgents.Length);
request.Headers.TryAddWithoutValidation("User-Agent", _userAgents[index]); request.Headers.TryAddWithoutValidation("User-Agent", _userAgents[index]);
} }
@@ -204,8 +204,10 @@ public sealed class EventBrowsingService : IEventBrowsingService
private static IReadOnlyList<EventScopeBoard> BuildBoards(OddsSnapshot snapshot) private static IReadOnlyList<EventScopeBoard> BuildBoards(OddsSnapshot snapshot)
{ {
// Group by scope, preserve Match-first order then ascending Period numbers. // Group by scope, preserve Match-first order then ascending Period numbers.
// BetScope is a record hierarchy so .GroupBy uses value equality natively —
// no custom comparer needed.
var groups = snapshot.Bets var groups = snapshot.Bets
.GroupBy(static b => b.Scope, ScopeEqualityComparer.Instance) .GroupBy(static b => b.Scope)
.OrderBy(static g => OrderKey(g.Key)); .OrderBy(static g => OrderKey(g.Key));
var boards = new List<EventScopeBoard>(); var boards = new List<EventScopeBoard>();
@@ -241,23 +243,4 @@ public sealed class EventBrowsingService : IEventBrowsingService
PeriodScope p => p.Number, PeriodScope p => p.Number,
_ => int.MaxValue, _ => int.MaxValue,
}; };
private sealed class ScopeEqualityComparer : IEqualityComparer<BetScope>
{
public static readonly ScopeEqualityComparer Instance = new();
public bool Equals(BetScope? x, BetScope? y) => (x, y) switch
{
(null, null) => true,
(MatchScope, MatchScope) => true,
(PeriodScope a, PeriodScope b) => a.Number == b.Number,
_ => false,
};
public int GetHashCode(BetScope obj) => obj switch
{
MatchScope => 0,
PeriodScope p => p.Number,
_ => -1,
};
}
} }