Files
maraphon-app/tests/Marathon.Application.Tests/UseCases/PullUpcomingEventsUseCaseTests.cs
alexei.dolgolyov 286b55986b perf(scraping): parallel HTTP fan-out, sequential DB persist (HIGH)
The Pull*UseCase implementations issued one HTTP request at a time despite
Scraping:MaxConcurrentRequests=4. With 30–80 live events and ~1s per
fetch, a 5–10s live cadence target was unreachable; cycles overflowed
the configured interval.

* New Marathon.Application.Configuration.ScrapingThrottle bound from the
  shared Scraping:* section. Exposes only MaxConcurrentRequests so the
  Application layer doesn't pull in the Infrastructure-side ScrapingOptions.
* PullLiveOddsUseCase + PullUpcomingEventsUseCase split into two phases:
  - Phase 1 — Parallel.ForEachAsync over the event list with
    MaxDegreeOfParallelism = throttle.MaxConcurrentRequests. The scraper's
    Polly rate limiter still throttles to RequestsPerSecond underneath
    this fan-out, so spikes are smoothed before they hit the bookmaker.
  - Phase 2 — sequential foreach over the (Event, Snapshot) tuples
    captured in Phase 1, doing event upsert + snapshot insert. EF Core
    DbContext is not thread-safe so all DB writes stay on a single thread.
* InfrastructureModule binds ScrapingThrottle alongside AnomalyOptions.
* Failed snapshot scrapes in Phase 1 mean the event row is also NOT
  persisted in Phase 2 — previously we'd persist the row even when the
  snapshot scrape failed, leaving an orphan event with no odds. Updated
  the regression test accordingly.
* Test fixture exposes TestFixtures.Throttle(maxConcurrentRequests=1) for
  deterministic sequential test runs.
* One existing NSubstitute setup that chained Arg.Is<>() across two
  configurations was rewritten to use a single Arg.Any<>() with inline
  branching — chained matchers were leaking and returning wrong results.
2026-05-09 15:27:06 +03:00

134 lines
5.6 KiB
C#

using FluentAssertions;
using Marathon.Application.Abstractions;
using Marathon.Application.UseCases;
using Marathon.Domain.Entities;
using Marathon.Domain.Enums;
using Marathon.Domain.ValueObjects;
using Microsoft.Extensions.Logging.Abstractions;
using NSubstitute;
using NSubstitute.ExceptionExtensions;
namespace Marathon.Application.Tests.UseCases;
public sealed class PullUpcomingEventsUseCaseTests
{
private readonly IOddsScraper _scraper = Substitute.For<IOddsScraper>();
private readonly IEventRepository _eventRepo = Substitute.For<IEventRepository>();
private readonly ISnapshotRepository _snapshotRepo = Substitute.For<ISnapshotRepository>();
private PullUpcomingEventsUseCase CreateSut() =>
new(_scraper, _eventRepo, _snapshotRepo,
TestFixtures.Throttle(),
NullLogger<PullUpcomingEventsUseCase>.Instance);
[Fact]
public async Task Should_PersistNewEventsAndCaptureSnapshots_When_ScraperReturnsEvents()
{
// Arrange: scraper returns 2 events, neither exists in DB
var ev1 = TestFixtures.MakeEvent("11111111");
var ev2 = TestFixtures.MakeEvent("22222222");
var events = new List<Event> { ev1, ev2 }.AsReadOnly();
_scraper.ScrapeUpcomingAsync(null, Arg.Any<CancellationToken>()).Returns(events);
_eventRepo.GetAsync(Arg.Any<EventId>(), Arg.Any<CancellationToken>()).Returns((Event?)null);
_scraper.ScrapeEventOddsAsync(Arg.Any<Event>(), OddsSource.PreMatch, Arg.Any<CancellationToken>())
.Returns(ci => TestFixtures.MakeSnapshot(ci.Arg<Event>().Id));
var sut = CreateSut();
// Act
var (processed, newEvents, snapshots) = await sut.ExecuteAsync(CancellationToken.None);
// Assert
processed.Should().Be(2);
newEvents.Should().Be(2);
snapshots.Should().Be(2);
await _eventRepo.Received(2).AddAsync(Arg.Any<Event>(), Arg.Any<CancellationToken>());
await _snapshotRepo.Received(2).AddAsync(Arg.Any<OddsSnapshot>(), Arg.Any<CancellationToken>());
}
[Fact]
public async Task Should_SkipExistingEvents_When_EventAlreadyInDatabase()
{
// Arrange: 3 events from scraper — 1 already in DB, 2 new
var ev1 = TestFixtures.MakeEvent("11111111"); // already in DB
var ev2 = TestFixtures.MakeEvent("22222222"); // new
var ev3 = TestFixtures.MakeEvent("33333333"); // new
var events = new List<Event> { ev1, ev2, ev3 }.AsReadOnly();
_scraper.ScrapeUpcomingAsync(null, Arg.Any<CancellationToken>()).Returns(events);
// ev1 exists, ev2/ev3 do not
_eventRepo.GetAsync(ev1.Id, Arg.Any<CancellationToken>()).Returns(ev1);
_eventRepo.GetAsync(ev2.Id, Arg.Any<CancellationToken>()).Returns((Event?)null);
_eventRepo.GetAsync(ev3.Id, Arg.Any<CancellationToken>()).Returns((Event?)null);
_scraper.ScrapeEventOddsAsync(Arg.Any<Event>(), OddsSource.PreMatch, Arg.Any<CancellationToken>())
.Returns(ci => TestFixtures.MakeSnapshot(ci.Arg<Event>().Id));
var sut = CreateSut();
// Act
var (processed, newEvents, snapshots) = await sut.ExecuteAsync(CancellationToken.None);
// Assert
processed.Should().Be(3);
newEvents.Should().Be(2, "ev1 was already in the database");
snapshots.Should().Be(3, "snapshots are captured for all events regardless of duplicate status");
await _eventRepo.Received(2).AddAsync(Arg.Any<Event>(), Arg.Any<CancellationToken>());
await _eventRepo.DidNotReceive().AddAsync(ev1, Arg.Any<CancellationToken>());
}
[Fact]
public async Task Should_ContinueProcessing_When_SnapshotCaptureFailsForOneEvent()
{
// Arrange: 2 events — snapshot for first throws, second succeeds
var ev1 = TestFixtures.MakeEvent("11111111");
var ev2 = TestFixtures.MakeEvent("22222222");
var events = new List<Event> { ev1, ev2 }.AsReadOnly();
_scraper.ScrapeUpcomingAsync(Arg.Any<SportCode?>(), Arg.Any<CancellationToken>()).Returns(events);
_eventRepo.GetAsync(Arg.Any<EventId>(), Arg.Any<CancellationToken>()).Returns((Event?)null);
// Inline routing of the throwing-vs-passing branch on event id, since chained
// Arg.Is<>() setups can leak matchers across NSubstitute invocations.
_scraper.ScrapeEventOddsAsync(
Arg.Any<Event>(), OddsSource.PreMatch, Arg.Any<CancellationToken>())
.Returns<OddsSnapshot>(ci =>
{
var evArg = ci.Arg<Event>();
if (evArg.Id == ev1.Id)
throw new HttpRequestException("site down");
return TestFixtures.MakeSnapshot(evArg.Id);
});
var sut = CreateSut();
// Act — should not throw
var (processed, newEvents, snapshots) = await sut.ExecuteAsync(CancellationToken.None);
// Assert: ev1's snapshot scrape failed in Phase 1, so it's not even
// attempted in Phase 2 — no orphan event row gets persisted.
processed.Should().Be(2);
newEvents.Should().Be(1, "ev1's snapshot failed so it was not persisted");
snapshots.Should().Be(1, "only ev2 snapshot succeeded");
}
[Fact]
public async Task Should_ReturnZeros_When_ScraperReturnsNoEvents()
{
_scraper.ScrapeUpcomingAsync(null, Arg.Any<CancellationToken>())
.Returns(Array.Empty<Event>());
var sut = CreateSut();
var (processed, newEvents, snapshots) = await sut.ExecuteAsync(CancellationToken.None);
processed.Should().Be(0);
newEvents.Should().Be(0);
snapshots.Should().Be(0);
}
}