From 537b78ab83051128e87ba39ea7501ecf92f1e072 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Sat, 9 May 2026 15:50:52 +0300 Subject: [PATCH] fix(security): validate scraped paths, BaseUrl, settings paths + atomic write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five MEDIUM-tier security findings from the review. None were exploitable in the single-user desktop threat model, but each is hardening for future hosts (server / multi-user) and for the case of an upstream compromise. * EventListingParserBase: scraped data-event-path values are now run through IsSafeRelativePath before being concatenated into request URLs. Rejects scheme://host/* patterns, leading slash/backslash (network-path traversal), ".." traversal, control characters (CRLF log forging), and path lengths greater than 512. * ScrapingModule.ResolveBaseAddress: configured Scraping:BaseUrl is now validated through an https-only allow-list (marathonbet.by + subdomains). Falls back to the default base URL when the value is missing, malformed, off-host, or carries userinfo / query / fragment. Settings UI may write arbitrary strings to this key — a typo or worse could otherwise re-point every subsequent request at an unrelated host. * JsonSettingsWriter.WriteRootAsync: temp-file write now FlushAsync + Flush(flushToDisk: true) before the rename so a crash mid-write doesn't leave a 0-byte appsettings.Local.json. Promote the rename from File.Move (overwrite=true, not atomic on NTFS for cross-volume cases) to File.Replace (atomic on NTFS, keeps a .bak copy of the previous file). Falls back to File.Move when the destination doesn't exist yet. * StoragePathValidator + Settings.razor wiring: DatabasePath and ExportDirectory paths from the Settings page are validated before persist. Rejects empty values, ".." traversal, control characters, and any path that resolves outside AppContext.BaseDirectory. Adds 4 new RU+EN keys for the validation messages. * Logged scraper paths: covered transitively by the EventPath validation above (the upstream of every logged {Path} value), so no separate sanitization needed. --- .../Parsers/EventListingParserBase.cs | 38 +++++++++ .../Scraping/ScrapingModule.cs | 42 ++++++++- src/Marathon.UI/Pages/Settings.razor | 12 +++ .../Resources/SharedResource.en.resx | 4 + .../Resources/SharedResource.ru.resx | 4 + .../Services/JsonSettingsWriter.cs | 29 ++++++- .../Services/StoragePathValidator.cs | 85 +++++++++++++++++++ 7 files changed, 210 insertions(+), 4 deletions(-) create mode 100644 src/Marathon.UI/Services/StoragePathValidator.cs diff --git a/src/Marathon.Infrastructure/Scraping/Parsers/EventListingParserBase.cs b/src/Marathon.Infrastructure/Scraping/Parsers/EventListingParserBase.cs index 86e5116..8d8740f 100644 --- a/src/Marathon.Infrastructure/Scraping/Parsers/EventListingParserBase.cs +++ b/src/Marathon.Infrastructure/Scraping/Parsers/EventListingParserBase.cs @@ -81,6 +81,17 @@ public abstract class EventListingParserBase var eventPath = row.GetAttribute("data-event-path"); if (string.IsNullOrWhiteSpace(eventPath)) return null; + if (!IsSafeRelativePath(eventPath)) + { + // Defense in depth: data-event-path is concatenated into a + // request URL by MarathonbetScraper. Reject anything that could + // redirect the scraper to a different host, escape the base + // directory, or carry control characters into a log line. + Logger.LogWarning( + "Rejecting event row with unsafe data-event-path value (eventId={EventId}).", + eventIdRaw); + return null; + } var eventName = row.GetAttribute("data-event-name") ?? string.Empty; @@ -138,6 +149,33 @@ public abstract class EventListingParserBase return null; } + /// + /// Validates that a scraped data-event-path value is safe to + /// concatenate into the bookmaker URL. Rejects values that could: + /// (a) redirect the scraper to a different host (network-path "//host/x" + /// or absolute "scheme://host/..."); (b) traverse out of the base + /// directory (".."); (c) inject CRLF / control characters into log + /// lines or HTTP headers. + /// + private static bool IsSafeRelativePath(string path) + { + if (path.Length is 0 or > 512) + return false; + if (path[0] is '/' or '\\') + return false; + if (path.Contains("://", StringComparison.Ordinal)) + return false; + if (path.Contains("..")) + return false; + + foreach (var ch in path) + { + if (ch < 0x20 || ch == 0x7F) + return false; + } + return true; + } + private static (string country, string league, string category) ParseEventPath(string path) { // Path example: diff --git a/src/Marathon.Infrastructure/Scraping/ScrapingModule.cs b/src/Marathon.Infrastructure/Scraping/ScrapingModule.cs index 54c0d7b..d2ec65d 100644 --- a/src/Marathon.Infrastructure/Scraping/ScrapingModule.cs +++ b/src/Marathon.Infrastructure/Scraping/ScrapingModule.cs @@ -51,7 +51,7 @@ public static class ScrapingModule .AddHttpClient("marathonbet", (sp, client) => { var opts = sp.GetRequiredService>().Value; - client.BaseAddress = new Uri(opts.BaseUrl); + client.BaseAddress = ResolveBaseAddress(opts.BaseUrl); client.Timeout = Timeout.InfiniteTimeSpan; // Polly timeout manages per-attempt }) .AddHttpMessageHandler() @@ -125,4 +125,44 @@ public static class ScrapingModule return services; } + + /// + /// Validates a configured Scraping:BaseUrl and returns the parsed + /// . Falls back to the default + /// https://www.marathonbet.by when the configured value is missing + /// or fails the safety checks (https only, host on the marathonbet.by + /// allow-list, no userinfo / fragment / query) — settings UI may write + /// arbitrary strings here, and we don't want a typo (or worse) to point + /// the scraper at an unrelated host. + /// + private static Uri ResolveBaseAddress(string? configured) + { + const string defaultBase = "https://www.marathonbet.by"; + + if (string.IsNullOrWhiteSpace(configured)) + return new Uri(defaultBase); + + if (!Uri.TryCreate(configured, UriKind.Absolute, out var parsed)) + return new Uri(defaultBase); + + if (!string.Equals(parsed.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase)) + return new Uri(defaultBase); + + // Allow-list — only marathonbet.by and its subdomains. Future bookmaker + // support should be opt-in via additional ScrapingOptions, not by + // letting any URL through this gate. + var host = parsed.Host; + var isAllowed = + host.Equals("marathonbet.by", StringComparison.OrdinalIgnoreCase) || + host.EndsWith(".marathonbet.by", StringComparison.OrdinalIgnoreCase); + if (!isAllowed) + return new Uri(defaultBase); + + if (!string.IsNullOrEmpty(parsed.UserInfo) || + !string.IsNullOrEmpty(parsed.Fragment) || + !string.IsNullOrEmpty(parsed.Query)) + return new Uri(defaultBase); + + return parsed; + } } diff --git a/src/Marathon.UI/Pages/Settings.razor b/src/Marathon.UI/Pages/Settings.razor index 5c60e49..ec2773a 100644 --- a/src/Marathon.UI/Pages/Settings.razor +++ b/src/Marathon.UI/Pages/Settings.razor @@ -230,6 +230,18 @@ private async Task SaveSectionAsync(string section, T payload) where T : class { + // Section-level validation — fails fast before disk write so the + // user sees the snackbar immediately and the JSON file isn't touched. + if (payload is StorageOptions storage) + { + var errorKey = StoragePathValidator.Validate(storage); + if (errorKey is not null) + { + Snackbar.Add(L[errorKey], Severity.Error); + return; + } + } + var confirmed = await ConfirmAsync(); if (!confirmed) { diff --git a/src/Marathon.UI/Resources/SharedResource.en.resx b/src/Marathon.UI/Resources/SharedResource.en.resx index aaa0390..b9780c3 100644 --- a/src/Marathon.UI/Resources/SharedResource.en.resx +++ b/src/Marathon.UI/Resources/SharedResource.en.resx @@ -125,6 +125,10 @@ SQLite path Export directory Snapshot retention (days) + SQLite path is required. + Export directory is required. + SQLite path must stay inside the application directory and may not contain "..". + Export directory must stay inside the application directory and may not contain "..". Suspension window (sec) Flip threshold (Δ probability) diff --git a/src/Marathon.UI/Resources/SharedResource.ru.resx b/src/Marathon.UI/Resources/SharedResource.ru.resx index 0c886ce..8facca7 100644 --- a/src/Marathon.UI/Resources/SharedResource.ru.resx +++ b/src/Marathon.UI/Resources/SharedResource.ru.resx @@ -132,6 +132,10 @@ Путь к SQLite Каталог экспорта Хранить снимки (дней) + Путь к SQLite обязателен. + Каталог экспорта обязателен. + Путь к SQLite должен быть внутри папки приложения и не содержать «..». + Каталог экспорта должен быть внутри папки приложения и не содержать «..». Окно «заморозки» (сек) diff --git a/src/Marathon.UI/Services/JsonSettingsWriter.cs b/src/Marathon.UI/Services/JsonSettingsWriter.cs index 2c63532..55018f3 100644 --- a/src/Marathon.UI/Services/JsonSettingsWriter.cs +++ b/src/Marathon.UI/Services/JsonSettingsWriter.cs @@ -117,13 +117,36 @@ public sealed class JsonSettingsWriter : ISettingsWriter } var tempPath = _filePath + ".tmp"; - await using (var stream = File.Create(tempPath)) + + // Write the temp file and force the buffer to disk BEFORE renaming. + // Without Flush(true) a crash between the write and the rename leaves + // a 0-byte appsettings.Local.json on the next boot. + await using (var stream = new FileStream( + tempPath, + FileMode.Create, + FileAccess.Write, + FileShare.None, + bufferSize: 4096, + useAsync: true)) { await JsonSerializer.SerializeAsync(stream, root, WriteOptions, cancellationToken).ConfigureAwait(false); + await stream.FlushAsync(cancellationToken).ConfigureAwait(false); + stream.Flush(flushToDisk: true); } - // Atomic rename — survives crashes mid-write. - File.Move(tempPath, _filePath, overwrite: true); + // File.Replace is atomic on NTFS and keeps a .bak copy of the prior + // contents in case the next boot fails to start with the new settings. + // Falls back to File.Move when the destination doesn't exist yet + // (Replace requires the destination present; Move handles first-write). + if (File.Exists(_filePath)) + { + var backupPath = _filePath + ".bak"; + File.Replace(tempPath, _filePath, backupPath, ignoreMetadataErrors: true); + } + else + { + File.Move(tempPath, _filePath, overwrite: false); + } } /// For tests: reads the persisted JSON object back. diff --git a/src/Marathon.UI/Services/StoragePathValidator.cs b/src/Marathon.UI/Services/StoragePathValidator.cs new file mode 100644 index 0000000..695f60c --- /dev/null +++ b/src/Marathon.UI/Services/StoragePathValidator.cs @@ -0,0 +1,85 @@ +using Marathon.Application.Storage; + +namespace Marathon.UI.Services; + +/// +/// Validates user-supplied values from the +/// Settings page before they're persisted to appsettings.Local.json. +/// +/// +/// On a single-user desktop build this is "user shoots own foot" hardening. +/// On a future ASP.NET host with the same RCL it becomes a real boundary — +/// neither the SQLite provider nor the Excel exporter validate paths +/// themselves. +/// +public static class StoragePathValidator +{ + /// + /// Returns a localizable error key when is + /// invalid, or null when it's safe to persist. + /// Validation rules: + /// + /// Both fields must be non-empty. + /// No control characters or characters Windows rejects in paths. + /// No ".." traversal segments. + /// Paths must resolve under — + /// either as relative paths (anchored on the working directory) or + /// as absolute paths whose + /// starts with the base directory's full path. + /// + /// + public static string? Validate(StorageOptions options) + { + ArgumentNullException.ThrowIfNull(options); + + if (string.IsNullOrWhiteSpace(options.DatabasePath)) + return "Settings.Storage.Validation.DatabasePathRequired"; + if (string.IsNullOrWhiteSpace(options.ExportDirectory)) + return "Settings.Storage.Validation.ExportDirectoryRequired"; + + if (!IsAcceptablePath(options.DatabasePath)) + return "Settings.Storage.Validation.DatabasePathInvalid"; + if (!IsAcceptablePath(options.ExportDirectory)) + return "Settings.Storage.Validation.ExportDirectoryInvalid"; + + return null; + } + + private static bool IsAcceptablePath(string raw) + { + if (raw.Contains("..")) + return false; + if (raw.IndexOfAny(Path.GetInvalidPathChars()) >= 0) + return false; + + try + { + var baseDir = Path.GetFullPath(AppContext.BaseDirectory); + var resolved = Path.GetFullPath( + Path.IsPathRooted(raw) ? raw : Path.Combine(AppContext.BaseDirectory, raw)); + + // Allow same path or any descendant. Use OrdinalIgnoreCase on + // Windows; case-sensitive elsewhere. + var comparison = OperatingSystem.IsWindows() + ? StringComparison.OrdinalIgnoreCase + : StringComparison.Ordinal; + + return resolved.StartsWith( + baseDir.TrimEnd(Path.DirectorySeparatorChar) + Path.DirectorySeparatorChar, + comparison) + || string.Equals(resolved, baseDir, comparison); + } + catch (ArgumentException) + { + return false; + } + catch (PathTooLongException) + { + return false; + } + catch (NotSupportedException) + { + return false; + } + } +}