using System.Net; using System.Net.Http; using System.Net.NetworkInformation; using System.Net.Security; using System.Net.Sockets; using System.Security.Cryptography.X509Certificates; using System.Diagnostics; using System.Text; using System.Text.Json; using Microsoft.EntityFrameworkCore; using EonaCat.LogStack.Status.Data; using EonaCat.LogStack.Status.Models; using Monitor = EonaCat.LogStack.Status.Models.Monitor; namespace EonaCat.LogStack.Status.Services; // This file is part of the EonaCat project(s) which is released under the Apache License. // See the LICENSE file or go to https://EonaCat.com/License for full license details. public class MonitoringService { private readonly IDbContextFactory _dbFactory; private readonly ILogger _log; public MonitoringService(IDbContextFactory dbFactory, ILogger log) { _dbFactory = dbFactory; _log = log; } // check ─ public async Task CheckMonitorAsync(Monitor monitor) { var sw = Stopwatch.StartNew(); MonitorStatus status; string? message = null; try { (status, message) = monitor.Type switch { MonitorType.TCP => await CheckTcpAsync(monitor.Host, monitor.Port ?? 80, monitor.TimeoutMs), MonitorType.UDP => await CheckUdpAsync(monitor.Host, monitor.Port ?? 53, monitor.TimeoutMs), MonitorType.Ping => await CheckPingAsync(monitor.Host, monitor.TimeoutMs), MonitorType.AppLocal => CheckLocalProcess(monitor.ProcessName ?? monitor.Name), MonitorType.AppRemote => await CheckTcpAsync(monitor.Host, monitor.Port ?? 80, monitor.TimeoutMs), MonitorType.HTTP => await CheckHttpAsync(monitor.Url ?? $"http://{monitor.Host}", monitor.TimeoutMs, monitor.ExpectedKeyword, monitor.ExpectedStatusCode), MonitorType.HTTPS => await CheckHttpAsync(monitor.Url ?? $"https://{monitor.Host}", monitor.TimeoutMs, monitor.ExpectedKeyword, monitor.ExpectedStatusCode), _ => (MonitorStatus.Unknown, "Unknown monitor type") }; } catch (Exception ex) { status = MonitorStatus.Down; message = ex.Message; } sw.Stop(); // failure threshold ─ if (status == MonitorStatus.Down || status == MonitorStatus.Warning) { monitor.ConsecutiveFailures++; if (monitor.ConsecutiveFailures < monitor.FailureThreshold) { // Not enough consecutive failures yet - keep previous status status = monitor.LastStatus == MonitorStatus.Unknown ? MonitorStatus.Unknown : monitor.LastStatus; message = $"[Grace: {monitor.ConsecutiveFailures}/{monitor.FailureThreshold}] {message}"; } } else { monitor.ConsecutiveFailures = 0; } var check = new MonitorCheck { MonitorId = monitor.Id, Status = status, ResponseMs = sw.Elapsed.TotalMilliseconds, Message = message, CheckedAt = DateTime.UtcNow }; await using var db = await _dbFactory.CreateDbContextAsync(); var prevStatus = monitor.LastStatus; db.MonitorChecks.Add(check); monitor.LastChecked = DateTime.UtcNow; monitor.LastStatus = status; monitor.LastResponseMs = check.ResponseMs; db.Monitors.Update(monitor); await db.SaveChangesAsync(); await EvaluateAlertRulesAsync(monitor, check, prevStatus, db); return check; } private async Task<(MonitorStatus, string?)> CheckTcpAsync(string host, int port, int timeoutMs) { using var client = new TcpClient(); var cts = new CancellationTokenSource(timeoutMs); try { await client.ConnectAsync(host, port, cts.Token); return (MonitorStatus.Up, $"Connected to {host}:{port}"); } catch (OperationCanceledException) { return (MonitorStatus.Down, $"Timeout connecting to {host}:{port}"); } catch (Exception ex) { return (MonitorStatus.Down, ex.Message); } } private async Task<(MonitorStatus, string?)> CheckUdpAsync(string host, int port, int timeoutMs) { try { using var udp = new UdpClient(); udp.Connect(host, port); var data = new byte[] { 0x00 }; await udp.SendAsync(data, data.Length); return (MonitorStatus.Up, $"UDP {host}:{port} reachable"); } catch (Exception ex) { return (MonitorStatus.Warning, $"UDP check: {ex.Message}"); } } /// ICMP ping check. private async Task<(MonitorStatus, string?)> CheckPingAsync(string host, int timeoutMs) { try { using var ping = new Ping(); var reply = await ping.SendPingAsync(host, timeoutMs); if (reply.Status == IPStatus.Success) { return (MonitorStatus.Up, $"Ping {host} = {reply.RoundtripTime}ms TTL={reply.Options?.Ttl}"); } return (MonitorStatus.Down, $"Ping {host}: {reply.Status}"); } catch (Exception ex) { return (MonitorStatus.Down, $"Ping error: {ex.Message}"); } } private (MonitorStatus, string?) CheckLocalProcess(string processName) { var procs = Process.GetProcessesByName(processName); if (procs.Length > 0) { return (MonitorStatus.Up, $"Process '{processName}' running (PID: {procs[0].Id})"); } return (MonitorStatus.Down, $"Process '{processName}' not found"); } private async Task<(MonitorStatus, string?)> CheckHttpAsync(string url, int timeoutMs, string? expectedKeyword, int? expectedStatusCode) { using var handler = new HttpClientHandler { ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator }; using var client = new HttpClient(handler) { Timeout = TimeSpan.FromMilliseconds(timeoutMs) }; try { var resp = await client.GetAsync(url); var code = (int)resp.StatusCode; string? body = null; // Keyword assertion if (!string.IsNullOrEmpty(expectedKeyword)) { body = await resp.Content.ReadAsStringAsync(); if (!body.Contains(expectedKeyword, StringComparison.OrdinalIgnoreCase)) { return (MonitorStatus.Down, $"HTTP {code} - keyword '{expectedKeyword}' not found"); } } // Status code assertion if (expectedStatusCode.HasValue) { if (code == expectedStatusCode.Value) { return (MonitorStatus.Up, $"HTTP {code} (expected)"); } return code >= 200 && code < 400 ? (MonitorStatus.Warning, $"HTTP {code} (expected {expectedStatusCode})") : (MonitorStatus.Down, $"HTTP {code} (expected {expectedStatusCode})"); } if (code >= 200 && code < 400) { return (MonitorStatus.Up, $"HTTP {code}"); } if (code >= 400 && code < 500) { return (MonitorStatus.Warning, $"HTTP {code}"); } return (MonitorStatus.Down, $"HTTP {code}"); } catch (TaskCanceledException) { return (MonitorStatus.Down, "Timeout"); } catch (Exception ex) { return (MonitorStatus.Down, ex.Message); } } public async Task CheckCertificateAsync(CertificateEntry cert) { try { using var client = new TcpClient(); await client.ConnectAsync(cert.Domain, cert.Port); using var ssl = new SslStream(client.GetStream(), false, (_, c, _, _) => true); await ssl.AuthenticateAsClientAsync(cert.Domain); var x509 = ssl.RemoteCertificate as X509Certificate2 ?? new X509Certificate2(ssl.RemoteCertificate!); cert.ExpiresAt = x509.NotAfter.ToUniversalTime(); cert.IssuedAt = x509.NotBefore.ToUniversalTime(); cert.Issuer = x509.Issuer; cert.Subject = x509.Subject; cert.Thumbprint = x509.Thumbprint; cert.LastError = null; } catch (Exception ex) { cert.LastError = ex.Message; } cert.LastChecked = DateTime.UtcNow; await using var db = await _dbFactory.CreateDbContextAsync(); db.Certificates.Update(cert); await db.SaveChangesAsync(); return cert; } public async Task GetStatsAsync(bool isAdmin) { await using var db = await _dbFactory.CreateDbContextAsync(); var monitors = await db.Monitors.Where(m => m.IsActive && (isAdmin || m.IsPublic)).ToListAsync(); var certs = await db.Certificates.ToListAsync(); var now = DateTime.UtcNow; return new DashboardStats { TotalMonitors = monitors.Count, UpCount = monitors.Count(m => m.LastStatus == MonitorStatus.Up), DownCount = monitors.Count(m => m.LastStatus == MonitorStatus.Down), WarnCount = monitors.Count(m => m.LastStatus == MonitorStatus.Warning || m.LastStatus == MonitorStatus.Degraded), UnknownCount = monitors.Count(m => m.LastStatus == MonitorStatus.Unknown), CertCount = certs.Count, CertExpiringSoon = certs.Count(c => c.ExpiresAt.HasValue && c.ExpiresAt.Value > now && (c.ExpiresAt.Value - now).TotalDays <= 30), CertExpired = certs.Count(c => c.ExpiresAt.HasValue && c.ExpiresAt.Value <= now), TotalLogs = await db.Logs.LongCountAsync(), ErrorLogs = await db.Logs.LongCountAsync(l => l.Level == "error" || l.Level == "critical"), OverallUptime = monitors.Count > 0 ? (double)monitors.Count(m => m.LastStatus == MonitorStatus.Up) / monitors.Count * 100 : 0, ActiveIncidents = await db.Incidents.CountAsync(i => i.Status != IncidentStatus.Resolved), ResolvedIncidents = await db.Incidents.CountAsync(i => i.Status == IncidentStatus.Resolved) }; } /// /// Returns uptime percentages and response time stats for a single monitor. /// public async Task GetUptimeReportAsync(int monitorId) { await using var db = await _dbFactory.CreateDbContextAsync(); var monitor = await db.Monitors.FindAsync(monitorId); if (monitor == null) { throw new KeyNotFoundException($"Monitor {monitorId} not found."); } var now = DateTime.UtcNow; var checks24h = await db.MonitorChecks .Where(c => c.MonitorId == monitorId && c.CheckedAt >= now.AddHours(-24)) .ToListAsync(); var checks7d = await db.MonitorChecks .Where(c => c.MonitorId == monitorId && c.CheckedAt >= now.AddDays(-7)) .ToListAsync(); var checks30d = await db.MonitorChecks .Where(c => c.MonitorId == monitorId && c.CheckedAt >= now.AddDays(-30)) .ToListAsync(); static double CalcUptime(List list) => list.Count == 0 ? 100.0 : (double)list.Count(c => c.Status == MonitorStatus.Up) / list.Count * 100.0; return new UptimeReport { MonitorId = monitorId, MonitorName = monitor.Name, Uptime24h = CalcUptime(checks24h), Uptime7d = CalcUptime(checks7d), Uptime30d = CalcUptime(checks30d), TotalChecks = checks30d.Count, UpChecks = checks30d.Count(c => c.Status == MonitorStatus.Up), DownChecks = checks30d.Count(c => c.Status == MonitorStatus.Down), AvgResponseMs = checks30d.Count > 0 ? checks30d.Average(c => c.ResponseMs) : 0 }; } /// /// Returns log volume bucketed by hour for the last hours. /// public async Task> GetLogStatsAsync(int hours = 24) { await using var db = await _dbFactory.CreateDbContextAsync(); var from = DateTime.UtcNow.AddHours(-hours); var logs = await db.Logs.Where(l => l.Timestamp >= from).ToListAsync(); return logs .GroupBy(l => new DateTime(l.Timestamp.Year, l.Timestamp.Month, l.Timestamp.Day, l.Timestamp.Hour, 0, 0, DateTimeKind.Utc)) .OrderBy(g => g.Key) .Select(g => new LogStatsBucket { BucketStart = g.Key, Total = g.LongCount(), Errors = g.LongCount(l => l.Level == "error" || l.Level == "critical"), Warnings = g.LongCount(l => l.Level == "warn" || l.Level == "warning") }) .ToList(); } private async Task EvaluateAlertRulesAsync(Monitor monitor, MonitorCheck check, MonitorStatus prevStatus, DatabaseContext db) { var rules = await db.AlertRules .Where(r => r.IsEnabled && (r.MonitorId == monitor.Id || r.MonitorId == null)) .ToListAsync(); var globalWebhook = await db.Settings.FirstOrDefaultAsync(s => s.Key == "AlertWebhookUrl"); var webhookUrl = globalWebhook?.Value; foreach (var rule in rules) { bool fired = rule.Condition switch { AlertRuleCondition.IsDown => check.Status == MonitorStatus.Down && prevStatus != MonitorStatus.Down, AlertRuleCondition.IsUp => check.Status == MonitorStatus.Up && prevStatus == MonitorStatus.Down, AlertRuleCondition.ResponseAboveMs => check.ResponseMs > (rule.ThresholdValue ?? double.MaxValue), AlertRuleCondition.CertExpiresWithinDays => false, // evaluated by cert loop separately _ => false }; if (!fired) { continue; } // Cooldown check if (rule.LastFiredAt.HasValue && (DateTime.UtcNow - rule.LastFiredAt.Value).TotalMinutes < rule.CooldownMinutes) { continue; } rule.LastFiredAt = DateTime.UtcNow; db.AlertRules.Update(rule); // Auto-create incident when a monitor goes down var autoIncidents = await db.Settings.FirstOrDefaultAsync(s => s.Key == "AutoCreateIncidents"); if (autoIncidents?.Value == "true" && rule.Condition == AlertRuleCondition.IsDown) { var incident = new Incident { Title = $"{monitor.Name} is down", Body = check.Message, Severity = IncidentSeverity.Major, Status = IncidentStatus.Investigating, MonitorId = monitor.Id, IsPublic = monitor.IsPublic }; db.Incidents.Add(incident); } // Fire webhook var target = rule.WebhookUrl ?? webhookUrl; if (!string.IsNullOrEmpty(target)) { _ = Task.Run(() => FireWebhookAsync(target, monitor, check, rule.Condition)); } await db.SaveChangesAsync(); } } private async Task FireWebhookAsync(string url, Monitor monitor, MonitorCheck check, AlertRuleCondition condition) { try { using var client = new HttpClient { Timeout = TimeSpan.FromSeconds(10) }; var payload = JsonSerializer.Serialize(new { monitorId = monitor.Id, monitorName = monitor.Name, condition = condition.ToString(), status = check.Status.ToString(), responseMs = check.ResponseMs, message = check.Message, checkedAt = check.CheckedAt.ToString("o") }); await client.PostAsync(url, new StringContent(payload, Encoding.UTF8, "application/json")); } catch (Exception ex) { _log.LogWarning("Webhook delivery to {Url} failed: {Msg}", url, ex.Message); } } } public class MonitoringBackgroundService : BackgroundService { private readonly IServiceScopeFactory _scopeFactory; private readonly ILogger _log; public MonitoringBackgroundService(IServiceScopeFactory scopeFactory, ILogger log) { _scopeFactory = scopeFactory; _log = log; } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { while (!stoppingToken.IsCancellationRequested) { try { using var scope = _scopeFactory.CreateScope(); var dbFactory = scope.ServiceProvider.GetRequiredService>(); await using var db = await dbFactory.CreateDbContextAsync(stoppingToken); var monitors = await db.Monitors.Where(m => m.IsActive).ToListAsync(stoppingToken); var now = DateTime.UtcNow; foreach (var m in monitors) { if (m.LastChecked == null || (now - m.LastChecked.Value).TotalSeconds >= m.IntervalSeconds) { var captured = m; _ = Task.Run(async () => { using var checkScope = _scopeFactory.CreateScope(); var svc = checkScope.ServiceProvider.GetRequiredService(); await svc.CheckMonitorAsync(captured); }, stoppingToken); } } // Check certs every hour var certs = await db.Certificates.ToListAsync(stoppingToken); foreach (var c in certs) { if (c.LastChecked == null || (now - c.LastChecked.Value).TotalHours >= 1) { var captured = c; _ = Task.Run(async () => { using var certScope = _scopeFactory.CreateScope(); var svc = certScope.ServiceProvider.GetRequiredService(); await svc.CheckCertificateAsync(captured); }, stoppingToken); } } // Log retention purge - run once per hour if (now.Minute == 0) { using var purgeScope = _scopeFactory.CreateScope(); var ingest = purgeScope.ServiceProvider.GetRequiredService(); var auth = purgeScope.ServiceProvider.GetRequiredService(); var days = int.TryParse(await auth.GetSettingAsync("MaxLogRetentionDays", "30"), out var d) ? d : 30; await ingest.PurgeOldLogsAsync(days); } } catch (Exception ex) { _log.LogError(ex, "Error in monitor loop"); } await Task.Delay(10_000, stoppingToken); } } } public class IngestionService { private readonly IDbContextFactory _dbFactory; public IngestionService(IDbContextFactory dbFactory) { _dbFactory = dbFactory; } public async Task IngestAsync(LogEntry entry) { await using var db = await _dbFactory.CreateDbContextAsync(); db.Logs.Add(entry); await db.SaveChangesAsync(); } public async Task IngestBatchAsync(IEnumerable entries) { await using var db = await _dbFactory.CreateDbContextAsync(); db.Logs.AddRange(entries); await db.SaveChangesAsync(); } public async Task PurgeOldLogsAsync(int retentionDays) { await using var db = await _dbFactory.CreateDbContextAsync(); var cutoff = DateTime.UtcNow.AddDays(-retentionDays); // Use ExecuteDeleteAsync for efficiency with large tables await db.Logs.Where(l => l.Timestamp < cutoff).ExecuteDeleteAsync(); } }