|
| 1 | +using DBADashAI.Models; |
| 2 | + |
| 3 | +namespace DBADashAI.Services.Tools; |
| 4 | + |
| 5 | +public sealed class CrossSignalCorrelationSummaryTool(SqlToolExecutor sql) : IAiTool |
| 6 | +{ |
| 7 | + public const string ToolName = "cross-signal-correlation-summary"; |
| 8 | + |
| 9 | + public string Name => ToolName; |
| 10 | + |
| 11 | + public string Description => "Correlate multiple risk signals across alerts, workload pressure, reliability, and capacity by instance."; |
| 12 | + |
| 13 | + public string InputHint => "Use for systemic risk, multi-signal correlation, and root-cause cluster questions."; |
| 14 | + |
| 15 | + public string[] Keywords => ["correlation", "cross-signal", "systemic", "cluster", "root cause", "multi signal", "overall risk", "top risks"]; |
| 16 | + |
| 17 | + public async Task<AiToolResult> RunAsync(AiAskRequest request, CancellationToken cancellationToken) |
| 18 | + { |
| 19 | + const string alertsSql = """ |
| 20 | + SELECT TOP 500 |
| 21 | + i.InstanceDisplayName, |
| 22 | + aa.AlertKey, |
| 23 | + aa.Priority, |
| 24 | + aa.IsResolved, |
| 25 | + aa.UpdatedDate |
| 26 | + FROM Alert.ActiveAlerts aa |
| 27 | + INNER JOIN dbo.Instances i ON i.InstanceID = aa.InstanceID |
| 28 | + WHERE i.IsActive = 1 |
| 29 | + AND aa.UpdatedDate >= DATEADD(DAY,-7,SYSUTCDATETIME()) |
| 30 | + ORDER BY aa.UpdatedDate DESC; |
| 31 | + """; |
| 32 | + |
| 33 | + const string waitsSql = """ |
| 34 | + SELECT TOP 500 |
| 35 | + i.InstanceDisplayName, |
| 36 | + SUM(w.wait_time_ms)/1000.0 AS TotalWaitSec |
| 37 | + FROM dbo.Waits_60MIN w |
| 38 | + INNER JOIN dbo.WaitType wt ON wt.WaitTypeID = w.WaitTypeID |
| 39 | + INNER JOIN dbo.Instances i ON i.InstanceID = w.InstanceID |
| 40 | + WHERE w.SnapshotDate >= DATEADD(HOUR,-24,SYSUTCDATETIME()) |
| 41 | + AND wt.IsExcluded = 0 |
| 42 | + AND i.IsActive = 1 |
| 43 | + GROUP BY i.InstanceDisplayName |
| 44 | + ORDER BY TotalWaitSec DESC; |
| 45 | + """; |
| 46 | + |
| 47 | + const string blockingSql = """ |
| 48 | + SELECT TOP 500 |
| 49 | + i.InstanceDisplayName, |
| 50 | + SUM(bs.BlockedWaitTime) AS BlockedWaitMs |
| 51 | + FROM dbo.BlockingSnapshotSummary bs |
| 52 | + INNER JOIN dbo.Instances i ON i.InstanceID = bs.InstanceID |
| 53 | + WHERE bs.SnapshotDateUTC >= DATEADD(HOUR,-24,SYSUTCDATETIME()) |
| 54 | + AND i.IsActive = 1 |
| 55 | + GROUP BY i.InstanceDisplayName |
| 56 | + ORDER BY BlockedWaitMs DESC; |
| 57 | + """; |
| 58 | + |
| 59 | + const string deadlockSql = """ |
| 60 | + SELECT TOP 500 |
| 61 | + i.InstanceDisplayName, |
| 62 | + SUM(CAST(ROUND(((pc.Value_Total / NULLIF(pc.SampleCount,0))*60.0),0) AS BIGINT)) AS DeadlockCountEstimate |
| 63 | + FROM dbo.PerformanceCounters_60MIN pc |
| 64 | + INNER JOIN dbo.Counters c ON c.CounterID = pc.CounterID |
| 65 | + INNER JOIN dbo.Instances i ON i.InstanceID = pc.InstanceID |
| 66 | + WHERE pc.SnapshotDate >= DATEADD(HOUR,-24,SYSUTCDATETIME()) |
| 67 | + AND c.counter_name = 'Number of Deadlocks/sec' |
| 68 | + AND c.object_name = 'Locks' |
| 69 | + AND c.instance_name = '_Total' |
| 70 | + AND i.IsActive = 1 |
| 71 | + GROUP BY i.InstanceDisplayName |
| 72 | + ORDER BY DeadlockCountEstimate DESC; |
| 73 | + """; |
| 74 | + |
| 75 | + const string driveSql = """ |
| 76 | + SELECT TOP 500 |
| 77 | + ds.InstanceDisplayName, |
| 78 | + ds.Status, |
| 79 | + ds.PctFreeSpace |
| 80 | + FROM dbo.DriveStatus ds |
| 81 | + WHERE ds.Status IN (1,2) |
| 82 | + ORDER BY ds.Status ASC, ds.PctFreeSpace ASC; |
| 83 | + """; |
| 84 | + |
| 85 | + var alertRows = await sql.QueryAsync(alertsSql, Math.Max(request.MaxRows, 150), cancellationToken); |
| 86 | + var waitsRows = await sql.QueryAsync(waitsSql, Math.Max(request.MaxRows, 100), cancellationToken); |
| 87 | + var blockingRows = await sql.QueryAsync(blockingSql, Math.Max(request.MaxRows, 100), cancellationToken); |
| 88 | + var deadlockRows = await sql.QueryAsync(deadlockSql, Math.Max(request.MaxRows, 100), cancellationToken); |
| 89 | + var driveRows = await sql.QueryAsync(driveSql, Math.Max(request.MaxRows, 100), cancellationToken); |
| 90 | + |
| 91 | + var instances = new HashSet<string>(StringComparer.OrdinalIgnoreCase); |
| 92 | + void AddInstances(IEnumerable<Dictionary<string, object?>> rows) |
| 93 | + { |
| 94 | + foreach (var r in rows) |
| 95 | + { |
| 96 | + var i = Get(r, "InstanceDisplayName"); |
| 97 | + if (!string.IsNullOrWhiteSpace(i)) instances.Add(i); |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + AddInstances(alertRows); |
| 102 | + AddInstances(waitsRows); |
| 103 | + AddInstances(blockingRows); |
| 104 | + AddInstances(deadlockRows); |
| 105 | + AddInstances(driveRows); |
| 106 | + |
| 107 | + var correlated = instances.Select(instance => |
| 108 | + { |
| 109 | + var unresolvedAlerts = alertRows.Count(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase) |
| 110 | + && (Get(r, "IsResolved") == "0" || string.Equals(Get(r, "IsResolved"), "False", StringComparison.OrdinalIgnoreCase))); |
| 111 | + var p1Alerts = alertRows.Count(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase) |
| 112 | + && Get(r, "Priority") == "1"); |
| 113 | + var waits = waitsRows.Where(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase)) |
| 114 | + .Sum(r => ToDecimal(Get(r, "TotalWaitSec"))); |
| 115 | + var blocking = blockingRows.Where(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase)) |
| 116 | + .Sum(r => ToDecimal(Get(r, "BlockedWaitMs"))) / 1000m; |
| 117 | + var deadlocks = deadlockRows.Where(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase)) |
| 118 | + .Sum(r => ToDecimal(Get(r, "DeadlockCountEstimate"))); |
| 119 | + var driveRisk = driveRows.Count(r => string.Equals(Get(r, "InstanceDisplayName"), instance, StringComparison.OrdinalIgnoreCase)); |
| 120 | + |
| 121 | + var signalCount = 0; |
| 122 | + if (unresolvedAlerts > 0 || p1Alerts > 0) signalCount++; |
| 123 | + if (waits > 0 || blocking > 0) signalCount++; |
| 124 | + if (deadlocks > 0) signalCount++; |
| 125 | + if (driveRisk > 0) signalCount++; |
| 126 | + |
| 127 | + var riskScore = (p1Alerts * 5m) + (unresolvedAlerts * 2m) + waits + blocking + (deadlocks * 5m) + (driveRisk * 3m); |
| 128 | + |
| 129 | + return new |
| 130 | + { |
| 131 | + Instance = instance, |
| 132 | + SignalCount = signalCount, |
| 133 | + UnresolvedAlerts = unresolvedAlerts, |
| 134 | + Priority1Alerts = p1Alerts, |
| 135 | + TotalWaitSec = Math.Round(waits, 2), |
| 136 | + BlockedWaitSec = Math.Round(blocking, 2), |
| 137 | + DeadlockCount = deadlocks, |
| 138 | + DriveRiskCount = driveRisk, |
| 139 | + RiskScore = Math.Round(riskScore, 2) |
| 140 | + }; |
| 141 | + }) |
| 142 | + .OrderByDescending(x => x.SignalCount) |
| 143 | + .ThenByDescending(x => x.RiskScore) |
| 144 | + .Take(25) |
| 145 | + .ToList(); |
| 146 | + |
| 147 | + return new AiToolResult |
| 148 | + { |
| 149 | + RowCount = alertRows.Count + waitsRows.Count + blockingRows.Count + deadlockRows.Count + driveRows.Count, |
| 150 | + Data = new |
| 151 | + { |
| 152 | + generatedUtc = DateTime.UtcNow, |
| 153 | + correlated, |
| 154 | + alertRows, |
| 155 | + waitsRows, |
| 156 | + blockingRows, |
| 157 | + deadlockRows, |
| 158 | + driveRows |
| 159 | + }, |
| 160 | + Evidence = |
| 161 | + [ |
| 162 | + new AiEvidenceItem |
| 163 | + { |
| 164 | + Source = "ActiveAlerts + Waits + Blocking + Deadlocks + DriveStatus", |
| 165 | + Detail = "Cross-signal correlation to identify systemic risk clusters by instance" |
| 166 | + } |
| 167 | + ] |
| 168 | + }; |
| 169 | + } |
| 170 | + |
| 171 | + private static decimal ToDecimal(string value) |
| 172 | + { |
| 173 | + return decimal.TryParse(value, out var d) ? d : 0m; |
| 174 | + } |
| 175 | + |
| 176 | + private static string Get(Dictionary<string, object?> row, string key) |
| 177 | + { |
| 178 | + return row.TryGetValue(key, out var value) && value is not null |
| 179 | + ? value.ToString() ?? string.Empty |
| 180 | + : string.Empty; |
| 181 | + } |
| 182 | +} |
0 commit comments