-
Notifications
You must be signed in to change notification settings - Fork 28
Release v1.7.4 — external-wait formula + CPU:Elapsed adjustment #260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -176,6 +176,14 @@ public class QueryTimeResult | |
|
|
||
| [JsonPropertyName("elapsed_time_ms")] | ||
| public long ElapsedTimeMs { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// Sum of external/preemptive wait time (MEMORY_ALLOCATION_*, PREEMPTIVE_*) — | ||
| /// these waits are CPU-busy in kernel and inflate CpuTimeMs vs real query CPU. | ||
| /// Subtract from CpuTimeMs for a truer CPU:Elapsed ratio. | ||
| /// </summary> | ||
| [JsonPropertyName("external_wait_ms")] | ||
| public long ExternalWaitMs { get; set; } | ||
|
Comment on lines
+179
to
+186
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New JSON field Generated by Claude Code |
||
| } | ||
|
|
||
| public class ParameterResult | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| using PlanViewer.Core.Models; | ||
| using PlanViewer.Core.Services; | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Output layer now depends on Generated by Claude Code |
||
|
|
||
| namespace PlanViewer.Core.Output; | ||
|
|
||
|
|
@@ -111,10 +112,18 @@ private static StatementResult MapStatement(PlanStatement stmt) | |
| // Query time (actual plans) | ||
| if (stmt.QueryTimeStats != null) | ||
| { | ||
| long externalWaitMs = 0; | ||
| foreach (var w in stmt.WaitStats) | ||
| { | ||
| if (BenefitScorer.IsExternalWait(w.WaitType)) | ||
| externalWaitMs += w.WaitTimeMs; | ||
| } | ||
|
|
||
| result.QueryTime = new QueryTimeResult | ||
| { | ||
| CpuTimeMs = stmt.QueryTimeStats.CpuTimeMs, | ||
| ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs | ||
| ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs, | ||
| ExternalWaitMs = externalWaitMs | ||
| }; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -494,12 +494,11 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null; | ||
|
|
||
| // Collect all operators with per-thread stats for parallel benefit calculation | ||
| List<OperatorWaitProfile>? operatorProfiles = null; | ||
| if (isParallel) | ||
| { | ||
| operatorProfiles = new List<OperatorWaitProfile>(); | ||
| CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles); | ||
| } | ||
| // Collect operator profiles even for serial plans — the external-wait formula | ||
| // uses sum-of-max-thread-cpu across operators and works for both. | ||
| var operatorProfiles = new List<OperatorWaitProfile>(); | ||
| if (stmt.RootNode != null) | ||
| CollectOperatorWaitProfiles(stmt.RootNode, operatorProfiles); | ||
|
|
||
| foreach (var wait in stmt.WaitStats) | ||
| { | ||
|
|
@@ -508,7 +507,15 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| var category = ClassifyWaitType(wait.WaitType); | ||
| double benefitPct; | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gate Generated by Claude Code |
||
|
|
||
| if (category == "Parallelism" && isParallel) | ||
| if (IsExternalWait(wait.WaitType) && operatorProfiles.Count > 0) | ||
| { | ||
| // External / preemptive waits (MEMORY_ALLOCATION_*, PREEMPTIVE_*): the worker | ||
| // is CPU-busy in kernel, so operator elapsed ≈ operator cpu and the wait | ||
| // barely shows in the per-thread (elapsed - cpu) calculation. Joe's formula: | ||
| // benefit = (wait_ms / total_cpu_ms) * Σ max_thread_cpu_per_operator / elapsed | ||
| benefitPct = CalculateExternalWaitBenefit(wait, operatorProfiles, stmt.QueryTimeStats!.CpuTimeMs, elapsedMs); | ||
| } | ||
| else if (category == "Parallelism" && isParallel) | ||
| { | ||
| // CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap, | ||
| // not the raw wait time. Threads waiting for other threads is a symptom | ||
|
|
@@ -525,7 +532,7 @@ private static void ScoreWaitStats(PlanStatement stmt) | |
| benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; | ||
| } | ||
| } | ||
| else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0) | ||
| else if (!isParallel || operatorProfiles.Count == 0) | ||
| { | ||
| // Serial plan or no operator data: simple ratio | ||
| benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100; | ||
|
|
@@ -585,6 +592,48 @@ private static double CalculateParallelWaitBenefit( | |
| return benefitMs / stmtElapsedMs * 100; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Joe's formula for external/preemptive waits where the worker is CPU-busy in kernel | ||
| /// (MEMORY_ALLOCATION_*, PREEMPTIVE_*). The standard (elapsed-cpu) per-thread | ||
| /// wait accounting misses these because elapsed ≈ cpu for those threads. Use the | ||
| /// wait's share of total CPU, scaled by the plan's critical-path CPU. | ||
| /// wait_cpu_share = wait_ms / total_cpu_ms | ||
| /// sum_max_cpu = Σ max_thread_cpu across operators | ||
| /// benefit_ms = wait_cpu_share * sum_max_cpu | ||
| /// Then convert to % of statement elapsed. | ||
| /// </summary> | ||
| private static double CalculateExternalWaitBenefit( | ||
| WaitStatInfo wait, List<OperatorWaitProfile> profiles, | ||
| long stmtCpuMs, long stmtElapsedMs) | ||
| { | ||
| if (stmtCpuMs <= 0 || stmtElapsedMs <= 0) | ||
| return (double)wait.WaitTimeMs / Math.Max(1, stmtElapsedMs) * 100; | ||
|
Comment on lines
+609
to
+610
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Early-return path when Generated by Claude Code |
||
|
|
||
| long sumMaxCpu = 0; | ||
| foreach (var p in profiles) | ||
| sumMaxCpu += p.MaxThreadCpuMs; | ||
|
|
||
| if (sumMaxCpu <= 0) | ||
| return (double)wait.WaitTimeMs / stmtElapsedMs * 100; | ||
|
|
||
| var waitCpuShare = (double)wait.WaitTimeMs / stmtCpuMs; | ||
| var benefitMs = waitCpuShare * sumMaxCpu; | ||
| return benefitMs / stmtElapsedMs * 100; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// External / preemptive waits where the worker is CPU-busy in kernel rather than | ||
| /// descheduled. Their wait time counts toward the query's CPU time, so the usual | ||
| /// (elapsed - cpu) per-thread wait math misses them entirely. | ||
| /// </summary> | ||
| public static bool IsExternalWait(string waitType) | ||
| { | ||
| if (string.IsNullOrEmpty(waitType)) return false; | ||
| var wt = waitType.ToUpperInvariant(); | ||
| return wt.Contains("MEMORY_ALLOCATION") | ||
| || wt.StartsWith("PREEMPTIVE_"); | ||
|
Comment on lines
+633
to
+634
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Also note this diverges from Generated by Claude Code |
||
| } | ||
|
|
||
| /// <summary> | ||
| /// Determines if an operator is relevant for a given wait category. | ||
| /// </summary> | ||
|
|
@@ -624,13 +673,18 @@ private static void CollectOperatorWaitProfiles(PlanNode node, List<OperatorWait | |
| maxThreadWait = threadWait; | ||
| } | ||
|
|
||
| if (totalWait > 0 || maxThreadWait > 0) | ||
| // Max per-thread SELF CPU (non-cumulative) — critical-path CPU contribution | ||
| // from this operator. Used by the external-wait formula. | ||
| var maxThreadCpu = PlanAnalyzer.GetOperatorMaxThreadOwnCpuMs(node); | ||
|
|
||
| if (totalWait > 0 || maxThreadWait > 0 || maxThreadCpu > 0) | ||
| { | ||
| profiles.Add(new OperatorWaitProfile | ||
| { | ||
| Node = node, | ||
| MaxThreadWaitMs = maxThreadWait, | ||
| TotalWaitMs = totalWait, | ||
| MaxThreadCpuMs = maxThreadCpu, | ||
| HasPhysicalReads = node.ActualPhysicalReads > 0, | ||
| HasCpuWork = node.ActualCPUMs > 0, | ||
| IsExchange = node.PhysicalOp == "Parallelism", | ||
|
|
@@ -681,6 +735,8 @@ private sealed class OperatorWaitProfile | |
| public PlanNode Node { get; init; } = null!; | ||
| public long MaxThreadWaitMs { get; init; } | ||
| public long TotalWaitMs { get; init; } | ||
| /// <summary>Max CPU time among this operator's threads (critical-path CPU for external-wait formula).</summary> | ||
| public long MaxThreadCpuMs { get; init; } | ||
| public bool HasPhysicalReads { get; init; } | ||
| public bool HasCpuWork { get; init; } | ||
| public bool IsExchange { get; init; } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -353,7 +353,7 @@ | |
|
|
||
| if (unsnifffedParams.Count > 0) | ||
| { | ||
| var hasRecompile = stmt.StatementText.Contains("RECOMPILE", StringComparison.OrdinalIgnoreCase); | ||
|
Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs
|
||
| if (!hasRecompile) | ||
| { | ||
| var names = string.Join(", ", unsnifffedParams.Select(p => p.Name)); | ||
|
|
@@ -1204,7 +1204,7 @@ | |
| // Rule 28: Row Count Spool — NOT IN with nullable column | ||
| // Pattern: Row Count Spool with high rewinds, child scan has IS NULL predicate, | ||
| // and statement text contains NOT IN | ||
| if (!cfg.IsRuleDisabled(28) && node.PhysicalOp.Contains("Row Count Spool")) | ||
|
Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs
|
||
| { | ||
| var rewinds = node.HasActualStats ? (double)node.ActualRewinds : node.EstimateRewinds; | ||
| if (rewinds > 10000 && HasNotInPattern(node, stmt)) | ||
|
|
@@ -1592,6 +1592,66 @@ | |
| return maxSelf; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Max per-thread self-CPU for this operator. | ||
| /// Parallel: for each thread, self_cpu = thread_cpu - Σ same-thread child cpu; take max. | ||
| /// Serial / single-thread: operator_cpu - Σ effective child cpu. | ||
| /// Needed for external-wait benefit scoring (Joe's formula). | ||
| /// </summary> | ||
| internal static long GetOperatorMaxThreadOwnCpuMs(PlanNode node) | ||
| { | ||
| if (!node.HasActualStats || node.ActualCPUMs <= 0) return 0; | ||
|
|
||
| if (node.PerThreadStats.Count > 1) | ||
| { | ||
| var parentByThread = new Dictionary<int, long>(); | ||
| foreach (var ts in node.PerThreadStats) | ||
| parentByThread[ts.ThreadId] = ts.ActualCPUMs; | ||
|
Comment on lines
+1607
to
+1609
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Generated by Claude Code |
||
|
|
||
| var childSumByThread = new Dictionary<int, long>(); | ||
| foreach (var child in node.Children) | ||
| { | ||
| var childNode = child; | ||
| if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) | ||
| childNode = child.Children.OrderByDescending(c => c.ActualCPUMs).First(); | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Two questions worth confirming: (1) Is picking the max-CPU child actually the right semantic here, versus summing per-thread CPU across all exchange children keyed by ThreadId? (2) Does this mirror the existing elapsed-path helper Generated by Claude Code |
||
| foreach (var ts in childNode.PerThreadStats) | ||
| { | ||
| childSumByThread.TryGetValue(ts.ThreadId, out var existing); | ||
| childSumByThread[ts.ThreadId] = existing + ts.ActualCPUMs; | ||
| } | ||
| } | ||
|
|
||
| var maxSelf = 0L; | ||
| foreach (var (threadId, parentCpu) in parentByThread) | ||
| { | ||
| childSumByThread.TryGetValue(threadId, out var childCpu); | ||
| var self = Math.Max(0, parentCpu - childCpu); | ||
| if (self > maxSelf) maxSelf = self; | ||
| } | ||
| return maxSelf; | ||
| } | ||
|
|
||
| // Serial: operator_cpu - Σ effective child cpu | ||
| var totalChildCpu = 0L; | ||
| foreach (var child in node.Children) | ||
| totalChildCpu += GetEffectiveChildCpuMs(child); | ||
| return Math.Max(0, node.ActualCPUMs - totalChildCpu); | ||
| } | ||
|
|
||
| private static long GetEffectiveChildCpuMs(PlanNode child) | ||
| { | ||
| if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0) | ||
| return child.Children.Max(GetEffectiveChildCpuMs); | ||
| if (child.ActualCPUMs > 0) | ||
| return child.ActualCPUMs; | ||
| if (child.Children.Count == 0) | ||
| return 0; | ||
| var sum = 0L; | ||
| foreach (var grandchild in child.Children) | ||
| sum += GetEffectiveChildCpuMs(grandchild); | ||
| return sum; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Serial row mode self-time: subtract all direct children's effective elapsed. | ||
| /// Pass-through operators (Compute Scalar, etc.) don't carry runtime stats — | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Third copy of the external-wait summation (also in
ResultMapper.cs:114-119and implicit inBenefitScorer.CalculateExternalWaitBenefit). Three call sites iterateWaitStats, callIsExternalWait, sumWaitTimeMs. Worth pulling into a single helper onPlanStatement(orBenefitScorer.SumExternalWaitMs(stmt)) so the definition of "external wait" has one home — especially if the rule ever expands beyondMEMORY_ALLOCATION/PREEMPTIVE_*.Generated by Claude Code