Skip to content

Commit 48870b0

Browse files
Merge pull request #260 from erikdarlingdata/dev
Release v1.7.4 — external-wait formula + CPU:Elapsed adjustment
2 parents f18fe57 + 6b61d79 commit 48870b0

8 files changed

Lines changed: 157 additions & 15 deletions

File tree

src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2839,8 +2839,15 @@ static string EfficiencyColor(double pct) => pct >= 80 ? "#E4E6EB"
28392839
statement.QueryTimeStats.CpuTimeMs > 0 &&
28402840
statement.DegreeOfParallelism > 1)
28412841
{
2842-
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism
2843-
var speedup = (double)statement.QueryTimeStats.CpuTimeMs / statement.QueryTimeStats.ElapsedTimeMs;
2842+
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism.
2843+
// Subtract external/preemptive wait time from CPU — those waits are CPU-busy
2844+
// in kernel and inflate the ratio without representing real query work.
2845+
long externalWaitMs = 0;
2846+
foreach (var w in statement.WaitStats)
2847+
if (BenefitScorer.IsExternalWait(w.WaitType))
2848+
externalWaitMs += w.WaitTimeMs;
2849+
var effectiveCpu = Math.Max(0, statement.QueryTimeStats.CpuTimeMs - externalWaitMs);
2850+
var speedup = (double)effectiveCpu / statement.QueryTimeStats.ElapsedTimeMs;
28442851
var efficiency = Math.Min(100.0, (speedup - 1.0) / (statement.DegreeOfParallelism - 1.0) * 100.0);
28452852
efficiency = Math.Max(0.0, efficiency);
28462853
dopText += $" ({efficiency:N0}% efficient)";

src/PlanViewer.App/PlanViewer.App.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<ApplicationManifest>app.manifest</ApplicationManifest>
77
<ApplicationIcon>EDD.ico</ApplicationIcon>
88
<AvaloniaUseCompiledBindingsByDefault>true</AvaloniaUseCompiledBindingsByDefault>
9-
<Version>1.7.3</Version>
9+
<Version>1.7.4</Version>
1010
<Authors>Erik Darling</Authors>
1111
<Company>Darling Data LLC</Company>
1212
<Product>Performance Studio</Product>

src/PlanViewer.Core/Output/AnalysisResult.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,14 @@ public class QueryTimeResult
176176

177177
[JsonPropertyName("elapsed_time_ms")]
178178
public long ElapsedTimeMs { get; set; }
179+
180+
/// <summary>
181+
/// Sum of external/preemptive wait time (MEMORY_ALLOCATION_*, PREEMPTIVE_*) —
182+
/// these waits are CPU-busy in kernel and inflate CpuTimeMs vs real query CPU.
183+
/// Subtract from CpuTimeMs for a truer CPU:Elapsed ratio.
184+
/// </summary>
185+
[JsonPropertyName("external_wait_ms")]
186+
public long ExternalWaitMs { get; set; }
179187
}
180188

181189
public class ParameterResult

src/PlanViewer.Core/Output/HtmlExporter.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,8 @@ private static void WriteRuntimeCard(StringBuilder sb, StatementResult stmt)
303303
WriteRow(sb, "CPU", $"{stmt.QueryTime.CpuTimeMs:N0} ms");
304304
if (stmt.QueryTime.ElapsedTimeMs > 0)
305305
{
306-
var ratio = (double)stmt.QueryTime.CpuTimeMs / stmt.QueryTime.ElapsedTimeMs;
306+
var effectiveCpu = Math.Max(0, stmt.QueryTime.CpuTimeMs - stmt.QueryTime.ExternalWaitMs);
307+
var ratio = (double)effectiveCpu / stmt.QueryTime.ElapsedTimeMs;
307308
WriteRow(sb, "CPU:Elapsed", ratio.ToString("N2"));
308309
}
309310
}

src/PlanViewer.Core/Output/ResultMapper.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using PlanViewer.Core.Models;
2+
using PlanViewer.Core.Services;
23

34
namespace PlanViewer.Core.Output;
45

@@ -111,10 +112,18 @@ private static StatementResult MapStatement(PlanStatement stmt)
111112
// Query time (actual plans)
112113
if (stmt.QueryTimeStats != null)
113114
{
115+
long externalWaitMs = 0;
116+
foreach (var w in stmt.WaitStats)
117+
{
118+
if (BenefitScorer.IsExternalWait(w.WaitType))
119+
externalWaitMs += w.WaitTimeMs;
120+
}
121+
114122
result.QueryTime = new QueryTimeResult
115123
{
116124
CpuTimeMs = stmt.QueryTimeStats.CpuTimeMs,
117-
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs
125+
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs,
126+
ExternalWaitMs = externalWaitMs
118127
};
119128
}
120129

src/PlanViewer.Core/Services/BenefitScorer.cs

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -494,12 +494,11 @@ private static void ScoreWaitStats(PlanStatement stmt)
494494
var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null;
495495

496496
// Collect all operators with per-thread stats for parallel benefit calculation
497-
List<OperatorWaitProfile>? operatorProfiles = null;
498-
if (isParallel)
499-
{
500-
operatorProfiles = new List<OperatorWaitProfile>();
501-
CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles);
502-
}
497+
// Collect operator profiles even for serial plans — the external-wait formula
498+
// uses sum-of-max-thread-cpu across operators and works for both.
499+
var operatorProfiles = new List<OperatorWaitProfile>();
500+
if (stmt.RootNode != null)
501+
CollectOperatorWaitProfiles(stmt.RootNode, operatorProfiles);
503502

504503
foreach (var wait in stmt.WaitStats)
505504
{
@@ -508,7 +507,15 @@ private static void ScoreWaitStats(PlanStatement stmt)
508507
var category = ClassifyWaitType(wait.WaitType);
509508
double benefitPct;
510509

511-
if (category == "Parallelism" && isParallel)
510+
if (IsExternalWait(wait.WaitType) && operatorProfiles.Count > 0)
511+
{
512+
// External / preemptive waits (MEMORY_ALLOCATION_*, PREEMPTIVE_*): the worker
513+
// is CPU-busy in kernel, so operator elapsed ≈ operator cpu and the wait
514+
// barely shows in the per-thread (elapsed - cpu) calculation. Joe's formula:
515+
// benefit = (wait_ms / total_cpu_ms) * Σ max_thread_cpu_per_operator / elapsed
516+
benefitPct = CalculateExternalWaitBenefit(wait, operatorProfiles, stmt.QueryTimeStats!.CpuTimeMs, elapsedMs);
517+
}
518+
else if (category == "Parallelism" && isParallel)
512519
{
513520
// CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap,
514521
// not the raw wait time. Threads waiting for other threads is a symptom
@@ -525,7 +532,7 @@ private static void ScoreWaitStats(PlanStatement stmt)
525532
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
526533
}
527534
}
528-
else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0)
535+
else if (!isParallel || operatorProfiles.Count == 0)
529536
{
530537
// Serial plan or no operator data: simple ratio
531538
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
@@ -585,6 +592,48 @@ private static double CalculateParallelWaitBenefit(
585592
return benefitMs / stmtElapsedMs * 100;
586593
}
587594

595+
/// <summary>
596+
/// Joe's formula for external/preemptive waits where the worker is CPU-busy in kernel
597+
/// (MEMORY_ALLOCATION_*, PREEMPTIVE_*). The standard (elapsed-cpu) per-thread
598+
/// wait accounting misses these because elapsed ≈ cpu for those threads. Use the
599+
/// wait's share of total CPU, scaled by the plan's critical-path CPU.
600+
/// wait_cpu_share = wait_ms / total_cpu_ms
601+
/// sum_max_cpu = Σ max_thread_cpu across operators
602+
/// benefit_ms = wait_cpu_share * sum_max_cpu
603+
/// Then convert to % of statement elapsed.
604+
/// </summary>
605+
private static double CalculateExternalWaitBenefit(
606+
WaitStatInfo wait, List<OperatorWaitProfile> profiles,
607+
long stmtCpuMs, long stmtElapsedMs)
608+
{
609+
if (stmtCpuMs <= 0 || stmtElapsedMs <= 0)
610+
return (double)wait.WaitTimeMs / Math.Max(1, stmtElapsedMs) * 100;
611+
612+
long sumMaxCpu = 0;
613+
foreach (var p in profiles)
614+
sumMaxCpu += p.MaxThreadCpuMs;
615+
616+
if (sumMaxCpu <= 0)
617+
return (double)wait.WaitTimeMs / stmtElapsedMs * 100;
618+
619+
var waitCpuShare = (double)wait.WaitTimeMs / stmtCpuMs;
620+
var benefitMs = waitCpuShare * sumMaxCpu;
621+
return benefitMs / stmtElapsedMs * 100;
622+
}
623+
624+
/// <summary>
625+
/// External / preemptive waits where the worker is CPU-busy in kernel rather than
626+
/// descheduled. Their wait time counts toward the query's CPU time, so the usual
627+
/// (elapsed - cpu) per-thread wait math misses them entirely.
628+
/// </summary>
629+
public static bool IsExternalWait(string waitType)
630+
{
631+
if (string.IsNullOrEmpty(waitType)) return false;
632+
var wt = waitType.ToUpperInvariant();
633+
return wt.Contains("MEMORY_ALLOCATION")
634+
|| wt.StartsWith("PREEMPTIVE_");
635+
}
636+
588637
/// <summary>
589638
/// Determines if an operator is relevant for a given wait category.
590639
/// </summary>
@@ -624,13 +673,18 @@ private static void CollectOperatorWaitProfiles(PlanNode node, List<OperatorWait
624673
maxThreadWait = threadWait;
625674
}
626675

627-
if (totalWait > 0 || maxThreadWait > 0)
676+
// Max per-thread SELF CPU (non-cumulative) — critical-path CPU contribution
677+
// from this operator. Used by the external-wait formula.
678+
var maxThreadCpu = PlanAnalyzer.GetOperatorMaxThreadOwnCpuMs(node);
679+
680+
if (totalWait > 0 || maxThreadWait > 0 || maxThreadCpu > 0)
628681
{
629682
profiles.Add(new OperatorWaitProfile
630683
{
631684
Node = node,
632685
MaxThreadWaitMs = maxThreadWait,
633686
TotalWaitMs = totalWait,
687+
MaxThreadCpuMs = maxThreadCpu,
634688
HasPhysicalReads = node.ActualPhysicalReads > 0,
635689
HasCpuWork = node.ActualCPUMs > 0,
636690
IsExchange = node.PhysicalOp == "Parallelism",
@@ -681,6 +735,8 @@ private sealed class OperatorWaitProfile
681735
public PlanNode Node { get; init; } = null!;
682736
public long MaxThreadWaitMs { get; init; }
683737
public long TotalWaitMs { get; init; }
738+
/// <summary>Max CPU time among this operator's threads (critical-path CPU for external-wait formula).</summary>
739+
public long MaxThreadCpuMs { get; init; }
684740
public bool HasPhysicalReads { get; init; }
685741
public bool HasCpuWork { get; init; }
686742
public bool IsExchange { get; init; }

src/PlanViewer.Core/Services/PlanAnalyzer.cs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1592,6 +1592,66 @@ private static long GetPerThreadOwnElapsed(PlanNode node)
15921592
return maxSelf;
15931593
}
15941594

1595+
/// <summary>
1596+
/// Max per-thread self-CPU for this operator.
1597+
/// Parallel: for each thread, self_cpu = thread_cpu - Σ same-thread child cpu; take max.
1598+
/// Serial / single-thread: operator_cpu - Σ effective child cpu.
1599+
/// Needed for external-wait benefit scoring (Joe's formula).
1600+
/// </summary>
1601+
internal static long GetOperatorMaxThreadOwnCpuMs(PlanNode node)
1602+
{
1603+
if (!node.HasActualStats || node.ActualCPUMs <= 0) return 0;
1604+
1605+
if (node.PerThreadStats.Count > 1)
1606+
{
1607+
var parentByThread = new Dictionary<int, long>();
1608+
foreach (var ts in node.PerThreadStats)
1609+
parentByThread[ts.ThreadId] = ts.ActualCPUMs;
1610+
1611+
var childSumByThread = new Dictionary<int, long>();
1612+
foreach (var child in node.Children)
1613+
{
1614+
var childNode = child;
1615+
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
1616+
childNode = child.Children.OrderByDescending(c => c.ActualCPUMs).First();
1617+
foreach (var ts in childNode.PerThreadStats)
1618+
{
1619+
childSumByThread.TryGetValue(ts.ThreadId, out var existing);
1620+
childSumByThread[ts.ThreadId] = existing + ts.ActualCPUMs;
1621+
}
1622+
}
1623+
1624+
var maxSelf = 0L;
1625+
foreach (var (threadId, parentCpu) in parentByThread)
1626+
{
1627+
childSumByThread.TryGetValue(threadId, out var childCpu);
1628+
var self = Math.Max(0, parentCpu - childCpu);
1629+
if (self > maxSelf) maxSelf = self;
1630+
}
1631+
return maxSelf;
1632+
}
1633+
1634+
// Serial: operator_cpu - Σ effective child cpu
1635+
var totalChildCpu = 0L;
1636+
foreach (var child in node.Children)
1637+
totalChildCpu += GetEffectiveChildCpuMs(child);
1638+
return Math.Max(0, node.ActualCPUMs - totalChildCpu);
1639+
}
1640+
1641+
private static long GetEffectiveChildCpuMs(PlanNode child)
1642+
{
1643+
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
1644+
return child.Children.Max(GetEffectiveChildCpuMs);
1645+
if (child.ActualCPUMs > 0)
1646+
return child.ActualCPUMs;
1647+
if (child.Children.Count == 0)
1648+
return 0;
1649+
var sum = 0L;
1650+
foreach (var grandchild in child.Children)
1651+
sum += GetEffectiveChildCpuMs(grandchild);
1652+
return sum;
1653+
}
1654+
15951655
/// <summary>
15961656
/// Serial row mode self-time: subtract all direct children's effective elapsed.
15971657
/// Pass-through operators (Compute Scalar, etc.) don't carry runtime stats —

src/PlanViewer.Web/Pages/Index.razor

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,8 @@ else
163163
</div>
164164
@if (ActiveStmt!.QueryTime.ElapsedTimeMs > 0)
165165
{
166-
var ratio = (double)ActiveStmt!.QueryTime.CpuTimeMs / ActiveStmt!.QueryTime.ElapsedTimeMs;
166+
var effectiveCpu = Math.Max(0L, ActiveStmt!.QueryTime.CpuTimeMs - ActiveStmt!.QueryTime.ExternalWaitMs);
167+
var ratio = (double)effectiveCpu / ActiveStmt!.QueryTime.ElapsedTimeMs;
167168
<div class="insight-row">
168169
<span class="insight-label">CPU:Elapsed</span>
169170
<span class="insight-value">@ratio.ToString("N2")</span>

0 commit comments

Comments
 (0)