Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/PlanViewer.App/Controls/PlanViewerControl.axaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2839,8 +2839,15 @@ static string EfficiencyColor(double pct) => pct >= 80 ? "#E4E6EB"
statement.QueryTimeStats.CpuTimeMs > 0 &&
statement.DegreeOfParallelism > 1)
{
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism
var speedup = (double)statement.QueryTimeStats.CpuTimeMs / statement.QueryTimeStats.ElapsedTimeMs;
// Speedup ratio: CPU/elapsed = 1.0 means serial, = DOP means perfect parallelism.
// Subtract external/preemptive wait time from CPU — those waits are CPU-busy
// in kernel and inflate the ratio without representing real query work.
long externalWaitMs = 0;
foreach (var w in statement.WaitStats)
if (BenefitScorer.IsExternalWait(w.WaitType))
externalWaitMs += w.WaitTimeMs;
var effectiveCpu = Math.Max(0, statement.QueryTimeStats.CpuTimeMs - externalWaitMs);
Comment on lines +2845 to +2849
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Third copy of the external-wait summation (also in ResultMapper.cs:114-119 and implicit in BenefitScorer.CalculateExternalWaitBenefit). Three call sites iterate WaitStats, call IsExternalWait, sum WaitTimeMs. Worth pulling into a single helper on PlanStatement (or BenefitScorer.SumExternalWaitMs(stmt)) so the definition of "external wait" has one home — especially if the rule ever expands beyond MEMORY_ALLOCATION / PREEMPTIVE_*.


Generated by Claude Code

var speedup = (double)effectiveCpu / statement.QueryTimeStats.ElapsedTimeMs;
var efficiency = Math.Min(100.0, (speedup - 1.0) / (statement.DegreeOfParallelism - 1.0) * 100.0);
efficiency = Math.Max(0.0, efficiency);
dopText += $" ({efficiency:N0}% efficient)";
Expand Down
2 changes: 1 addition & 1 deletion src/PlanViewer.App/PlanViewer.App.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<ApplicationManifest>app.manifest</ApplicationManifest>
<ApplicationIcon>EDD.ico</ApplicationIcon>
<AvaloniaUseCompiledBindingsByDefault>true</AvaloniaUseCompiledBindingsByDefault>
<Version>1.7.3</Version>
<Version>1.7.4</Version>
<Authors>Erik Darling</Authors>
<Company>Darling Data LLC</Company>
<Product>Performance Studio</Product>
Expand Down
8 changes: 8 additions & 0 deletions src/PlanViewer.Core/Output/AnalysisResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ public class QueryTimeResult

[JsonPropertyName("elapsed_time_ms")]
public long ElapsedTimeMs { get; set; }

/// <summary>
/// Sum of external/preemptive wait time (MEMORY_ALLOCATION_*, PREEMPTIVE_*) —
/// these waits are CPU-busy in kernel and inflate CpuTimeMs vs real query CPU.
/// Subtract from CpuTimeMs for a truer CPU:Elapsed ratio.
/// </summary>
[JsonPropertyName("external_wait_ms")]
public long ExternalWaitMs { get; set; }
Comment on lines +179 to +186
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New JSON field external_wait_ms is a public output contract — MCP clients, downstream consumers, and the Dashboard/Lite copies of PerformanceMonitor will start seeing this. Worth mentioning in release notes and confirming any consumer that computes its own CPU:Elapsed ratio (e.g., dashboards) gets the same treatment.


Generated by Claude Code

}

public class ParameterResult
Expand Down
3 changes: 2 additions & 1 deletion src/PlanViewer.Core/Output/HtmlExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ private static void WriteRuntimeCard(StringBuilder sb, StatementResult stmt)
WriteRow(sb, "CPU", $"{stmt.QueryTime.CpuTimeMs:N0} ms");
if (stmt.QueryTime.ElapsedTimeMs > 0)
{
var ratio = (double)stmt.QueryTime.CpuTimeMs / stmt.QueryTime.ElapsedTimeMs;
var effectiveCpu = Math.Max(0, stmt.QueryTime.CpuTimeMs - stmt.QueryTime.ExternalWaitMs);
var ratio = (double)effectiveCpu / stmt.QueryTime.ElapsedTimeMs;
WriteRow(sb, "CPU:Elapsed", ratio.ToString("N2"));
}
}
Expand Down
11 changes: 10 additions & 1 deletion src/PlanViewer.Core/Output/ResultMapper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PlanViewer.Core.Models;
using PlanViewer.Core.Services;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Output layer now depends on PlanViewer.Core.Services to call BenefitScorer.IsExternalWait. Fine as long as you're OK with Output → Services, but the classification is really a model concern — a PlanStatement.ExternalWaitMs property (or a WaitStatInfo.IsExternal computed once during parse) would let both Output and the Avalonia control consume the value without reaching into Services.


Generated by Claude Code


namespace PlanViewer.Core.Output;

Expand Down Expand Up @@ -111,10 +112,18 @@ private static StatementResult MapStatement(PlanStatement stmt)
// Query time (actual plans)
if (stmt.QueryTimeStats != null)
{
long externalWaitMs = 0;
foreach (var w in stmt.WaitStats)
{
if (BenefitScorer.IsExternalWait(w.WaitType))
externalWaitMs += w.WaitTimeMs;
}

result.QueryTime = new QueryTimeResult
{
CpuTimeMs = stmt.QueryTimeStats.CpuTimeMs,
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs
ElapsedTimeMs = stmt.QueryTimeStats.ElapsedTimeMs,
ExternalWaitMs = externalWaitMs
};
}

Expand Down
74 changes: 65 additions & 9 deletions src/PlanViewer.Core/Services/BenefitScorer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,11 @@ private static void ScoreWaitStats(PlanStatement stmt)
var isParallel = stmt.DegreeOfParallelism > 1 && stmt.RootNode != null;

// Collect all operators with per-thread stats for parallel benefit calculation
List<OperatorWaitProfile>? operatorProfiles = null;
if (isParallel)
{
operatorProfiles = new List<OperatorWaitProfile>();
CollectOperatorWaitProfiles(stmt.RootNode!, operatorProfiles);
}
// Collect operator profiles even for serial plans — the external-wait formula
// uses sum-of-max-thread-cpu across operators and works for both.
var operatorProfiles = new List<OperatorWaitProfile>();
if (stmt.RootNode != null)
CollectOperatorWaitProfiles(stmt.RootNode, operatorProfiles);

foreach (var wait in stmt.WaitStats)
{
Expand All @@ -508,7 +507,15 @@ private static void ScoreWaitStats(PlanStatement stmt)
var category = ClassifyWaitType(wait.WaitType);
double benefitPct;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gate IsExternalWait(...) && operatorProfiles.Count > 0: for an external wait on a plan without a RootNode (estimated-only plan that somehow has wait stats, or parse failures), this falls through to the Parallelism branch test and then to !isParallel || operatorProfiles.Count == 0 → simple ratio. That's the pre-fix behavior, which the doc-comments say is wrong for these waits. Likely fine in practice, but the fallback chain for external waits in degraded data is worth a single explicit branch.


Generated by Claude Code


if (category == "Parallelism" && isParallel)
if (IsExternalWait(wait.WaitType) && operatorProfiles.Count > 0)
{
// External / preemptive waits (MEMORY_ALLOCATION_*, PREEMPTIVE_*): the worker
// is CPU-busy in kernel, so operator elapsed ≈ operator cpu and the wait
// barely shows in the per-thread (elapsed - cpu) calculation. Joe's formula:
// benefit = (wait_ms / total_cpu_ms) * Σ max_thread_cpu_per_operator / elapsed
benefitPct = CalculateExternalWaitBenefit(wait, operatorProfiles, stmt.QueryTimeStats!.CpuTimeMs, elapsedMs);
}
else if (category == "Parallelism" && isParallel)
{
// CXPACKET/CXCONSUMER/CXSYNC: benefit is the parallelism efficiency gap,
// not the raw wait time. Threads waiting for other threads is a symptom
Expand All @@ -525,7 +532,7 @@ private static void ScoreWaitStats(PlanStatement stmt)
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
}
}
else if (!isParallel || operatorProfiles == null || operatorProfiles.Count == 0)
else if (!isParallel || operatorProfiles.Count == 0)
{
// Serial plan or no operator data: simple ratio
benefitPct = (double)wait.WaitTimeMs / elapsedMs * 100;
Expand Down Expand Up @@ -585,6 +592,48 @@ private static double CalculateParallelWaitBenefit(
return benefitMs / stmtElapsedMs * 100;
}

/// <summary>
/// Joe's formula for external/preemptive waits where the worker is CPU-busy in kernel
/// (MEMORY_ALLOCATION_*, PREEMPTIVE_*). The standard (elapsed-cpu) per-thread
/// wait accounting misses these because elapsed ≈ cpu for those threads. Use the
/// wait's share of total CPU, scaled by the plan's critical-path CPU.
/// wait_cpu_share = wait_ms / total_cpu_ms
/// sum_max_cpu = Σ max_thread_cpu across operators
/// benefit_ms = wait_cpu_share * sum_max_cpu
/// Then convert to % of statement elapsed.
/// </summary>
private static double CalculateExternalWaitBenefit(
WaitStatInfo wait, List<OperatorWaitProfile> profiles,
long stmtCpuMs, long stmtElapsedMs)
{
if (stmtCpuMs <= 0 || stmtElapsedMs <= 0)
return (double)wait.WaitTimeMs / Math.Max(1, stmtElapsedMs) * 100;
Comment on lines +609 to +610
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Early-return path when stmtCpuMs <= 0 falls back to wait_ms / elapsed * 100 — the same math that was already failing to surface these waits (per the method's own doc-comment). If the data is missing, returning 0 would be more honest than a known-wrong number.


Generated by Claude Code


long sumMaxCpu = 0;
foreach (var p in profiles)
sumMaxCpu += p.MaxThreadCpuMs;

if (sumMaxCpu <= 0)
return (double)wait.WaitTimeMs / stmtElapsedMs * 100;

var waitCpuShare = (double)wait.WaitTimeMs / stmtCpuMs;
var benefitMs = waitCpuShare * sumMaxCpu;
return benefitMs / stmtElapsedMs * 100;
}

/// <summary>
/// External / preemptive waits where the worker is CPU-busy in kernel rather than
/// descheduled. Their wait time counts toward the query's CPU time, so the usual
/// (elapsed - cpu) per-thread wait math misses them entirely.
/// </summary>
public static bool IsExternalWait(string waitType)
{
if (string.IsNullOrEmpty(waitType)) return false;
var wt = waitType.ToUpperInvariant();
return wt.Contains("MEMORY_ALLOCATION")
|| wt.StartsWith("PREEMPTIVE_");
Comment on lines +633 to +634
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Contains("MEMORY_ALLOCATION") is broader than the doc-comment's stated target (MEMORY_ALLOCATION_EXT, RESERVED_MEMORY_ALLOCATION_EXT). Any future wait type with that substring will silently be classified as external and get the CPU-share formula even if the worker isn't CPU-busy in kernel. Tightening to StartsWith("MEMORY_ALLOCATION") || StartsWith("RESERVED_MEMORY_ALLOCATION") — or an explicit allowlist — matches the documented intent.

Also note this diverges from ClassifyWaitType at line 550, which still puts MEMORY_ALLOCATION in the "Memory" category. Two classifiers of the same wait that disagree is a future foot-gun.


Generated by Claude Code

}

/// <summary>
/// Determines if an operator is relevant for a given wait category.
/// </summary>
Expand Down Expand Up @@ -624,13 +673,18 @@ private static void CollectOperatorWaitProfiles(PlanNode node, List<OperatorWait
maxThreadWait = threadWait;
}

if (totalWait > 0 || maxThreadWait > 0)
// Max per-thread SELF CPU (non-cumulative) — critical-path CPU contribution
// from this operator. Used by the external-wait formula.
var maxThreadCpu = PlanAnalyzer.GetOperatorMaxThreadOwnCpuMs(node);

if (totalWait > 0 || maxThreadWait > 0 || maxThreadCpu > 0)
{
profiles.Add(new OperatorWaitProfile
{
Node = node,
MaxThreadWaitMs = maxThreadWait,
TotalWaitMs = totalWait,
MaxThreadCpuMs = maxThreadCpu,
HasPhysicalReads = node.ActualPhysicalReads > 0,
HasCpuWork = node.ActualCPUMs > 0,
IsExchange = node.PhysicalOp == "Parallelism",
Expand Down Expand Up @@ -681,6 +735,8 @@ private sealed class OperatorWaitProfile
public PlanNode Node { get; init; } = null!;
public long MaxThreadWaitMs { get; init; }
public long TotalWaitMs { get; init; }
/// <summary>Max CPU time among this operator's threads (critical-path CPU for external-wait formula).</summary>
public long MaxThreadCpuMs { get; init; }
public bool HasPhysicalReads { get; init; }
public bool HasCpuWork { get; init; }
public bool IsExchange { get; init; }
Expand Down
60 changes: 60 additions & 0 deletions src/PlanViewer.Core/Services/PlanAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@

if (unsnifffedParams.Count > 0)
{
var hasRecompile = stmt.StatementText.Contains("RECOMPILE", StringComparison.OrdinalIgnoreCase);

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / release

Dereference of a possibly null reference.

Check warning on line 356 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / release

Dereference of a possibly null reference.
if (!hasRecompile)
{
var names = string.Join(", ", unsnifffedParams.Select(p => p.Name));
Expand Down Expand Up @@ -1204,7 +1204,7 @@
// Rule 28: Row Count Spool — NOT IN with nullable column
// Pattern: Row Count Spool with high rewinds, child scan has IS NULL predicate,
// and statement text contains NOT IN
if (!cfg.IsRuleDisabled(28) && node.PhysicalOp.Contains("Row Count Spool"))

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

Dereference of a possibly null reference.

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / release

Dereference of a possibly null reference.

Check warning on line 1207 in src/PlanViewer.Core/Services/PlanAnalyzer.cs

View workflow job for this annotation

GitHub Actions / release

Dereference of a possibly null reference.
{
var rewinds = node.HasActualStats ? (double)node.ActualRewinds : node.EstimateRewinds;
if (rewinds > 10000 && HasNotInPattern(node, stmt))
Expand Down Expand Up @@ -1592,6 +1592,66 @@
return maxSelf;
}

/// <summary>
/// Max per-thread self-CPU for this operator.
/// Parallel: for each thread, self_cpu = thread_cpu - Σ same-thread child cpu; take max.
/// Serial / single-thread: operator_cpu - Σ effective child cpu.
/// Needed for external-wait benefit scoring (Joe's formula).
/// </summary>
internal static long GetOperatorMaxThreadOwnCpuMs(PlanNode node)
{
if (!node.HasActualStats || node.ActualCPUMs <= 0) return 0;

if (node.PerThreadStats.Count > 1)
{
var parentByThread = new Dictionary<int, long>();
foreach (var ts in node.PerThreadStats)
parentByThread[ts.ThreadId] = ts.ActualCPUMs;
Comment on lines +1607 to +1609
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parentByThread[ts.ThreadId] = ts.ActualCPUMs — if PerThreadStats ever contains two entries for the same ThreadId (e.g., thread reuse across iterations, or parser quirks on some plan shapes), the second silently overwrites the first. Worth a TryGetValue + sum like childSumByThread uses a few lines below, for symmetry and safety.


Generated by Claude Code


var childSumByThread = new Dictionary<int, long>();
foreach (var child in node.Children)
{
var childNode = child;
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
childNode = child.Children.OrderByDescending(c => c.ActualCPUMs).First();
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OrderByDescending(c => c.ActualCPUMs).First() picks a single Parallelism-exchange child and discards the other side(s), so any CPU those threads contributed is dropped from the per-thread subtraction. On ties, .First() is effectively arbitrary (enumeration order) which makes maxSelf non-deterministic for identical plan inputs.

Two questions worth confirming: (1) Is picking the max-CPU child actually the right semantic here, versus summing per-thread CPU across all exchange children keyed by ThreadId? (2) Does this mirror the existing elapsed-path helper GetPerThreadOwnElapsed — if not, the two "critical-path" calculations may diverge.


Generated by Claude Code

foreach (var ts in childNode.PerThreadStats)
{
childSumByThread.TryGetValue(ts.ThreadId, out var existing);
childSumByThread[ts.ThreadId] = existing + ts.ActualCPUMs;
}
}

var maxSelf = 0L;
foreach (var (threadId, parentCpu) in parentByThread)
{
childSumByThread.TryGetValue(threadId, out var childCpu);
var self = Math.Max(0, parentCpu - childCpu);
if (self > maxSelf) maxSelf = self;
}
return maxSelf;
}

// Serial: operator_cpu - Σ effective child cpu
var totalChildCpu = 0L;
foreach (var child in node.Children)
totalChildCpu += GetEffectiveChildCpuMs(child);
return Math.Max(0, node.ActualCPUMs - totalChildCpu);
}

private static long GetEffectiveChildCpuMs(PlanNode child)
{
if (child.PhysicalOp == "Parallelism" && child.Children.Count > 0)
return child.Children.Max(GetEffectiveChildCpuMs);
if (child.ActualCPUMs > 0)
return child.ActualCPUMs;
if (child.Children.Count == 0)
return 0;
var sum = 0L;
foreach (var grandchild in child.Children)
sum += GetEffectiveChildCpuMs(grandchild);
return sum;
}

/// <summary>
/// Serial row mode self-time: subtract all direct children's effective elapsed.
/// Pass-through operators (Compute Scalar, etc.) don't carry runtime stats —
Expand Down
3 changes: 2 additions & 1 deletion src/PlanViewer.Web/Pages/Index.razor
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ else
</div>
@if (ActiveStmt!.QueryTime.ElapsedTimeMs > 0)
{
var ratio = (double)ActiveStmt!.QueryTime.CpuTimeMs / ActiveStmt!.QueryTime.ElapsedTimeMs;
var effectiveCpu = Math.Max(0L, ActiveStmt!.QueryTime.CpuTimeMs - ActiveStmt!.QueryTime.ExternalWaitMs);
var ratio = (double)effectiveCpu / ActiveStmt!.QueryTime.ElapsedTimeMs;
<div class="insight-row">
<span class="insight-label">CPU:Elapsed</span>
<span class="insight-value">@ratio.ToString("N2")</span>
Expand Down
Loading