Skip to content

Commit 3f5411c

Browse files
ryan-williamsclaude
andcommitted
\dvx status\: group by status + \-s\/\-x\ filters
Group output under per-status headers (Stale → Missing → Transitive → Error → Fresh), each with a count and the status's color. Flatten with \`-G\`/\`--no-group\`. Add \`-s\`/\`--status\` (include) and \`-x\`/\`--omit\` (exclude), both comma-separated and prefix-matched (\`s\` → stale, \`m\` → missing, etc.). JSON/YAML now respect filters too; the summary line keeps reporting full unfiltered counts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 172cad0 commit 3f5411c

File tree

2 files changed

+223
-42
lines changed

2 files changed

+223
-42
lines changed

src/dvx/cli/status.py

Lines changed: 104 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -209,19 +209,51 @@ def _mark_transitive_staleness(results: list[dict], target_list: list) -> None:
209209
queue.append(dependent)
210210

211211

212+
STATUS_NAMES = ["fresh", "stale", "missing", "error", "transitive"]
213+
GROUP_ORDER = ["stale", "missing", "transitive", "error", "fresh"]
214+
215+
216+
def _resolve_status_list(value: str | None) -> set[str] | None:
217+
"""Resolve a comma-separated list of status names (with prefix matching) to a set.
218+
219+
Returns None if value is None/empty. Raises click.BadParameter on ambiguous or unknown prefixes.
220+
"""
221+
if not value:
222+
return None
223+
result = set()
224+
for raw in value.split(","):
225+
token = raw.strip().lower()
226+
if not token:
227+
continue
228+
matches = [s for s in STATUS_NAMES if s.startswith(token)]
229+
if not matches:
230+
raise click.BadParameter(f"unknown status {token!r} (expected one of {STATUS_NAMES})")
231+
if len(matches) > 1:
232+
raise click.BadParameter(f"ambiguous status prefix {token!r}: matches {matches}")
233+
result.add(matches[0])
234+
return result
235+
236+
212237
@click.command()
213238
@click.argument("targets", nargs=-1)
214239
@click.option("-d", "--with-deps", is_flag=True, default=True, help="Check upstream dependencies.")
240+
@click.option("-G", "--no-group", is_flag=True, help="Don't group output by status.")
215241
@click.option("-j", "--jobs", type=int, default=None, help="Number of parallel workers.")
242+
@click.option("-N", "--no-transitive", is_flag=True, help="Hide transitively stale stages.")
243+
@click.option("-s", "--status", "status_filter", default=None, help="Show only these statuses (comma-sep, prefix-matched, e.g. 's,m').")
216244
@click.option("-v", "--verbose", is_flag=True, help="Show all files including fresh.")
245+
@click.option("-x", "--omit", default=None, help="Exclude these statuses (comma-sep, prefix-matched, e.g. 'm').")
217246
@click.option("--json", "as_json", is_flag=True, help="Output results as JSON.")
218-
@click.option("-N", "--no-transitive", is_flag=True, help="Hide transitively stale stages.")
219247
@click.option("-y", "--yaml", "as_yaml", is_flag=True, help="Output detailed results as YAML (includes before/after hashes).")
220-
def status(targets, with_deps, jobs, verbose, as_json, no_transitive, as_yaml):
248+
def status(targets, with_deps, no_group, jobs, no_transitive, status_filter, verbose, omit, as_json, as_yaml):
221249
"""Check freshness status of artifacts.
222250
223-
By default, only shows stale/missing files (like git status).
224-
Use -v/--verbose to show all files including fresh ones.
251+
By default, only shows stale/missing files (like git status), grouped by status.
252+
Use -v/--verbose to also include fresh files.
253+
Use -s/--status to include only specific statuses (e.g. -s stale,missing).
254+
Use -x/--omit to exclude specific statuses (e.g. -x missing).
255+
Status names support prefix matching: 's' → stale, 'm' → missing, etc.
256+
Use -G/--no-group to flatten output (paths sorted, no per-status sections).
225257
Use -y/--yaml for detailed output with before/after hashes for changed deps.
226258
227259
Examples:
@@ -231,11 +263,16 @@ def status(targets, with_deps, jobs, verbose, as_json, no_transitive, as_yaml):
231263
dvx status -j 4 # Use 4 parallel workers
232264
dvx status --json # Output as JSON
233265
dvx status -y # Detailed YAML with hashes
266+
dvx status -x m # Hide missing files
267+
dvx status -s s,t # Show only stale and transitive
234268
"""
235269
import json as json_module
236270
from concurrent.futures import ThreadPoolExecutor, as_completed
237271
from functools import partial
238272

273+
include = _resolve_status_list(status_filter)
274+
exclude = _resolve_status_list(omit) or set()
275+
239276
# Find targets - expand directories to .dvc files
240277
if targets:
241278
target_list = _expand_targets(targets)
@@ -270,50 +307,78 @@ def status(targets, with_deps, jobs, verbose, as_json, no_transitive, as_yaml):
270307
_mark_transitive_staleness(results, target_list)
271308

272309
results.sort(key=lambda r: r["path"])
273-
transitive_count = sum(1 for r in results if r["status"] == "transitive")
274-
stale_count = sum(1 for r in results if r["status"] == "stale")
275-
missing_count = sum(1 for r in results if r["status"] == "missing")
276-
fresh_count = sum(1 for r in results if r["status"] == "fresh")
277-
error_count = sum(1 for r in results if r["status"] == "error")
310+
311+
# Counts from the full, unfiltered set (for the summary line)
312+
counts = {s: sum(1 for r in results if r["status"] == s) for s in STATUS_NAMES}
313+
314+
# Compute the visible set. Precedence: -s overrides default; -v adds fresh to default;
315+
# -x always subtracts.
316+
if include is not None:
317+
visible = set(include)
318+
else:
319+
visible = set(STATUS_NAMES) if verbose else {"stale", "missing", "error", "transitive"}
320+
visible -= exclude
321+
322+
filtered = [r for r in results if r["status"] in visible]
278323

279324
if as_yaml:
280325
import yaml
281-
# Filter to non-fresh unless verbose
282-
if not verbose:
283-
results = [r for r in results if r["status"] != "fresh"]
284-
# Convert to dict keyed by path for nicer YAML
285326
yaml_data = {}
286-
for r in results:
327+
for r in filtered:
287328
path = r.pop("path")
288-
# Remove None values for cleaner output
289329
yaml_data[path] = {k: v for k, v in r.items() if v is not None}
290330
click.echo(yaml.dump(yaml_data, default_flow_style=False, sort_keys=False))
291-
elif as_json:
292-
click.echo(json_module.dumps(results, indent=2))
331+
return
332+
333+
if as_json:
334+
click.echo(json_module.dumps(filtered, indent=2))
335+
return
336+
337+
status_style = {
338+
"fresh": ("✓", "green"),
339+
"stale": ("✗", "red"),
340+
"missing": ("?", "magenta"),
341+
"error": ("!", "red"),
342+
"transitive": ("⚠", "yellow"),
343+
}
344+
345+
def _render(r):
346+
icon, color = status_style.get(r["status"], ("?", "red"))
347+
styled_icon = click.style(icon, fg=color)
348+
line = f"{styled_icon} {r['path']}"
349+
if r.get("reason"):
350+
line += click.style(f" ({r['reason']})", fg="bright_black")
351+
return line
352+
353+
if no_group:
354+
for r in filtered:
355+
click.echo(_render(r))
293356
else:
294-
# By default, only show non-fresh files (like git status)
295-
status_style = {
296-
"fresh": ("✓", "green"),
297-
"stale": ("✗", "red"),
298-
"missing": ("?", "magenta"),
299-
"error": ("!", "red"),
300-
"transitive": ("⚠", "yellow"),
301-
}
302-
for r in results:
303-
if r["status"] == "fresh" and not verbose:
357+
first = True
358+
for s in GROUP_ORDER:
359+
if s not in visible:
360+
continue
361+
group = [r for r in filtered if r["status"] == s]
362+
if not group:
304363
continue
305-
icon, color = status_style.get(r["status"], ("?", "red"))
306-
styled_icon = click.style(icon, fg=color)
307-
line = f"{styled_icon} {r['path']}"
308-
if r.get("reason"):
309-
line += click.style(f" ({r['reason']})", fg="bright_black")
310-
click.echo(line)
311-
312-
# Summary line
313-
parts = [f"Fresh: {fresh_count}", f"Stale: {stale_count}"]
314-
if transitive_count:
315-
parts.append(f"Transitively stale: {transitive_count}")
316-
click.echo(f"\n{', '.join(parts)}")
364+
if not first:
365+
click.echo()
366+
first = False
367+
_, color = status_style[s]
368+
header = click.style(f"{s.capitalize()} ({len(group)}):", fg=color, bold=True)
369+
click.echo(header)
370+
for r in group:
371+
click.echo(f" {_render(r)}")
372+
373+
# Summary line (always reflects the full set, not filtered)
374+
parts = [f"Fresh: {counts['fresh']}", f"Stale: {counts['stale']}"]
375+
if counts["missing"]:
376+
parts.append(f"Missing: {counts['missing']}")
377+
if counts["transitive"]:
378+
parts.append(f"Transitively stale: {counts['transitive']}")
379+
if counts["error"]:
380+
parts.append(f"Error: {counts['error']}")
381+
click.echo(f"\n{', '.join(parts)}")
317382

318383

319384
# Export the command

tests/test_cli.py

Lines changed: 119 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def test_status_shows_fresh_and_stale(runner, temp_dvc_repo):
340340
lines = result.output.strip().split("\n")
341341

342342
# Find the stale.txt line - should have ✗ and show data changed
343-
stale_lines = [l for l in lines if "stale.txt" in l]
343+
stale_lines = [l.lstrip() for l in lines if "stale.txt" in l]
344344
assert len(stale_lines) == 1
345345
stale_line = stale_lines[0]
346346
assert stale_line.startswith("✗")
@@ -365,7 +365,7 @@ def test_status_json_output(runner, temp_dvc_repo):
365365
"outs": [{"md5": file_hash, "size": 10, "hash": "md5", "path": "data.txt"}]
366366
}, f)
367367

368-
result = runner.invoke(cli, ["status", "--json"])
368+
result = runner.invoke(cli, ["status", "--json", "-v"])
369369
assert result.exit_code == 0
370370

371371
assert json.loads(result.output) == [
@@ -415,7 +415,7 @@ def test_status_dep_changed(runner, temp_dvc_repo):
415415
lines = result.output.strip().split("\n")
416416

417417
# Find output.txt line - should show dep changed, not data changed
418-
output_lines = [l for l in lines if "output.txt" in l]
418+
output_lines = [l.lstrip() for l in lines if "output.txt" in l]
419419
assert len(output_lines) == 1
420420
output_line = output_lines[0]
421421
assert output_line.startswith("✗")
@@ -511,3 +511,119 @@ def test_status_transitive_staleness(runner, tmp_path):
511511
assert "⚠" in result.output
512512
assert "step_b" in result.output
513513
assert "upstream stale" in result.output
514+
515+
516+
@pytest.fixture
517+
def mixed_status_repo(tmp_path):
518+
"""Repo with one stale, one missing, one fresh .dvc file."""
519+
os.chdir(tmp_path)
520+
(tmp_path / ".dvc").mkdir()
521+
522+
from dvx.run.hash import compute_md5
523+
524+
# Fresh
525+
f = tmp_path / "fresh.txt"
526+
f.write_text("fresh\n")
527+
with open(tmp_path / "fresh.txt.dvc", "w") as fp:
528+
yaml.dump({"outs": [{"md5": compute_md5(f), "size": f.stat().st_size, "path": "fresh.txt"}]}, fp)
529+
530+
# Stale
531+
s = tmp_path / "stale.txt"
532+
s.write_text("stale\n")
533+
with open(tmp_path / "stale.txt.dvc", "w") as fp:
534+
yaml.dump({"outs": [{"md5": "0" * 32, "size": 5, "path": "stale.txt"}]}, fp)
535+
536+
# Missing
537+
with open(tmp_path / "missing.txt.dvc", "w") as fp:
538+
yaml.dump({"outs": [{"md5": "1" * 32, "size": 10, "path": "missing.txt"}]}, fp)
539+
540+
return tmp_path
541+
542+
543+
def test_status_grouped_by_default(runner, mixed_status_repo):
544+
"""Default output groups stale / missing under headers."""
545+
result = runner.invoke(cli, ["status"])
546+
assert result.exit_code == 0
547+
548+
out = result.output
549+
stale_idx = out.index("Stale (1)")
550+
missing_idx = out.index("Missing (1)")
551+
# Stale group appears before missing per GROUP_ORDER
552+
assert stale_idx < missing_idx
553+
assert "stale.txt.dvc" in out
554+
assert "missing.txt.dvc" in out
555+
# Fresh hidden by default
556+
assert "Fresh (" not in out
557+
assert "fresh.txt.dvc" not in out
558+
559+
560+
def test_status_no_group(runner, mixed_status_repo):
561+
"""-G disables grouping; no headers."""
562+
result = runner.invoke(cli, ["status", "-G"])
563+
assert result.exit_code == 0
564+
assert "Stale (" not in result.output
565+
assert "Missing (" not in result.output
566+
assert "stale.txt.dvc" in result.output
567+
assert "missing.txt.dvc" in result.output
568+
569+
570+
def test_status_omit_missing(runner, mixed_status_repo):
571+
"""-x missing hides missing paths."""
572+
result = runner.invoke(cli, ["status", "-x", "missing"])
573+
assert result.exit_code == 0
574+
assert "stale.txt.dvc" in result.output
575+
assert "missing.txt.dvc" not in result.output
576+
assert "Missing (" not in result.output
577+
578+
579+
def test_status_omit_prefix(runner, mixed_status_repo):
580+
"""-x m (prefix) also hides missing."""
581+
result = runner.invoke(cli, ["status", "-x", "m"])
582+
assert result.exit_code == 0
583+
assert "missing.txt.dvc" not in result.output
584+
585+
586+
def test_status_include_only(runner, mixed_status_repo):
587+
"""-s stale shows only stale, hides missing even though not omitted."""
588+
result = runner.invoke(cli, ["status", "-s", "stale"])
589+
assert result.exit_code == 0
590+
assert "stale.txt.dvc" in result.output
591+
assert "missing.txt.dvc" not in result.output
592+
assert "fresh.txt.dvc" not in result.output
593+
594+
595+
def test_status_include_prefix_comma_sep(runner, mixed_status_repo):
596+
"""-s s,m accepts comma-separated prefixes."""
597+
result = runner.invoke(cli, ["status", "-s", "s,m"])
598+
assert result.exit_code == 0
599+
assert "stale.txt.dvc" in result.output
600+
assert "missing.txt.dvc" in result.output
601+
assert "fresh.txt.dvc" not in result.output
602+
603+
604+
def test_status_unknown_status(runner, mixed_status_repo):
605+
"""Unknown status name is rejected."""
606+
result = runner.invoke(cli, ["status", "-s", "bogus"])
607+
assert result.exit_code != 0
608+
assert "unknown status" in result.output
609+
610+
611+
def test_status_json_respects_filter(runner, mixed_status_repo):
612+
"""JSON output respects -s filter."""
613+
import json
614+
result = runner.invoke(cli, ["status", "-s", "stale", "--json"])
615+
assert result.exit_code == 0
616+
data = json.loads(result.output)
617+
statuses = {r["status"] for r in data}
618+
assert statuses == {"stale"}
619+
620+
621+
def test_status_summary_includes_all_counts(runner, mixed_status_repo):
622+
"""Summary line reflects full unfiltered set even when filtered."""
623+
result = runner.invoke(cli, ["status", "-s", "stale"])
624+
assert result.exit_code == 0
625+
summary = result.output.strip().split("\n")[-1]
626+
# Full counts: 1 fresh, 1 stale, 1 missing
627+
assert "Fresh: 1" in summary
628+
assert "Stale: 1" in summary
629+
assert "Missing: 1" in summary

0 commit comments

Comments
 (0)