Skip to content

Commit 53da838

Browse files
ryan-williamsclaude
andcommitted
Add tests for materialize, from_dvc side-effect, dir manifest
- `Artifact.from_dvc()` for side-effect stages (no outs) - `materialize()`: single run, skip-fresh, error-raises - `find_parent_dvc_dir()`: basic lookup, nested paths, not-found - `read_dir_manifest()`: JSON parsing, .dir suffix, missing manifest Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9f93d2a commit 53da838

File tree

2 files changed

+179
-1
lines changed

2 files changed

+179
-1
lines changed

tests/test_run_artifact.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Tests for dvx.run.artifact module."""
22

3+
import os
34
from pathlib import Path
45

56
import pytest
67
import yaml
78

8-
from dvx.run.artifact import Artifact, Computation, delayed, write_all_dvc
9+
from dvx.run.artifact import Artifact, Computation, delayed, materialize, write_all_dvc
910

1011

1112
def test_artifact_basic(tmp_path):
@@ -297,3 +298,85 @@ def test_artifact_from_dvc_with_git_deps(tmp_path):
297298
# Check blob SHAs are stored as md5 on the Artifact objects
298299
git_dep_map = {d.path: d.md5 for d in artifact.computation.git_deps}
299300
assert git_dep_map == {"script.py": "aabbccdd", "lib.py": "eeff0011"}
301+
302+
303+
def test_artifact_from_dvc_side_effect(tmp_path):
304+
"""Test Artifact.from_dvc() for side-effect stages (no outs)."""
305+
dvc_file = tmp_path / "deploy.dvc"
306+
dvc_content = {
307+
"meta": {
308+
"computation": {
309+
"cmd": "wrangler pages deploy dist",
310+
"deps": {"dist/index.html": "aaa111"},
311+
}
312+
}
313+
}
314+
with open(dvc_file, "w") as f:
315+
yaml.dump(dvc_content, f)
316+
317+
artifact = Artifact.from_dvc(tmp_path / "deploy")
318+
319+
assert artifact is not None
320+
assert artifact.path == "deploy"
321+
assert artifact.md5 is None
322+
assert artifact.computation is not None
323+
assert artifact.computation.cmd == "wrangler pages deploy dist"
324+
assert len(artifact.computation.deps) == 1
325+
326+
327+
def test_materialize_single(tmp_path):
328+
"""Test materialize() runs a computation and updates the artifact."""
329+
os.chdir(tmp_path)
330+
331+
output = tmp_path / "result.txt"
332+
artifact = Artifact(
333+
path=str(output),
334+
computation=Computation(cmd=f"echo hello > {output}"),
335+
)
336+
337+
computed = materialize([artifact], update_dvc=False)
338+
339+
assert len(computed) == 1
340+
assert output.exists()
341+
assert output.read_text().strip() == "hello"
342+
assert computed[0].md5 is not None
343+
344+
345+
def test_materialize_skips_fresh(tmp_path):
346+
"""Test materialize() skips already-fresh artifacts (doesn't re-run cmd)."""
347+
os.chdir(tmp_path)
348+
349+
output = tmp_path / "result.txt"
350+
output.write_text("existing\n")
351+
352+
from dvx.run.hash import compute_md5
353+
from dvx.run.dvc_files import write_dvc_file
354+
md5 = compute_md5(output)
355+
356+
# Write .dvc so it's "fresh"
357+
write_dvc_file(output_path=output, md5=md5, size=output.stat().st_size)
358+
359+
artifact = Artifact(
360+
path=str(output),
361+
md5=md5,
362+
computation=Computation(cmd="echo should-not-run"),
363+
)
364+
365+
materialize([artifact], update_dvc=False)
366+
367+
# File content should be unchanged (cmd was not executed)
368+
assert output.read_text() == "existing\n"
369+
370+
371+
def test_materialize_error_raises(tmp_path):
372+
"""Test materialize() raises on command failure."""
373+
os.chdir(tmp_path)
374+
375+
output = tmp_path / "result.txt"
376+
artifact = Artifact(
377+
path=str(output),
378+
computation=Computation(cmd="false"), # always fails
379+
)
380+
381+
with pytest.raises(RuntimeError, match="Computation failed"):
382+
materialize([artifact], update_dvc=False)

tests/test_run_dvc_files.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88

99
from dvx.run.dvc_files import (
1010
DVCFileInfo,
11+
find_parent_dvc_dir,
1112
get_dvc_file_path,
1213
get_freshness_details,
1314
is_output_fresh,
15+
read_dir_manifest,
1416
read_dvc_file,
1517
write_dvc_file,
1618
)
@@ -897,3 +899,96 @@ def test_is_fetch_due_naive_last_run():
897899
last = "2026-04-07T12:00:00" # No timezone
898900
now = datetime(2026, 4, 8, 13, 0, 0, tzinfo=timezone.utc)
899901
assert is_fetch_due("daily", last, now=now) is True
902+
903+
904+
# =============================================================================
905+
# find_parent_dvc_dir / read_dir_manifest tests
906+
# =============================================================================
907+
908+
909+
def test_find_parent_dvc_dir_basic(tmp_path):
910+
"""find_parent_dvc_dir finds .dvc-tracked parent directory."""
911+
# Create a directory tracked by DVC
912+
data_dir = tmp_path / "data"
913+
data_dir.mkdir()
914+
(data_dir / "file1.txt").write_text("content1\n")
915+
(data_dir / "sub").mkdir()
916+
(data_dir / "sub" / "file2.txt").write_text("content2\n")
917+
918+
# Write .dvc file for the directory
919+
dvc_file = tmp_path / "data.dvc"
920+
dvc_content = {
921+
"outs": [{"md5": "abc123.dir", "size": 1000, "nfiles": 2, "path": "data"}]
922+
}
923+
with open(dvc_file, "w") as f:
924+
yaml.dump(dvc_content, f)
925+
926+
# Find parent for a file inside the directory
927+
result = find_parent_dvc_dir(tmp_path / "data" / "file1.txt")
928+
assert result is not None
929+
parent_dir, relpath = result
930+
assert parent_dir == tmp_path / "data"
931+
assert relpath == "file1.txt"
932+
933+
# Find parent for a nested file
934+
result2 = find_parent_dvc_dir(tmp_path / "data" / "sub" / "file2.txt")
935+
assert result2 is not None
936+
parent_dir2, relpath2 = result2
937+
assert parent_dir2 == tmp_path / "data"
938+
assert relpath2 == "sub/file2.txt"
939+
940+
941+
def test_find_parent_dvc_dir_not_found(tmp_path):
942+
"""find_parent_dvc_dir returns None when no parent .dvc exists."""
943+
(tmp_path / "untracked.txt").write_text("hello\n")
944+
result = find_parent_dvc_dir(tmp_path / "untracked.txt")
945+
assert result is None
946+
947+
948+
def test_read_dir_manifest(tmp_path):
949+
"""read_dir_manifest reads .dir JSON manifest from cache."""
950+
import json
951+
952+
# Create cache structure
953+
cache_dir = tmp_path / "cache"
954+
cache_dir.mkdir()
955+
subdir = cache_dir / "ab"
956+
subdir.mkdir()
957+
958+
# Write manifest file (hash = "abc123...", prefix "ab", rest "c123...")
959+
manifest = [
960+
{"md5": "111222333", "relpath": "file1.txt"},
961+
{"md5": "444555666", "relpath": "sub/file2.txt"},
962+
]
963+
manifest_file = subdir / "c123def456.dir"
964+
manifest_file.write_text(json.dumps(manifest))
965+
966+
result = read_dir_manifest("abc123def456", cache_dir)
967+
968+
assert result == {"file1.txt": "111222333", "sub/file2.txt": "444555666"}
969+
970+
971+
def test_read_dir_manifest_with_dir_suffix(tmp_path):
972+
"""read_dir_manifest handles hash with .dir suffix."""
973+
import json
974+
975+
cache_dir = tmp_path / "cache"
976+
cache_dir.mkdir()
977+
subdir = cache_dir / "ab"
978+
subdir.mkdir()
979+
980+
manifest = [{"md5": "aaa", "relpath": "data.csv"}]
981+
(subdir / "c123.dir").write_text(json.dumps(manifest))
982+
983+
# Pass hash with .dir suffix
984+
result = read_dir_manifest("abc123.dir", cache_dir)
985+
assert result == {"data.csv": "aaa"}
986+
987+
988+
def test_read_dir_manifest_missing(tmp_path):
989+
"""read_dir_manifest returns empty dict for missing manifest."""
990+
cache_dir = tmp_path / "cache"
991+
cache_dir.mkdir()
992+
993+
result = read_dir_manifest("nonexistent", cache_dir)
994+
assert result == {}

0 commit comments

Comments
 (0)