Skip to content

Commit 8ce9c24

Browse files
Refactor CLI commands and update documentation for improved clarity and functionality
1 parent fb2863c commit 8ce9c24

File tree

16 files changed

+221
-54
lines changed

16 files changed

+221
-54
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ sqlcompare table analytics.fact_sales analytics.fact_sales_new id
5353
That command prints a **diff_id**. Use it for follow-up analysis:
5454

5555
```bash
56-
sqlcompare analyze-diff <diff_id> --stats
57-
sqlcompare analyze-diff <diff_id> --column revenue --limit 100
58-
sqlcompare analyze-diff <diff_id> --missing-current
56+
sqlcompare inspect <diff_id> --stats
57+
sqlcompare inspect <diff_id> --column revenue --limit 100
58+
sqlcompare inspect <diff_id> --missing-current
5959
```
6060

6161
---

examples/row_compare.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,6 @@ Rows only in previous dataset: 1
4545
| 1 | name | alpha | alfa |
4646
| 1 | amount | 10 | 11 |
4747
+----+--------+--------+---------+
48-
🔎 To review the diff, run: sqlcompare analyze-diff compare_sqlcompare_dataset_dataset_998aa249_previous_sqlcompare_dataset_dataset_998aa249_new_20260104_025829_b28946b5
48+
🔎 To review the diff, run: sqlcompare inspect compare_sqlcompare_dataset_dataset_998aa249_previous_sqlcompare_dataset_dataset_998aa249_new_20260104_025829_b28946b5
4949
💡 Tips: --stats for per-column counts, --missing-current/--missing-previous for row-only, --column <name> to filter, --list-columns to inspect available fields.
5050
```

examples/stats_compare.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Compare statistics between two CSV files
55
## Command
66

77
```bash
8-
sqlcompare table tests/datasets/stats_compare/previous.csv tests/datasets/stats_compare/current.csv --stats
8+
sqlcompare stats tests/datasets/stats_compare/previous.csv tests/datasets/stats_compare/current.csv
99
```
1010

1111
## Output

sqlcompare/cli.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22

33
import typer
44

5-
from sqlcompare.analyze_diff import analyze_diff_cmd
65
from sqlcompare.dataset import dataset_cmd
6+
from sqlcompare.inspect import inspect_cmd
77
from sqlcompare.list_diffs import list_diffs_cmd
88
from sqlcompare.query import query_cmd
9+
from sqlcompare.stats import stats_cmd
910
from sqlcompare.table import table_cmd
1011

1112
app = typer.Typer(help="Compare database tables and inspect diffs.")
1213

1314
# Register all commands
1415
app.command("table")(table_cmd)
15-
app.command("analyze-diff")(analyze_diff_cmd)
16+
app.command("inspect")(inspect_cmd)
17+
app.command("stats")(stats_cmd)
1618
app.command("list-diffs")(list_diffs_cmd)
1719
app.command("query")(query_cmd)
1820
app.command("dataset")(dataset_cmd)

sqlcompare/compare/comparator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,6 @@ def compare(
372372
save_test_runs(runs)
373373

374374
log.debug(f"\U0001f4c1 Analysis data saved with ID: {diff_id}")
375-
log.debug(f"Use 'sqlcompare analyze-diff {diff_id}' to review differences")
375+
log.debug(f"Use 'sqlcompare inspect {diff_id}' to review differences")
376376

377377
return diff_id

sqlcompare/dataset.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,5 +133,50 @@ def dataset_cmd(
133133
None, "--schema", help="Schema for dataset tables"
134134
),
135135
) -> None:
136-
"""Compare datasets defined in YAML configuration file."""
136+
"""Compare datasets defined in YAML configuration.
137+
138+
Supports comparing:
139+
- SQL queries (select_sql)
140+
- CSV/XLSX files (file_name)
141+
- Mix of both
142+
143+
YAML Format:
144+
previous:
145+
select_sql: "SELECT * FROM users WHERE created < '2024-01-01'"
146+
index:
147+
- user_id
148+
149+
new:
150+
select_sql: "SELECT * FROM users WHERE created >= '2024-01-01'"
151+
index:
152+
- user_id
153+
154+
# Or with files
155+
previous:
156+
file_name: "{{here}}/previous.csv"
157+
index:
158+
- customer_id
159+
- order_id
160+
161+
new:
162+
file_name: "{{here}}/current.xlsx"
163+
index:
164+
- customer_id
165+
- order_id
166+
167+
Examples:
168+
# Compare using dataset config
169+
sqlcompare dataset configs/migration_check.yaml
170+
171+
# Override connection from YAML
172+
sqlcompare dataset configs/validation.yaml -c snowflake_prod
173+
174+
# Use {{here}} in YAML for relative file paths
175+
# {{here}} expands to the directory containing the YAML file
176+
177+
Notes:
178+
- index columns must match between previous and new
179+
- Both sides must use the same connection (or files)
180+
- Use 'conn' or 'connection' key in YAML to specify database
181+
"""
137182
compare_dataset(path, connection, schema)
Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from sqlcompare.log import log
1616

1717

18-
def analyze_diff(
18+
def inspect_diff(
1919
diff_id: str,
2020
column: str | None = None,
2121
limit: int = 25,
@@ -26,7 +26,7 @@ def analyze_diff(
2626
missing_previous: bool = False,
2727
) -> None:
2828
"""
29-
Analyze diff results from a previous comparison run.
29+
Inspect diff results from a previous comparison run.
3030
3131
Supports database-backed diffs and legacy pickle files.
3232
"""
@@ -201,7 +201,7 @@ def analyze_diff(
201201
log.error("❌ Pickle-based diff files are not supported without pandas.")
202202

203203

204-
def analyze_diff_cmd(
204+
def inspect_cmd(
205205
diff_id: str = typer.Argument(..., help="Diff run ID"),
206206
column: str | None = typer.Option(
207207
None, "--column", "-c", help="Filter by specific column name"
@@ -223,8 +223,35 @@ def analyze_diff_cmd(
223223
False, "--missing-previous", help="Show rows only in previous dataset"
224224
),
225225
) -> None:
226-
"""Analyze diff results from a previous comparison run."""
227-
analyze_diff(
226+
"""Inspect results from a previous comparison.
227+
228+
After running 'table' or 'dataset' comparison, use this command to analyze
229+
the saved diff results. You can view statistics, filter by column, examine
230+
missing rows, or export results.
231+
232+
Examples:
233+
# View diff summary with statistics
234+
sqlcompare inspect <diff_id> --stats
235+
236+
# Filter differences for specific column
237+
sqlcompare inspect <diff_id> --column revenue
238+
239+
# Show rows only in current dataset
240+
sqlcompare inspect <diff_id> --missing-current
241+
242+
# Show rows only in previous dataset
243+
sqlcompare inspect <diff_id> --missing-previous
244+
245+
# List all available columns
246+
sqlcompare inspect <diff_id> --list-columns
247+
248+
# Show more results
249+
sqlcompare inspect <diff_id> --limit 100
250+
251+
# Save filtered results to CSV
252+
sqlcompare inspect <diff_id> --column price --save
253+
"""
254+
inspect_diff(
228255
diff_id,
229256
column=column,
230257
limit=limit,

sqlcompare/list_diffs.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,21 @@ def list_diffs_cmd(
6262
pattern: str | None = typer.Argument(None, help="Match diff IDs"),
6363
test: str | None = typer.Option(None, "--test", help="Filter by test name"),
6464
) -> None:
65-
"""List all available diff data files."""
65+
"""List all saved comparison results.
66+
67+
Shows all diff IDs from previous table/dataset comparisons, sorted by date.
68+
69+
Examples:
70+
# List all diffs
71+
sqlcompare list-diffs
72+
73+
# Filter by pattern
74+
sqlcompare list-diffs users
75+
76+
# Filter by test name
77+
sqlcompare list-diffs --test migration_check
78+
79+
Output:
80+
Displays: diff_id, test name, file size, creation date
81+
"""
6682
list_diffs(pattern, test)

sqlcompare/query.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def query(q: str, connection: str | None, output: str) -> None:
5757
def query_cmd(
5858
q: str = typer.Argument(..., help="SQL query to run"),
5959
connection: str | None = typer.Option(
60-
None, "--connection", "-c", "--conn", help="Connector name"
60+
None, "--connection", "-c", help="Database connector name"
6161
),
6262
output: str = typer.Option(
6363
"terminal",
@@ -66,5 +66,28 @@ def query_cmd(
6666
help="Output format or file path. Use 'terminal' or provide a .csv filename",
6767
),
6868
) -> None:
69-
"""Execute SQL query and display or save results."""
69+
"""Execute SQL query and display or save results.
70+
71+
Examples:
72+
# Display in terminal
73+
sqlcompare query "SELECT * FROM users LIMIT 10"
74+
75+
# Save to CSV
76+
sqlcompare query "SELECT id, name FROM customers" --output results.csv
77+
78+
# Use specific connection
79+
sqlcompare query "SELECT COUNT(*) FROM orders" -c snowflake_prod
80+
81+
# Multi-line query
82+
sqlcompare query "
83+
SELECT u.id, u.name, COUNT(o.id) as order_count
84+
FROM users u
85+
LEFT JOIN orders o ON u.id = o.user_id
86+
GROUP BY u.id, u.name
87+
" -c postgres_dev --output user_orders.csv
88+
89+
Output Options:
90+
--output terminal: Display in terminal (default)
91+
--output file.csv: Save to CSV file
92+
"""
7093
query(q, connection, output)

sqlcompare/stats.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from __future__ import annotations
2+
3+
import typer
4+
5+
from sqlcompare.utils.test_types.stats import compare_table_stats
6+
7+
8+
def stats_cmd(
9+
table1: str = typer.Argument(..., help="Previous table name or CSV/XLSX file path"),
10+
table2: str = typer.Argument(..., help="Current table name or CSV/XLSX file path"),
11+
connection: str | None = typer.Option(
12+
None, "--connection", "-c", help="Database connector name"
13+
),
14+
) -> None:
15+
"""Compare table statistics without row-by-row comparison.
16+
17+
Displays column-level statistics including:
18+
- Row counts
19+
- Null counts
20+
- Distinct value counts
21+
- Differences between previous and current
22+
23+
This is useful for:
24+
- Quick data quality checks
25+
- Schema validation
26+
- High-level change detection
27+
- Large tables where row comparison would be slow
28+
29+
Examples:
30+
# Compare table statistics
31+
sqlcompare stats analytics.users analytics.users_new
32+
33+
# Compare CSV file statistics
34+
sqlcompare stats exports/jan.csv exports/feb.csv
35+
36+
# Compare with specific connection
37+
sqlcompare stats orders_v1 orders_v2 -c postgres_prod
38+
39+
Output Columns:
40+
PREV_ROWS, NEW_ROWS: Total row counts
41+
PREV_NULLS, NEW_NULLS: Null counts per column
42+
PREV_DISTINCT, NEW_DISTINCT: Unique value counts
43+
*_DIFF: Calculated differences
44+
"""
45+
compare_table_stats(table1, table2, connection)

0 commit comments

Comments
 (0)