Skip to content

Commit 6099a03

Browse files
Add run command for SQL/file comparisons
1 parent 8c1fc54 commit 6099a03

File tree

5 files changed

+497
-7
lines changed

5 files changed

+497
-7
lines changed

README.md

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Compare two tables on a key:
4949

5050
```bash
5151
export SQLCOMPARE_CONN_DEFAULT="postgresql://<user>:<pass>@<host>/<db>"
52-
sqlcompare table analytics.fact_sales analytics.fact_sales_new id
52+
sqlcompare run analytics.fact_sales analytics.fact_sales_new id
5353
```
5454

5555
That command prints a **diff_id**. Use it for follow-up analysis:
@@ -121,7 +121,7 @@ SQLCompare does two things:
121121
Best for production validation and regression checks across supported connectors.
122122

123123
```bash
124-
sqlcompare table analytics.users analytics.users_new user_id,tenant_id
124+
sqlcompare run analytics.users analytics.users_new user_id,tenant_id
125125
```
126126

127127
Why it’s useful:
@@ -135,7 +135,23 @@ Why it’s useful:
135135

136136
Use this when tables aren’t materialized yet or you want a filtered slice.
137137

138-
Create a dataset config:
138+
Inline SQL:
139+
140+
```bash
141+
sqlcompare run \
142+
"SELECT * FROM analytics.orders WHERE order_date < '2024-01-01'" \
143+
"SELECT * FROM analytics.orders WHERE order_date >= '2024-01-01'" \
144+
order_id \
145+
-c snowflake_prod
146+
```
147+
148+
SQL files:
149+
150+
```bash
151+
sqlcompare run queries/previous.sql queries/current.sql order_id -c snowflake_prod
152+
```
153+
154+
Or create a dataset config:
139155

140156
```yaml
141157
previous:
@@ -184,8 +200,7 @@ new:
184200
Set a local default connector and run:
185201
186202
```bash
187-
export SQLCOMPARE_CONN_DEFAULT="duckdb:///:memory:"
188-
sqlcompare dataset path/to/dataset.yaml
203+
sqlcompare run path/to/previous.csv path/to/current.xlsx id
189204
```
190205

191206
Why it’s useful:
@@ -201,7 +216,7 @@ Use this when your data lives in a local `.duckdb` file.
201216

202217
```bash
203218
export SQLCOMPARE_CONN_LOCAL="duckdb:////absolute/path/to/warehouse.duckdb"
204-
sqlcompare table raw.customers staged.customers id -c local
219+
sqlcompare run raw.customers staged.customers id -c local
205220
```
206221

207222
Why it’s useful:

sqlcompare/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import typer
44

55
from sqlcompare.dataset import dataset_cmd
6+
from sqlcompare.run_cmd import run_cmd
67
from sqlcompare.inspect import inspect_cmd
78
from sqlcompare.list_diffs import list_diffs_cmd
89
from sqlcompare.query import query_cmd
@@ -13,6 +14,7 @@
1314

1415
# Register all commands
1516
app.command("table")(table_cmd)
17+
app.command("run")(run_cmd)
1618
app.command("inspect")(inspect_cmd)
1719
app.command("stats")(stats_cmd)
1820
app.command("list-diffs")(list_diffs_cmd)

sqlcompare/helpers.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
from __future__ import annotations
44

55
import re
6+
from dataclasses import dataclass
67
from pathlib import Path
7-
from typing import Any
8+
from typing import Any, Literal
89

910
import typer
1011
import yaml
@@ -15,6 +16,35 @@
1516

1617
# Dataset helpers
1718

19+
@dataclass(frozen=True)
20+
class InputSpec:
21+
kind: Literal["table", "file", "sql"]
22+
value: str
23+
source: Literal["inline", "file", "none"] = "none"
24+
25+
26+
def _looks_like_sql(value: str) -> bool:
27+
return bool(re.match(r"(?is)^\s*(select|with)\b", value))
28+
29+
30+
def detect_input(value: str) -> InputSpec:
31+
path = Path(value)
32+
if path.exists():
33+
suffix = path.suffix.lower()
34+
if suffix == ".sql":
35+
sql_text = path.read_text(encoding="utf-8").strip()
36+
if not sql_text:
37+
raise typer.BadParameter(f"SQL file is empty: {path}")
38+
return InputSpec(kind="sql", value=sql_text, source="file")
39+
if suffix in (".csv", ".xlsx"):
40+
return InputSpec(kind="file", value=str(path), source="file")
41+
raise typer.BadParameter(f"Unsupported file type: {suffix}")
42+
43+
if _looks_like_sql(value):
44+
return InputSpec(kind="sql", value=value.strip(), source="inline")
45+
46+
return InputSpec(kind="table", value=value.strip(), source="none")
47+
1848
def expand_dataset_value(value: Any, base_dir: Path) -> Any:
1949
"""
2050
Recursively expand template variables in dataset configuration values.

sqlcompare/run_cmd.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import uuid
5+
6+
import typer
7+
8+
from sqlcompare.config import get_default_schema
9+
from sqlcompare.db import DBConnection
10+
from sqlcompare.helpers import create_table_from_select, detect_input, ensure_schema
11+
from sqlcompare.table import compare_table
12+
13+
14+
def _resolve_connection(connection: str | None) -> str:
15+
if connection:
16+
return connection
17+
default_conn = os.getenv("SQLCOMPARE_CONN_DEFAULT") or os.getenv("DTK_CONN_DEFAULT")
18+
if not default_conn:
19+
raise typer.BadParameter(
20+
"No connection specified. Use --connection or set SQLCOMPARE_CONN_DEFAULT."
21+
)
22+
return default_conn
23+
24+
25+
def run_cmd(
26+
table1: str = typer.Argument(
27+
..., help="Previous table name, CSV/XLSX file path, or SQL"
28+
),
29+
table2: str = typer.Argument(
30+
..., help="Current table name, CSV/XLSX file path, or SQL"
31+
),
32+
index: str = typer.Argument(
33+
..., help="Comma-separated key column(s), e.g. 'id' or 'user_id,tenant_id'"
34+
),
35+
connection: str | None = typer.Option(
36+
None, "--connection", "-c", help="Database connector name"
37+
),
38+
schema: str | None = typer.Option(None, "--schema", help="Schema for test tables"),
39+
columns: str | None = typer.Option(
40+
None,
41+
"--columns",
42+
help="Comma-separated non-index columns to compare (default: all common columns)",
43+
),
44+
ignore_columns: str | None = typer.Option(
45+
None,
46+
"--ignore-columns",
47+
help="Comma-separated non-index columns to skip from comparison",
48+
),
49+
) -> None:
50+
"""Run a comparison from tables, files, or SQL text.
51+
52+
This command auto-detects inputs:
53+
- CSV/XLSX paths are treated as files
54+
- .sql paths are read as SQL text
55+
- Inline SQL starting with SELECT/WITH is treated as SQL
56+
- Otherwise, inputs are treated as table names
57+
58+
Examples:
59+
# Tables
60+
sqlcompare run analytics.users analytics.users_new id
61+
62+
# SQL inline
63+
sqlcompare run "SELECT * FROM previous" "SELECT * FROM current" id -c duckdb_test
64+
65+
# SQL files
66+
sqlcompare run queries/previous.sql queries/current.sql id -c snowflake_prod
67+
68+
# Files (uses DuckDB)
69+
sqlcompare run exports/prev.csv exports/current.csv customer_id
70+
"""
71+
schema = schema or get_default_schema()
72+
73+
prev_spec = detect_input(table1)
74+
new_spec = detect_input(table2)
75+
76+
if prev_spec.kind == "file" or new_spec.kind == "file":
77+
if prev_spec.kind != "file" or new_spec.kind != "file":
78+
raise typer.BadParameter(
79+
"Both table arguments must be file paths when using CSV/XLSX inputs."
80+
)
81+
compare_table(
82+
prev_spec.value,
83+
new_spec.value,
84+
index,
85+
connection,
86+
schema,
87+
include_columns=columns,
88+
ignore_columns=ignore_columns,
89+
)
90+
return
91+
92+
if prev_spec.kind == "sql" or new_spec.kind == "sql":
93+
connection = _resolve_connection(connection)
94+
schema_prefix = f"{schema}." if schema else ""
95+
suffix = uuid.uuid4().hex[:8]
96+
previous_table = (
97+
prev_spec.value
98+
if prev_spec.kind == "table"
99+
else f"{schema_prefix}sqlcompare_sql_{suffix}_previous"
100+
)
101+
new_table = (
102+
new_spec.value
103+
if new_spec.kind == "table"
104+
else f"{schema_prefix}sqlcompare_sql_{suffix}_new"
105+
)
106+
107+
with DBConnection(connection) as db:
108+
ensure_schema(db, schema)
109+
if prev_spec.kind == "sql":
110+
create_table_from_select(db, previous_table, prev_spec.value)
111+
if new_spec.kind == "sql":
112+
create_table_from_select(db, new_table, new_spec.value)
113+
114+
compare_table(
115+
previous_table,
116+
new_table,
117+
index,
118+
connection,
119+
schema,
120+
include_columns=columns,
121+
ignore_columns=ignore_columns,
122+
)
123+
return
124+
125+
compare_table(
126+
table1,
127+
table2,
128+
index,
129+
connection,
130+
schema,
131+
include_columns=columns,
132+
ignore_columns=ignore_columns,
133+
)

0 commit comments

Comments
 (0)