Add run command for SQL/file comparisons

luisggoncalves · luisggoncalves · commit 6099a03f208b · 2026-03-30T12:14:30.000-03:00
diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ Compare two tables on a key:
 
 ```bash
 export SQLCOMPARE_CONN_DEFAULT="postgresql://<user>:<pass>@<host>/<db>"
-sqlcompare table analytics.fact_sales analytics.fact_sales_new id
+sqlcompare run analytics.fact_sales analytics.fact_sales_new id
 ```
 
 That command prints a **diff_id**. Use it for follow-up analysis:
@@ -121,7 +121,7 @@ SQLCompare does two things:
 Best for production validation and regression checks across supported connectors.
 
 ```bash
-sqlcompare table analytics.users analytics.users_new user_id,tenant_id
+sqlcompare run analytics.users analytics.users_new user_id,tenant_id
 ```
 
 Why it’s useful:
@@ -135,7 +135,23 @@ Why it’s useful:
 
 Use this when tables aren’t materialized yet or you want a filtered slice.
 
-Create a dataset config:
+Inline SQL:
+
+```bash
+sqlcompare run \
+  "SELECT * FROM analytics.orders WHERE order_date < '2024-01-01'" \
+  "SELECT * FROM analytics.orders WHERE order_date >= '2024-01-01'" \
+  order_id \
+  -c snowflake_prod
+```
+
+SQL files:
+
+```bash
+sqlcompare run queries/previous.sql queries/current.sql order_id -c snowflake_prod
+```
+
+Or create a dataset config:
 
 ```yaml
 previous:
@@ -184,8 +200,7 @@ new:
 Set a local default connector and run:
 
 ```bash
-export SQLCOMPARE_CONN_DEFAULT="duckdb:///:memory:"
-sqlcompare dataset path/to/dataset.yaml
+sqlcompare run path/to/previous.csv path/to/current.xlsx id
 ```
 
 Why it’s useful:
@@ -201,7 +216,7 @@ Use this when your data lives in a local `.duckdb` file.
 
 ```bash
 export SQLCOMPARE_CONN_LOCAL="duckdb:////absolute/path/to/warehouse.duckdb"
-sqlcompare table raw.customers staged.customers id -c local
+sqlcompare run raw.customers staged.customers id -c local
 ```
 
 Why it’s useful:
diff --git a/sqlcompare/cli.py b/sqlcompare/cli.py
@@ -3,6 +3,7 @@
 import typer
 
 from sqlcompare.dataset import dataset_cmd
+from sqlcompare.run_cmd import run_cmd
 from sqlcompare.inspect import inspect_cmd
 from sqlcompare.list_diffs import list_diffs_cmd
 from sqlcompare.query import query_cmd
@@ -13,6 +14,7 @@
 
 # Register all commands
 app.command("table")(table_cmd)
+app.command("run")(run_cmd)
 app.command("inspect")(inspect_cmd)
 app.command("stats")(stats_cmd)
 app.command("list-diffs")(list_diffs_cmd)
diff --git a/sqlcompare/helpers.py b/sqlcompare/helpers.py
@@ -3,8 +3,9 @@
 from __future__ import annotations
 
 import re
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import typer
 import yaml
@@ -15,6 +16,35 @@
 
 # Dataset helpers
 
+@dataclass(frozen=True)
+class InputSpec:
+    kind: Literal["table", "file", "sql"]
+    value: str
+    source: Literal["inline", "file", "none"] = "none"
+
+
+def _looks_like_sql(value: str) -> bool:
+    return bool(re.match(r"(?is)^\s*(select|with)\b", value))
+
+
+def detect_input(value: str) -> InputSpec:
+    path = Path(value)
+    if path.exists():
+        suffix = path.suffix.lower()
+        if suffix == ".sql":
+            sql_text = path.read_text(encoding="utf-8").strip()
+            if not sql_text:
+                raise typer.BadParameter(f"SQL file is empty: {path}")
+            return InputSpec(kind="sql", value=sql_text, source="file")
+        if suffix in (".csv", ".xlsx"):
+            return InputSpec(kind="file", value=str(path), source="file")
+        raise typer.BadParameter(f"Unsupported file type: {suffix}")
+
+    if _looks_like_sql(value):
+        return InputSpec(kind="sql", value=value.strip(), source="inline")
+
+    return InputSpec(kind="table", value=value.strip(), source="none")
+
 def expand_dataset_value(value: Any, base_dir: Path) -> Any:
     """
     Recursively expand template variables in dataset configuration values.
diff --git a/sqlcompare/run_cmd.py b/sqlcompare/run_cmd.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+
+import os
+import uuid
+
+import typer
+
+from sqlcompare.config import get_default_schema
+from sqlcompare.db import DBConnection
+from sqlcompare.helpers import create_table_from_select, detect_input, ensure_schema
+from sqlcompare.table import compare_table
+
+
+def _resolve_connection(connection: str | None) -> str:
+    if connection:
+        return connection
+    default_conn = os.getenv("SQLCOMPARE_CONN_DEFAULT") or os.getenv("DTK_CONN_DEFAULT")
+    if not default_conn:
+        raise typer.BadParameter(
+            "No connection specified. Use --connection or set SQLCOMPARE_CONN_DEFAULT."
+        )
+    return default_conn
+
+
+def run_cmd(
+    table1: str = typer.Argument(
+        ..., help="Previous table name, CSV/XLSX file path, or SQL"
+    ),
+    table2: str = typer.Argument(
+        ..., help="Current table name, CSV/XLSX file path, or SQL"
+    ),
+    index: str = typer.Argument(
+        ..., help="Comma-separated key column(s), e.g. 'id' or 'user_id,tenant_id'"
+    ),
+    connection: str | None = typer.Option(
+        None, "--connection", "-c", help="Database connector name"
+    ),
+    schema: str | None = typer.Option(None, "--schema", help="Schema for test tables"),
+    columns: str | None = typer.Option(
+        None,
+        "--columns",
+        help="Comma-separated non-index columns to compare (default: all common columns)",
+    ),
+    ignore_columns: str | None = typer.Option(
+        None,
+        "--ignore-columns",
+        help="Comma-separated non-index columns to skip from comparison",
+    ),
+) -> None:
+    """Run a comparison from tables, files, or SQL text.
+
+    This command auto-detects inputs:
+      - CSV/XLSX paths are treated as files
+      - .sql paths are read as SQL text
+      - Inline SQL starting with SELECT/WITH is treated as SQL
+      - Otherwise, inputs are treated as table names
+
+    Examples:
+        # Tables
+        sqlcompare run analytics.users analytics.users_new id
+
+        # SQL inline
+        sqlcompare run "SELECT * FROM previous" "SELECT * FROM current" id -c duckdb_test
+
+        # SQL files
+        sqlcompare run queries/previous.sql queries/current.sql id -c snowflake_prod
+
+        # Files (uses DuckDB)
+        sqlcompare run exports/prev.csv exports/current.csv customer_id
+    """
+    schema = schema or get_default_schema()
+
+    prev_spec = detect_input(table1)
+    new_spec = detect_input(table2)
+
+    if prev_spec.kind == "file" or new_spec.kind == "file":
+        if prev_spec.kind != "file" or new_spec.kind != "file":
+            raise typer.BadParameter(
+                "Both table arguments must be file paths when using CSV/XLSX inputs."
+            )
+        compare_table(
+            prev_spec.value,
+            new_spec.value,
+            index,
+            connection,
+            schema,
+            include_columns=columns,
+            ignore_columns=ignore_columns,
+        )
+        return
+
+    if prev_spec.kind == "sql" or new_spec.kind == "sql":
+        connection = _resolve_connection(connection)
+        schema_prefix = f"{schema}." if schema else ""
+        suffix = uuid.uuid4().hex[:8]
+        previous_table = (
+            prev_spec.value
+            if prev_spec.kind == "table"
+            else f"{schema_prefix}sqlcompare_sql_{suffix}_previous"
+        )
+        new_table = (
+            new_spec.value
+            if new_spec.kind == "table"
+            else f"{schema_prefix}sqlcompare_sql_{suffix}_new"
+        )
+
+        with DBConnection(connection) as db:
+            ensure_schema(db, schema)
+            if prev_spec.kind == "sql":
+                create_table_from_select(db, previous_table, prev_spec.value)
+            if new_spec.kind == "sql":
+                create_table_from_select(db, new_table, new_spec.value)
+
+        compare_table(
+            previous_table,
+            new_table,
+            index,
+            connection,
+            schema,
+            include_columns=columns,
+            ignore_columns=ignore_columns,
+        )
+        return
+
+    compare_table(
+        table1,
+        table2,
+        index,
+        connection,
+        schema,
+        include_columns=columns,
+        ignore_columns=ignore_columns,
+    )
diff --git a/tests/test_cli_run.py b/tests/test_cli_run.py