Skip to content

Commit 8c1fc54

Browse files
Use pandas formatting for tables
1 parent d63aaf8 commit 8c1fc54

File tree

7 files changed

+68
-85
lines changed

7 files changed

+68
-85
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies = [
1212
"rich>=14.2.0",
1313
"SQLAlchemy>=2.0.0",
1414
"numpy!=2.4.0",
15-
"tabulate>=0.9.0",
15+
"pandas>=2.0.0",
1616
"typer>=0.21.0",
1717
"openpyxl>=3.1.0",
1818
"duckdb>=1.0.0",

sqlcompare/analysis/utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,15 @@ def _display(
7777
if not filtered_rows:
7878
log.info(f"❌ No statistics found for column containing '{column}'")
7979
return
80-
log.info(format_table(columns, filtered_rows[:limit]))
80+
display_rows = filtered_rows[:limit]
81+
log.info(
82+
format_table(
83+
columns,
84+
display_rows,
85+
max_rows=len(display_rows),
86+
max_cols=len(columns),
87+
)
88+
)
8189
return
8290

8391
effective_total = total_differences if total_differences is not None else len(rows)

sqlcompare/query.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sqlcompare.db import DBConnection
88
from sqlcompare.helpers import resolve_output_filename
99
from sqlcompare.log import log
10+
from sqlcompare.utils.format import format_table
1011

1112

1213
def query(q: str, connection: str | None, output: str) -> None:
@@ -35,9 +36,7 @@ def query(q: str, connection: str | None, output: str) -> None:
3536

3637
if output == "terminal":
3738
if rows:
38-
from tabulate import tabulate
39-
40-
print(tabulate(rows, headers=columns, tablefmt="pretty"))
39+
print(format_table(columns, rows))
4140
print(f"\nTotal rows: {len(rows)}")
4241
else:
4342
print("Query executed successfully. No rows returned.")

sqlcompare/utils/format.py

Lines changed: 17 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,35 @@
11
from __future__ import annotations
22

3-
from typing import Iterable, Sequence, Any
4-
from tabulate import tabulate
3+
from typing import Any
54

6-
7-
def _trim_cell(x: Any, max_width: int | None) -> Any:
8-
"""Truncate long cell strings with … (does not wrap)."""
9-
if max_width is None:
10-
return x
11-
s = "" if x is None else str(x)
12-
if len(s) <= max_width:
13-
return s
14-
if max_width <= 1:
15-
return "…"
16-
return s[: max_width - 1] + "…"
5+
import pandas as pd
176

187

198
def format_table(
209
columns: list[str],
2110
rows: list[tuple],
2211
*,
23-
tablefmt: str = "pretty",
2412
max_rows: int = 20,
2513
max_cols: int = 12,
2614
max_cell_width: int | None = 60,
27-
**tabulate_kwargs,
2815
) -> str:
29-
"""
30-
pandas-like summary output for tabulate:
31-
- If too many columns: show left + '…' + right (inserts an ellipsis column)
32-
- If too many rows: show head + ellipsis row + tail
33-
"""
34-
n_cols = len(columns)
35-
if n_cols == 0:
36-
return tabulate([], headers=[], tablefmt="pretty", **tabulate_kwargs)
37-
38-
# ---- column trimming ----
39-
use_col_ellipsis = n_cols > max_cols and max_cols >= 2
40-
if use_col_ellipsis:
41-
left = max_cols // 2
42-
right = max_cols - left
43-
left_idx = list(range(left))
44-
right_idx = list(range(n_cols - right, n_cols))
45-
keep_idx = left_idx + right_idx
46-
47-
out_columns = columns[:left] + ["…"] + columns[-right:]
16+
"""Render a dataframe-like table using pandas output formatting."""
17+
if not columns:
18+
return ""
4819

49-
out_rows = []
50-
for r in rows:
51-
r_list = list(r)
52-
kept = (
53-
[r_list[i] for i in keep_idx[:left]]
54-
+ ["…"]
55-
+ [r_list[i] for i in keep_idx[left:]]
56-
)
57-
out_rows.append(tuple(kept))
58-
else:
59-
out_columns = (
60-
columns[:max_cols] if (n_cols > max_cols and max_cols >= 1) else columns
61-
)
62-
keep_idx = list(range(len(out_columns)))
63-
out_rows = [tuple(r[i] for i in keep_idx) for r in rows]
20+
df = pd.DataFrame(list(rows), columns=columns)
6421

65-
# ---- row trimming ----
66-
n_rows = len(out_rows)
67-
use_row_ellipsis = n_rows > max_rows and max_rows >= 2
68-
if use_row_ellipsis:
69-
top = max_rows // 2
70-
bottom = max_rows - top
71-
head = out_rows[:top]
72-
tail = out_rows[-bottom:]
73-
ellipsis_row = tuple("…" for _ in out_columns)
74-
out_rows = head + [ellipsis_row] + tail
75-
else:
76-
out_rows = out_rows[:max_rows]
22+
display_max_rows = None if max_rows is None or max_rows <= 0 else max_rows
23+
display_max_cols = None if max_cols is None or max_cols <= 0 else max_cols
7724

78-
# ---- cell trimming ----
25+
option_kwargs: dict[str, Any] = {
26+
"display.max_rows": display_max_rows,
27+
"display.max_columns": display_max_cols,
28+
"display.width": 0,
29+
"display.expand_frame_repr": False,
30+
}
7931
if max_cell_width is not None:
80-
out_rows = [tuple(_trim_cell(v, max_cell_width) for v in r) for r in out_rows]
32+
option_kwargs["display.max_colwidth"] = max_cell_width
8133

82-
return tabulate(out_rows, headers=out_columns, tablefmt=tablefmt, **tabulate_kwargs)
34+
with pd.option_context(*sum(option_kwargs.items(), ())):
35+
return df.to_string(index=False)

sqlcompare/utils/test_types/stats.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,20 @@ def compare_table_stats(table1: str, table2: str, connection: str | None) -> Non
9797
"ROWS_DIFF",
9898
"NULLS_DIFF",
9999
"DISTINCT_DIFF",
100+
"STATUS_PCT",
100101
]
101102
rows = []
103+
row_base = max(prev_count, new_count, 1)
102104
for key in common_keys:
103105
prev_nulls = prev_stats.get(key, {}).get("null_count", 0)
104106
new_nulls = new_stats.get(key, {}).get("null_count", 0)
105107
prev_distinct = prev_stats.get(key, {}).get("distinct_count", 0)
106108
new_distinct = new_stats.get(key, {}).get("distinct_count", 0)
109+
distinct_base = max(prev_distinct, new_distinct, 1)
110+
row_sim = 1 - (abs(new_count - prev_count) / row_base)
111+
null_sim = 1 - (abs(new_nulls - prev_nulls) / row_base)
112+
distinct_sim = 1 - (abs(new_distinct - prev_distinct) / distinct_base)
113+
status_pct = max(0.0, min(100.0, (row_sim + null_sim + distinct_sim) / 3 * 100))
107114
rows.append(
108115
(
109116
prev_map[key],
@@ -116,8 +123,18 @@ def compare_table_stats(table1: str, table2: str, connection: str | None) -> Non
116123
new_count - prev_count,
117124
new_nulls - prev_nulls,
118125
new_distinct - prev_distinct,
126+
round(status_pct, 2),
119127
)
120128
)
121129

130+
rows.sort(key=lambda row: row[-1])
131+
122132
log.info("📊 Table statistics comparison:")
123-
log.info(format_table(output_columns, rows))
133+
log.info(
134+
format_table(
135+
output_columns,
136+
rows,
137+
max_rows=len(rows),
138+
max_cols=len(output_columns),
139+
)
140+
)

tests/test_stats_comparison.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,16 @@ def test_table_command_with_stats(tmp_path, monkeypatch) -> None:
3535
assert result.exit_code == 0, result.output
3636
assert "Table statistics comparison" in result.output
3737

38-
lines = [line for line in result.output.splitlines() if line.startswith("|")]
38+
lines = [line for line in result.output.splitlines() if line.strip()]
3939
assert lines, result.output
4040

4141
header = []
42-
rows = {}
42+
rows: dict[str, list[str]] = {}
4343
for line in lines:
44-
cells = [cell.strip() for cell in line.strip().strip("|").split("|")]
44+
stripped = line.strip()
45+
if not stripped:
46+
continue
47+
cells = stripped.split()
4548
if not cells:
4649
continue
4750
if cells[0] == "COLUMN_NAME":
@@ -60,12 +63,24 @@ def test_table_command_with_stats(tmp_path, monkeypatch) -> None:
6063
"ROWS_DIFF",
6164
"NULLS_DIFF",
6265
"DISTINCT_DIFF",
66+
"STATUS_PCT",
6367
]
6468
assert header == expected_header
6569

6670
for col_name in ("id", "name", "value"):
6771
assert col_name in rows, result.output
68-
assert rows[col_name][1:] == ["2", "2", "0", "0", "2", "2", "0", "0", "0"]
72+
assert rows[col_name][1:] == [
73+
"2",
74+
"2",
75+
"0",
76+
"0",
77+
"2",
78+
"2",
79+
"0",
80+
"0",
81+
"0",
82+
"100.0",
83+
]
6984

7085

7186
def test_table_command_with_stats_from_files() -> None:

uv.lock

Lines changed: 2 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)