From d19eb93c57ba36c31a7fd435c02f7e0ed0d55e8d Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 5 Dec 2025 15:36:41 +0000 Subject: [PATCH 001/100] feat: Prototype Db2 z/OS support --- data_validation/cli_tools.py | 1 + docs/connections.md | 9 +++++++++ third_party/ibis/ibis_db2/__init__.py | 20 +++++++++++++------- third_party/ibis/ibis_db2/api.py | 8 +++++--- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index daa0e8f8e..f3fa31138 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -194,6 +194,7 @@ ["database", "Database in DB2 to connect to"], ["url", "URL link in DB2 to connect to"], ["driver", "Driver link in DB2 to connect to (default ibm_db_sa)"], + ["connect_args", "(Optional) Additional connection argument mapping"], ], } diff --git a/docs/connections.md b/docs/connections.md index a500b215a..eca90d57d 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -426,6 +426,15 @@ data-validation connections add --database DATABASE DB2 database [--url URL] SQLAlchemy connection URL [--driver DRIVER] DB2 driver, defaults to "ibm_db_sa" + [--connect-args CONNECT_ARGS] Additional connection args, JSON String dict, default {} +``` + +Example TLS connection: +``` +data-validation connections add -c db2conn DB2 \ + --host=sdb2-host --database=somedb \ + --user=dvt --password=DVTS3cret \ + --connect-args='{"Security": "SSL", "SSLClientKeystoredb": "/path/to/file.kdb", "SSLClientKeystash": "/path/to/file.sth"}' ``` ## Snowflake diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 771168123..be8c511e6 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -11,11 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sqlalchemy as sa import re +from typing import Iterable, Optional, Tuple, Dict, Any + +import sqlalchemy as sa import ibis.expr.datatypes as dt -from typing import Iterable, Tuple from ibis.backends.base.sql.alchemy import BaseAlchemyBackend from third_party.ibis.ibis_db2.compiler import Db2Compiler from third_party.ibis.ibis_db2.datatypes import _get_type @@ -28,12 +29,13 @@ class Backend(BaseAlchemyBackend): def do_connect( self, host: str = "localhost", - user: str = None, - password: str = None, + user: Optional[str] = None, + password: Optional[str] = None, port: int = 50000, - database: str = None, - url: str = None, + database: Optional[str] = None, + url: Optional[str] = None, driver: str = "ibm_db_sa", + connect_args: Dict[str, Any] = None, ) -> None: if url is None: if driver != "ibm_db_sa": @@ -56,6 +58,7 @@ def do_connect( poolclass=sa.pool.StaticPool, # Pessimistic disconnect handling pool_pre_ping=True, + connect_args=connect_args or {}, ) self.database_name = database self.url = sa_url @@ -63,7 +66,10 @@ def do_connect( @sa.event.listens_for(engine, "connect") def connect(dbapi_connection, connection_record): with dbapi_connection.cursor() as cur: - cur.execute("SET TIMEZONE = UTC") + # On Db2 z/OS we can set the time zone as below. + # We cannot find an equivalent command for LUW and previously had + # SET TIMEZONE = 'UTC' which silently failed anyway. + cur.execute("SET SESSION TIME ZONE = '00:00'") super().do_connect(engine) diff --git a/third_party/ibis/ibis_db2/api.py b/third_party/ibis/ibis_db2/api.py index ee6ce692b..27286ca75 100644 --- a/third_party/ibis/ibis_db2/api.py +++ b/third_party/ibis/ibis_db2/api.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# from ibis.backends.base_sqlalchemy.alchemy import to_sqlalchemy -# from third_party.ibis.ibis_db2.compiler import dialect, rewrites # noqa: F401 - from third_party.ibis.ibis_db2 import Backend as DB2Backend import ibm_db_sa # NOQA fail early if driver is missing +from data_validation.util import dvt_config_string_to_dict + def db2_connect( host: str = "localhost", @@ -27,7 +26,9 @@ def db2_connect( database: str = None, url: str = None, driver: str = "ibm_db_sa", + connect_args: str = None, ): + connect_args_dict = dvt_config_string_to_dict(connect_args) if connect_args else {} backend = DB2Backend() backend.do_connect( host=host, @@ -37,5 +38,6 @@ def db2_connect( database=database, url=url, driver=driver, + connect_args=connect_args_dict, ) return backend From 89a69ee53007a7ecd650530f123c255817c59e94 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 15:33:10 +0000 Subject: [PATCH 002/100] feat: Prototype Db2 z/OS support --- README.md | 5 +- data_validation/cli_tools.py | 20 ++++++-- data_validation/clients.py | 15 ++++-- data_validation/consts.py | 1 + .../query_builder/random_row_builder.py | 1 + docs/connections.md | 48 +++++++++++++++---- third_party/ibis/ibis_db2/__init__.py | 12 ++--- 7 files changed, 73 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index af30e1673..3ff9fd63d 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,8 @@ DVT supports the following connection types: * [AlloyDB](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#alloydb) * [BigQuery](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#google-bigquery) -* [DB2](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#db2) +* [Db2 LUW](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#db2_luw) +* [Db2 z/OS](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#db2_zos) * [FileSystem](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#filesystem) * [Hive](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#hive) * [Impala](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/connections.md#impala) @@ -69,7 +70,7 @@ The CLI is the main interface to use this tool and it has several different commands which can be used to create and run validations. DVT is designed to run in an environment connected to GCP services, specifically, BigQuery, GCS and Secret manager. If DVT is being run on-premises or in an environment with restricted access to GCP services, see -[running DVT at on-prem](#running-dvt-at-on-prem). Below are the command syntax and options for running validations. +[running DVT at on-prem](#running-dvt-at-on-prem). Below are the command syntax and options for running validations. Alternatives to running DVT in the CLI include deploying DVT to Cloud Run, Cloud Functions, or Airflow ([Examples Here](https://github.com/GoogleCloudPlatform/professional-services-data-validator/tree/develop/samples)). See the [Validation Logic](https://github.com/GoogleCloudPlatform/professional-services-data-validator#validation-logic) section diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index f3fa31138..1ab5726e1 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -187,13 +187,23 @@ ["http_path", "URL path of HTTP proxy"], ], consts.SOURCE_TYPE_DB2: [ - ["host", "Desired DB2 host"], - ["port", "Desired DB2 port (50000 if not provided)"], + ["host", "Desired Db2 host"], + ["port", "Desired Db2 port (50000 if not provided)"], ["user", "Username to connect to"], ["password", "Password for authentication of user"], - ["database", "Database in DB2 to connect to"], - ["url", "URL link in DB2 to connect to"], - ["driver", "Driver link in DB2 to connect to (default ibm_db_sa)"], + ["database", "Database in Db2 to connect to"], + ["url", "URL link in Db2 to connect to"], + ["driver", "Driver link in Db2 to connect to (default ibm_db_sa)"], + ["connect_args", "(Optional) Additional connection argument mapping"], + ], + consts.SOURCE_TYPE_DB2_ZOS: [ + ["host", "Db2 host"], + ["port", "Db2 port (50000 if not provided)"], + ["user", "Username to connect to"], + ["password", "Password for authentication of user"], + ["database", "Database in Db2 to connect to"], + ["url", "URL link in Db2 to connect to"], + ["driver", "Driver link in Db2 to connect to (default ibm_db_sa)"], ["connect_args", "(Optional) Additional connection argument mapping"], ], } diff --git a/data_validation/clients.py b/data_validation/clients.py index d3b55c984..a2dd4d335 100644 --- a/data_validation/clients.py +++ b/data_validation/clients.py @@ -53,6 +53,7 @@ "oracle", "postgres", "db2", + "db2_zos", "mssql", "redshift", "snowflake", @@ -90,8 +91,10 @@ def get_client_call(*args, **kwargs): # DB2 requires ibm_db_sa try: from third_party.ibis.ibis_db2.api import db2_connect + from third_party.ibis.ibis_db2_zos.api import db2_zos_connect except ImportError: db2_connect = _raise_missing_client_error("pip install ibm_db_sa") + db2_zos_connect = _raise_missing_client_error("pip install ibm_db_sa") def get_google_bigquery_client( @@ -218,6 +221,7 @@ def get_ibis_table(client, schema_name, table_name, database_name=None): "oracle", "postgres", "db2", + "db2_zos", "mssql", "redshift", ]: @@ -279,7 +283,7 @@ def list_tables(client, schema_name, tables_only=True): if tables_only and client.name != "pandas" else client.list_tables ) - if client.name in ["db2", "redshift", "snowflake", "pandas"]: + if client.name in ["db2", "db2_zos", "redshift", "snowflake", "pandas"]: return fn() return fn(database=schema_name) @@ -332,10 +336,10 @@ def get_data_client(connection_config): consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH ) if key_path: - decrypted_connection_config[ - "credentials" - ] = google.oauth2.service_account.Credentials.from_service_account_file( - key_path + decrypted_connection_config["credentials"] = ( + google.oauth2.service_account.Credentials.from_service_account_file( + key_path + ) ) if source_type not in CLIENT_LOOKUP: @@ -419,4 +423,5 @@ def get_max_in_list_size(client, in_list_over_expressions=False): consts.SOURCE_TYPE_SNOWFLAKE: snowflake_connect, consts.SOURCE_TYPE_SPANNER: spanner_connect, consts.SOURCE_TYPE_DB2: db2_connect, + consts.SOURCE_TYPE_DB2_ZOS: db2_zos_connect, } diff --git a/data_validation/consts.py b/data_validation/consts.py index 9af662f73..08ee0e2b1 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -130,6 +130,7 @@ # Connection key constants. SOURCE_TYPE_BIGQUERY = "BigQuery" SOURCE_TYPE_DB2 = "DB2" +SOURCE_TYPE_DB2_ZOS = "DB2_zOS" SOURCE_TYPE_FILESYSTEM = "FileSystem" SOURCE_TYPE_IMPALA = "Impala" SOURCE_TYPE_MSSQL = "MSSQL" diff --git a/data_validation/query_builder/random_row_builder.py b/data_validation/query_builder/random_row_builder.py index fac70a82e..b1af4fae3 100644 --- a/data_validation/query_builder/random_row_builder.py +++ b/data_validation/query_builder/random_row_builder.py @@ -29,6 +29,7 @@ "postgres", "mssql", "db2", + "db2_zos", "mysql", "spanner", "redshift", diff --git a/docs/connections.md b/docs/connections.md index eca90d57d..f048abd80 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -89,7 +89,8 @@ The data validation tool supports the following connection types. * [FileSystem](#filesystem-csv-orc-parquet-or-json-only) * [Impala](#impala) * [Hive](#hive) -* [DB2](#db2) +* [Db2 LUW](#db2) +* [Db2 z/OS](#db2_zos) * [AlloyDB](#alloydb) * [Snowflake](#snowflake) @@ -410,22 +411,22 @@ data-validation connections add ``` -## DB2 +## Db2 LUW -DB2 requires the `ibm_db_sa` package. We currently support only IBM DB2 LUW - Universal Database for Linux/Unix/Windows versions 9.7 onwards. +Db2 requires the `ibm_db_sa` package. We currently support only IBM Db2 LUW - Universal Database for Linux/Unix/Windows versions 9.7 onwards. ``` data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID - --connection-name CONN_NAME DB2 Connection name - --host HOST DB2 host - --port PORT DB2 port, defaults to 50000 - --user USER DB2 user - --password PASSWORD DB2 password - --database DATABASE DB2 database + --connection-name CONN_NAME Db2 Connection name + --host HOST Db2 host + --port PORT Db2 port, defaults to 50000 + --user USER Db2 user + --password PASSWORD Db2 password + --database DATABASE Db2 database [--url URL] SQLAlchemy connection URL - [--driver DRIVER] DB2 driver, defaults to "ibm_db_sa" + [--driver DRIVER] Db2 driver, defaults to "ibm_db_sa" [--connect-args CONNECT_ARGS] Additional connection args, JSON String dict, default {} ``` @@ -437,6 +438,33 @@ data-validation connections add -c db2conn DB2 \ --connect-args='{"Security": "SSL", "SSLClientKeystoredb": "/path/to/file.kdb", "SSLClientKeystash": "/path/to/file.sth"}' ``` +## Db2 z/OS + +Db2 requires the `ibm_db_sa` package. + +``` +data-validation connections add + [--secret-manager-type ] Secret Manager type (None, GCP) + [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID + --connection-name CONN_NAME DB2_zOS Connection name + --host HOST Db2 host + --port PORT Db2 port, defaults to 50000 + --user USER Db2 user + --password PASSWORD Db2 password + --database DATABASE Db2 database + [--url URL] SQLAlchemy connection URL + [--driver DRIVER] Db2 driver, defaults to "ibm_db_sa" + [--connect-args CONNECT_ARGS] Additional connection args, JSON String dict, default {} +``` + +Example TLS connection: +``` +data-validation connections add -c db2conn DB2_zOS \ + --host=sdb2-host --database=somedb \ + --user=dvt --password=DVTS3cret \ + --connect-args='{"Security": "SSL", "SSLClientKeystoredb": "/path/to/file.kdb", "SSLClientKeystash": "/path/to/file.sth"}' +``` + ## Snowflake Snowflake requires the `snowflake-sqlalchemy` and `snowflake-connector-python` packages. diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index be8c511e6..574475c39 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -63,13 +63,11 @@ def do_connect( self.database_name = database self.url = sa_url - @sa.event.listens_for(engine, "connect") - def connect(dbapi_connection, connection_record): - with dbapi_connection.cursor() as cur: - # On Db2 z/OS we can set the time zone as below. - # We cannot find an equivalent command for LUW and previously had - # SET TIMEZONE = 'UTC' which silently failed anyway. - cur.execute("SET SESSION TIME ZONE = '00:00'") + # We cannot find a SET TIMEZONE command for LUW therefore commented this section out. + # @sa.event.listens_for(engine, "connect") + # def connect(dbapi_connection, connection_record): + # with dbapi_connection.cursor() as cur: + # cur.execute("SET TIMEZONE = 'UTC'") super().do_connect(engine) From 36906d77625f99960e1d4750c6867880cfbea104 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 15:42:56 +0000 Subject: [PATCH 003/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 67 +++++++++++++++++++++++ third_party/ibis/ibis_db2_zos/api.py | 45 +++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 third_party/ibis/ibis_db2_zos/__init__.py create mode 100644 third_party/ibis/ibis_db2_zos/api.py diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py new file mode 100644 index 000000000..4d39a6448 --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -0,0 +1,67 @@ +# Copyright 2025 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Dict, Any + +import sqlalchemy as sa + +from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend + + +class Backend(Db2LUWBackend): + name = "db2_zos" + + def do_connect( + self, + host: str = "localhost", + user: Optional[str] = None, + password: Optional[str] = None, + port: int = 50000, + database: Optional[str] = None, + url: Optional[str] = None, + driver: str = "ibm_db_sa", + connect_args: Dict[str, Any] = None, + ) -> None: + if url is None: + if driver != "ibm_db_sa": + raise NotImplementedError( + "ibm_db_sa is currently the only supported driver" + ) + sa_url = sa.engine.url.URL.create( + "ibm_db_sa", + host=host, + port=port, + username=user, + password=password, + database=database, + ) + else: + sa_url = sa.engine.url.make_url(url) + + engine = sa.create_engine( + sa_url, + poolclass=sa.pool.StaticPool, + # Pessimistic disconnect handling + pool_pre_ping=True, + connect_args=connect_args or {}, + ) + self.database_name = database + self.url = sa_url + + @sa.event.listens_for(engine, "connect") + def connect(dbapi_connection, connection_record): + with dbapi_connection.cursor() as cur: + cur.execute("SET SESSION TIME ZONE = '+0:00'") + + super(Db2LUWBackend).do_connect(engine) diff --git a/third_party/ibis/ibis_db2_zos/api.py b/third_party/ibis/ibis_db2_zos/api.py new file mode 100644 index 000000000..654257386 --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/api.py @@ -0,0 +1,45 @@ +# Copyright 2025 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from third_party.ibis.ibis_db2_zos import Backend +import ibm_db_sa # NOQA fail early if driver is missing + +from data_validation.util import dvt_config_string_to_dict + + +def db2_connect( + host: str = "localhost", + user: Optional[str] = None, + password: Optional[str] = None, + port: int = 50000, + database: Optional[str] = None, + url: Optional[str] = None, + driver: str = "ibm_db_sa", + connect_args: Optional[str] = None, +): + connect_args_dict = dvt_config_string_to_dict(connect_args) if connect_args else {} + backend = Backend() + backend.do_connect( + host=host, + user=user, + password=password, + port=port, + database=database, + url=url, + driver=driver, + connect_args=connect_args_dict, + ) + return backend From ec75d2e53673fa2d6f5a580ec180ef2359edb6e0 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 15:44:27 +0000 Subject: [PATCH 004/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/api.py b/third_party/ibis/ibis_db2_zos/api.py index 654257386..53a7030e7 100644 --- a/third_party/ibis/ibis_db2_zos/api.py +++ b/third_party/ibis/ibis_db2_zos/api.py @@ -20,7 +20,7 @@ from data_validation.util import dvt_config_string_to_dict -def db2_connect( +def db2_zos_connect( host: str = "localhost", user: Optional[str] = None, password: Optional[str] = None, From f31ca4c64570dda6bf3d84ff7071956e191e6634 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 16:31:18 +0000 Subject: [PATCH 005/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_addon/operations.py | 40 +++++++---------------- third_party/ibis/ibis_db2/registry.py | 9 +++++ third_party/ibis/ibis_db2_zos/compiler.py | 27 +++++++++++++++ third_party/ibis/ibis_db2_zos/registry.py | 33 +++++++++++++++++++ 4 files changed, 81 insertions(+), 28 deletions(-) create mode 100644 third_party/ibis/ibis_db2_zos/compiler.py create mode 100644 third_party/ibis/ibis_db2_zos/registry.py diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 04980986c..bed80c131 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -187,12 +187,6 @@ def strftime_impala(t, op): return f"from_unixtime(unix_timestamp({targ}, {format_str!r}), {format_str!r})" -def strftime_db2(translator, op): - """Date, Datetime, Timestamp formatting specific to DB2.""" - # TODO(issue-1296): third_party/ibis/ibis_db2/registry.py:298 - AttributeError: 'Strftime' object has no attribute 'value' - pass - - def format_hashbytes_hive(translator, op): arg = translator.translate(op.arg) if op.how == "sha256": @@ -246,14 +240,6 @@ def sa_format_hashbytes_mysql(translator, op): return hash_func -def sa_format_hashbytes_db2(translator, op): - compiled_arg = translator.translate(op.arg) - hash_func = sa.func.hash(compiled_arg, sa.sql.literal_column("2")) - # OBS: SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) - hex_func = sa.func.hex(hash_func) - return sa.func.lower(hex_func) - - def sa_format_hashbytes_redshift(translator, op): arg = translator.translate(op.arg) return sa.sql.literal_column(f"sha2({arg}, 256)") @@ -516,19 +502,19 @@ def _sa_whitespace_rstrip(t, op): ops.StringLength ] -PostgreSQLExprTranslator._registry[ - ops.HashBytes -] = postgres_registry.sa_format_hashbytes +PostgreSQLExprTranslator._registry[ops.HashBytes] = ( + postgres_registry.sa_format_hashbytes +) PostgreSQLExprTranslator._registry[RawSQL] = sa_format_raw_sql PostgreSQLExprTranslator._registry[ToChar] = sa_format_to_char PostgreSQLExprTranslator._registry[ops.Cast] = postgres_registry.sa_cast_postgres PostgreSQLExprTranslator._registry[BinaryLength] = sa_format_binary_length -PostgreSQLExprTranslator._registry[ - ops.ExtractEpochSeconds -] = postgres_registry.sa_epoch_seconds -PostgreSQLExprTranslator._registry[ - PaddedCharLength -] = postgres_registry.sa_format_postgres_padded_char_length +PostgreSQLExprTranslator._registry[ops.ExtractEpochSeconds] = ( + postgres_registry.sa_epoch_seconds +) +PostgreSQLExprTranslator._registry[PaddedCharLength] = ( + postgres_registry.sa_format_postgres_padded_char_length +) MsSqlExprTranslator._registry[ops.HashBytes] = mssql_registry.sa_format_hashbytes @@ -561,10 +547,8 @@ def _sa_whitespace_rstrip(t, op): ] if Db2ExprTranslator: - Db2ExprTranslator._registry[ops.HashBytes] = sa_format_hashbytes_db2 Db2ExprTranslator._registry[RawSQL] = sa_format_raw_sql Db2ExprTranslator._registry[BinaryLength] = sa_format_binary_length - Db2ExprTranslator._registry[ops.Strftime] = strftime_db2 Db2ExprTranslator._registry[ops.RStrip] = _sa_whitespace_rstrip Db2ExprTranslator._registry[PaddedCharLength] = Db2ExprTranslator._registry[ ops.StringLength @@ -578,9 +562,9 @@ def _sa_whitespace_rstrip(t, op): TeradataExprTranslator._registry[RawSQL] = format_raw_sql TeradataExprTranslator._registry[ops.HashBytes] = format_hashbytes_teradata TeradataExprTranslator._registry[BinaryLength] = sa_format_binary_length - TeradataExprTranslator._registry[ - PaddedCharLength - ] = TeradataExprTranslator._registry[ops.StringLength] + TeradataExprTranslator._registry[PaddedCharLength] = ( + TeradataExprTranslator._registry[ops.StringLength] + ) if SnowflakeExprTranslator: SnowflakeExprTranslator._registry[ops.Cast] = sa_cast_snowflake diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 04fd03ff2..08b20b60a 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -462,6 +462,14 @@ def _day_of_week_name(t, op): return sa.func.dayname(sa_arg) +def sa_format_hashbytes_db2(translator, op): + compiled_arg = translator.translate(op.arg) + hash_func = sa.func.hash(compiled_arg, sa.sql.literal_column("2")) + # OBS: SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) + hex_func = sa.func.hex(hash_func) + return sa.func.lower(hex_func) + + operation_registry.update( { ops.Literal: _literal, @@ -488,6 +496,7 @@ def _day_of_week_name(t, op): ops.Translate: fixed_arity("translate", 3), ops.RegexExtract: _regex_extract, ops.StringJoin: _string_join, + ops.HashBytes: sa_format_hashbytes_db2, # math ops.Log: _log, ops.Log2: unary(lambda x: sa.func.log(2, x)), diff --git a/third_party/ibis/ibis_db2_zos/compiler.py b/third_party/ibis/ibis_db2_zos/compiler.py new file mode 100644 index 000000000..e76c7288d --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/compiler.py @@ -0,0 +1,27 @@ +# Copyright 2023 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ibis.backends.base.sql.alchemy import AlchemyExprTranslator + +from third_party.ibis.ibis_db2_zos.compiler import Db2Compiler, Db2ExprTranslator +from third_party.ibis.ibis_db2_zos.registry import operation_registry + + +class Db2zOSExprTranslator(Db2ExprTranslator): + _registry = operation_registry.copy() + _rewrites = AlchemyExprTranslator._rewrites.copy() + _dialect_name = "db2" + + +class Db2zOSCompiler(Db2Compiler): + translator_class = Db2zOSExprTranslator diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py new file mode 100644 index 000000000..bb624c6bb --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -0,0 +1,33 @@ +# Copyright 2025 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ibis.expr.operations as ops +import sqlalchemy as sa + +from third_party.ibis.ibis_db2.registry import ( + operation_registry as db2_luw_operation_registry, +) + +operation_registry = db2_luw_operation_registry.copy() + + +def _sa_whitespace_rstrip(t, op): + # The full string.whitespace second parameter was throwing an error on Db2 z/OS: + # the data type, length or value of the argument for the parameter in position "2" of routine "RTRIM" is incorrect + # Therefore we've removed it below and let Db2 exclude default space character. + sa_arg = t.translate(op.arg) + return sa.func.rtrim(sa_arg) + + +operation_registry[ops.RStrip] = _sa_whitespace_rstrip From 11f44be9f665ddd98b857945421ee7d144688d5a Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 17:23:01 +0000 Subject: [PATCH 006/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 4d39a6448..ec7aa3e1d 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -64,4 +64,4 @@ def connect(dbapi_connection, connection_record): with dbapi_connection.cursor() as cur: cur.execute("SET SESSION TIME ZONE = '+0:00'") - super(Db2LUWBackend).do_connect(engine) + super(Db2LUWBackend, self).do_connect(engine) From 7712040e9e33084f3c4556a154275c005fc785af Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 18:17:09 +0000 Subject: [PATCH 007/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 2 ++ third_party/ibis/ibis_db2_zos/compiler.py | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index ec7aa3e1d..62d42d652 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -17,10 +17,12 @@ import sqlalchemy as sa from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend +from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler class Backend(Db2LUWBackend): name = "db2_zos" + compiler = Db2zOSCompiler def do_connect( self, diff --git a/third_party/ibis/ibis_db2_zos/compiler.py b/third_party/ibis/ibis_db2_zos/compiler.py index e76c7288d..2c495e3cb 100644 --- a/third_party/ibis/ibis_db2_zos/compiler.py +++ b/third_party/ibis/ibis_db2_zos/compiler.py @@ -11,17 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from ibis.backends.base.sql.alchemy import AlchemyExprTranslator +from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from third_party.ibis.ibis_db2_zos.compiler import Db2Compiler, Db2ExprTranslator from third_party.ibis.ibis_db2_zos.registry import operation_registry -class Db2zOSExprTranslator(Db2ExprTranslator): +class Db2zOSExprTranslator(AlchemyExprTranslator): _registry = operation_registry.copy() _rewrites = AlchemyExprTranslator._rewrites.copy() _dialect_name = "db2" -class Db2zOSCompiler(Db2Compiler): +class Db2zOSCompiler(AlchemyCompiler): translator_class = Db2zOSExprTranslator From 6783d6f127c50f2aead6dd5e965d5488fafdd782 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 18:22:59 +0000 Subject: [PATCH 008/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_addon/operations.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index bed80c131..0494444f4 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -71,8 +71,10 @@ # DB2 requires ibm_db_dbi try: from third_party.ibis.ibis_db2.compiler import Db2ExprTranslator + from third_party.ibis.ibis_db2_zos.compiler import Db2zOSExprTranslator except Exception: Db2ExprTranslator = None + Db2zOSExprTranslator = None # Oracle requires oracledb try: @@ -554,6 +556,14 @@ def _sa_whitespace_rstrip(t, op): ops.StringLength ] +if Db2zOSExprTranslator: + Db2zOSExprTranslator._registry[RawSQL] = sa_format_raw_sql + Db2zOSExprTranslator._registry[BinaryLength] = sa_format_binary_length + Db2zOSExprTranslator._registry[ops.RStrip] = _sa_whitespace_rstrip + Db2zOSExprTranslator._registry[PaddedCharLength] = Db2zOSExprTranslator._registry[ + ops.StringLength + ] + SpannerExprTranslator._registry[RawSQL] = format_raw_sql SpannerExprTranslator._registry[ops.HashBytes] = bigquery_registry.format_hashbytes SpannerExprTranslator._registry[BinaryLength] = sa_format_binary_length From 8a1f9f55b535ade7721672d06ac8295325cd1123 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 17 Dec 2025 18:28:24 +0000 Subject: [PATCH 009/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_addon/operations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 0494444f4..574c32fe7 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -559,7 +559,6 @@ def _sa_whitespace_rstrip(t, op): if Db2zOSExprTranslator: Db2zOSExprTranslator._registry[RawSQL] = sa_format_raw_sql Db2zOSExprTranslator._registry[BinaryLength] = sa_format_binary_length - Db2zOSExprTranslator._registry[ops.RStrip] = _sa_whitespace_rstrip Db2zOSExprTranslator._registry[PaddedCharLength] = Db2zOSExprTranslator._registry[ ops.StringLength ] From a77772a58dd17f2cb9408538cc13591e82ad64c5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 18 Dec 2025 16:08:10 +0000 Subject: [PATCH 010/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index bb624c6bb..3402f58bc 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -27,7 +27,7 @@ def _sa_whitespace_rstrip(t, op): # the data type, length or value of the argument for the parameter in position "2" of routine "RTRIM" is incorrect # Therefore we've removed it below and let Db2 exclude default space character. sa_arg = t.translate(op.arg) - return sa.func.rtrim(sa_arg) + return sa.func.rtrim(sa_arg, " \t\r") operation_registry[ops.RStrip] = _sa_whitespace_rstrip From 7e7f2b1df841f66630f8a265d1e9bde9907e23eb Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 12:54:39 +0000 Subject: [PATCH 011/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 9 ++++++++- third_party/ibis/ibis_impala/api.py | 8 +++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 3402f58bc..c03d4fb55 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -22,12 +22,19 @@ operation_registry = db2_luw_operation_registry.copy() +def _sa_ifnull(t, op): + sa_arg = t.translate(op.arg) + return sa.func.coalesce(sa_arg, sa.literal_column(f"'{op.ifnull_expr.value}'")) + + def _sa_whitespace_rstrip(t, op): # The full string.whitespace second parameter was throwing an error on Db2 z/OS: # the data type, length or value of the argument for the parameter in position "2" of routine "RTRIM" is incorrect # Therefore we've removed it below and let Db2 exclude default space character. sa_arg = t.translate(op.arg) - return sa.func.rtrim(sa_arg, " \t\r") + return sa.func.rtrim(sa_arg, sa.literal_column("' \t\r'")) + # return sa.func.rtrim(sa_arg, " \t\r") +operation_registry[ops.IfNull] = _sa_ifnull operation_registry[ops.RStrip] = _sa_whitespace_rstrip diff --git a/third_party/ibis/ibis_impala/api.py b/third_party/ibis/ibis_impala/api.py index 26c5dcfaa..b232cbf2e 100644 --- a/third_party/ibis/ibis_impala/api.py +++ b/third_party/ibis/ibis_impala/api.py @@ -196,9 +196,11 @@ def fill(target, chunks, na_rep): return target -@rewrites(ops.IfNull) -def _if_null(op): - return ops.Coalesce((op.arg, op.ifnull_expr)) +# TODO this needs changing to not be a rewrite across all engines. +# @rewrites(ops.IfNull) +# def _if_null(op): +# breakpoint() +# return ops.Coalesce((op.arg, op.ifnull_expr)) def update_query_with_limit(query): From 5f690ff3ead8ff505e5265e0f67bea668dfcd0bb Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 13:09:31 +0000 Subject: [PATCH 012/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index c03d4fb55..176bbe90f 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -36,5 +36,5 @@ def _sa_whitespace_rstrip(t, op): # return sa.func.rtrim(sa_arg, " \t\r") -operation_registry[ops.IfNull] = _sa_ifnull +# operation_registry[ops.IfNull] = _sa_ifnull operation_registry[ops.RStrip] = _sa_whitespace_rstrip From ee2545734d91688a563c42828e13a96925bfca4b Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 14:21:51 +0000 Subject: [PATCH 013/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 176bbe90f..4769db955 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -27,14 +27,26 @@ def _sa_ifnull(t, op): return sa.func.coalesce(sa_arg, sa.literal_column(f"'{op.ifnull_expr.value}'")) +def _sa_format_hashbytes(translator, op): + # Use of query parameters is throwing an error on Db2 z/OS. + # Code below uses sa.sql.literal_column to prevent parameterization. + # I invested several hours into trying to find a way to utilize literal_binds=True but was unsuccessful. + compiled_arg = translator.translate(op.arg) + hash_func = sa.func.hash( + sa.func.unicode_str(compiled_arg), sa.sql.literal_column("2") + ) + hex_func = sa.func.hex(hash_func) + return sa.func.lower(hex_func) + + def _sa_whitespace_rstrip(t, op): # The full string.whitespace second parameter was throwing an error on Db2 z/OS: # the data type, length or value of the argument for the parameter in position "2" of routine "RTRIM" is incorrect # Therefore we've removed it below and let Db2 exclude default space character. sa_arg = t.translate(op.arg) - return sa.func.rtrim(sa_arg, sa.literal_column("' \t\r'")) - # return sa.func.rtrim(sa_arg, " \t\r") + return sa.func.rtrim(sa_arg) -# operation_registry[ops.IfNull] = _sa_ifnull +operation_registry[ops.HashBytes] = _sa_format_hashbytes +operation_registry[ops.IfNull] = _sa_ifnull operation_registry[ops.RStrip] = _sa_whitespace_rstrip From 680d90bde3c3cd46d4410efd20688190aeaf2c9b Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 14:22:17 +0000 Subject: [PATCH 014/100] feat: Prototype Db2 z/OS support --- data_validation/consts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_validation/consts.py b/data_validation/consts.py index 08ee0e2b1..1ed776ca6 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -130,7 +130,7 @@ # Connection key constants. SOURCE_TYPE_BIGQUERY = "BigQuery" SOURCE_TYPE_DB2 = "DB2" -SOURCE_TYPE_DB2_ZOS = "DB2_zOS" +SOURCE_TYPE_DB2_ZOS = "DB2_ZOS" SOURCE_TYPE_FILESYSTEM = "FileSystem" SOURCE_TYPE_IMPALA = "Impala" SOURCE_TYPE_MSSQL = "MSSQL" From 5324407b11c2b839fbed43de9d24a5d0ae67fe82 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 14:26:27 +0000 Subject: [PATCH 015/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 4769db955..fe46b1a09 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -32,9 +32,7 @@ def _sa_format_hashbytes(translator, op): # Code below uses sa.sql.literal_column to prevent parameterization. # I invested several hours into trying to find a way to utilize literal_binds=True but was unsuccessful. compiled_arg = translator.translate(op.arg) - hash_func = sa.func.hash( - sa.func.unicode_str(compiled_arg), sa.sql.literal_column("2") - ) + hash_func = sa.func.hash_sha256(sa.func.unicode_str(compiled_arg)) hex_func = sa.func.hex(hash_func) return sa.func.lower(hex_func) From 8e9340a00f31a92cd370598d9710e2b1f7bfac0c Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 15:07:15 +0000 Subject: [PATCH 016/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_addon/operations.py | 1 + third_party/ibis/ibis_impala/api.py | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 574c32fe7..0abc8b8bf 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -488,6 +488,7 @@ def _sa_whitespace_rstrip(t, op): ExprTranslator._registry[PaddedCharLength] = ExprTranslator._registry[ops.StringLength] ImpalaExprTranslator._registry[ops.Cast] = sa_cast_hive +ImpalaExprTranslator._registry[ops.IfNull] = sa_fixed_arity(sa.func.coalesce, 2) ImpalaExprTranslator._registry[RawSQL] = format_raw_sql ImpalaExprTranslator._registry[ops.HashBytes] = format_hashbytes_hive ImpalaExprTranslator._registry[ops.RandomScalar] = fixed_arity("RAND", 0) diff --git a/third_party/ibis/ibis_impala/api.py b/third_party/ibis/ibis_impala/api.py index b232cbf2e..8c14e790b 100644 --- a/third_party/ibis/ibis_impala/api.py +++ b/third_party/ibis/ibis_impala/api.py @@ -196,13 +196,6 @@ def fill(target, chunks, na_rep): return target -# TODO this needs changing to not be a rewrite across all engines. -# @rewrites(ops.IfNull) -# def _if_null(op): -# breakpoint() -# return ops.Coalesce((op.arg, op.ifnull_expr)) - - def update_query_with_limit(query): limit_pattern = re.compile(r"LIMIT\s+\d+(\s+OFFSET\s+\d+)?\s*;?\s*$", re.IGNORECASE) last_limit_match = limit_pattern.search(query) From 20cf58c0cf6f27d8206c190d713d58d33400c0ea Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 15:23:58 +0000 Subject: [PATCH 017/100] feat: Move Impala/Hive functions to dedicated registry file --- third_party/ibis/ibis_addon/operations.py | 60 ++----------------- third_party/ibis/ibis_impala/registry.py | 70 +++++++++++++++++++++++ 2 files changed, 76 insertions(+), 54 deletions(-) create mode 100644 third_party/ibis/ibis_impala/registry.py diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 0abc8b8bf..373637e18 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -39,10 +39,7 @@ from ibis.backends.base.sql.alchemy.registry import fixed_arity as sa_fixed_arity from ibis.backends.base.sql.alchemy.translator import AlchemyExprTranslator from ibis.backends.base.sql.compiler.translator import ExprTranslator -from ibis.backends.base.sql.registry import ( - fixed_arity, - type_to_sql_string as base_type_to_sql_string, -) +from ibis.backends.base.sql.registry import fixed_arity from ibis.backends.bigquery.client import ( _DTYPE_TO_IBIS_TYPE as _BQ_DTYPE_TO_IBIS_TYPE, _LEGACY_TO_STANDARD as _BQ_LEGACY_TO_STANDARD, @@ -60,6 +57,7 @@ # Do not remove these lines, they trigger patching of Ibis code. import third_party.ibis.ibis_bigquery.api # noqa from third_party.ibis.ibis_bigquery import registry as bigquery_registry +from third_party.ibis.ibis_impala import registry as impala_registry import third_party.ibis.ibis_mysql.compiler # noqa from third_party.ibis.ibis_mssql import registry as mssql_registry from third_party.ibis.ibis_postgres import registry as postgres_registry @@ -177,28 +175,6 @@ def strftime_mysql(translator, op): return sa.func.date_format(arg_formatted, fmt_string) -def strftime_impala(t, op): - import sqlglot as sg - - hive_dialect = sg.dialects.hive.Hive - if (time_mapping := getattr(hive_dialect, "TIME_MAPPING", None)) is None: - time_mapping = hive_dialect.time_mapping - reverse_hive_mapping = {v: k for k, v in time_mapping.items()} - format_str = sg.time.format_time(op.format_str.value, reverse_hive_mapping) - targ = t.translate(ops.Cast(op.arg, to=dt.string)) - return f"from_unixtime(unix_timestamp({targ}, {format_str!r}), {format_str!r})" - - -def format_hashbytes_hive(translator, op): - arg = translator.translate(op.arg) - if op.how == "sha256": - return f"sha2({arg}, 256)" - elif op.how == "md5": - return f"md5({arg})" - else: - raise ValueError(f"unexpected value for 'how': {op.how}") - - def format_hashbytes_alchemy(translator, op): arg = translator.translate(op.arg) if op.how == "sha256": @@ -273,30 +249,6 @@ def sa_format_binary_length_oracle(translator, op): return sa.func.dbms_lob.getlength(arg) -def sa_cast_hive(t, op): - arg = op.arg - typ = op.to - arg_dtype = arg.output_dtype - - arg_formatted = t.translate(arg) - - if arg_dtype.is_binary() and typ.is_string(): - # Binary to string cast is a "to hex" conversion for DVT. - return f"lower(hex({arg_formatted}))" - elif arg_dtype.is_string() and typ.is_binary(): - # Binary from string cast is a "from hex" conversion for DVT. - return f"unhex({arg_formatted})" - - # Cannot use sa_fixed_cast() because of ImpalaExprTranslator ancestry. - sql_type = base_type_to_sql_string(typ) - cast_expr = "CAST({} AS {})".format(arg_formatted, sql_type) - - if arg_dtype.is_boolean() and typ.is_string(): - return f"LOWER({cast_expr})" - else: - return cast_expr - - def sa_cast_mysql(t, op): # Add cast from numeric to string arg = op.arg @@ -487,12 +439,12 @@ def _sa_whitespace_rstrip(t, op): # Base length of padded string is the same as for a standard string. ExprTranslator._registry[PaddedCharLength] = ExprTranslator._registry[ops.StringLength] -ImpalaExprTranslator._registry[ops.Cast] = sa_cast_hive -ImpalaExprTranslator._registry[ops.IfNull] = sa_fixed_arity(sa.func.coalesce, 2) +ImpalaExprTranslator._registry[ops.Cast] = impala_registry.sa_cast +ImpalaExprTranslator._registry[ops.IfNull] = impala_registry.sa_ifnull ImpalaExprTranslator._registry[RawSQL] = format_raw_sql -ImpalaExprTranslator._registry[ops.HashBytes] = format_hashbytes_hive +ImpalaExprTranslator._registry[ops.HashBytes] = impala_registry.sa_format_hashbytes ImpalaExprTranslator._registry[ops.RandomScalar] = fixed_arity("RAND", 0) -ImpalaExprTranslator._registry[ops.Strftime] = strftime_impala +ImpalaExprTranslator._registry[ops.Strftime] = impala_registry.sa_strftime ImpalaExprTranslator._registry[BinaryLength] = sa_format_binary_length if OracleExprTranslator: diff --git a/third_party/ibis/ibis_impala/registry.py b/third_party/ibis/ibis_impala/registry.py new file mode 100644 index 000000000..7fc2ea33e --- /dev/null +++ b/third_party/ibis/ibis_impala/registry.py @@ -0,0 +1,70 @@ +# Copyright 2025 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sqlalchemy as sa +import ibis.expr.operations as ops +from ibis.backends.base.sql.registry import ( + type_to_sql_string as base_type_to_sql_string, +) + + +def sa_cast(t, op): + arg = op.arg + typ = op.to + arg_dtype = arg.output_dtype + + arg_formatted = t.translate(arg) + + if arg_dtype.is_binary() and typ.is_string(): + # Binary to string cast is a "to hex" conversion for DVT. + return f"lower(hex({arg_formatted}))" + elif arg_dtype.is_string() and typ.is_binary(): + # Binary from string cast is a "from hex" conversion for DVT. + return f"unhex({arg_formatted})" + + # Cannot use sa_fixed_cast() because of ImpalaExprTranslator ancestry. + sql_type = base_type_to_sql_string(typ) + cast_expr = "CAST({} AS {})".format(arg_formatted, sql_type) + + if arg_dtype.is_boolean() and typ.is_string(): + return f"LOWER({cast_expr})" + else: + return cast_expr + + +def sa_ifnull(t, op): + sa_arg = t.translate(op.arg) + return sa.func.coalesce(sa_arg, op.ifnull_expr.value) + + +def sa_format_hashbytes(translator, op): + arg = translator.translate(op.arg) + if op.how == "sha256": + return f"sha2({arg}, 256)" + elif op.how == "md5": + return f"md5({arg})" + else: + raise ValueError(f"unexpected value for 'how': {op.how}") + + +def sa_strftime(t, op): + import sqlglot as sg + + hive_dialect = sg.dialects.hive.Hive + if (time_mapping := getattr(hive_dialect, "TIME_MAPPING", None)) is None: + time_mapping = hive_dialect.time_mapping + reverse_hive_mapping = {v: k for k, v in time_mapping.items()} + format_str = sg.time.format_time(op.format_str.value, reverse_hive_mapping) + targ = t.translate(ops.Cast(op.arg, to=dt.string)) + return f"from_unixtime(unix_timestamp({targ}, {format_str!r}), {format_str!r})" From da0b07656eeec0248c45d70db558cd01b8d69e21 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 15:26:13 +0000 Subject: [PATCH 018/100] feat: Move Impala/Hive functions to dedicated registry file --- third_party/ibis/ibis_impala/registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/ibis/ibis_impala/registry.py b/third_party/ibis/ibis_impala/registry.py index 7fc2ea33e..f55acfd89 100644 --- a/third_party/ibis/ibis_impala/registry.py +++ b/third_party/ibis/ibis_impala/registry.py @@ -13,6 +13,7 @@ # limitations under the License. import sqlalchemy as sa +import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.base.sql.registry import ( type_to_sql_string as base_type_to_sql_string, From d43e8a6406841efe8bc4f8f2aa33f813af4cc987 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 15:32:30 +0000 Subject: [PATCH 019/100] feat: Move Impala/Hive functions to dedicated registry file --- third_party/ibis/ibis_impala/registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/ibis/ibis_impala/registry.py b/third_party/ibis/ibis_impala/registry.py index f55acfd89..6935bf4c7 100644 --- a/third_party/ibis/ibis_impala/registry.py +++ b/third_party/ibis/ibis_impala/registry.py @@ -45,8 +45,8 @@ def sa_cast(t, op): def sa_ifnull(t, op): - sa_arg = t.translate(op.arg) - return sa.func.coalesce(sa_arg, op.ifnull_expr.value) + arg_formatted = t.translate(op.arg) + return f"coalesce({arg_formatted},'{op.ifnull_expr.value}')" def sa_format_hashbytes(translator, op): From 7d9fe34ac342577a7d5f883b6503c1892886bf8b Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 19 Dec 2025 16:37:11 +0000 Subject: [PATCH 020/100] feat: Move Impala/Hive functions to dedicated registry file --- third_party/ibis/ibis_impala/registry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/ibis/ibis_impala/registry.py b/third_party/ibis/ibis_impala/registry.py index 6935bf4c7..7a44a901f 100644 --- a/third_party/ibis/ibis_impala/registry.py +++ b/third_party/ibis/ibis_impala/registry.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sqlalchemy as sa import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.base.sql.registry import ( From d9c1602bb27b68feaf9931c3e049b114c093e7c7 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 14:35:54 +0000 Subject: [PATCH 021/100] fix: Db2 GROUP BY support in column validation --- tests/system/data_sources/test_db2.py | 3 +-- third_party/ibis/ibis_db2/compiler.py | 1 + third_party/ibis/ibis_db2_zos/compiler.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 7e83234db..101041d65 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -115,8 +115,7 @@ def test_column_validation_core_types(): min_cols=cols, max_cols=cols, filters="id>0 AND col_int8>0", - # TODO When issue-1295 is complete uncomment --grouped_columns parameter below. - # grouped_columns="col_varchar_30", + grouped_columns="col_varchar_30", ) diff --git a/third_party/ibis/ibis_db2/compiler.py b/third_party/ibis/ibis_db2/compiler.py index 786814a1e..9c688365d 100644 --- a/third_party/ibis/ibis_db2/compiler.py +++ b/third_party/ibis/ibis_db2/compiler.py @@ -29,3 +29,4 @@ class Db2ExprTranslator(AlchemyExprTranslator): class Db2Compiler(AlchemyCompiler): translator_class = Db2ExprTranslator + supports_indexed_grouping_keys = False diff --git a/third_party/ibis/ibis_db2_zos/compiler.py b/third_party/ibis/ibis_db2_zos/compiler.py index 2c495e3cb..a32b7cdee 100644 --- a/third_party/ibis/ibis_db2_zos/compiler.py +++ b/third_party/ibis/ibis_db2_zos/compiler.py @@ -24,3 +24,4 @@ class Db2zOSExprTranslator(AlchemyExprTranslator): class Db2zOSCompiler(AlchemyCompiler): translator_class = Db2zOSExprTranslator + supports_indexed_grouping_keys = False From f678fd9fe82570ffc4c03213c89265cebb2a57bb Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 15:05:16 +0000 Subject: [PATCH 022/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 101041d65..9cf813abf 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -17,6 +17,7 @@ from data_validation import cli_tools, consts from tests.system.data_sources.common_functions import ( + DVT_CORE_TYPES_COLUMNS, schema_validation_test, column_validation_test, run_test_from_cli_args, @@ -25,6 +26,7 @@ custom_query_validation_test, raw_query_test, ) + from tests.system.data_sources.test_bigquery import BQ_CONN @@ -126,7 +128,7 @@ def test_column_validation_core_types(): def test_column_validation_core_types_to_bigquery(): """DB2 to BigQuery dvt_core_types column validation""" # Excluded col_float32 because BigQuery does not have an exact same type and float32/64 are lossy and cannot be compared. - # Excluded col_tstz since it is not possible to set time zone at this column on DB2 + # Excluded col_tstz since it is not possible to set time zone at this column on Db2 cols = "col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_datetime" column_validation_test( tc="bq-conn", @@ -145,11 +147,19 @@ def test_column_validation_core_types_to_bigquery(): ) def test_row_validation_core_types(): """DB2 to DB2 dvt_core_types row validation""" + # Excluded col_datetime,col_tstz due to strftime issue-1296. + cols = ",".join( + [ + _ + for _ in DVT_CORE_TYPES_COLUMNS + if _ not in ("id", "col_datetime", "col_tstz") + ] + ) row_validation_test( tables="db2inst1.dvt_core_types", tc="mock-conn", # OBS: Only passing this column because SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) - hash="col_string", + hash=cols, filters="id>0 AND col_int8>0", ) From 5b3c46559e0b268e48bc12972c5732636bb3ccef Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 15:31:55 +0000 Subject: [PATCH 023/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 9cf813abf..e86f0b398 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -147,12 +147,13 @@ def test_column_validation_core_types_to_bigquery(): ) def test_row_validation_core_types(): """DB2 to DB2 dvt_core_types row validation""" - # Excluded col_datetime,col_tstz due to strftime issue-1296. + # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. + # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. cols = ",".join( [ _ for _ in DVT_CORE_TYPES_COLUMNS - if _ not in ("id", "col_datetime", "col_tstz") + if _ not in ("id", "col_char_2", "col_date", "col_datetime", "col_tstz") ] ) row_validation_test( @@ -184,12 +185,20 @@ def test_row_validation_core_types_auto_pks(): ) def test_row_validation_core_types_to_bigquery(): """DB2 to BigQuery dvt_core_types row validation""" + # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. + # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. + cols = ",".join( + [ + _ + for _ in DVT_CORE_TYPES_COLUMNS + if _ not in ("id", "col_char_2", "col_date", "col_datetime", "col_tstz") + ] + ) row_validation_test( tables="db2inst1.dvt_core_types=pso_data_validator.dvt_core_types", tc="bq-conn", # OBS: Only passing this column because SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) - # TODO: When issue-1296 is complete change to col_date,col_datetime,col_tstz instead - hash="col_string", + hash=cols, ) From bc6b1d57adf96dfb3617cb63d31770d4b57eb9d2 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 15:50:28 +0000 Subject: [PATCH 024/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 38 +++++++++++++++++++++++--- third_party/ibis/ibis_db2/datatypes.py | 2 +- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index e86f0b398..3eb844656 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -149,17 +149,31 @@ def test_row_validation_core_types(): """DB2 to DB2 dvt_core_types row validation""" # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. + # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. cols = ",".join( [ _ for _ in DVT_CORE_TYPES_COLUMNS - if _ not in ("id", "col_char_2", "col_date", "col_datetime", "col_tstz") + if _ + not in ( + "id", + "col_int8", # issue-1634 + "col_int16", # issue-1634 + "col_dec_20", # issue-1634 + "col_dec_38", # issue-1634 + "col_dec_10_2", # issue-1634 + "col_float32", + "col_float64", # issue-1634 + "col_char_2", + "col_date", + "col_datetime", + "col_tstz", + ) ] ) row_validation_test( tables="db2inst1.dvt_core_types", tc="mock-conn", - # OBS: Only passing this column because SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) hash=cols, filters="id>0 AND col_int8>0", ) @@ -185,19 +199,35 @@ def test_row_validation_core_types_auto_pks(): ) def test_row_validation_core_types_to_bigquery(): """DB2 to BigQuery dvt_core_types row validation""" + # Excluded col_float32 because BigQuery does not have an exact same type and + # float32/64 are lossy and cannot be compared. # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. + # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. cols = ",".join( [ _ for _ in DVT_CORE_TYPES_COLUMNS - if _ not in ("id", "col_char_2", "col_date", "col_datetime", "col_tstz") + if _ + not in ( + "id", + "col_int8", # issue-1634 + "col_int16", # issue-1634 + "col_dec_20", # issue-1634 + "col_dec_38", # issue-1634 + "col_dec_10_2", # issue-1634 + "col_float32", + "col_float64", # issue-1634 + "col_char_2", + "col_date", + "col_datetime", + "col_tstz", + ) ] ) row_validation_test( tables="db2inst1.dvt_core_types=pso_data_validator.dvt_core_types", tc="bq-conn", - # OBS: Only passing this column because SYSIBM.HEX function accepts a max length of 16336 bytes (https://www.ibm.com/docs/en/db2/11.5?topic=functions-hex) hash=cols, ) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 3548af7d1..22ec23ecb 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -33,7 +33,7 @@ ibm_db_dbi.ROWID: dt.String, } -ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=3000) +ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=300) def _get_type(typename) -> dt.DataType: From 7390f28b54a5374b5a9b4898cde2907cee60f8d1 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 15:52:49 +0000 Subject: [PATCH 025/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/datatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 22ec23ecb..3548af7d1 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -33,7 +33,7 @@ ibm_db_dbi.ROWID: dt.String, } -ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=300) +ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=3000) def _get_type(typename) -> dt.DataType: From 13ffff5b51aab81537d5e3ca609f825066169fac Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 22 Dec 2025 16:01:01 +0000 Subject: [PATCH 026/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 3eb844656..95f781ad7 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -147,6 +147,7 @@ def test_column_validation_core_types_to_bigquery(): ) def test_row_validation_core_types(): """DB2 to DB2 dvt_core_types row validation""" + # Exclude col_string because it is unbound and causes overflow error for HEX function. # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. @@ -159,12 +160,14 @@ def test_row_validation_core_types(): "id", "col_int8", # issue-1634 "col_int16", # issue-1634 + "col_int32", # issue-1634 "col_dec_20", # issue-1634 "col_dec_38", # issue-1634 "col_dec_10_2", # issue-1634 - "col_float32", + "col_float32", # issue-1634 "col_float64", # issue-1634 "col_char_2", + "col_string", "col_date", "col_datetime", "col_tstz", @@ -201,6 +204,7 @@ def test_row_validation_core_types_to_bigquery(): """DB2 to BigQuery dvt_core_types row validation""" # Excluded col_float32 because BigQuery does not have an exact same type and # float32/64 are lossy and cannot be compared. + # Exclude col_string because it is unbound and causes overflow error for HEX function. # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. @@ -213,12 +217,14 @@ def test_row_validation_core_types_to_bigquery(): "id", "col_int8", # issue-1634 "col_int16", # issue-1634 + "col_int32", # issue-1634 "col_dec_20", # issue-1634 "col_dec_38", # issue-1634 "col_dec_10_2", # issue-1634 "col_float32", "col_float64", # issue-1634 "col_char_2", + "col_string", "col_date", "col_datetime", "col_tstz", From 1bef8b63d99b671f3d2c3227b0aa5db0942a3eda Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 8 Jan 2026 11:13:08 +0000 Subject: [PATCH 027/100] chore: Pandas deprecation warning --- third_party/ibis/ibis_addon/operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 373637e18..f78513dd6 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -387,7 +387,7 @@ def string_to_epoch(ts: str) -> int: @execute_node.register(ops.ExtractEpochSeconds, (datetime.datetime, pd.Series)) def execute_epoch_seconds_new(op, data, **kwargs): - convert = getattr(data, "view", data.astype) + convert = data.astype try: series = convert(np.int64) # We need int64 below because NaT overflows int32. From 1c9e64301338c44d9f33e6b71f41cc5e6dadb000 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 8 Jan 2026 15:11:15 +0000 Subject: [PATCH 028/100] chore: Resolve Pandas FutureWarning --- data_validation/combiner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_validation/combiner.py b/data_validation/combiner.py index 1d4125218..433247120 100644 --- a/data_validation/combiner.py +++ b/data_validation/combiner.py @@ -194,7 +194,7 @@ def _generate_report_slice( logging.debug(documented.compile()) result_df = client.execute(documented) - result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True) + result_df["validation_status"].fillna(consts.VALIDATION_STATUS_FAIL, inplace=True) return result_df From 4cb50afd811e55f30de7e1c1e449f7182962aa0c Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 8 Jan 2026 17:13:52 +0000 Subject: [PATCH 029/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/datatypes.py | 8 +++++++- third_party/ibis/ibis_db2_zos/__init__.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 3548af7d1..c12547588 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -11,11 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sqlalchemy as sa + import ibm_db_dbi import ibis.expr.datatypes as dt from ibis.backends.base.sql.alchemy.datatypes import ibis_type_to_sqla +from ibm_db_sa.ibm_db import DB2Dialect_ibm_db +import sqlalchemy as sa +import sqlalchemy.types as sat + # Types from https://github.com/ibmdb/python-ibmdb/blob/master/IBM_DB/ibm_db/ibm_db_dbi.py _type_mapping = { @@ -35,6 +39,8 @@ ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=3000) +DB2Dialect_ibm_db.ischema_names["DECFLOAT"] = sat.DOUBLE + def _get_type(typename) -> dt.DataType: typ = _type_mapping.get(typename) diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 62d42d652..e7d3eca0d 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Dict, Any +from typing import Iterable, Optional, Dict, Any import sqlalchemy as sa +from third_party.ibis.ibis_addon.api import dvt_handle_failed_column_type_inference from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler @@ -67,3 +68,8 @@ def connect(dbapi_connection, connection_record): cur.execute("SET SESSION TIME ZONE = '+0:00'") super(Db2LUWBackend, self).do_connect(engine) + + def _handle_failed_column_type_inference( + self, table: sa.Table, nulltype_cols: Iterable[str] + ) -> sa.Table: + return dvt_handle_failed_column_type_inference(self, table, nulltype_cols) From 9eb756df48667134577c5892fdd1f505c799b467 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 9 Jan 2026 14:56:07 +0000 Subject: [PATCH 030/100] feat: Prototype Db2 z/OS support --- tests/resources/db2_test_tables.sql | 31 +++++++++++++++++++++++++++ tests/system/data_sources/test_db2.py | 16 ++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index 5bc9939fb..8ce5643da 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -56,6 +56,37 @@ INSERT INTO db2inst1.dvt_core_types VALUES ,TIMESTAMP'1970-01-02 21:23:03'); COMMIT; +-- Db2 data types test table (data types not covered by core types). +DROP TABLE db2inst1.dvt_db2_types; +CREATE TABLE db2inst1.dvt_db2_types +( id INTEGER NOT NULL PRIMARY KEY +, col_smallint SMALLINT +, col_int INTEGER +, col_bigint BIGINT +, col_decfloat_16 DECFLOAT(16) +, col_decfloat_32 DECFLOAT(34) +, col_clob CLOB +, col_nvarchar_30 NVARCHAR(30) +, col_nchar_2 NCHAR(2) +, col_nclob NCLOB +, col_blob BLOB +, col_char_bit CHAR(5) FOR BIT DATA +, col_varchar_bit VARCHAR(5) FOR BIT DATA +, col_graphic GRAPHIC(3) +, col_vargraphic VARGRAPHIC(3) +, col_time TIME +, col_xml XML +); +COMMENT ON TABLE db2inst1.dvt_core_types IS 'Db2 data types integration test table'; + +INSERT INTO db2inst1.dvt_db2_types VALUES +(1,123,12345,1123456789,123.456,123456.789 +,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB' +,CAST('Hello BLOB' AS BLOB),'ABC','DEF','GHI','JKL' +,TIME'00:00:01' +,''); +COMMIT; + DROP TABLE db2inst1.dvt_null_not_null; CREATE TABLE db2inst1.dvt_null_not_null ( col_nn TIMESTAMP(0) NOT NULL diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 95f781ad7..2d8c1f639 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -55,7 +55,7 @@ def mock_get_connection_config(*args): new=mock_get_connection_config, ) def test_schema_validation_core_types(): - """DB2 to DB2 dvt_core_types schema validation""" + """Db2 to Db2 dvt_core_types schema validation""" schema_validation_test( tables="db2inst1.dvt_core_types", tc="mock-conn", @@ -67,7 +67,7 @@ def test_schema_validation_core_types(): new=mock_get_connection_config, ) def test_schema_validation_core_types_to_bigquery(): - """DB2 to BigQuery dvt_core_types schema validation""" + """Db2 to BigQuery dvt_core_types schema validation""" schema_validation_test( tables="db2inst1.dvt_core_types=pso_data_validator.dvt_core_types", tc="bq-conn", @@ -82,6 +82,18 @@ def test_schema_validation_core_types_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_schema_validation_db2_types(): + """Db2 to Db2 dvt_db2_types schema validation""" + schema_validation_test( + tables="db2inst1.dvt_db2_types", + tc="mock-conn", + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, From 7fc697681dc6bc9e4b599d5fec8d75c90b43f7b8 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 16 Jan 2026 17:43:31 +0000 Subject: [PATCH 031/100] feat: Prototype Db2 z/OS support --- data_validation/clients.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data_validation/clients.py b/data_validation/clients.py index 6edb529e5..853e0f11c 100644 --- a/data_validation/clients.py +++ b/data_validation/clients.py @@ -293,7 +293,7 @@ def list_tables(client, schema_name, tables_only=True): if tables_only and client.name != "pandas" else client.list_tables ) - if client.name in ["db2", "db2_zos", "redshift", "snowflake", "pandas"]: + if client.name in ["redshift", "snowflake", "pandas"]: return fn() return fn(database=schema_name) @@ -346,10 +346,10 @@ def get_data_client(connection_config): consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH ) if key_path: - decrypted_connection_config["credentials"] = ( - google.oauth2.service_account.Credentials.from_service_account_file( - key_path - ) + decrypted_connection_config[ + "credentials" + ] = google.oauth2.service_account.Credentials.from_service_account_file( + key_path ) if source_type not in CLIENT_LOOKUP: From 00bd60146058b40879a0e7aeeb622c7c5c4ef217 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 13:46:02 +0000 Subject: [PATCH 032/100] fix: Fix strftime for Db2 LUW --- tests/system/data_sources/test_db2.py | 4 +-- third_party/ibis/ibis_db2/registry.py | 51 +++++++-------------------- 2 files changed, 14 insertions(+), 41 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index cf2d92da7..1f62d3707 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -169,7 +169,7 @@ def test_column_validation_core_types_to_bigquery(): def test_row_validation_core_types(): """Db2 to Db2 dvt_core_types row validation""" # Exclude col_string because it is unbound and causes overflow error for HEX function. - # TODO: When issue-1296 is complete remove col_date,col_datetime,col_tstz from exclusion list below. + # TODO: When issue-1296 is complete remove col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. cols = ",".join( @@ -189,8 +189,6 @@ def test_row_validation_core_types(): "col_float64", # issue-1634 "col_char_2", "col_string", - "col_date", - "col_datetime", "col_tstz", ) ] diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 08b20b60a..50d7ebc51 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -153,48 +153,24 @@ def _string_agg(t, op): _strftime_to_db2_rules = { - "%a": "TMDy", # TM does it in a locale dependent way - "%A": "TMDay", + "%a": "Dy", # TM does it in a locale dependent way + "%A": "Day", "%w": "D", # 1-based day of week, see below for how we make this 0-based "%d": "DD", # day of month - "%-d": "FMDD", - "%b": "TMMon", # Sep - "%B": "TMMonth", # September + "%b": "Mon", # Sep + "%B": "Month", # September "%m": "MM", # 01 - "%-m": "FMMM", # 1 "%y": "YY", # 15 "%Y": "YYYY", # 2015 "%H": "HH24", # 09 - "%-H": "FMHH24", # 9 "%I": "HH12", # 09 - "%-I": "FMHH12", # 9 "%p": "AM", # AM or PM "%M": "MI", # zero padded minute - "%-M": "FMMI", # Minute "%S": "SS", # zero padded second - "%-S": "FMSS", # Second - "%f": "US", # zero padded microsecond - "%z": "OF", # utf offset - "%Z": "TZ", # uppercase timezone name + "%f": "FF6", # zero padded microsecond "%j": "DDD", # zero padded day of year - "%-j": "FMDDD", # day of year - "%U": "WW", # 1-based week of year } -try: - _strftime_to_db2_rules.update( - { - "%c": locale.nl_langinfo(locale.D_T_FMT), # locale date and time - "%x": locale.nl_langinfo(locale.D_FMT), # locale date - "%X": locale.nl_langinfo(locale.T_FMT), # locale time - } - ) -except AttributeError: - warnings.warn( - "locale specific date formats (%%c, %%x, %%X) are not yet implemented " - "for %s" % platform.system() - ) - _scanner = re.Scanner( # double quotes need to be escaped @@ -227,7 +203,7 @@ def _string_agg(t, op): _lexicon_values = frozenset(_strftime_to_db2_rules.values()) -_strftime_blacklist = frozenset(["%w", "%U", "%c", "%x", "%X", "%e"]) +_strftime_excludelist = frozenset(["%w", "%U", "%c", "%x", "%X", "%e"]) def _reduce_tokens(tokens, arg): @@ -237,7 +213,7 @@ def _reduce_tokens(tokens, arg): # reduced list of tokens that accounts for blacklisted values reduced = [] - non_special_tokens = frozenset(_strftime_to_db2_rules) - _strftime_blacklist + non_special_tokens = frozenset(_strftime_to_db2_rules) - _strftime_excludelist # TODO: how much of a hack is this? for token in tokens: @@ -251,7 +227,7 @@ def _reduce_tokens(tokens, arg): curtokens.append('"{}"'.format(token)) # we have a token that needs special treatment - elif token in _strftime_blacklist: + elif token in _strftime_excludelist: if token == "%w": value = sa.extract("dow", arg) # 0 based day of week elif token == "%U": @@ -294,12 +270,11 @@ def _reduce_tokens(tokens, arg): return reduced -def _strftime(arg, pattern): - # TODO(issue-1296): third_party/ibis/ibis_db2/registry.py:298 - AttributeError: 'Strftime' object has no attribute 'value' - tokens, _ = _scanner.scan(pattern.value) - reduced = _reduce_tokens(tokens, arg) - result = functools.reduce(sa.sql.ColumnElement.concat, reduced) - return result +def _strftime(t, op): + tokens, _ = _scanner.scan(op.format_str.value) + reduced = _reduce_tokens(tokens, t.translate(op.arg)) + breakpoint() + return functools.reduce(sa.sql.ColumnElement.concat, reduced) def _regex_replace(t, op): From dfb8e13aa521446d85cc27fb9fe561884c98507e Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 13:48:39 +0000 Subject: [PATCH 033/100] fix: Fix strftime for Db2 LUW --- third_party/ibis/ibis_db2/registry.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 50d7ebc51..f36c80277 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -11,20 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import functools import itertools -import locale +import operator import platform import re import string -import warnings -import operator import sqlalchemy as sa +import ibis import ibis.common.exceptions as com import ibis.expr.operations as ops - from ibis.backends.base.sql.alchemy import ( fixed_arity, sqlalchemy_operation_registry, @@ -273,7 +272,6 @@ def _reduce_tokens(tokens, arg): def _strftime(t, op): tokens, _ = _scanner.scan(op.format_str.value) reduced = _reduce_tokens(tokens, t.translate(op.arg)) - breakpoint() return functools.reduce(sa.sql.ColumnElement.concat, reduced) From 3b844cd045cf7916cb9bdcc8942d552b71f54cf7 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 13:49:44 +0000 Subject: [PATCH 034/100] fix: Fix strftime for Db2 LUW --- tests/system/data_sources/test_db2.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 1f62d3707..3e5276bbd 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -169,7 +169,6 @@ def test_column_validation_core_types_to_bigquery(): def test_row_validation_core_types(): """Db2 to Db2 dvt_core_types row validation""" # Exclude col_string because it is unbound and causes overflow error for HEX function. - # TODO: When issue-1296 is complete remove col_tstz from exclusion list below. # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. # TODO: When issue-1634 is complete remove columns tagged with issue-1634 from exclusion list below. cols = ",".join( @@ -189,7 +188,6 @@ def test_row_validation_core_types(): "col_float64", # issue-1634 "col_char_2", "col_string", - "col_tstz", ) ] ) From f0473fe28857039cb75e7c0360aa704feb8c2224 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 16:20:15 +0000 Subject: [PATCH 035/100] test: Change dvt_db2_types testing to compare with BigQuery table --- tests/resources/bigquery_test_tables.sql | 29 ++++++++++++++++++ tests/system/data_sources/test_db2.py | 39 ++++++++++++++++++++++-- third_party/ibis/ibis_db2/datatypes.py | 3 +- 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/tests/resources/bigquery_test_tables.sql b/tests/resources/bigquery_test_tables.sql index b5f15e1c5..210a8f092 100644 --- a/tests/resources/bigquery_test_tables.sql +++ b/tests/resources/bigquery_test_tables.sql @@ -164,6 +164,7 @@ INSERT INTO `pso_data_validator`.`test_generate_partitions_v2` VALUES ("St. Paul''s", 5678, '2023-08-27 15:00:00', '2023-08-23', True, 2.1), ("St. Paul''s", 5678, '2023-08-27 15:00:00', '2023-08-23', False, 3.5); +-- Comparison table for SQL Server specific test table. CREATE OR REPLACE TABLE pso_data_validator.dvt_sql_server_types ( id INT64 NOT NULL , col_int1 INT64 @@ -210,6 +211,34 @@ INSERT INTO pso_data_validator.dvt_sql_server_types VALUES ,CAST('B' AS BYTES),CAST('B' AS BYTES),CAST('B' AS BYTES),FALSE,CAST('A' AS BYTES) ); +-- Comparison table for Db2 specific test table. +CREATE OR REPLACE TABLE pso_data_validator.dvt_db2_types +( id INT64 NOT NULL +, col_smallint INT64 +, col_int INT64 +, col_bigint INT64 +, col_decfloat_16 NUMERIC +, col_decfloat_32 NUMERIC +, col_clob STRING +, col_nvarchar_30 STRING +, col_nchar_2 STRING +, col_nclob STRING +, col_blob BYTES +, col_char_bit STRING +, col_varchar_bit STRING +, col_graphic STRING +, col_vargraphic STRING +, col_time TIME +, col_xml STRING +) OPTIONS (description='Db2 data types integration test table (BigQuery target)'); + +INSERT INTO pso_data_validator.dvt_db2_types VALUES +(1,123,12345,1123456789,123.456,123456.789 +,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB' +,CAST('Hello BLOB' AS BYTES),'ABC','DEF','GHI','JKL' +,TIME'00:00:01' +,''); + CREATE OR REPLACE TABLE `pso_data_validator`.`dvt_binary` ( binary_id BYTES(16) NOT NULL , int_id INT64 NOT NULL diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index fe78c1b63..ed133207f 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -97,10 +97,11 @@ def test_schema_validation_core_types_to_bigquery(): new=mock_get_connection_config, ) def test_schema_validation_db2_types(): - """Db2 to Db2 dvt_db2_types schema validation""" + """Db2 to BigQuery dvt_db2_types schema validation""" schema_validation_test( tables="pso_data_validator.dvt_db2_types", - tc="mock-conn", + tc="bq-conn", + allow_list=("int16:int64,int32:int64," "decimal:decimal(38,9)"), ) @@ -163,6 +164,25 @@ def test_column_validation_core_types_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_column_validation_db2_types_to_bigquery(): + """DB2 to BigQuery dvt_db2_types column validation""" + cols = "*" + column_validation_test( + tc="bq-conn", + tables="pso_data_validator.dvt_db2_types", + sum_cols=cols, + min_cols=cols, + max_cols=cols, + avg_cols=cols, + std_cols=cols, + wildcard_include_timestamp=True, + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -256,6 +276,21 @@ def test_row_validation_core_types_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_row_validation_db2_types_to_bigquery(): + """Db2 to BigQuery dvt_db2_types row validation""" + # TODO: When issue-1296 is complete change col to "*" below. + cols = "col_decfloat_16,col_decfloat_32" + row_validation_test( + tables="pso_data_validator.dvt_db2_types", + tc="bq-conn", + hash=cols, + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index c12547588..db39a948e 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -37,7 +37,8 @@ ibm_db_dbi.ROWID: dt.String, } -ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=3000) +# TODO Temporarily changed to 300 until issue-1296 is complete. +ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=300) DB2Dialect_ibm_db.ischema_names["DECFLOAT"] = sat.DOUBLE From 2d3b21dab940b0a2c4ee940a4c5eab48ba0c5475 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 16:52:41 +0000 Subject: [PATCH 036/100] test: Change dvt_db2_types testing to compare with BigQuery table --- data_validation/consts.py | 2 ++ data_validation/query_builder/query_builder.py | 5 ++++- tests/system/data_sources/test_db2.py | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/data_validation/consts.py b/data_validation/consts.py index bc6b7083e..2e78964b4 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -261,6 +261,8 @@ CALC_FIELD_EPOCH_SECONDS = "epoch_seconds" CALC_FIELD_HASH = "hash" CALC_FIELD_IFNULL = "ifnull" +CALC_FIELD_IFNULL_DEFAULT = "default_null_string" +CALC_FIELD_IFNULL_DEFAULT_STRING = "XX" CALC_FIELD_LENGTH = "length" CALC_FIELD_PADDED_CHAR_LENGTH = "padded_char_length" CALC_FIELD_RSTRIP = "rstrip" diff --git a/data_validation/query_builder/query_builder.py b/data_validation/query_builder/query_builder.py index 31d9da021..97aa27255 100644 --- a/data_validation/query_builder/query_builder.py +++ b/data_validation/query_builder/query_builder.py @@ -341,7 +341,10 @@ def to_char(config, fields): @staticmethod def ifnull(config, fields): default_null_string = ibis.literal( - config.get("default_null_string", "DEFAULT_REPLACEMENT_STRING") + config.get( + consts.CALC_FIELD_IFNULL_DEFAULT, + consts.CALC_FIELD_IFNULL_DEFAULT_STRING, + ) ) fields = [fields[0], default_null_string] return CalculatedField( diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index ed133207f..377c679f2 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -282,8 +282,12 @@ def test_row_validation_core_types_to_bigquery(): ) def test_row_validation_db2_types_to_bigquery(): """Db2 to BigQuery dvt_db2_types row validation""" + # Excluded col_clob,col_nclob,col_xml because they are incompatible with hex() function (due to potential length). # TODO: When issue-1296 is complete change col to "*" below. - cols = "col_decfloat_16,col_decfloat_32" + # TODO Add col_blob to list below once issue-1354 is complete. + # TODO Add col_char_2 to list below once issue-1354 is complete. + # TODO Add col_char_bit,col_varchar_bit to list below once issue-1655 is complete. + cols = "col_decfloat_16,col_decfloat_32,col_nvarchar_30,col_graphic,col_vargraphic,col_time" row_validation_test( tables="pso_data_validator.dvt_db2_types", tc="bq-conn", From 48011bdf5322dae887a23a7861152afa2f2c1e78 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 23 Jan 2026 17:34:13 +0000 Subject: [PATCH 037/100] fix: Use strftime to format TIME data types --- data_validation/config_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index aabf5493f..5831a90d7 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -1174,11 +1174,13 @@ def _get_comparison_max_col_length(self) -> int: return self._comparison_max_col_length def _strftime_format( - self, column_type: Union[dt.Date, dt.Timestamp], client + self, column_type: Union[dt.Date, dt.Timestamp, dt.Time], client ) -> str: if column_type.is_timestamp(): return "%Y-%m-%d %H:%M:%S" - if clients.is_oracle_client(client): + elif column_type.is_time(): + return "%H:%M:%S" + elif clients.is_oracle_client(client): # Oracle DATE is a DateTime return "%Y-%m-%d %H:%M:%S" return "%Y-%m-%d" From 85c0a46260445f3852c0f4ce2453aef1ee658172 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 27 Jan 2026 13:18:11 +0000 Subject: [PATCH 038/100] fix: Fix Db2 TIME formatting --- third_party/ibis/ibis_db2/registry.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index f36c80277..cff9d0464 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -125,6 +125,10 @@ def _cast(t, op): if arg_dtype.is_integer() and typ.is_timestamp(): return t.integer_to_timestamp(sa_arg, tz=typ.timezone) + if arg_dtype.is_time() and typ.is_string(): + # Force colons as time separator which CHAR(column,JIS) expression. + return sa.func.char(sa_arg, sa.literal_column("JIS")) + if arg_dtype.is_binary() and typ.is_string(): # Binary to string cast is a "to hex" conversion for DVT. return sa.func.lower(sa.func.hex(sa_arg)) From 1ffb7a742b844dc9c082cd7fc4b0922e3719a46d Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 27 Jan 2026 13:43:02 +0000 Subject: [PATCH 039/100] feat: Prototype Db2 z/OS support --- data_validation/config_manager.py | 6 ++---- data_validation/consts.py | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index 5831a90d7..aabf5493f 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -1174,13 +1174,11 @@ def _get_comparison_max_col_length(self) -> int: return self._comparison_max_col_length def _strftime_format( - self, column_type: Union[dt.Date, dt.Timestamp, dt.Time], client + self, column_type: Union[dt.Date, dt.Timestamp], client ) -> str: if column_type.is_timestamp(): return "%Y-%m-%d %H:%M:%S" - elif column_type.is_time(): - return "%H:%M:%S" - elif clients.is_oracle_client(client): + if clients.is_oracle_client(client): # Oracle DATE is a DateTime return "%Y-%m-%d %H:%M:%S" return "%Y-%m-%d" diff --git a/data_validation/consts.py b/data_validation/consts.py index 2e78964b4..ec24668c5 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -262,6 +262,7 @@ CALC_FIELD_HASH = "hash" CALC_FIELD_IFNULL = "ifnull" CALC_FIELD_IFNULL_DEFAULT = "default_null_string" +# TODO Below is a temporary fix on this branch until issue-1470 is resolved. CALC_FIELD_IFNULL_DEFAULT_STRING = "XX" CALC_FIELD_LENGTH = "length" CALC_FIELD_PADDED_CHAR_LENGTH = "padded_char_length" From dedf97eb50636fc692038389b35385afb5ac78c5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 30 Jan 2026 16:50:35 +0000 Subject: [PATCH 040/100] test: Add missing DBCLOB column to pso_data_validator.dvt_db2_types --- tests/resources/bigquery_test_tables.sql | 3 ++- tests/resources/db2_test_tables.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/resources/bigquery_test_tables.sql b/tests/resources/bigquery_test_tables.sql index 210a8f092..1b7d36b30 100644 --- a/tests/resources/bigquery_test_tables.sql +++ b/tests/resources/bigquery_test_tables.sql @@ -223,6 +223,7 @@ CREATE OR REPLACE TABLE pso_data_validator.dvt_db2_types , col_nvarchar_30 STRING , col_nchar_2 STRING , col_nclob STRING +, col_dbclob STRING , col_blob BYTES , col_char_bit STRING , col_varchar_bit STRING @@ -234,7 +235,7 @@ CREATE OR REPLACE TABLE pso_data_validator.dvt_db2_types INSERT INTO pso_data_validator.dvt_db2_types VALUES (1,123,12345,1123456789,123.456,123456.789 -,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB' +,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB','Hello DBCLOB' ,CAST('Hello BLOB' AS BYTES),'ABC','DEF','GHI','JKL' ,TIME'00:00:01' ,''); diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index 4751ec8a6..4860fb653 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -69,6 +69,7 @@ CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_db2_types , col_nvarchar_30 NVARCHAR(30) , col_nchar_2 NCHAR(2) , col_nclob NCLOB +, col_dbclob DBCLOB , col_blob BLOB , col_char_bit CHAR(5) FOR BIT DATA , col_varchar_bit VARCHAR(5) FOR BIT DATA @@ -81,7 +82,7 @@ COMMENT ON TABLE pso_data_validator.dvt_core_types IS 'Db2 data types integratio INSERT INTO pso_data_validator.dvt_db2_types VALUES (1,123,12345,1123456789,123.456,123456.789 -,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB' +,'Hello CLOB','Hello NVARCHAR','A ','Hello NCLOB','Hello DBCLOB' ,CAST('Hello BLOB' AS BLOB),'ABC','DEF','GHI','JKL' ,TIME'00:00:01' ,''); From 75746def44a2b039b543745c6149a8668e686ec7 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 30 Jan 2026 16:58:34 +0000 Subject: [PATCH 041/100] test: Fix pso_data_validator.dvt_core_types data --- tests/resources/db2_test_tables.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index 4860fb653..c92433f06 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -42,19 +42,19 @@ INSERT INTO pso_data_validator.dvt_core_types VALUES ,12345678901234567890,1234567890123456789012345,123.11,123456.1,12345678.1 ,'Hello DVT','A ','Hello DVT' ,DATE'1970-01-01',TIMESTAMP'1970-01-01 00:00:01' -,TIMESTAMP'1969-12-31 23:23:01'); +,TIMESTAMP'1970-01-01 01:00:01'); INSERT INTO pso_data_validator.dvt_core_types VALUES (2,2,2,2,2 ,12345678901234567890,1234567890123456789012345,123.22,123456.2,12345678.2 ,'Hello DVT','B','Hello DVT' ,DATE'1970-01-02',TIMESTAMP'1970-01-02 00:00:02' -,TIMESTAMP'1970-01-01 22:23:02'); +,TIMESTAMP'1970-01-02 02:00:02'); INSERT INTO pso_data_validator.dvt_core_types VALUES (3,3,3,3,3 ,12345678901234567890,1234567890123456789012345,123.3,123456.3,12345678.3 ,'Hello DVT','C ','Hello DVT' ,DATE'1970-01-03',TIMESTAMP'1970-01-03 00:00:03' -,TIMESTAMP'1970-01-02 21:23:03'); +,TIMESTAMP'1970-01-03 03:00:03'); COMMIT; -- Db2 data types test table (data types not covered by core types). From 336d3d36f034781501309e5db17800a19ba18d8e Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 30 Jan 2026 17:22:44 +0000 Subject: [PATCH 042/100] test: Add more Db2 tests --- tests/resources/db2_test_tables.sql | 57 +++++++++++++ tests/system/data_sources/test_db2.py | 117 ++++++++++++++++++++++++-- 2 files changed, 169 insertions(+), 5 deletions(-) diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index c92433f06..3d3550ef2 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -152,3 +152,60 @@ INSERT INTO pso_data_validator.dvt_binary VALUES (CAST('DVT-key-4' AS VARBINARY( INSERT INTO pso_data_validator.dvt_binary VALUES (CAST('DVT-key-5' AS VARBINARY(16)), 5, 'Row 5'); COMMIT; +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_varchar_id +( id VARCHAR(15) NOT NULL PRIMARY KEY +, other_data VARCHAR(100) +); +COMMENT ON TABLE pso_data_validator.dvt_varchar_id IS 'Integration test table used to test varchar pk matching. Trailing blanks are significant'; +INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-1', 'Row 1'); +INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-2', 'Row 2'); +INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-3', 'Row 3'); +INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-4 ', 'Row 4'); +INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-5', 'Row 5'); +COMMIT; + +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_datetime_id +( id TIMESTAMP(0) NOT NULL PRIMARY KEY +, other_data VARCHAR(100) +); +COMMENT ON TABLE pso_data_validator.dvt_datetime_id IS 'Integration test table used to test datetime pk matching.'; +INSERT INTO pso_data_validator.dvt_datetime_id VALUES (TIMESTAMP'2020-01-01 12:00:00', 'Row 1'); +INSERT INTO pso_data_validator.dvt_datetime_id VALUES (TIMESTAMP'2020-02-01 12:00:00', 'Row 2'); +INSERT INTO pso_data_validator.dvt_datetime_id VALUES (TIMESTAMP'2020-03-01 12:00:00', 'Row 3'); +INSERT INTO pso_data_validator.dvt_datetime_id VALUES (TIMESTAMP'2020-04-01 12:00:00', 'Row 4'); +INSERT INTO pso_data_validator.dvt_datetime_id VALUES (TIMESTAMP'2020-05-01 12:00:00', 'Row 5'); +COMMIT; + +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_group_by_timestamp +( id INTEGER NOT NULL PRIMARY KEY +, group_id INTEGER +, col_date DATE +, col_datetime TIMESTAMP(0) +); +COMMENT ON TABLE pso_data_validator.dvt_group_by_timestamp IS 'Integration test table used to test Timestamp grouping.'; +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (1,1,DATE'2021-01-01',TIMESTAMP'2021-01-01 12:00:00'); +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (2,1,DATE'2021-01-01',TIMESTAMP'2021-01-01 13:00:00'); +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (3,1,DATE'2021-01-01',TIMESTAMP'2021-01-01 14:00:00'); +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (4,2,DATE'2022-02-02',TIMESTAMP'2022-02-02 12:00:00'); +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (5,2,DATE'2022-02-02',TIMESTAMP'2022-02-02 13:00:00'); +INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (6,3,DATE'2023-03-03',TIMESTAMP'2023-03-03 12:00:00'); +COMMIT; + +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_tricky_dates ( + id INTEGER NOT NULL PRIMARY KEY +, col_dt_low DATE +, col_dt_epoch DATE +, col_dt_high DATE +, col_dt_4712 DATE +, col_ts_low TIMESTAMP(0) +, col_ts_epoch TIMESTAMP(0) +, col_ts_high TIMESTAMP(0) +, col_ts_4712 TIMESTAMP(0) +); +COMMENT ON TABLE pso_data_validator.dvt_tricky_dates IS 'Integration test table used to test potentially difficult Timestamps.'; +INSERT INTO pso_data_validator.dvt_tricky_dates VALUES +(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31',DATE'4712-12-31' +,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59',TIMESTAMP'4712-12-31 23:23:59'); +-- NULL in all columns. +INSERT INTO pso_data_validator.dvt_tricky_dates (id) VALUES (2); +COMMIT; \ No newline at end of file diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index f43653b6c..e1981513d 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -20,14 +20,18 @@ from data_validation import cli_tools, consts from tests.system.data_sources.common_functions import ( DVT_CORE_TYPES_COLUMNS, - find_tables_test, - schema_validation_test, + DVT_TRICKY_DATES_COLUMNS, column_validation_test, - run_test_from_cli_args, - null_not_null_assertions, - row_validation_test, + column_validation_test_args, custom_query_validation_test, + find_tables_test, + id_column_row_validation_test, + id_column_query_row_validation_test, + null_not_null_assertions, raw_query_test, + row_validation_test, + run_test_from_cli_args, + schema_validation_test, ) from tests.system.data_sources.test_bigquery import BQ_CONN @@ -180,6 +184,7 @@ def test_column_validation_db2_types_to_bigquery(): avg_cols=cols, std_cols=cols, wildcard_include_timestamp=True, + ) @mock.patch( @@ -226,6 +231,48 @@ def test_column_validation_large_decimals_to_bigquery_mismatch(): assert "sum__col_dec_18_1_fail" in df[consts.VALIDATION_NAME].values +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_column_validation_group_by_timestamp(): + """Test that --grouped-columns on Timestamps works correctly. + + DVT casts TIMESTAMP grouped columns to DATE, Oracle DATE includes a time element + which should be removed in SQL otherwise groups will not match Pandas. + """ + args = column_validation_test_args( + tables="pso_data_validator.dvt_group_by_timestamp", + grouped_columns="col_datetime", + filter_status=None, + ) + df = run_test_from_cli_args(args) + # We expect 3 groups in the data set even though there are 6 records, due to Timestamp to Date cast. + assert len(df) == 3 + # All groups should be a successful validation. + assert all( + _ == "success" for _ in df[consts.VALIDATION_STATUS] + ), "Not all records are marked as success" + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_column_validation_tricky_dates_to_bigquery(): + """Test with date values that are at the extremes, e.g. 9999-12-31.""" + cols = ",".join(DVT_TRICKY_DATES_COLUMNS) + column_validation_test( + tc="bq-conn", + tables="pso_data_validator.dvt_tricky_dates", + min_cols=cols, + max_cols=cols, + sum_cols=cols, + grouped_columns="id", + wildcard_include_timestamp=True, + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -335,6 +382,29 @@ def test_row_validation_db2_types_to_bigquery(): tables="pso_data_validator.dvt_db2_types", tc="bq-conn", hash=cols, + ) + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_row_validation_datetime_pk_to_bigquery(): + """Test datetime primary key join columns""" + # TODO Remove use_randow_row option below when issue-1445 is actioned. + id_column_row_validation_test( + "pso_data_validator.dvt_datetime_id", + use_randow_row=False, + ) + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_varchar_pk_row_validation_to_bigquery(): + """Test varchar primary keys""" + id_column_row_validation_test("pso_data_validator.dvt_varchar_id") @mock.patch( @@ -357,6 +427,34 @@ def test_row_validation_large_decimals_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_row_validation_tricky_dates_to_bigquery(): + """Test with date values that are at the extremes, e.g. 9999-12-31.""" + cols = ",".join(DVT_TRICKY_DATES_COLUMNS) + row_validation_test( + tables="pso_data_validator.dvt_tricky_dates", + tc="bq-conn", + hash=cols, + ) + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_row_validation_comp_fields_tricky_dates_to_bigquery(): + """Test with date values that are at the extremes, e.g. 9999-12-31.""" + cols = ",".join(DVT_TRICKY_DATES_COLUMNS) + row_validation_test( + tables="pso_data_validator.dvt_tricky_dates", + tc="bq-conn", + comp_fields=cols, + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -382,6 +480,15 @@ def test_custom_query_row_validation_core_types_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_varchar_pk_query_row_validation_to_bigquery(): + """Test varchar primary keys on custom query""" + id_column_query_row_validation_test("pso_data_validator.dvt_varchar_id") + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, From f9f24a843f6b7f21206140546f63881cff658322 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 5 Feb 2026 10:46:33 +0000 Subject: [PATCH 043/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 1 + third_party/ibis/ibis_db2_zos/datatypes.py | 27 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 third_party/ibis/ibis_db2_zos/datatypes.py diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index e7d3eca0d..7f9e087c3 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -19,6 +19,7 @@ from third_party.ibis.ibis_addon.api import dvt_handle_failed_column_type_inference from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler +from third_party.ibis.ibis_db2_zos.datatypes import _get_type class Backend(Db2LUWBackend): diff --git a/third_party/ibis/ibis_db2_zos/datatypes.py b/third_party/ibis/ibis_db2_zos/datatypes.py new file mode 100644 index 000000000..dd4399691 --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/datatypes.py @@ -0,0 +1,27 @@ +# Copyright 2026 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ibm_db_sa.ibm_db import DB2Dialect_ibm_db +from ibm_db_sa.base import GRAPHIC +import sqlalchemy.types as sat + +# This import is only here to trigger the import of this module which patches ischema_names. +from third_party.ibis.ibis_db2.datatypes import _get_type # noqa: F401 + + +# TODO Temporary additions for Db2 z/OS testing. +# These lines are also polluting the standard Db2 ischema_names but that is low risk. +if "VARGRAPHIC" not in DB2Dialect_ibm_db.ischema_names: + DB2Dialect_ibm_db.ischema_names["VARGRAPHIC"] = GRAPHIC +DB2Dialect_ibm_db.ischema_names["ROWID"] = sat.VARCHAR From 595f8656b778224945d23c5abc93709ac2134a27 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 5 Feb 2026 10:50:24 +0000 Subject: [PATCH 044/100] Revert "feat: Prototype Db2 z/OS support" This reverts commit f9f24a843f6b7f21206140546f63881cff658322. --- third_party/ibis/ibis_db2_zos/__init__.py | 1 - third_party/ibis/ibis_db2_zos/datatypes.py | 27 ---------------------- 2 files changed, 28 deletions(-) delete mode 100644 third_party/ibis/ibis_db2_zos/datatypes.py diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 7f9e087c3..e7d3eca0d 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -19,7 +19,6 @@ from third_party.ibis.ibis_addon.api import dvt_handle_failed_column_type_inference from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler -from third_party.ibis.ibis_db2_zos.datatypes import _get_type class Backend(Db2LUWBackend): diff --git a/third_party/ibis/ibis_db2_zos/datatypes.py b/third_party/ibis/ibis_db2_zos/datatypes.py deleted file mode 100644 index dd4399691..000000000 --- a/third_party/ibis/ibis_db2_zos/datatypes.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2026 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ibm_db_sa.ibm_db import DB2Dialect_ibm_db -from ibm_db_sa.base import GRAPHIC -import sqlalchemy.types as sat - -# This import is only here to trigger the import of this module which patches ischema_names. -from third_party.ibis.ibis_db2.datatypes import _get_type # noqa: F401 - - -# TODO Temporary additions for Db2 z/OS testing. -# These lines are also polluting the standard Db2 ischema_names but that is low risk. -if "VARGRAPHIC" not in DB2Dialect_ibm_db.ischema_names: - DB2Dialect_ibm_db.ischema_names["VARGRAPHIC"] = GRAPHIC -DB2Dialect_ibm_db.ischema_names["ROWID"] = sat.VARCHAR From 6cb7eb66f55d2806d76baf027adf89b1bec823f3 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 09:16:57 +0000 Subject: [PATCH 045/100] feat: Prototype Db2 z/OS support --- data_validation/config_manager.py | 5 +++-- third_party/ibis/ibis_db2/registry.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index 0efae10c5..8fe31d2fc 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -1010,9 +1010,10 @@ def require_pre_agg_calc_field( # Oracle BLOB is invalid for use with SQL COUNT function. # The expression below returns True if client is Oracle which # has the effect of triggering use of byte_length transformation. + # Same for Db2 z/OS. return bool( - self.source_client.name == "oracle" - or self.target_client.name == "oracle" + self.source_client.name in ("oracle", "db2_zos") + or self.target_client.name in ("oracle", "db2_zos") ) else: # Convert to length for any min/max/sum on binary columns. diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index cff9d0464..91969b524 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -515,6 +515,6 @@ def sa_format_hashbytes_db2(translator, op): ops.CumulativeAny: unary(sa.func.bool_or), ops.IdenticalTo: _identical_to, # aggregate methods - ops.Count: _reduction_count(sa.func.count), + ops.Count: _reduction_count(sa.func.count_big), } ) From 1b41c808530dcd4d26b1189bcc6f6a13d0e2b1d7 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 13:22:52 +0000 Subject: [PATCH 046/100] fix: Attempt to prevent exceptions due to ROWID types --- third_party/ibis/ibis_db2_zos/__init__.py | 1 + third_party/ibis/ibis_db2_zos/datatypes.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 third_party/ibis/ibis_db2_zos/datatypes.py diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index e7d3eca0d..461c918a4 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -19,6 +19,7 @@ from third_party.ibis.ibis_addon.api import dvt_handle_failed_column_type_inference from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler +import third_party.ibis.ibis_db2.datatypes # noqa: F401 class Backend(Db2LUWBackend): diff --git a/third_party/ibis/ibis_db2_zos/datatypes.py b/third_party/ibis/ibis_db2_zos/datatypes.py new file mode 100644 index 000000000..e1178df61 --- /dev/null +++ b/third_party/ibis/ibis_db2_zos/datatypes.py @@ -0,0 +1,19 @@ +# Copyright 2026 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ibm_db_sa.ibm_db import DB2Dialect_ibm_db +import sqlalchemy.types as sat + + +DB2Dialect_ibm_db.ischema_names["ROWID"] = sat.VARCHAR From e6fe66441e2d1b1996bc8e9c84e0149e821ed9e5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 13:25:52 +0000 Subject: [PATCH 047/100] Revert "fix: Attempt to prevent exceptions due to ROWID types" This reverts commit 1b41c808530dcd4d26b1189bcc6f6a13d0e2b1d7. --- third_party/ibis/ibis_db2_zos/__init__.py | 1 - third_party/ibis/ibis_db2_zos/datatypes.py | 19 ------------------- 2 files changed, 20 deletions(-) delete mode 100644 third_party/ibis/ibis_db2_zos/datatypes.py diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 461c918a4..e7d3eca0d 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -19,7 +19,6 @@ from third_party.ibis.ibis_addon.api import dvt_handle_failed_column_type_inference from third_party.ibis.ibis_db2 import Backend as Db2LUWBackend from third_party.ibis.ibis_db2_zos.compiler import Db2zOSCompiler -import third_party.ibis.ibis_db2.datatypes # noqa: F401 class Backend(Db2LUWBackend): diff --git a/third_party/ibis/ibis_db2_zos/datatypes.py b/third_party/ibis/ibis_db2_zos/datatypes.py deleted file mode 100644 index e1178df61..000000000 --- a/third_party/ibis/ibis_db2_zos/datatypes.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2026 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ibm_db_sa.ibm_db import DB2Dialect_ibm_db -import sqlalchemy.types as sat - - -DB2Dialect_ibm_db.ischema_names["ROWID"] = sat.VARCHAR From 6828f1ab0cfd22d0eec653788d42e23f7c815928 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 14:55:00 +0000 Subject: [PATCH 048/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/datatypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index db39a948e..4b4af73b0 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -17,7 +17,6 @@ import ibis.expr.datatypes as dt from ibis.backends.base.sql.alchemy.datatypes import ibis_type_to_sqla from ibm_db_sa.ibm_db import DB2Dialect_ibm_db -import sqlalchemy as sa import sqlalchemy.types as sat @@ -38,7 +37,7 @@ } # TODO Temporarily changed to 300 until issue-1296 is complete. -ibis_type_to_sqla[dt.String] = sa.sql.sqltypes.String(length=300) +ibis_type_to_sqla[dt.String] = sat.String(length=300) DB2Dialect_ibm_db.ischema_names["DECFLOAT"] = sat.DOUBLE From d669a24ae7f8565365ab50c5e1ec167c45036153 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 15:24:24 +0000 Subject: [PATCH 049/100] feat: Add Db2 z/OS VARG variant of VARGRAPHIC --- third_party/ibis/ibis_db2/datatypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 4b4af73b0..dc81763de 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -40,6 +40,7 @@ ibis_type_to_sqla[dt.String] = sat.String(length=300) DB2Dialect_ibm_db.ischema_names["DECFLOAT"] = sat.DOUBLE +DB2Dialect_ibm_db.ischema_names["VARG"] = DB2Dialect_ibm_db.ischema_names["VARGRAPHIC"] def _get_type(typename) -> dt.DataType: From 6a55378ba33c1abdae0d18505008dc4aef4ea5c2 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 6 Feb 2026 18:32:32 +0000 Subject: [PATCH 050/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 12 ++++++ third_party/ibis/ibis_db2/__init__.py | 61 +++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 877dff137..7ad3f978a 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -486,6 +486,18 @@ def test_varchar_pk_query_row_validation_to_bigquery(): id_column_query_row_validation_test("pso_data_validator.dvt_varchar_id") +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_fixed_char_pk_row_validation_to_bigquery(): + """Test fixed char primary keys""" + id_column_row_validation_test( + "db2inst1.dvt_fixed_char_id=pso_data_validator.dvt_fixed_char_id", + use_random_row=False, + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 574475c39..0b33e1d78 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -106,24 +106,69 @@ def list_primary_key_columns(self, database: str, table: str) -> list: ) return [_[0] for _ in result.cursor.fetchall()] - def raw_column_metadata_not_implemented( + def raw_column_metadata( self, database: str = None, table: str = None, query: str = None ) -> list: """Define this method to allow DVT to test if backend specific transformations may be needed for comparison. - Partner method to _metadata that retains raw data type information instead of converting - to Ibis types. This works in the same way as _metadata by running a query over the DVT - source, either schema.table or a custom query, and fetching the metadata using sp_describe_first_result_set. - - THIS METHOD IS NOT IMPLEMENTED YET. + Partner method to _metadata that retains raw data type information instead of converting to Ibis types. + This works in the same way as _metadata by running a query over the DVT source, either schema.table or a + custom query, and fetching the first row. From the cursor we can detect data types of the row's columns. + NOTE: This only works for table look-ups. For custom queries the raw data types are not available to us + due to the IBM Db2 driver hiding the real data types. Returns: list: A list of tuples containing the standard 7 DB API fields: https://peps.python.org/pep-0249/#description """ - return [] + assert (database and table) or query, "We should never receive all args=None" + if database and table: + # For table-based validation, query the system catalog to get the true data type. + get_column_metadata_sql = """ + SELECT COLNAME, TYPENAME, LENGTH, SCALE, NULLS + FROM SYSCAT.COLUMNS + WHERE TABSCHEMA = ? AND TABNAME = ? + ORDER BY COLNO + """ + with self.begin() as con: + result = con.exec_driver_sql( + get_column_metadata_sql, + parameters=(database.upper(), table.upper()), + ) + # Yield a 7-tuple mimicking cursor.description, with the true typename. + for ( + colname, + typename, + col_length, + col_scale, + nullable, + ) in result.cursor.fetchall(): + yield ( + colname, + typename, + col_length, + col_length, + col_length, + col_scale, + nullable, + ) + elif query: + # For custom queries, the system catalog cannot be used. Fall back to + # cursor.description, which may not distinguish padded char types. + source = f"({query})" + with self.begin() as con: + result = con.exec_driver_sql(f"SELECT * FROM {source} t0 LIMIT 0") + cursor = result.cursor + yield from (column for column in cursor.description) def is_char_type_padded(self, char_type: Tuple) -> bool: """Define this method if the backend supports character/string types that are padded and returns padded values, which DVT may want to trim""" - return char_type[0] == "CHARACTER" + type_code = char_type[0] + if isinstance(type_code, str): + return type_code.upper() == "CHARACTER" + else: + # From cursor.description for custom queries, this is a DBAPITypeObject. + # It's not possible to distinguish padded char types in this case, + # so we default to False to be safe and avoid trimming incorrectly. + return False From e5b060f39ce840fc7d5deff74598697aa3877bcc Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 9 Feb 2026 14:51:21 +0000 Subject: [PATCH 051/100] fix: Skip Db2 internal generated columns --- tests/resources/db2_test_tables.sql | 18 +++++++++++++- tests/system/data_sources/test_db2.py | 34 +++++++++++++++++++++++++++ third_party/ibis/ibis_db2/__init__.py | 20 +++++++++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index 3d3550ef2..69deb1ad7 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -208,4 +208,20 @@ INSERT INTO pso_data_validator.dvt_tricky_dates VALUES ,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59',TIMESTAMP'4712-12-31 23:23:59'); -- NULL in all columns. INSERT INTO pso_data_validator.dvt_tricky_dates (id) VALUES (2); -COMMIT; \ No newline at end of file +COMMIT; + +-- pso_data_validator.dvt_db2_generated_cols1 has fake generated columns that should be ignored. +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_db2_generated_cols1 +( id INTEGER NOT NULL PRIMARY KEY +, col_int INTEGER +, db2_generated_docid_for_xml INTEGER +, db2_generated_rowid_for_lob INTEGER); +COMMENT ON TABLE pso_data_validator.dvt_db2_generated_cols1 IS 'Test table to prove generated columns are ignored.'; +INSERT INTO pso_data_validator.dvt_db2_generated_cols1 VALUES (1,1,1,1); +COMMIT; +-- pso_data_validator.dvt_db2_generated_cols2 no fake generated columns, tests in tandem with dvt_db2_generated_cols1. +CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_db2_generated_cols2 +( id INTEGER NOT NULL PRIMARY KEY +, col_int INTEGER); +COMMENT ON TABLE pso_data_validator.dvt_db2_generated_cols2 IS 'Test table to prove generated columns are ignored.'; +INSERT INTO pso_data_validator.dvt_db2_generated_cols2 VALUES (1,1); diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 7ad3f978a..5dc02646a 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -129,6 +129,17 @@ def test_schema_validation_not_null_vs_nullable(): null_not_null_assertions(df) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_schema_validation_db2_generated_cols(): + """Test schema validation for tables with Db2 internal generated columns""" + schema_validation_test( + tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -273,6 +284,17 @@ def test_column_validation_tricky_dates_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_column_validation_db2_generated_cols(): + """Test column validation for tables with Db2 internal generated columns""" + column_validation_test( + tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -498,6 +520,18 @@ def test_fixed_char_pk_row_validation_to_bigquery(): ) +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_row_validation_db2_generated_cols(): + """Test column validation for tables with Db2 internal generated columns""" + row_validation_test( + tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", + primary_keys="id", + ) + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 0b33e1d78..989bb01fd 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -from typing import Iterable, Optional, Tuple, Dict, Any +from typing import Iterable, Optional, Tuple, Dict, Any, TYPE_CHECKING import sqlalchemy as sa @@ -21,6 +21,12 @@ from third_party.ibis.ibis_db2.compiler import Db2Compiler from third_party.ibis.ibis_db2.datatypes import _get_type +if TYPE_CHECKING: + import ibis.expr.types as ir + + +DB2_HIDDEN_COLUMNS = ["db2_generated_docid_for_xml", "db2_generated_rowid_for_lob"] + class Backend(BaseAlchemyBackend): name = "db2" @@ -172,3 +178,15 @@ def is_char_type_padded(self, char_type: Tuple) -> bool: # It's not possible to distinguish padded char types in this case, # so we default to False to be safe and avoid trimming incorrectly. return False + + def table( + self, + name: str, + database: str | None = None, + schema: str | None = None, + ) -> ir.Table: + return_table = super().table(name, database, schema) + for c in DB2_HIDDEN_COLUMNS: + if c in return_table.columns: + return_table = return_table.drop(c) + return return_table From 6984bbeb53ad36424fb484e9296af31163bd7197 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 9 Feb 2026 14:52:09 +0000 Subject: [PATCH 052/100] fix: Skip Db2 internal generated columns --- third_party/ibis/ibis_db2/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 989bb01fd..279dd0f98 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -184,7 +184,7 @@ def table( name: str, database: str | None = None, schema: str | None = None, - ) -> ir.Table: + ) -> "ir.Table": return_table = super().table(name, database, schema) for c in DB2_HIDDEN_COLUMNS: if c in return_table.columns: From 69a6cd00ce09de704e861e28cea38a2bf3647fcc Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 9 Feb 2026 15:53:45 +0000 Subject: [PATCH 053/100] fix: Skip Db2 internal generated columns --- tests/system/data_sources/test_db2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 5dc02646a..0eda9fe85 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -136,6 +136,7 @@ def test_schema_validation_not_null_vs_nullable(): def test_schema_validation_db2_generated_cols(): """Test schema validation for tables with Db2 internal generated columns""" schema_validation_test( + tc="mock-conn", tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", ) @@ -291,6 +292,7 @@ def test_column_validation_tricky_dates_to_bigquery(): def test_column_validation_db2_generated_cols(): """Test column validation for tables with Db2 internal generated columns""" column_validation_test( + tc="mock-conn", tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", ) @@ -527,6 +529,7 @@ def test_fixed_char_pk_row_validation_to_bigquery(): def test_row_validation_db2_generated_cols(): """Test column validation for tables with Db2 internal generated columns""" row_validation_test( + tc="mock-conn", tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", primary_keys="id", ) From 912b67ec179b39f061dcfc268ad495972202d11a Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 9 Feb 2026 16:09:57 +0000 Subject: [PATCH 054/100] fix: Skip Db2 internal generated columns --- third_party/ibis/ibis_db2/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 279dd0f98..36b23367e 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -186,7 +186,7 @@ def table( schema: str | None = None, ) -> "ir.Table": return_table = super().table(name, database, schema) - for c in DB2_HIDDEN_COLUMNS: - if c in return_table.columns: - return_table = return_table.drop(c) + columns_to_drop = [_ for _ in return_table.columns if _ in DB2_HIDDEN_COLUMNS] + if columns_to_drop: + return_table = return_table.drop(*columns_to_drop) return return_table From 3d81f7cd9fc77ec8b3c0a40c01935c9510bb1168 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 9 Feb 2026 16:37:58 +0000 Subject: [PATCH 055/100] fix: Skip Db2 internal generated columns --- third_party/ibis/ibis_db2/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 36b23367e..dfe8020ad 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -185,7 +185,10 @@ def table( database: str | None = None, schema: str | None = None, ) -> "ir.Table": + # TODO add docstring return_table = super().table(name, database, schema) + # Temporary early return because this code is breaking row validation. + return return_table columns_to_drop = [_ for _ in return_table.columns if _ in DB2_HIDDEN_COLUMNS] if columns_to_drop: return_table = return_table.drop(*columns_to_drop) From 6e19966ccd5ebd6aa8b567bb36175eedea72ffc4 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 10 Feb 2026 13:18:37 +0000 Subject: [PATCH 056/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index dfe8020ad..ac431b68f 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -130,10 +130,11 @@ def raw_column_metadata( assert (database and table) or query, "We should never receive all args=None" if database and table: # For table-based validation, query the system catalog to get the true data type. + # SYSIBM.SYSCOLUMNS works on both LUW and z/OS. SYSCAT.COLUMNS is only valid on LUW. get_column_metadata_sql = """ - SELECT COLNAME, TYPENAME, LENGTH, SCALE, NULLS - FROM SYSCAT.COLUMNS - WHERE TABSCHEMA = ? AND TABNAME = ? + SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS + FROM SYSIBM.SYSCOLUMNS + WHERE TBCREATOR = ? AND TBNAME = ? ORDER BY COLNO """ with self.begin() as con: From 626c6c69141982897391ddad0ec10c9f2e58a945 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 13:39:49 +0000 Subject: [PATCH 057/100] feat: Prototype Db2 z/OS support --- tests/system/data_sources/test_db2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 372ce738f..4253ecab4 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -504,7 +504,6 @@ def test_fixed_char_pk_row_validation_to_bigquery(): """Test fixed char primary keys""" id_column_row_validation_test( "db2inst1.dvt_fixed_char_id=pso_data_validator.dvt_fixed_char_id", - use_random_row=False, ) From da47aad3767f77ab06dae6e2db2abb5a398b9e0f Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 14:13:01 +0000 Subject: [PATCH 058/100] chore: Fix Pandas 3 FutureWarning --- data_validation/combiner.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/data_validation/combiner.py b/data_validation/combiner.py index 433247120..534bf03ef 100644 --- a/data_validation/combiner.py +++ b/data_validation/combiner.py @@ -113,11 +113,11 @@ def generate_report( # Get the first validation metadata object to fill source and/or target empty table names. first = run_metadata.validations[next(iter(run_metadata.validations))] if first.validation_type != consts.CUSTOM_QUERY: - result_df.source_table_name.fillna( - first.get_table_name(consts.RESULT_TYPE_SOURCE), inplace=True + result_df["source_table_name"] = result_df["source_table_name"].fillna( + first.get_table_name(consts.RESULT_TYPE_SOURCE) ) - result_df.target_table_name.fillna( - first.get_table_name(consts.RESULT_TYPE_TARGET), inplace=True + result_df["target_table_name"] = result_df["target_table_name"].fillna( + first.get_table_name(consts.RESULT_TYPE_TARGET) ) _get_summary(run_metadata, result_df, source_df, target_df) @@ -194,7 +194,9 @@ def _generate_report_slice( logging.debug(documented.compile()) result_df = client.execute(documented) - result_df["validation_status"].fillna(consts.VALIDATION_STATUS_FAIL, inplace=True) + result_df["validation_status"] = result_df["validation_status"].fillna( + consts.VALIDATION_STATUS_FAIL + ) return result_df From 9d5b6631979c5987b912d8eedb411ddc12823c6b Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 15:49:29 +0000 Subject: [PATCH 059/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index e7d3eca0d..3f06b1e5b 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -73,3 +73,15 @@ def _handle_failed_column_type_inference( self, table: sa.Table, nulltype_cols: Iterable[str] ) -> sa.Table: return dvt_handle_failed_column_type_inference(self, table, nulltype_cols) + + def is_char_type_padded(self, char_type: Tuple) -> bool: + """Define this method if the backend supports character/string types that are padded and returns + padded values, which DVT may want to trim""" + type_code = char_type[0] + if isinstance(type_code, str): + return type_code.upper() == "CHAR" + else: + # From cursor.description for custom queries, this is a DBAPITypeObject. + # It's not possible to distinguish padded char types in this case, + # so we default to False to be safe and avoid trimming incorrectly. + return False From 284ff31ef60d7185ebd6b9ccdeafd2e8a06b5334 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 15:50:32 +0000 Subject: [PATCH 060/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 3f06b1e5b..687932edc 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Optional, Dict, Any +from typing import Iterable, Optional, Dict, Any, Tuple import sqlalchemy as sa From c029f974b0ca9590cf113c694825368efdd9c187 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 18:17:09 +0000 Subject: [PATCH 061/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/__init__.py | 4 ++- third_party/ibis/ibis_db2/registry.py | 36 +++++++++++++++++------ third_party/ibis/ibis_db2_zos/__init__.py | 16 ++-------- third_party/ibis/ibis_db2_zos/registry.py | 8 +++++ 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index ac431b68f..239c8fbf4 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -32,6 +32,8 @@ class Backend(BaseAlchemyBackend): name = "db2" compiler = Db2Compiler + char_datatype = "CHARACTER" + def do_connect( self, host: str = "localhost", @@ -173,7 +175,7 @@ def is_char_type_padded(self, char_type: Tuple) -> bool: padded values, which DVT may want to trim""" type_code = char_type[0] if isinstance(type_code, str): - return type_code.upper() == "CHARACTER" + return type_code.upper() == self.char_datatype else: # From cursor.description for custom queries, this is a DBAPITypeObject. # It's not possible to distinguish padded char types in this case, diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 65708a960..7063d3e12 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -221,7 +221,13 @@ def _string_agg(t, op): _strftime_excludelist = frozenset(["%w", "%U", "%c", "%x", "%X", "%e"]) -def _reduce_tokens(tokens, arg): +def _reduce_tokens(tokens, arg, allow_query_params=True): + def literal_arg(s: str): + if allow_query_params: + return s + else: + return sa.sql.literal_column(s) + # current list of tokens curtokens = [] @@ -246,7 +252,9 @@ def _reduce_tokens(tokens, arg): if token == "%w": value = sa.extract("dow", arg) # 0 based day of week elif token == "%U": - value = sa.cast(sa.func.to_char(arg, "WW"), sa.SMALLINT) - 1 + value = ( + sa.cast(sa.func.to_char(arg, literal_arg("WW")), sa.SMALLINT) - 1 + ) elif token == "%c" or token == "%x" or token == "%X": # re scan and tokenize this pattern try: @@ -260,14 +268,18 @@ def _reduce_tokens(tokens, arg): new_tokens, _ = _scanner.scan(new_pattern) value = functools.reduce( sa.sql.ColumnElement.concat, - _reduce_tokens(new_tokens, arg), + _reduce_tokens( + new_tokens, arg, allow_query_params=allow_query_params + ), ) elif token == "%e": # pad with spaces instead of zeros - value = sa.func.replace(sa.func.to_char(arg, "DD"), "0", " ") + value = sa.func.replace( + sa.func.to_char(arg, literal_arg("DD")), "0", " " + ) reduced += [ - sa.func.to_char(arg, "".join(curtokens)), + sa.func.to_char(arg, literal_arg("".join(curtokens))), sa.cast(value, sa.TEXT), ] @@ -281,16 +293,22 @@ def _reduce_tokens(tokens, arg): # append result to r if we had more tokens or if we have no # blacklisted tokens if curtokens: - reduced.append(sa.func.to_char(arg, "".join(curtokens))) + reduced.append(sa.func.to_char(arg, literal_arg("".join(curtokens)))) return reduced -def _strftime(t, op): +def db2_luw_strftime(t, op, allow_query_params=True): tokens, _ = _scanner.scan(op.format_str.value) - reduced = _reduce_tokens(tokens, t.translate(op.arg)) + reduced = _reduce_tokens( + tokens, t.translate(op.arg), allow_query_params=allow_query_params + ) return functools.reduce(sa.sql.ColumnElement.concat, reduced) +def _sa_strftime(t, op): + return db2_luw_strftime(t, op) + + def _regex_replace(t, op): return sa.func.regexp_replace( t.translate(op.string), t.translate(op.pattern), t.translate(op.replacement) @@ -503,7 +521,7 @@ def sa_format_hashbytes_db2(translator, op): ops.TimestampAdd: fixed_arity(operator.add, 2), ops.TimestampSub: fixed_arity(operator.sub, 2), ops.TimestampDiff: fixed_arity(operator.sub, 2), - ops.Strftime: _strftime, + ops.Strftime: _sa_strftime, ops.ExtractYear: _extract("year"), ops.ExtractMonth: _extract("month"), ops.ExtractDay: _extract("day"), diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 687932edc..0151532c6 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Optional, Dict, Any, Tuple +from typing import Iterable, Optional, Dict, Any import sqlalchemy as sa @@ -25,6 +25,8 @@ class Backend(Db2LUWBackend): name = "db2_zos" compiler = Db2zOSCompiler + char_datatype = "CHAR" + def do_connect( self, host: str = "localhost", @@ -73,15 +75,3 @@ def _handle_failed_column_type_inference( self, table: sa.Table, nulltype_cols: Iterable[str] ) -> sa.Table: return dvt_handle_failed_column_type_inference(self, table, nulltype_cols) - - def is_char_type_padded(self, char_type: Tuple) -> bool: - """Define this method if the backend supports character/string types that are padded and returns - padded values, which DVT may want to trim""" - type_code = char_type[0] - if isinstance(type_code, str): - return type_code.upper() == "CHAR" - else: - # From cursor.description for custom queries, this is a DBAPITypeObject. - # It's not possible to distinguish padded char types in this case, - # so we default to False to be safe and avoid trimming incorrectly. - return False diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index fe46b1a09..0a7a39e66 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools + import ibis.expr.operations as ops import sqlalchemy as sa from third_party.ibis.ibis_db2.registry import ( operation_registry as db2_luw_operation_registry, + db2_luw_strftime, ) operation_registry = db2_luw_operation_registry.copy() @@ -45,6 +48,11 @@ def _sa_whitespace_rstrip(t, op): return sa.func.rtrim(sa_arg) +def _sa_strftime(t, op): + return db2_luw_strftime(t, op) + + operation_registry[ops.HashBytes] = _sa_format_hashbytes operation_registry[ops.IfNull] = _sa_ifnull operation_registry[ops.RStrip] = _sa_whitespace_rstrip +operation_registry[ops.Strftime] = _sa_strftime From 3c8779cf086f247f382c8dbaf4e220f09792251c Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 18:20:40 +0000 Subject: [PATCH 062/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 7063d3e12..2699cb413 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -226,7 +226,7 @@ def literal_arg(s: str): if allow_query_params: return s else: - return sa.sql.literal_column(s) + return sa.sql.literal_column(f"'{s}'") # current list of tokens curtokens = [] From 287a7066a5965488ca0549658c5c7138004ab353 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 11 Feb 2026 18:22:12 +0000 Subject: [PATCH 063/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 0a7a39e66..f20ed486c 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -49,7 +49,7 @@ def _sa_whitespace_rstrip(t, op): def _sa_strftime(t, op): - return db2_luw_strftime(t, op) + return db2_luw_strftime(t, op, allow_query_params=False) operation_registry[ops.HashBytes] = _sa_format_hashbytes From 1cf3e80f81b8f9ad4ac215d1988871649d2b8f14 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 12 Feb 2026 09:30:16 +0000 Subject: [PATCH 064/100] feat: Prototype Db2 z/OS support --- third_party/ibis/ibis_db2/__init__.py | 1 - third_party/ibis/ibis_db2_zos/registry.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 77daad73f..239c8fbf4 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -196,4 +196,3 @@ def table( if columns_to_drop: return_table = return_table.drop(*columns_to_drop) return return_table - diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index f20ed486c..5116af817 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import functools - import ibis.expr.operations as ops import sqlalchemy as sa From c8e2742be65c27de30dffc44c24edf51120bbd47 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 23 Feb 2026 09:46:34 +0000 Subject: [PATCH 065/100] feat: Exclude XML from Db2 z/OS column validations --- data_validation/config_manager.py | 17 +++++++++++++++-- docs/limitations.md | 8 ++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index d24e45ede..fc0786045 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -673,12 +673,20 @@ def _is_db2_xml(self, source_column_name: str, target_column_name: str) -> bool: "db2", source_column_name, target_column_name, ["XML"] ) - def _is_db2_zos_blob(self, source_column_name: str, target_column_name: str) -> bool: - """Returns True when either source or target column is Db2 BLOB data type.""" + def _is_db2_zos_blob( + self, source_column_name: str, target_column_name: str + ) -> bool: + """Returns True when either source or target column is Db2 z/OS BLOB data type.""" return self._is_raw_data_type( "db2_zos", source_column_name, target_column_name, ["BLOB"] ) + def _is_db2_zos_xml(self, source_column_name: str, target_column_name: str) -> bool: + """Returns True when either source or target column is Db2 z/OS XML data type.""" + return self._is_raw_data_type( + "db2_zos", source_column_name, target_column_name, ["XML"] + ) + def _is_oracle_lob(self, source_column_name: str, target_column_name: str) -> bool: """Returns True when either source or target column is Oracle LOB data type. @@ -1153,6 +1161,11 @@ def require_pre_agg_calc_field( f"Skipping {agg_type} on {column} due to SQL Server image data type" ) continue + elif self._is_db2_zos_xml(column, column): + logging.info( + f"Skipping {agg_type} on {column} due to Db2 z/OS XML data type" + ) + continue elif agg_type != "count" and self._is_db2_xml(column, column): logging.info( f"Skipping {agg_type} on {column} due to Db2 XML data type" diff --git a/docs/limitations.md b/docs/limitations.md index 5d5d8f25c..bee9037f7 100644 --- a/docs/limitations.md +++ b/docs/limitations.md @@ -4,6 +4,14 @@ - BigQuery does not have a 32 bit float data type. Validations of systems containing 32 bit floats (e.g. Oracle BINARY_FLOAT) will likely be problematic when compared to FLOAT64. +## Db2 LUW + +- Db2 LUW XML data type is not compatible with the LENGTH function and is excluded from any column validations other than `--count`. + +## Db2 z/OS + +- Db2 z/OS XML data type is not compatible with aggregation functions or the LENGTH and is excluded from all column validations. + ## Oracle - Requires the `oracledb` package to be installed as an extra dependency. From 20875409ecee4bf79ba098d5c205999034d21a83 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 23 Feb 2026 11:15:22 +0000 Subject: [PATCH 066/100] chore: Fix merge conflict mistake --- tests/system/data_sources/test_db2.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 548e36b77..4531a62da 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -220,25 +220,6 @@ def test_column_validation_db2_types_to_bigquery(): ) -@mock.patch( - "data_validation.state_manager.StateManager.get_connection_config", - new=mock_get_connection_config, -) -def test_column_validation_db2_types_to_bigquery(): - """DB2 to BigQuery dvt_db2_types column validation""" - cols = "*" - column_validation_test( - tc="bq-conn", - tables="pso_data_validator.dvt_db2_types", - sum_cols=cols, - min_cols=cols, - max_cols=cols, - avg_cols=cols, - std_cols=cols, - wildcard_include_timestamp=True, - ) - - @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, From b2d4c8c3b63f9a5a55257927887d4d5fbdc26692 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 23 Feb 2026 11:23:15 +0000 Subject: [PATCH 067/100] chore: Fix merge conflict mistake --- tests/system/data_sources/test_db2.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 4531a62da..8bfe69fbb 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -132,9 +132,6 @@ def test_schema_validation_not_null_vs_nullable(): null_not_null_assertions(df) -########################## -# SCHEMA VALIDATION TESTS -########################## @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -147,6 +144,9 @@ def test_schema_validation_db2_generated_cols(): ) +########################### +# COLUMN VALIDATION TESTS +########################### @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, @@ -264,9 +264,6 @@ def test_column_validation_large_decimals_to_bigquery_mismatch(): assert "sum__col_dec_18_1_fail" in df[consts.VALIDATION_NAME].values -########################### -# COLUMN VALIDATION TESTS -########################### @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, From 634b8b5b371bd1aaf2933f33bfa0662e79f60561 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 24 Feb 2026 13:31:57 +0000 Subject: [PATCH 068/100] fix: Fix formatting of decimal with scale>0 for Db2 row validation --- third_party/ibis/ibis_db2/registry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 7e9c7d7c7..b6c0c77e5 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -143,9 +143,10 @@ def _cast(t, op): + "0." + ("9" * arg_dtype.scale) ) + # Using sa.literal_column below because z/OS does not support parameterized queries. return sa.func.ltrim( sa.func.regexp_replace( - sa.func.to_char(sa_arg, fmt), + sa.func.to_char(sa_arg, sa.literal_column(fmt)), sa.literal_column("'\\.?0+$'"), sa.literal_column("''"), ) From d88ee2d185d47758bd16856342b80b84e874da8d Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 24 Feb 2026 17:34:00 +0000 Subject: [PATCH 069/100] feat: Add VARBIN for Db2 z/OS --- third_party/ibis/ibis_db2/datatypes.py | 9 +++++++++ third_party/ibis/ibis_db2/registry.py | 3 +++ 2 files changed, 12 insertions(+) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index dc81763de..3fd71608e 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -39,10 +39,19 @@ # TODO Temporarily changed to 300 until issue-1296 is complete. ibis_type_to_sqla[dt.String] = sat.String(length=300) +DB2Dialect_ibm_db.ischema_names["BINARY"] = sat.BINARY DB2Dialect_ibm_db.ischema_names["DECFLOAT"] = sat.DOUBLE +DB2Dialect_ibm_db.ischema_names["VARBINARY"] = sat.BINARY +# Db2 z/OS variants. +DB2Dialect_ibm_db.ischema_names["VARBIN"] = DB2Dialect_ibm_db.ischema_names["VARBINARY"] DB2Dialect_ibm_db.ischema_names["VARG"] = DB2Dialect_ibm_db.ischema_names["VARGRAPHIC"] +@dt.dtype.register(DB2Dialect_ibm_db, sat.BINARY) +def sa_sf_binary(_, satype, nullable=True): + return dt.Binary(nullable=nullable) + + def _get_type(typename) -> dt.DataType: typ = _type_mapping.get(typename) if typ is None: diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index b6c0c77e5..3ad678d8c 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -164,6 +164,9 @@ def _cast(t, op): if arg_dtype.is_binary() and typ.is_string(): # Binary to string cast is a "to hex" conversion for DVT. return sa.func.lower(sa.func.hex(sa_arg)) + elif arg_dtype.is_string() and typ.is_binary(): + # Binary from string cast is a "from hex" conversion for DVT. + return sa.func.hextoraw(sa_arg) if typ.is_binary(): # decode yields a column of memoryview which is annoying to deal with From 56a15eb90fcb71bca32cfd653de8871df455abd0 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 25 Feb 2026 09:23:12 +0000 Subject: [PATCH 070/100] feat: Add decimal cast override for Db2 z/OS --- third_party/ibis/ibis_db2/registry.py | 4 +-- third_party/ibis/ibis_db2_zos/registry.py | 38 +++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 3ad678d8c..91755be75 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -117,7 +117,7 @@ def _is_inf(t, op): return sa.or_(sa_arg == inf, sa_arg == -inf) -def _cast(t, op): +def db2_luw_cast(t, op): arg = op.arg typ = op.to arg_dtype = arg.output_dtype @@ -505,7 +505,7 @@ def sa_format_hashbytes_db2(translator, op): ops.Literal: _literal, ops.TableColumn: _table_column, # types - ops.Cast: _cast, + ops.Cast: db2_luw_cast, # Floating ops.IsNan: _is_nan, ops.IsInf: _is_inf, diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 5116af817..25541a712 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -17,6 +17,7 @@ from third_party.ibis.ibis_db2.registry import ( operation_registry as db2_luw_operation_registry, + db2_luw_cast, db2_luw_strftime, ) @@ -50,6 +51,43 @@ def _sa_strftime(t, op): return db2_luw_strftime(t, op, allow_query_params=False) +def db2_zos_cast(t, op): + arg = op.arg + typ = op.to + arg_dtype = arg.output_dtype + + sa_arg = t.translate(arg) + + if ( + arg_dtype.is_decimal() + and typ.is_string() + and arg_dtype.scale is not None + and arg_dtype.scale > 0 + ): + # The Db2 LUW regexp_replace technique is not valid for z/OS because regexp_replace is not always available, from the docs: + # "Passthrough-only expression: This function is passthrough-only and cannot run on Db2 for z/OS® without acceleration." + # + # We use an alternative RTRIM based technique in this z/OS specialization. + + # Db2 always pads fractional part of the number out to length of scale. + # We need to remove those insignificant digits. + precision = arg_dtype.precision or 31 + fmt = ("9" * (precision - arg_dtype.scale - 1)) + "0." + ("9" * arg_dtype.scale) + # Using sa.literal_column below because z/OS does not support parameterized queries. + return sa.func.ltrim( + sa.func.rtrim( + sa.func.rtrim( + sa.func.to_char(sa_arg, sa.literal_column(fmt)), + sa.literal_column("'0'"), + ), + sa.literal_column("'.'"), + ) + ) + + return db2_luw_cast(t, op) + + +operation_registry[ops.Cast] = db2_zos_cast operation_registry[ops.HashBytes] = _sa_format_hashbytes operation_registry[ops.IfNull] = _sa_ifnull operation_registry[ops.RStrip] = _sa_whitespace_rstrip From 15598770420882c039405ce6cd86570bed47b3bf Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 26 Feb 2026 13:27:19 +0000 Subject: [PATCH 071/100] fix: Force Db2 FOR BIT DATA columns to Ibis Binary --- third_party/ibis/ibis_db2/__init__.py | 68 +++++++++++++++++++-------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 5a98ff240..c3c20af0e 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -18,6 +18,7 @@ import sqlalchemy as sa import ibis.expr.datatypes as dt +import ibis.expr.schema as sch from ibis.backends.base.sql.alchemy import BaseAlchemyBackend from third_party.ibis.ibis_db2.compiler import Db2Compiler from third_party.ibis.ibis_db2.datatypes import _get_type @@ -27,6 +28,11 @@ DB2_HIDDEN_COLUMNS = ["db2_generated_docid_for_xml", "db2_generated_rowid_for_lob"] +FOR_BIT_DATA_MAP = { + "CHARACTER": "CHARACTER_FOR_BIT_DATA", + "CHAR": "CHAR_FOR_BIT_DATA", + "VARCHAR": "VARCHAR_FOR_BIT_DATA", +} class Backend(BaseAlchemyBackend): @@ -117,7 +123,7 @@ def list_primary_key_columns(self, database: str, table: str) -> list: def raw_column_metadata( self, database: str = None, table: str = None, query: str = None - ) -> list: + ) -> Iterable[Tuple]: """Define this method to allow DVT to test if backend specific transformations may be needed for comparison. Partner method to _metadata that retains raw data type information instead of converting to Ibis types. This works in the same way as _metadata by running a query over the DVT source, either schema.table or a @@ -134,8 +140,10 @@ def raw_column_metadata( if database and table: # For table-based validation, query the system catalog to get the true data type. # SYSIBM.SYSCOLUMNS works on both LUW and z/OS. SYSCAT.COLUMNS is only valid on LUW. + # FOR BIT DATA is not revealed in the TYPENAME column, we need to check CODEPAGE and + # inject our own custom TYPENAME. get_column_metadata_sql = """ - SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS + SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS, CODEPAGE FROM SYSIBM.SYSCOLUMNS WHERE TBCREATOR = ? AND TBNAME = ? ORDER BY COLNO @@ -145,23 +153,23 @@ def raw_column_metadata( get_column_metadata_sql, parameters=(database.upper(), table.upper()), ) - # Yield a 7-tuple mimicking cursor.description, with the true typename. - for ( + rows = result.cursor.fetchall() + + for row in rows: + colname, typename, col_length, col_scale, nullable, codepage = row + if codepage == 0 and typename.upper() in FOR_BIT_DATA_MAP: + # Db2 does not expose FOR BIT DATA types so we customize the type name here. + typename = FOR_BIT_DATA_MAP[typename.upper()] + + yield ( colname, typename, col_length, + col_length, + col_length, col_scale, nullable, - ) in result.cursor.fetchall(): - yield ( - colname, - typename, - col_length, - col_length, - col_length, - col_scale, - nullable, - ) + ) elif query: # For custom queries, the system catalog cannot be used. Fall back to # cursor.description, which may not distinguish padded char types. @@ -189,11 +197,31 @@ def table( database: str | None = None, schema: str | None = None, ) -> "ir.Table": - # TODO add docstring + """Intercept Ibis table() call and inject Db2 customizations before returning the table object.""" return_table = super().table(name, database, schema) - # Temporary early return because this code is breaking row validation. - return return_table - columns_to_drop = [_ for _ in return_table.columns if _ in DB2_HIDDEN_COLUMNS] - if columns_to_drop: - return_table = return_table.drop(*columns_to_drop) + + # Query raw metadata to find columns that are actually binary (FOR BIT DATA) + # but reflected as strings by SQLAlchemy. + raw_types = self.raw_column_metadata(schema or database, name) or [] + for_bit_data_cols = set() + for col_name, type_name, *_ in raw_types: + if type_name in FOR_BIT_DATA_MAP.values(): + for_bit_data_cols.add(col_name.lower()) + + if for_bit_data_cols: + # Create a new table object with FOR BIT DATA columns as binary. + old_schema = return_table.schema() + new_fields = { + name: (dt.binary if name.lower() in for_bit_data_cols else dtype) + for name, dtype in old_schema.items() + } + new_schema = sch.Schema(new_fields) + op = return_table.op() + new_op = op.copy(schema=new_schema) + return_table = new_op.to_expr() + return return_table + # Failed attempt at dealing with hidden columns below for reference. + # columns_to_drop = [_ for _ in return_table.columns if _ in DB2_HIDDEN_COLUMNS] + # if columns_to_drop: + # return_table = return_table.drop(*columns_to_drop) From fb47abd690a15a12de6048cb1363bce7ec8d83a3 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 26 Feb 2026 13:49:59 +0000 Subject: [PATCH 072/100] fix: Add ifnull token length for Db2 binary data types --- third_party/ibis/ibis_addon/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/third_party/ibis/ibis_addon/api.py b/third_party/ibis/ibis_addon/api.py index fee7ea781..ccb9bb561 100644 --- a/third_party/ibis/ibis_addon/api.py +++ b/third_party/ibis/ibis_addon/api.py @@ -174,6 +174,9 @@ def db2_type_string_length( return 19 elif ibis_column.is_integer(): return ibis_integer_string_length(ibis_column) + elif ibis_column.is_binary() and raw_data_type: + # Position 1 in raw_types is the data length. + return raw_data_type[1] else: return None From 2ae6ffe093bfe024d30b35997b467c44f7152249 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 26 Feb 2026 15:41:39 +0000 Subject: [PATCH 073/100] fix: FOR BIT DATA tweak for Db2 z/OS --- third_party/ibis/ibis_db2/__init__.py | 7 ++++++- third_party/ibis/ibis_db2_zos/__init__.py | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index c3c20af0e..0176b3387 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -41,6 +41,8 @@ class Backend(BaseAlchemyBackend): char_datatype = "CHARACTER" + for_bit_data_codepage = 0 + def do_connect( self, host: str = "localhost", @@ -157,7 +159,10 @@ def raw_column_metadata( for row in rows: colname, typename, col_length, col_scale, nullable, codepage = row - if codepage == 0 and typename.upper() in FOR_BIT_DATA_MAP: + if ( + codepage == self.for_bit_data_codepage + and typename.upper() in FOR_BIT_DATA_MAP + ): # Db2 does not expose FOR BIT DATA types so we customize the type name here. typename = FOR_BIT_DATA_MAP[typename.upper()] diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 0151532c6..e0a389906 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -27,6 +27,8 @@ class Backend(Db2LUWBackend): char_datatype = "CHAR" + for_bit_data_codepage = 65535 + def do_connect( self, host: str = "localhost", From d1fe5420cab57fc9760cd100398fd172a8016229 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 2 Mar 2026 13:36:35 +0000 Subject: [PATCH 074/100] chore: Fix merge --- tests/system/data_sources/test_db2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index d2d7e8612..4a55de5e4 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -436,10 +436,11 @@ def test_row_validation_db2_types_to_bigquery(): ) def test_row_validation_datetime_pk_to_bigquery(): """Test datetime primary key join columns""" - # TODO Remove use_randow_row option below when issue-1445 is actioned. + # TODO Remove use_random_row option below when issue-1445 is actioned. id_column_row_validation_test( "pso_data_validator.dvt_datetime_id", - use_randow_row=False, + use_random_row=False, + ) @mock.patch( From 80a64fb237ad625efd0e062886b705a5dbc5f5a5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 3 Mar 2026 09:13:38 +0000 Subject: [PATCH 075/100] fix: FOR BIT DATA tweak for Db2 z/OS --- third_party/ibis/ibis_db2/__init__.py | 16 ++++++---------- third_party/ibis/ibis_db2_zos/__init__.py | 5 +++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 0176b3387..4eb195231 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -41,6 +41,11 @@ class Backend(BaseAlchemyBackend): char_datatype = "CHARACTER" + raw_column_metadata_sql = """ + SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS, CODEPAGE + FROM SYSIBM.SYSCOLUMNS + WHERE TBCREATOR = ? AND TBNAME = ? + ORDER BY COLNO""" for_bit_data_codepage = 0 def do_connect( @@ -141,18 +146,9 @@ def raw_column_metadata( assert (database and table) or query, "We should never receive all args=None" if database and table: # For table-based validation, query the system catalog to get the true data type. - # SYSIBM.SYSCOLUMNS works on both LUW and z/OS. SYSCAT.COLUMNS is only valid on LUW. - # FOR BIT DATA is not revealed in the TYPENAME column, we need to check CODEPAGE and - # inject our own custom TYPENAME. - get_column_metadata_sql = """ - SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS, CODEPAGE - FROM SYSIBM.SYSCOLUMNS - WHERE TBCREATOR = ? AND TBNAME = ? - ORDER BY COLNO - """ with self.begin() as con: result = con.exec_driver_sql( - get_column_metadata_sql, + self.raw_column_metadata_sql, parameters=(database.upper(), table.upper()), ) rows = result.cursor.fetchall() diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index e0a389906..59d0fa0ba 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -27,6 +27,11 @@ class Backend(Db2LUWBackend): char_datatype = "CHAR" + raw_column_metadata_sql = """ + SELECT NAME, TYPENAME, LENGTH, SCALE, NULLS, CCSID + FROM SYSIBM.SYSCOLUMNS + WHERE TBCREATOR = ? AND TBNAME = ? + ORDER BY COLNO""" for_bit_data_codepage = 65535 def do_connect( From dc0a1b4b8485b86b5c097c3bfe43c5683d3c3972 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 4 Mar 2026 15:09:25 +0000 Subject: [PATCH 076/100] feat: Auto exclude Db2 internal hidden columns from validations --- tests/system/data_sources/test_db2.py | 2 +- third_party/ibis/ibis_db2/__init__.py | 27 ++++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 4a55de5e4..0e956db79 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -138,7 +138,7 @@ def test_schema_validation_not_null_vs_nullable(): new=mock_get_connection_config, ) def test_schema_validation_db2_generated_cols(): - """Test schema validation for tables with Db2 internal generated columns""" + """Test schema validation for tables with Db2 internal generated columns.""" schema_validation_test( tc="mock-conn", tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 4eb195231..e0ef6d0b0 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -204,17 +204,26 @@ def table( # Query raw metadata to find columns that are actually binary (FOR BIT DATA) # but reflected as strings by SQLAlchemy. raw_types = self.raw_column_metadata(schema or database, name) or [] - for_bit_data_cols = set() - for col_name, type_name, *_ in raw_types: - if type_name in FOR_BIT_DATA_MAP.values(): - for_bit_data_cols.add(col_name.lower()) - - if for_bit_data_cols: - # Create a new table object with FOR BIT DATA columns as binary. + for_bit_data_cols = { + col_name.lower() + for col_name, type_name, *_ in raw_types + if type_name in FOR_BIT_DATA_MAP.values() + } + # The IBM Db2 driver exposes hidden columns that are not visible in the table definition. + # We drop these columns from the table object. + columns_to_drop = { + _.lower() + for _ in return_table.columns + if _.lower() in DB2_HIDDEN_COLUMNS + } + + if for_bit_data_cols or columns_to_drop: + # Create a new table object modifying schema for dropped/binary columns old_schema = return_table.schema() new_fields = { name: (dt.binary if name.lower() in for_bit_data_cols else dtype) for name, dtype in old_schema.items() + if name.lower() not in columns_to_drop } new_schema = sch.Schema(new_fields) op = return_table.op() @@ -222,7 +231,3 @@ def table( return_table = new_op.to_expr() return return_table - # Failed attempt at dealing with hidden columns below for reference. - # columns_to_drop = [_ for _ in return_table.columns if _ in DB2_HIDDEN_COLUMNS] - # if columns_to_drop: - # return_table = return_table.drop(*columns_to_drop) From 56ce9fe34158ca0a747683c9b4397df72927bcd9 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 4 Mar 2026 15:32:42 +0000 Subject: [PATCH 077/100] feat: Auto exclude Db2 internal hidden columns from validations --- tests/system/data_sources/test_db2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index 0e956db79..0e37beffa 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -599,6 +599,7 @@ def test_row_validation_db2_generated_cols(): row_validation_test( tc="mock-conn", tables="pso_data_validator.dvt_db2_generated_cols1=pso_data_validator.dvt_db2_generated_cols2", + hash="*", primary_keys="id", use_random_row=False, ) From db593c5ac44f1695b1ca59899f237da76a931a68 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 4 Mar 2026 17:40:56 +0000 Subject: [PATCH 078/100] fix: Cap Db2 precision at 31 when casting decimals to string --- third_party/ibis/ibis_db2/registry.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 768581153..8809c9e51 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -138,6 +138,9 @@ def db2_luw_cast(t, op): # Db2 always pads fractional part of the number out to length of scale. # We need to remove those insignificant digits. precision = arg_dtype.precision or 31 + # 31 is the max decimal precision on Db2. + if precision > 31: + precision = 31 fmt = ( ("9" * (precision - arg_dtype.scale - 1)) + "0." @@ -146,16 +149,16 @@ def db2_luw_cast(t, op): # Using sa.literal_column below because z/OS does not support parameterized queries. return sa.func.ltrim( sa.func.regexp_replace( - sa.func.to_char(sa_arg, sa.literal_column(fmt)), + sa.func.to_char(sa_arg, sa.literal_column(f"'{fmt}'")), sa.literal_column("'\\.?0+$'"), sa.literal_column("''"), ) ) - # Max expected precision 38 plus 2 for minus sign and decimal place. - return sa.cast(sa_arg, sa.String(40)) + # Max expected precision 31 plus 2 for minus sign and decimal place. + return sa.cast(sa_arg, sa.String(33)) elif arg_dtype.is_floating() and typ.is_string(): - # Max expected precision 38 plus 2 for minus sign and decimal place. - return sa.cast(sa_arg, sa.String(40)) + # Max expected precision 31 plus 2 for minus sign and decimal place. + return sa.cast(sa_arg, sa.String(33)) if arg_dtype.is_time() and typ.is_string(): # Force colons as time separator with CHAR(column,JIS) expression. From 161d7b27fa24037af1925950087e1504b2ec33fe Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 4 Mar 2026 18:00:46 +0000 Subject: [PATCH 079/100] test: Fix schema name in Db2 test --- tests/resources/db2_test_tables.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/resources/db2_test_tables.sql b/tests/resources/db2_test_tables.sql index 6f1707674..fb29b4ed8 100644 --- a/tests/resources/db2_test_tables.sql +++ b/tests/resources/db2_test_tables.sql @@ -182,17 +182,17 @@ INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-4 ', 'Row 4'); INSERT INTO pso_data_validator.dvt_varchar_id VALUES ('DVT-key-5', 'Row 5'); COMMIT; -DROP TABLE db2inst1.dvt_fixed_char_id; -CREATE TABLE db2inst1.dvt_fixed_char_id +DROP TABLE pso_data_validator.dvt_fixed_char_id; +CREATE TABLE pso_data_validator.dvt_fixed_char_id ( id CHAR(6) NOT NULL PRIMARY KEY , other_data CHAR(100) ); -COMMENT ON TABLE db2inst1.dvt_fixed_char_id IS 'Integration test table used to test fixed char pk matching. Trailing blanks are not significant'; -INSERT INTO db2inst1.dvt_fixed_char_id VALUES ('DVT1', 'Row 1 '); -INSERT INTO db2inst1.dvt_fixed_char_id VALUES ('DVT2', 'Row 2 '); -INSERT INTO db2inst1.dvt_fixed_char_id VALUES ('DVT3', 'Row 3 '); -INSERT INTO db2inst1.dvt_fixed_char_id VALUES ('DVT4', 'Row 4 '); -INSERT INTO db2inst1.dvt_fixed_char_id VALUES ('DVT5', 'Row 5'); +COMMENT ON TABLE pso_data_validator.dvt_fixed_char_id IS 'Integration test table used to test fixed char pk matching. Trailing blanks are not significant'; +INSERT INTO pso_data_validator.dvt_fixed_char_id VALUES ('DVT1', 'Row 1 '); +INSERT INTO pso_data_validator.dvt_fixed_char_id VALUES ('DVT2', 'Row 2 '); +INSERT INTO pso_data_validator.dvt_fixed_char_id VALUES ('DVT3', 'Row 3 '); +INSERT INTO pso_data_validator.dvt_fixed_char_id VALUES ('DVT4', 'Row 4 '); +INSERT INTO pso_data_validator.dvt_fixed_char_id VALUES ('DVT5', 'Row 5'); COMMIT; CREATE TABLE IF NOT EXISTS pso_data_validator.dvt_datetime_id From 302a8e05694f7469f941b3ad60041b94b252af16 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 4 Mar 2026 18:07:26 +0000 Subject: [PATCH 080/100] fix: Cap Db2 z/OS precision at 31 when casting decimals to string --- third_party/ibis/ibis_db2/registry.py | 5 +---- third_party/ibis/ibis_db2_zos/registry.py | 5 ++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/third_party/ibis/ibis_db2/registry.py b/third_party/ibis/ibis_db2/registry.py index 8809c9e51..922e4337c 100644 --- a/third_party/ibis/ibis_db2/registry.py +++ b/third_party/ibis/ibis_db2/registry.py @@ -138,9 +138,6 @@ def db2_luw_cast(t, op): # Db2 always pads fractional part of the number out to length of scale. # We need to remove those insignificant digits. precision = arg_dtype.precision or 31 - # 31 is the max decimal precision on Db2. - if precision > 31: - precision = 31 fmt = ( ("9" * (precision - arg_dtype.scale - 1)) + "0." @@ -149,7 +146,7 @@ def db2_luw_cast(t, op): # Using sa.literal_column below because z/OS does not support parameterized queries. return sa.func.ltrim( sa.func.regexp_replace( - sa.func.to_char(sa_arg, sa.literal_column(f"'{fmt}'")), + sa.func.to_char(sa_arg, fmt), sa.literal_column("'\\.?0+$'"), sa.literal_column("''"), ) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 25541a712..af54f29a1 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -72,12 +72,15 @@ def db2_zos_cast(t, op): # Db2 always pads fractional part of the number out to length of scale. # We need to remove those insignificant digits. precision = arg_dtype.precision or 31 + # 31 is the max decimal precision on Db2. + if precision > 31: + precision = 31 fmt = ("9" * (precision - arg_dtype.scale - 1)) + "0." + ("9" * arg_dtype.scale) # Using sa.literal_column below because z/OS does not support parameterized queries. return sa.func.ltrim( sa.func.rtrim( sa.func.rtrim( - sa.func.to_char(sa_arg, sa.literal_column(fmt)), + sa.func.to_char(sa_arg, sa.literal_column(f"'{fmt}'")), sa.literal_column("'0'"), ), sa.literal_column("'.'"), From a23ec12d5b4b2ff9abebb1376207a1ea03eb09e3 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 12 Mar 2026 14:37:40 +0000 Subject: [PATCH 081/100] feat: Add random row support for Db2 z/OS --- .../query_builder/random_row_builder.py | 19 +++- .../query_builder/test_random_row_builder.py | 103 +++++++++++++++++- 2 files changed, 115 insertions(+), 7 deletions(-) diff --git a/data_validation/query_builder/random_row_builder.py b/data_validation/query_builder/random_row_builder.py index 362b54574..df8c4356b 100644 --- a/data_validation/query_builder/random_row_builder.py +++ b/data_validation/query_builder/random_row_builder.py @@ -19,9 +19,8 @@ from data_validation import clients from data_validation.query_builder.query_builder import QueryBuilder - # Adding new data sources should be done by adding the Backend name here -RANDOM_SORT_SUPPORTS = [ +SUPPORTS_SORT_BY_RANDOM = [ "bigquery", "teradata", "impala", @@ -29,7 +28,6 @@ "postgres", "mssql", "db2", - "db2_zos", "mysql", "redshift", "snowflake", @@ -37,6 +35,10 @@ "sybase", ] +SUPPORTS_SORT_BY_ALIASED_RANDOM = [ + "db2_zos", +] + class RandomRowBuilder(object): def __init__(self, primary_keys: List[str], batch_size: int): @@ -89,7 +91,16 @@ def maybe_add_random_sort( self, data_client: ibis.backends.base.BaseBackend, table: ibis.Expr ) -> ibis.Expr: """Return a randomly sorted query if it is supported for the client.""" - if data_client.name in RANDOM_SORT_SUPPORTS: + if data_client.name in SUPPORTS_SORT_BY_ALIASED_RANDOM: + random_sort_col = "random_sort" + return ( + table[self.primary_keys] + .mutate(**{random_sort_col: ibis.random()}) + .order_by(random_sort_col) + .limit(self.batch_size)[self.primary_keys] + ) + + if data_client.name in SUPPORTS_SORT_BY_RANDOM: # Teradata 'SAMPLE' and Spanner 'TABLESAMPLE' is random by nature # and does not require a sort by if data_client.name == "teradata" or data_client.name == "spanner": diff --git a/tests/unit/query_builder/test_random_row_builder.py b/tests/unit/query_builder/test_random_row_builder.py index d941aebde..e0c75a7d4 100644 --- a/tests/unit/query_builder/test_random_row_builder.py +++ b/tests/unit/query_builder/test_random_row_builder.py @@ -12,14 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import unittest +from unittest import mock import pytest +import ibis + +import data_validation.query_builder.random_row_builder as random_row_builder @pytest.fixture def module_under_test(): - import data_validation.query_builder.random_row_builder - - return data_validation.query_builder.random_row_builder + return random_row_builder def test_import(module_under_test): @@ -32,3 +35,97 @@ def test_init(module_under_test): assert builder.primary_keys == primary_keys assert builder.batch_size == 100 + + +def test_maybe_add_random_sort_unsupported(): + mock_client = mock.MagicMock() + mock_client.name = "unsupported_db" + + mock_table = mock.MagicMock() + + builder = random_row_builder.RandomRowBuilder(["id"], 10) + + result = builder.maybe_add_random_sort(mock_client, mock_table) + assert result == mock_table + + +def test_maybe_add_random_sort_teradata_spanner(): + mock_client = mock.MagicMock() + mock_client.name = "teradata" + + mock_table = mock.MagicMock() + mock_table_sliced = mock.MagicMock() + mock_table.__getitem__.return_value = mock_table_sliced + mock_table_limited = mock.MagicMock() + mock_table_sliced.limit.return_value = mock_table_limited + + builder = random_row_builder.RandomRowBuilder(["id"], 10) + + result = builder.maybe_add_random_sort(mock_client, mock_table) + + mock_table.__getitem__.assert_called_once_with(["id"]) + mock_table_sliced.limit.assert_called_once_with(10) + assert result == mock_table_limited + + +@mock.patch("ibis.random") +def test_maybe_add_random_sort_supported(mock_random): + mock_client = mock.MagicMock() + mock_client.name = "bigquery" + + mock_random_expr = mock.MagicMock() + mock_random.return_value = mock_random_expr + + mock_table = mock.MagicMock() + mock_table_sliced = mock.MagicMock() + mock_table_ordered = mock.MagicMock() + mock_table_limited = mock.MagicMock() + + mock_table.__getitem__.return_value = mock_table_sliced + mock_table_sliced.order_by.return_value = mock_table_ordered + mock_table_ordered.limit.return_value = mock_table_limited + + builder = random_row_builder.RandomRowBuilder(["id"], 10) + + result = builder.maybe_add_random_sort(mock_client, mock_table) + + mock_table.__getitem__.assert_called_once_with(["id"]) + mock_table_sliced.order_by.assert_called_once_with(mock_random_expr) + mock_table_ordered.limit.assert_called_once_with(10) + assert result == mock_table_limited + + +@mock.patch("ibis.random") +def test_maybe_add_random_sort_aliased(mock_random): + mock_client = mock.MagicMock() + mock_client.name = "db2_zos" + + mock_random_expr = mock.MagicMock() + mock_random.return_value = mock_random_expr + + mock_table = mock.MagicMock() + mock_table_sliced = mock.MagicMock() + mock_table_mutated = mock.MagicMock() + mock_table_ordered = mock.MagicMock() + mock_table_limited = mock.MagicMock() + mock_table_final = mock.MagicMock() + + mock_table.__getitem__.return_value = mock_table_sliced + mock_table_sliced.mutate.return_value = mock_table_mutated + mock_table_mutated.order_by.return_value = mock_table_ordered + mock_table_ordered.limit.return_value = mock_table_limited + mock_table_limited.__getitem__.return_value = mock_table_final + + builder = random_row_builder.RandomRowBuilder(["id"], 10) + + result = builder.maybe_add_random_sort(mock_client, mock_table) + + mock_table.__getitem__.assert_called_once_with(["id"]) + mock_table_sliced.mutate.assert_called_once_with( + **{"random_sort": mock_random_expr} + ) + mock_table_mutated.order_by.assert_called_once_with("random_sort") + mock_table_ordered.limit.assert_called_once_with(10) + mock_table_limited.__getitem__.assert_called_once_with(["id"]) + + assert result == mock_table_final From 14d8a86163e04521b7cc5edf7d99af147f11f619 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 12 Mar 2026 15:22:03 +0000 Subject: [PATCH 082/100] feat: Add random row support for Db2 z/OS --- third_party/ibis/ibis_db2_zos/compiler.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/third_party/ibis/ibis_db2_zos/compiler.py b/third_party/ibis/ibis_db2_zos/compiler.py index a32b7cdee..3e0765802 100644 --- a/third_party/ibis/ibis_db2_zos/compiler.py +++ b/third_party/ibis/ibis_db2_zos/compiler.py @@ -11,11 +11,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sqlalchemy as sa + from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator +from ibis.backends.base.sql.alchemy.query_builder import AlchemySelect from third_party.ibis.ibis_db2_zos.registry import operation_registry +class Db2zOSSelect(AlchemySelect): + def _add_limit(self, fragment): + """Add LIMIT and OFFSET clauses to the query. + + Db2 z/OS backend does not support parameterization for LIMIT/OFFSET + (FETCH FIRST n ROWS ONLY) clauses. We convert the integer limits + into literal SQLAlchemy columns so they render as hardcoded values + in the final compiled SQL string instead of generating bound parameters. + """ + if self.limit is None: + return fragment + + # Use sa.literal_column instead of sa.text for SQLAlchemy .limit() compatibility + fragment = fragment.limit(sa.literal_column(str(self.limit.n))) + if offset := self.limit.offset: + fragment = fragment.offset(sa.literal_column(str(offset))) + return fragment + + class Db2zOSExprTranslator(AlchemyExprTranslator): _registry = operation_registry.copy() _rewrites = AlchemyExprTranslator._rewrites.copy() @@ -24,4 +46,5 @@ class Db2zOSExprTranslator(AlchemyExprTranslator): class Db2zOSCompiler(AlchemyCompiler): translator_class = Db2zOSExprTranslator + select_class = Db2zOSSelect supports_indexed_grouping_keys = False From 2737806cb917ec6d96b1a9b78d2b0480d67357b5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 13 Mar 2026 10:56:49 +0000 Subject: [PATCH 083/100] chore: Fix broken merge --- third_party/ibis/ibis_db2/datatypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 77bac6c6c..8a6310a24 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ibm_db_dbi - +from ibis.backends.base.sql.alchemy import to_sqla_type import ibis.expr.datatypes as dt +import ibm_db_dbi from ibm_db_sa.ibm_db import DB2Dialect_ibm_db +import sqlalchemy as sa import sqlalchemy.types as sat -from ibis.backends.base.sql.alchemy import to_sqla_type # Types from https://github.com/ibmdb/python-ibmdb/blob/master/IBM_DB/ibm_db/ibm_db_dbi.py _type_mapping = { From dca1f712d8f9c96d0678d3edf3c620df6435b402 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 13 Mar 2026 11:07:27 +0000 Subject: [PATCH 084/100] feat: Add Db2 z/OS support --- third_party/ibis/ibis_db2/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 16a6a1c84..aee6d3fe3 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -85,12 +85,6 @@ def do_connect( self.database_name = database self.url = sa_url - # We cannot find a SET TIMEZONE command for LUW therefore commented this section out. - # @sa.event.listens_for(engine, "connect") - # def connect(dbapi_connection, connection_record): - # with dbapi_connection.cursor() as cur: - # cur.execute("SET TIMEZONE = 'UTC'") - super().do_connect(engine) def find_db(self): From 9b1dfcacb98671c90f14c3b546a8ebb20fdb03d8 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Fri, 13 Mar 2026 14:56:18 +0000 Subject: [PATCH 085/100] feat: Add random row support for Db2 z/OS --- third_party/ibis/ibis_db2_zos/compiler.py | 25 +---------------------- 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/compiler.py b/third_party/ibis/ibis_db2_zos/compiler.py index 3e0765802..aa4f90aa8 100644 --- a/third_party/ibis/ibis_db2_zos/compiler.py +++ b/third_party/ibis/ibis_db2_zos/compiler.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google Inc. +# Copyright 2025 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,33 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sqlalchemy as sa from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.base.sql.alchemy.query_builder import AlchemySelect - from third_party.ibis.ibis_db2_zos.registry import operation_registry -class Db2zOSSelect(AlchemySelect): - def _add_limit(self, fragment): - """Add LIMIT and OFFSET clauses to the query. - - Db2 z/OS backend does not support parameterization for LIMIT/OFFSET - (FETCH FIRST n ROWS ONLY) clauses. We convert the integer limits - into literal SQLAlchemy columns so they render as hardcoded values - in the final compiled SQL string instead of generating bound parameters. - """ - if self.limit is None: - return fragment - - # Use sa.literal_column instead of sa.text for SQLAlchemy .limit() compatibility - fragment = fragment.limit(sa.literal_column(str(self.limit.n))) - if offset := self.limit.offset: - fragment = fragment.offset(sa.literal_column(str(offset))) - return fragment - - class Db2zOSExprTranslator(AlchemyExprTranslator): _registry = operation_registry.copy() _rewrites = AlchemyExprTranslator._rewrites.copy() @@ -46,5 +24,4 @@ class Db2zOSExprTranslator(AlchemyExprTranslator): class Db2zOSCompiler(AlchemyCompiler): translator_class = Db2zOSExprTranslator - select_class = Db2zOSSelect supports_indexed_grouping_keys = False From 5f3246d6227394543b48bfb726d71a6ec73a9627 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 19 Mar 2026 12:50:04 +0000 Subject: [PATCH 086/100] fix: Add raw datatype support for Db2 custom queries --- data_validation/util.py | 13 +++ third_party/ibis/ibis_db2/__init__.py | 109 ++++++++++++++++++-------- 2 files changed, 90 insertions(+), 32 deletions(-) diff --git a/data_validation/util.py b/data_validation/util.py index 2f42ce403..563bfd3c9 100644 --- a/data_validation/util.py +++ b/data_validation/util.py @@ -16,6 +16,7 @@ import logging import re import time +import uuid from data_validation import clients, exceptions @@ -87,3 +88,15 @@ def ibis_table_to_sql(ibis_table: "IbisTable", alchemy_client: "BaseBackend") -> else: sql_string = str(ibis_table.compile()) return sql_string + + +def dvt_temp_object_name(prefix: str = "dvt_temp") -> str: + """Generate a random name for when DVT needs to create a temporary object. + + Args: + prefix: The prefix to use for the temporary object name. + + Returns: + A lower case random name for when DVT needs to create a temporary object. + """ + return f"{prefix}_{uuid.uuid4().hex[:8].lower()}" diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index aee6d3fe3..d8e90869d 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import re from typing import Iterable, Optional, Tuple, Dict, Any, TYPE_CHECKING @@ -23,6 +24,8 @@ from third_party.ibis.ibis_db2.compiler import Db2Compiler from third_party.ibis.ibis_db2.datatypes import _get_type +from data_validation import util + if TYPE_CHECKING: import ibis.expr.types as ir @@ -138,42 +141,84 @@ def raw_column_metadata( https://peps.python.org/pep-0249/#description """ assert (database and table) or query, "We should never receive all args=None" - if database and table: - # For table-based validation, query the system catalog to get the true data type. - # SYSIBM.SYSCOLUMNS works on both LUW and z/OS. SYSCAT.COLUMNS is only valid on LUW. + target_schema = None + target_table = None + temp_view_name = None + + if query: + source = f"({query})" + temp_view_name = util.dvt_temp_object_name().upper() + + # Step 1: Create a view representing the query schema. + try: + with self.begin() as con: + con.exec_driver_sql( + f"CREATE VIEW {temp_view_name} AS SELECT * FROM {source} t0" + ) + except Exception as e: + logging.getLogger(__name__).warning( + "Could not create temp view for custom query %s. Falling back to cursor.description. %s", + temp_view_name, + str(e), + ) + # Step 2: Fallback to cursor.description if user lacks permissions. + with self.begin() as con: + result = con.exec_driver_sql(f"SELECT * FROM {source} t0 LIMIT 0") + cursor = result.cursor + yield from (column for column in cursor.description) + return + + # Step 3: Get current schema and set variables to the view. + with self.begin() as con: + schema_result = con.exec_driver_sql( + "SELECT CURRENT SCHEMA FROM SYSIBM.SYSDUMMY1" + ) + target_schema = schema_result.cursor.fetchone()[0].strip() + target_table = temp_view_name + + elif database and table: + # Step 4: Set the variables to the actual database/table names. + target_schema = database.upper() + target_table = table.upper() + + # Step 5: Run the metadata SQL using names from 3 or 4. + try: with self.begin() as con: result = con.exec_driver_sql( self.raw_column_metadata_sql, - parameters=(database.upper(), table.upper()), + parameters=(target_schema, target_table), ) rows = result.cursor.fetchall() + finally: + if temp_view_name: + try: + with self.begin() as con: + con.exec_driver_sql(f"DROP VIEW {temp_view_name}") + except Exception as e: + import logging - for row in rows: - colname, typename, col_length, col_scale, nullable, codepage = row - if ( - codepage == self.for_bit_data_codepage - and typename.upper() in FOR_BIT_DATA_MAP - ): - # Db2 does not expose FOR BIT DATA types so we customize the type name here. - typename = FOR_BIT_DATA_MAP[typename.upper()] - - yield ( - colname, - typename, - col_length, - col_length, - col_length, - col_scale, - nullable, - ) - elif query: - # For custom queries, the system catalog cannot be used. Fall back to - # cursor.description, which may not distinguish padded char types. - source = f"({query})" - with self.begin() as con: - result = con.exec_driver_sql(f"SELECT * FROM {source} t0 LIMIT 0") - cursor = result.cursor - yield from (column for column in cursor.description) + logging.getLogger(__name__).warning( + "Could not drop temp view %s", temp_view_name, exc_info=e + ) + + for row in rows: + colname, typename, col_length, col_scale, nullable, codepage = row + if ( + codepage == self.for_bit_data_codepage + and typename.upper() in FOR_BIT_DATA_MAP + ): + # Db2 does not expose FOR BIT DATA types so we customize the type name here. + typename = FOR_BIT_DATA_MAP[typename.upper()] + + yield ( + colname, + typename, + col_length, + col_length, + col_length, + col_scale, + nullable, + ) def is_char_type_padded(self, char_type: Tuple) -> bool: """Define this method if the backend supports character/string types that are padded and returns @@ -190,8 +235,8 @@ def is_char_type_padded(self, char_type: Tuple) -> bool: def table( self, name: str, - database: str | None = None, - schema: str | None = None, + database: Optional[str] = None, + schema: Optional[str] = None, ) -> "ir.Table": """Intercept Ibis table() call and inject Db2 customizations before returning the table object.""" return_table = super().table(name, database, schema) From 38800d7889be469bf2e1201c02791c8d77175203 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Tue, 31 Mar 2026 15:16:29 +0000 Subject: [PATCH 087/100] test: Unit tests for Db2 connections add --- tests/unit/test_cli_tools.py | 68 ++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py index 2c0f65d09..ff85d6b0e 100644 --- a/tests/unit/test_cli_tools.py +++ b/tests/unit/test_cli_tools.py @@ -105,7 +105,7 @@ f"--connect-args={SNOWFLAKE_CONNECTION_ARGS_DICT_STR}", ] -TERADATA_CONNECTION_ARGS_DICT_STR = '{"a": "1", "b": 2}' +CONNECTION_ARGS_DICT_STR = '{"a": "1", "b": 2}' CLI_ADD_TERADATA_CONNECTION_ARGS = [ "connections", "add", @@ -116,7 +116,7 @@ "--port=123", "--user-name=dvt_user", "--password=dvt_pass", - f"--json-params={TERADATA_CONNECTION_ARGS_DICT_STR}", + f"--json-params={CONNECTION_ARGS_DICT_STR}", ] CLI_ADD_ORACLE_STD_CONNECTION_ARGS = [ @@ -158,6 +158,24 @@ "--thick-mode", ] +CLI_ADD_DB2_CONNECTION_ARGS = [ + "connections", + "add", + "--connection-name", + "db2_conn", + "DB2", + "--host=host_name", + "--port=123", + "--user=dvt_user", + "--password=dvt_pass", + "--database=db", + f"--connect-args={CONNECTION_ARGS_DICT_STR}", +] + +CLI_ADD_DB2_ZOS_CONNECTION_ARGS = [ + "DB2_ZOS" if _ == "DB2" else _ for _ in CLI_ADD_DB2_CONNECTION_ARGS +] + TEST_VALIDATION_CONFIG = { "source": "example", "target": "example", @@ -374,7 +392,51 @@ def test_create_teradata_connection(caplog, fs): assert conn["password"] == args.password conn_from_file = cli_tools.get_connection(args.connection_name) - assert conn_from_file["json_params"] == TERADATA_CONNECTION_ARGS_DICT_STR + assert conn_from_file["json_params"] == CONNECTION_ARGS_DICT_STR + + +def test_create_db2_connection(caplog, fs): + caplog.set_level(logging.INFO) + # Create Connection + parser = cli_tools.configure_arg_parser() + args = parser.parse_args(CLI_ADD_DB2_CONNECTION_ARGS) + conn = cli_tools.get_connection_config_from_args(args) + cli_tools.store_connection(args.connection_name, conn) + + assert gcs_helper.WRITE_SUCCESS_STRING in caplog.records[0].msg + + conn = cli_tools.get_connection(args.connection_name) + assert conn[consts.SOURCE_TYPE] == consts.SOURCE_TYPE_DB2 + assert conn["host"] == args.host + assert conn["port"] == args.port + assert conn["user"] == args.user + assert conn["password"] == args.password + assert conn["database"] == args.database + + conn_from_file = cli_tools.get_connection(args.connection_name) + assert conn_from_file["connect_args"] == CONNECTION_ARGS_DICT_STR + + +def test_create_db2_zos_connection(caplog, fs): + caplog.set_level(logging.INFO) + # Create Connection + parser = cli_tools.configure_arg_parser() + args = parser.parse_args(CLI_ADD_DB2_ZOS_CONNECTION_ARGS) + conn = cli_tools.get_connection_config_from_args(args) + cli_tools.store_connection(args.connection_name, conn) + + assert gcs_helper.WRITE_SUCCESS_STRING in caplog.records[0].msg + + conn = cli_tools.get_connection(args.connection_name) + assert conn[consts.SOURCE_TYPE] == consts.SOURCE_TYPE_DB2_ZOS + assert conn["host"] == args.host + assert conn["port"] == args.port + assert conn["user"] == args.user + assert conn["password"] == args.password + assert conn["database"] == args.database + + conn_from_file = cli_tools.get_connection(args.connection_name) + assert conn_from_file["connect_args"] == CONNECTION_ARGS_DICT_STR def test_configure_arg_parser_list_and_run_validation_configs(): From 92c189bc0824258544f0075d6301d010d1e69b66 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 1 Apr 2026 10:18:57 +0000 Subject: [PATCH 088/100] feat: Add Db2 z/OS support --- tests/system/data_sources/common_functions.py | 40 +++++++++++++++++++ tests/system/data_sources/test_db2.py | 30 +++++++++++++- third_party/ibis/ibis_db2/__init__.py | 2 +- third_party/ibis/ibis_db2_zos/__init__.py | 2 +- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/tests/system/data_sources/common_functions.py b/tests/system/data_sources/common_functions.py index a8084dc0b..542ee0507 100644 --- a/tests/system/data_sources/common_functions.py +++ b/tests/system/data_sources/common_functions.py @@ -13,19 +13,24 @@ # limitations under the License. import json +import logging import os import random import string from typing import TYPE_CHECKING, Optional, Tuple import pathlib +import pytest + from data_validation import __main__ as main from data_validation import ( cli_tools, consts, data_validation, find_tables, + gcs_helper, raw_query, + state_manager, ) from data_validation.partition_builder import PartitionBuilder @@ -816,3 +821,38 @@ def raw_query_test( raw_query.print_raw_query_output(rows) captured = capsys.readouterr() assert "characters truncated" not in captured.out + + +def connections_add_test( + caplog: pytest.LogCaptureFixture, + conn_type: str, + conn_args: list, + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +): + """Generic 'connections add' test.""" + # Set PSO_DV_CONN_HOME to tmp_path to bypass GCS and use a local temp directory. + # This avoids pyfakefs module loading issues while keeping file creation isolated. + monkeypatch.setenv(consts.ENV_DIRECTORY_VAR, str(tmp_path)) + with caplog.at_level(logging.INFO): + parser = cli_tools.configure_arg_parser() + cli_arg_list = [ + "connections", + "add", + "-c=new-conn", + conn_type, + ] + conn_args + cli_arg_list = [_ for _ in cli_arg_list if _] + args = parser.parse_args(cli_arg_list) + + main.run_connections(args) + + # Check success message in logging + assert any( + gcs_helper.WRITE_SUCCESS_STRING in record.msg for record in caplog.records + ) + + # Check if file exists in the mocked filesystem + mgr = state_manager.StateManager() + connection_path = mgr._get_connection_path("new-conn") + assert os.path.exists(connection_path) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index ea527323e..f4ca2c479 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -24,6 +24,7 @@ binary_key_assertions, column_validation_test, column_validation_test_args, + connections_add_test, custom_query_validation_test, find_tables_test, id_column_row_validation_test, @@ -48,14 +49,14 @@ DB2_USER = os.getenv("DB2_USER", "db2inst1") DB2_PASSWORD = os.getenv("DB2_PASSWORD") DB2_DATABASE = os.getenv("DB2_DATABASE", "testdb") -DB2_PORT = os.getenv("DB2_PORT", 50000) +DB2_PORT = os.getenv("DB2_PORT", "50000") CONN = { consts.SOURCE_TYPE: consts.SOURCE_TYPE_DB2, "host": DB2_HOST, "user": DB2_USER, "password": DB2_PASSWORD, - "port": DB2_PORT, + "port": int(DB2_PORT), "database": DB2_DATABASE, } @@ -643,3 +644,28 @@ def test_find_tables(): def test_raw_query_dvt_row_types(capsys): """Test data-validation query command.""" raw_query_test(capsys, table="pso_data_validator.dvt_core_types") + + +#################### +# CONNECTIONS TESTS +#################### +def test_connections_add(caplog, tmp_path, monkeypatch): + """Test data-validation connections add command.""" + conn_args = [ + "--host", + DB2_HOST, + "--user", + DB2_USER, + "--password", + DB2_PASSWORD, + "--port", + str(DB2_PORT), + "--database", + DB2_DATABASE, + # QueryTimeout is a harmless setting we can use to exercise --connect-args. + "--connect-args", + '{ "QueryTimeout": "0" }', + ] + connections_add_test( + caplog, consts.SOURCE_TYPE_DB2, conn_args, tmp_path, monkeypatch + ) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index d8e90869d..96c076937 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -74,6 +74,7 @@ def do_connect( username=user, password=password, database=database, + query=connect_args or {}, ) else: sa_url = sa.engine.url.make_url(url) @@ -83,7 +84,6 @@ def do_connect( poolclass=sa.pool.StaticPool, # Pessimistic disconnect handling pool_pre_ping=True, - connect_args=connect_args or {}, ) self.database_name = database self.url = sa_url diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index 59d0fa0ba..c4ddc93a4 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -57,6 +57,7 @@ def do_connect( username=user, password=password, database=database, + query=connect_args or {}, ) else: sa_url = sa.engine.url.make_url(url) @@ -66,7 +67,6 @@ def do_connect( poolclass=sa.pool.StaticPool, # Pessimistic disconnect handling pool_pre_ping=True, - connect_args=connect_args or {}, ) self.database_name = database self.url = sa_url From 38ac21c7384e4ad9ec12303ceab8c80fc588b936 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 12:39:52 +0000 Subject: [PATCH 089/100] chore: Reformat --- data_validation/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data_validation/util.py b/data_validation/util.py index 979beab43..563bfd3c9 100644 --- a/data_validation/util.py +++ b/data_validation/util.py @@ -100,4 +100,3 @@ def dvt_temp_object_name(prefix: str = "dvt_temp") -> str: A lower case random name for when DVT needs to create a temporary object. """ return f"{prefix}_{uuid.uuid4().hex[:8].lower()}" - From 571922b6fe82905f011f65c67d986dbe0c47cdcf Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 14:05:56 +0000 Subject: [PATCH 090/100] feat: Add Db2 z/OS support --- docs/connections.md | 5 ++++- docs/limitations.md | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/connections.md b/docs/connections.md index ef9c535ed..afb773425 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -458,6 +458,9 @@ data-validation connections add [--connect-args CONNECT_ARGS] Additional connection args, JSON String dict, default {} ``` +Db2 permissions required to run DVT custom-query validations: +- CREATE VIEW. See [Db2 z/OS limitations](limitations.md#db2-zos). + Example TLS connection: ``` data-validation connections add -c db2conn DB2_zOS \ @@ -517,4 +520,4 @@ data-validation connections add -c sybase Sybase \ --connect-args='{"driver": "FreeTDS", "autocommit": "True", "TDS_Version": "5.0"}' ``` -See also [limitations](https://github.com/GoogleCloudPlatform/professional-services-data-validator2/blob/develop/docs/limitations.md#sybase) \ No newline at end of file +See also [limitations](limitations.md#sybase-ase) \ No newline at end of file diff --git a/docs/limitations.md b/docs/limitations.md index 499503ceb..a40dd2f84 100644 --- a/docs/limitations.md +++ b/docs/limitations.md @@ -12,6 +12,9 @@ ## Db2 z/OS - Db2 z/OS XML data type is not compatible with aggregation functions or LENGTH function and is excluded from all column validations. +- Db2 z/OS custom query validations require the creation of temporary views in the database to resolve additional data type information. + This requires CREATE VIEW privileges for the connected user. Without it some data types will not be resolved correctly, for + example CHARACTER will be interpreted as VARCHAR. ## Oracle From c964c687c39d81b0594fc671759910db10208462 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 14:29:31 +0000 Subject: [PATCH 091/100] feat: Add Db2 z/OS support --- docs/limitations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/limitations.md b/docs/limitations.md index a40dd2f84..a0e8536c4 100644 --- a/docs/limitations.md +++ b/docs/limitations.md @@ -15,6 +15,7 @@ - Db2 z/OS custom query validations require the creation of temporary views in the database to resolve additional data type information. This requires CREATE VIEW privileges for the connected user. Without it some data types will not be resolved correctly, for example CHARACTER will be interpreted as VARCHAR. +- Db2 Z/OS ROWID data type is not supported. ## Oracle From 576fcb94a84e3f779870ef4fa0b1f55d152ee9b1 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 14:59:19 +0000 Subject: [PATCH 092/100] feat: Add Db2 z/OS support --- tests/system/data_sources/test_db2.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index f4ca2c479..d82a2e937 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -347,7 +347,6 @@ def test_column_validation_db2_generated_cols(): def test_row_validation_core_types(): """Db2 to Db2 dvt_core_types row validation""" # Exclude col_string because it is unbound and causes overflow error for HEX function. - # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. cols = ",".join( [ _ @@ -355,7 +354,6 @@ def test_row_validation_core_types(): if _ not in ( "id", - "col_char_2", "col_string", ) ] @@ -391,7 +389,6 @@ def test_row_validation_core_types_to_bigquery(): # Excluded col_float32 because BigQuery does not have an exact same type and # float32/64 are lossy and cannot be compared. # Exclude col_string because it is unbound and causes overflow error for HEX function. - # TODO: When issue-1638 is complete remove col_char_2 from exclusion list below. cols = ",".join( [ _ @@ -401,7 +398,6 @@ def test_row_validation_core_types_to_bigquery(): "id", "col_float32", "col_float64", - "col_char_2", "col_string", "col_tstz", ) @@ -556,20 +552,6 @@ def test_row_validation_comp_fields_tricky_dates_to_bigquery(): ) -@mock.patch( - "data_validation.state_manager.StateManager.get_connection_config", - new=mock_get_connection_config, -) -def test_row_validation_comp_fields_tricky_dates_to_bigquery(): - """Test with date values that are at the extremes, e.g. 9999-12-31.""" - cols = ",".join(DVT_TRICKY_DATES_COLUMNS) - row_validation_test( - tables="pso_data_validator.dvt_tricky_dates", - tc="bq-conn", - comp_fields=cols, - ) - - ################################ # CUSTOM-QUERY VALIDATION TESTS ################################ From 8c1ba2348ad811c003c56fc2aa37a3e7bc16e2d6 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 15:23:55 +0000 Subject: [PATCH 093/100] feat: Add Db2 z/OS support --- third_party/ibis/ibis_db2/datatypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/third_party/ibis/ibis_db2/datatypes.py b/third_party/ibis/ibis_db2/datatypes.py index 8a6310a24..a0ca1cff6 100644 --- a/third_party/ibis/ibis_db2/datatypes.py +++ b/third_party/ibis/ibis_db2/datatypes.py @@ -40,8 +40,7 @@ @to_sqla_type.register(DB2Dialect_ibm_db, dt.String) def _string(_, itype): """Include a limit for casts to String due to line size limits supported by Db2 concat().""" - # TODO Temporarily changed to 300 until issue-1296 is complete. - return sa.sql.sqltypes.String(length=300) + return sa.sql.sqltypes.String(length=3000) DB2Dialect_ibm_db.ischema_names["BINARY"] = sat.BINARY From 95656468f7e6210e91de4eb3f358cb3b7a2c9c2f Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 15:54:05 +0000 Subject: [PATCH 094/100] feat: Add Db2 z/OS support --- tests/system/data_sources/test_db2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/data_sources/test_db2.py b/tests/system/data_sources/test_db2.py index d82a2e937..79bf5550c 100644 --- a/tests/system/data_sources/test_db2.py +++ b/tests/system/data_sources/test_db2.py @@ -112,7 +112,7 @@ def test_schema_validation_db2_types_to_bigquery(): schema_validation_test( tables="pso_data_validator.dvt_db2_types", tc="bq-conn", - allow_list=("int16:int64,int32:int64,decimal:decimal(38,9)"), + allow_list=("int16:int64,int32:int64,decimal:decimal(38,9),float32:float64"), ) From fcc35b809263025d786a67f5f7c303140bfab24c Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 16:25:27 +0000 Subject: [PATCH 095/100] feat: Add Db2 z/OS support --- data_validation/util.py | 2 +- tests/unit/test_util.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data_validation/util.py b/data_validation/util.py index 563bfd3c9..7e3edcb61 100644 --- a/data_validation/util.py +++ b/data_validation/util.py @@ -97,6 +97,6 @@ def dvt_temp_object_name(prefix: str = "dvt_temp") -> str: prefix: The prefix to use for the temporary object name. Returns: - A lower case random name for when DVT needs to create a temporary object. + A random name for when DVT needs to create a temporary object. """ return f"{prefix}_{uuid.uuid4().hex[:8].lower()}" diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index 96d7ae633..c28596b28 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -172,7 +172,6 @@ def test_dvt_temp_object_name(module_under_test): # Test custom prefix with uppercase name = module_under_test.dvt_temp_object_name("CUSTOM_Prefix") - assert name.startswith("custom_prefix_") - assert name.islower() + assert name.startswith("CUSTOM_Prefix_") assert len(name) == 22 # 'custom_prefix_' (14) + 8 hex chars - assert re.match(r"^custom_prefix_[a-f0-9]{8}$", name) + assert re.match(r"^CUSTOM_Prefix_[a-f0-9]{8}$", name) From 486e4a6f5b6734d6b3e236d686b9a6e8b97474dd Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 16:45:59 +0000 Subject: [PATCH 096/100] feat: Add Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index af54f29a1..411978aa5 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -25,10 +25,18 @@ def _sa_ifnull(t, op): + """Db2 z/OS does not support query parameters in this context. + + This override uses sa.literal_column to prevent parameterization. + """ + assert all(c.isalnum() or c == '_' for c in str(op.ifnull_expr.value)), \ + f"Value '{op.ifnull_expr.value}' contains non-alphanumeric or non-underscore characters." + sa_arg = t.translate(op.arg) return sa.func.coalesce(sa_arg, sa.literal_column(f"'{op.ifnull_expr.value}'")) + def _sa_format_hashbytes(translator, op): # Use of query parameters is throwing an error on Db2 z/OS. # Code below uses sa.sql.literal_column to prevent parameterization. From a15b12be91067819a060e7bd1b3669c7ed5b9fcc Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 2 Apr 2026 16:46:19 +0000 Subject: [PATCH 097/100] feat: Add Db2 z/OS support --- third_party/ibis/ibis_db2_zos/registry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/ibis/ibis_db2_zos/registry.py b/third_party/ibis/ibis_db2_zos/registry.py index 411978aa5..1c792c7e8 100644 --- a/third_party/ibis/ibis_db2_zos/registry.py +++ b/third_party/ibis/ibis_db2_zos/registry.py @@ -29,14 +29,14 @@ def _sa_ifnull(t, op): This override uses sa.literal_column to prevent parameterization. """ - assert all(c.isalnum() or c == '_' for c in str(op.ifnull_expr.value)), \ - f"Value '{op.ifnull_expr.value}' contains non-alphanumeric or non-underscore characters." + assert all( + c.isalnum() or c == "_" for c in str(op.ifnull_expr.value) + ), f"Value '{op.ifnull_expr.value}' contains non-alphanumeric or non-underscore characters." sa_arg = t.translate(op.arg) return sa.func.coalesce(sa_arg, sa.literal_column(f"'{op.ifnull_expr.value}'")) - def _sa_format_hashbytes(translator, op): # Use of query parameters is throwing an error on Db2 z/OS. # Code below uses sa.sql.literal_column to prevent parameterization. From 5b47c4540bbd53698128f490fc03cef8c202e554 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 8 Apr 2026 17:11:14 +0000 Subject: [PATCH 098/100] chore: Remove unused imports --- tests/unit/query_builder/test_random_row_builder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/query_builder/test_random_row_builder.py b/tests/unit/query_builder/test_random_row_builder.py index e0c75a7d4..21ad6ff49 100644 --- a/tests/unit/query_builder/test_random_row_builder.py +++ b/tests/unit/query_builder/test_random_row_builder.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest from unittest import mock import pytest -import ibis import data_validation.query_builder.random_row_builder as random_row_builder From a7a2363e76438aaa4d52d438225ce1a031d46e75 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Thu, 9 Apr 2026 08:54:53 +0000 Subject: [PATCH 099/100] feat: Add Db2 z/OS support --- third_party/ibis/ibis_impala/api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/ibis/ibis_impala/api.py b/third_party/ibis/ibis_impala/api.py index 9c22b1ca6..32749a992 100644 --- a/third_party/ibis/ibis_impala/api.py +++ b/third_party/ibis/ibis_impala/api.py @@ -195,6 +195,14 @@ def fill(target, chunks, na_rep): return target +# TODO The rewrite below is rewriting across all engines, not just Impala. +# It should be moved to operations.py or removed and implement system +# specific overrides. See issue-1728. +@rewrites(ops.IfNull) +def _if_null(op): + return ops.Coalesce((op.arg, op.ifnull_expr)) + + def update_query_with_limit(query): limit_pattern = re.compile(r"LIMIT\s+\d+(\s+OFFSET\s+\d+)?\s*;?\s*$", re.IGNORECASE) last_limit_match = limit_pattern.search(query) From 03a529e483969c6c253a09cb0afb0775e3b7c530 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Mon, 13 Apr 2026 09:54:43 +0000 Subject: [PATCH 100/100] chore: PR comment --- third_party/ibis/ibis_db2/__init__.py | 2 +- third_party/ibis/ibis_db2/api.py | 14 ++++++++------ third_party/ibis/ibis_db2_zos/__init__.py | 2 +- third_party/ibis/ibis_db2_zos/api.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/third_party/ibis/ibis_db2/__init__.py b/third_party/ibis/ibis_db2/__init__.py index 603a053cb..fdf4809c1 100644 --- a/third_party/ibis/ibis_db2/__init__.py +++ b/third_party/ibis/ibis_db2/__init__.py @@ -57,7 +57,7 @@ def do_connect( host: str = "localhost", user: Optional[str] = None, password: Optional[str] = None, - port: int = 50000, + port: Optional[int] = None, database: Optional[str] = None, url: Optional[str] = None, driver: str = "ibm_db_sa", diff --git a/third_party/ibis/ibis_db2/api.py b/third_party/ibis/ibis_db2/api.py index 27286ca75..7b1a78bd1 100644 --- a/third_party/ibis/ibis_db2/api.py +++ b/third_party/ibis/ibis_db2/api.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + from third_party.ibis.ibis_db2 import Backend as DB2Backend import ibm_db_sa # NOQA fail early if driver is missing @@ -20,13 +22,13 @@ def db2_connect( host: str = "localhost", - user: str = None, - password: str = None, - port: int = 50000, - database: str = None, - url: str = None, + user: Optional[str] = None, + password: Optional[str] = None, + port: Optional[int] = None, + database: Optional[str] = None, + url: Optional[str] = None, driver: str = "ibm_db_sa", - connect_args: str = None, + connect_args: Optional[str] = None, ): connect_args_dict = dvt_config_string_to_dict(connect_args) if connect_args else {} backend = DB2Backend() diff --git a/third_party/ibis/ibis_db2_zos/__init__.py b/third_party/ibis/ibis_db2_zos/__init__.py index c4ddc93a4..50cb6ac8d 100644 --- a/third_party/ibis/ibis_db2_zos/__init__.py +++ b/third_party/ibis/ibis_db2_zos/__init__.py @@ -39,7 +39,7 @@ def do_connect( host: str = "localhost", user: Optional[str] = None, password: Optional[str] = None, - port: int = 50000, + port: Optional[int] = None, database: Optional[str] = None, url: Optional[str] = None, driver: str = "ibm_db_sa", diff --git a/third_party/ibis/ibis_db2_zos/api.py b/third_party/ibis/ibis_db2_zos/api.py index 53a7030e7..741aaed47 100644 --- a/third_party/ibis/ibis_db2_zos/api.py +++ b/third_party/ibis/ibis_db2_zos/api.py @@ -24,7 +24,7 @@ def db2_zos_connect( host: str = "localhost", user: Optional[str] = None, password: Optional[str] = None, - port: int = 50000, + port: Optional[int] = None, database: Optional[str] = None, url: Optional[str] = None, driver: str = "ibm_db_sa",