diff --git a/data_diff/databases/mysql.py b/data_diff/databases/mysql.py index 1ee04460..72665dee 100644 --- a/data_diff/databases/mysql.py +++ b/data_diff/databases/mysql.py @@ -8,6 +8,7 @@ Float, Decimal, Integer, + JSON, Text, TemporalType, FractionalType, @@ -68,6 +69,8 @@ class Dialect(BaseDialect): "tinytext": Text, # Boolean "boolean": Boolean, + # JSON + "json": JSON, } def quote(self, s: str) -> str: diff --git a/data_diff/utils.py b/data_diff/utils.py index 1d1405fd..1ad9fbac 100644 --- a/data_diff/utils.py +++ b/data_diff/utils.py @@ -512,10 +512,20 @@ def diff_int_dynamic_color_template(diff_value: int) -> str: return "0" -def _jsons_equiv(a: str, b: str): +def _jsons_equiv(a: Optional[str], b: Optional[str]): + # Treat Python None (DB null) as the JSON null literal so that a NULL on + # the MySQL side matches a 'null' string produced by TO_JSON_STRING(NULL) + # on the BigQuery side (or any other DB that serializes NULL as 'null'). + if a is None: + a = "null" + if b is None: + b = "null" + # Fast-path: identical strings don't need JSON parsing. + if a == b: + return True try: return json.loads(a) == json.loads(b) - except (ValueError, TypeError, json.decoder.JSONDecodeError): # not valid jsons + except (ValueError, TypeError): # covers json.JSONDecodeError (subclass of ValueError) return False diff --git a/pyproject.toml b/pyproject.toml index dd2b0523..cbc3aa75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ [project.optional-dependencies] preql = ["preql>=0.2.19"] -mysql = ["mysql-connector-python==8.0.29"] +mysql = ["mysql-connector-python>=8.0.29"] postgresql = ["psycopg2"] redshift = ["psycopg2"] snowflake = ["snowflake-connector-python>=3.0.2,<4.0.0", "cryptography"] @@ -54,7 +54,7 @@ duckdb = ["duckdb"] bigquery = ["google-cloud-bigquery"] all-dbs = [ "preql>=0.2.19", - "mysql-connector-python==8.0.29", + "mysql-connector-python>=8.0.29", "psycopg2", "snowflake-connector-python>=3.0.2,<4.0.0", "cryptography", diff --git a/tests/test_utils.py b/tests/test_utils.py index 712f3467..613c0b03 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,6 +11,7 @@ columns_removed_template, columns_added_template, columns_type_changed_template, + _jsons_equiv, ) from data_diff.__main__ import _remove_passwords_in_dict @@ -211,3 +212,43 @@ def test_columns_type_changed_template(self): output = columns_type_changed_template({"column1", "column2"}) self.assertIn("Type changed [2]: [green]", output) self.assertEqual(self.extract_columns_set(output), {"column1", "column2"}) + + +class TestJsonsEquiv(unittest.TestCase): + # --- None / null equivalence --- + def test_both_none(self): + """Two DB NULLs are equivalent.""" + self.assertTrue(_jsons_equiv(None, None)) + + def test_none_vs_json_null_string(self): + """DB NULL on one side, JSON 'null' string on the other, are equivalent.""" + self.assertTrue(_jsons_equiv(None, "null")) + self.assertTrue(_jsons_equiv("null", None)) + + def test_none_vs_json_string_null(self): + """DB NULL must NOT equal the JSON string literal \"null\".""" + self.assertFalse(_jsons_equiv(None, '"null"')) + self.assertFalse(_jsons_equiv('"null"', None)) + + # --- Identical strings fast-path --- + def test_identical_strings(self): + self.assertTrue(_jsons_equiv('{"a": 1}', '{"a": 1}')) + + # --- Semantic JSON equivalence --- + def test_equivalent_objects_different_whitespace(self): + self.assertTrue(_jsons_equiv('{"a":1,"b":2}', '{"b": 2, "a": 1}')) + + def test_equivalent_arrays(self): + self.assertTrue(_jsons_equiv("[1, 2, 3]", "[1,2,3]")) + + def test_different_values(self): + self.assertFalse(_jsons_equiv('{"a": 1}', '{"a": 2}')) + + def test_different_types(self): + self.assertFalse(_jsons_equiv("1", '"1"')) + + # --- Invalid JSON --- + def test_invalid_json_returns_false(self): + # Different invalid-JSON strings → False (can't parse either side) + self.assertFalse(_jsons_equiv("not-json", "also-not-json")) + self.assertFalse(_jsons_equiv('{"a": 1}', "not-json")) diff --git a/uv.lock b/uv.lock index eff8aea1..f367694d 100644 --- a/uv.lock +++ b/uv.lock @@ -949,8 +949,8 @@ requires-dist = [ { name = "google-cloud-bigquery", marker = "extra == 'bigquery'" }, { name = "keyring" }, { name = "mashumaro", extras = ["msgpack"], specifier = ">=2.9,<3.11.0" }, - { name = "mysql-connector-python", marker = "extra == 'all-dbs'", specifier = "==8.0.29" }, - { name = "mysql-connector-python", marker = "extra == 'mysql'", specifier = "==8.0.29" }, + { name = "mysql-connector-python", marker = "extra == 'all-dbs'", specifier = ">=8.0.29" }, + { name = "mysql-connector-python", marker = "extra == 'mysql'", specifier = ">=8.0.29" }, { name = "oracledb", marker = "extra == 'all-dbs'" }, { name = "oracledb", marker = "extra == 'oracle'" }, { name = "preql", marker = "extra == 'all-dbs'", specifier = ">=0.2.19" },