Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data_diff/databases/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Float,
Decimal,
Integer,
JSON,
Text,
TemporalType,
FractionalType,
Expand Down Expand Up @@ -68,6 +69,8 @@ class Dialect(BaseDialect):
"tinytext": Text,
# Boolean
"boolean": Boolean,
# JSON
"json": JSON,
}

def quote(self, s: str) -> str:
Expand Down
14 changes: 12 additions & 2 deletions data_diff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,20 @@ def diff_int_dynamic_color_template(diff_value: int) -> str:
return "0"


def _jsons_equiv(a: str, b: str):
def _jsons_equiv(a: Optional[str], b: Optional[str]):
# Treat Python None (DB null) as the JSON null literal so that a NULL on
# the MySQL side matches a 'null' string produced by TO_JSON_STRING(NULL)
# on the BigQuery side (or any other DB that serializes NULL as 'null').
if a is None:
a = "null"
if b is None:
b = "null"
Comment thread
kantselovich marked this conversation as resolved.
# Fast-path: identical strings don't need JSON parsing.
if a == b:
return True
try:
return json.loads(a) == json.loads(b)
except (ValueError, TypeError, json.decoder.JSONDecodeError): # not valid jsons
except (ValueError, TypeError): # covers json.JSONDecodeError (subclass of ValueError)
return False


Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies = [

[project.optional-dependencies]
preql = ["preql>=0.2.19"]
mysql = ["mysql-connector-python==8.0.29"]
mysql = ["mysql-connector-python>=8.0.29"]
postgresql = ["psycopg2"]
redshift = ["psycopg2"]
snowflake = ["snowflake-connector-python>=3.0.2,<4.0.0", "cryptography"]
Expand All @@ -54,7 +54,7 @@ duckdb = ["duckdb"]
bigquery = ["google-cloud-bigquery"]
all-dbs = [
"preql>=0.2.19",
"mysql-connector-python==8.0.29",
"mysql-connector-python>=8.0.29",
"psycopg2",
"snowflake-connector-python>=3.0.2,<4.0.0",
"cryptography",
Expand Down
41 changes: 41 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
columns_removed_template,
columns_added_template,
columns_type_changed_template,
_jsons_equiv,
)

from data_diff.__main__ import _remove_passwords_in_dict
Expand Down Expand Up @@ -211,3 +212,43 @@ def test_columns_type_changed_template(self):
output = columns_type_changed_template({"column1", "column2"})
self.assertIn("Type changed [2]: [green]", output)
self.assertEqual(self.extract_columns_set(output), {"column1", "column2"})


class TestJsonsEquiv(unittest.TestCase):
# --- None / null equivalence ---
def test_both_none(self):
"""Two DB NULLs are equivalent."""
self.assertTrue(_jsons_equiv(None, None))

def test_none_vs_json_null_string(self):
"""DB NULL on one side, JSON 'null' string on the other, are equivalent."""
self.assertTrue(_jsons_equiv(None, "null"))
self.assertTrue(_jsons_equiv("null", None))

def test_none_vs_json_string_null(self):
"""DB NULL must NOT equal the JSON string literal \"null\"."""
self.assertFalse(_jsons_equiv(None, '"null"'))
self.assertFalse(_jsons_equiv('"null"', None))

# --- Identical strings fast-path ---
def test_identical_strings(self):
self.assertTrue(_jsons_equiv('{"a": 1}', '{"a": 1}'))

# --- Semantic JSON equivalence ---
def test_equivalent_objects_different_whitespace(self):
self.assertTrue(_jsons_equiv('{"a":1,"b":2}', '{"b": 2, "a": 1}'))

def test_equivalent_arrays(self):
self.assertTrue(_jsons_equiv("[1, 2, 3]", "[1,2,3]"))

def test_different_values(self):
self.assertFalse(_jsons_equiv('{"a": 1}', '{"a": 2}'))

def test_different_types(self):
self.assertFalse(_jsons_equiv("1", '"1"'))

# --- Invalid JSON ---
def test_invalid_json_returns_false(self):
# Different invalid-JSON strings → False (can't parse either side)
self.assertFalse(_jsons_equiv("not-json", "also-not-json"))
self.assertFalse(_jsons_equiv('{"a": 1}', "not-json"))
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading