diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 8444080..f136ec3 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -159,10 +159,20 @@ def make_attrstring(attr: dict[str, Any]) -> str: if key == "type": return f' type="{val}"' return f' {key}="{escape_xml(val)}"' - attrstring = " ".join([f'{k}="{escape_xml(v)}"' for k, v in attr.items()]) + attrstring = " ".join(f'{k}="{escape_xml(v)}"' for k, v in attr.items()) return f" {attrstring}" +def make_typed_attrstring(attr: dict[str, Any], xml_type: str) -> str: + """Create XML attributes with a type value without mutating caller attrs.""" + if not attr: + return f' type="{xml_type}"' + + typed_attr = dict(attr) + typed_attr["type"] = xml_type + return make_attrstring(typed_attr) + + def _is_fast_valid_xml_name(key: str) -> bool: """Return True for ASCII XML names known to be accepted by the legacy parser.""" if not key or not key.isascii() or ":" in key: @@ -560,26 +570,59 @@ def _append_dict2xml_str( if attr_type: attr["type"] = get_xml_type(item) - val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) - if "@val" in item: - rawitem = item["@val"] - elif "@attrs" in item: - rawitem = {key: value for key, value in item.items() if key != "@attrs"} + has_custom_attrs = "@attrs" in item + if has_custom_attrs: + raw_attrs = item["@attrs"] + val_attr = raw_attrs if isinstance(raw_attrs, dict) else dict(raw_attrs) + rawitem = item["@val"] if "@val" in item else { + key: value for key, value in item.items() if key != "@attrs" + } else: - rawitem = item + val_attr = attr + rawitem = item.get("@val", item) if parentIsList and list_headers: if len(val_attr) > 0 and not item_wrap: output.write(f"<{parent}{make_attrstring(val_attr)}>") else: output.write(f"<{parent}>") - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + ) output.write(f"") elif item.get("@flat", False) or (parentIsList and not item_wrap): - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + ) else: output.write(f"<{item_name}{make_attrstring(val_attr)}>") - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + ) output.write(f"") @@ -597,7 +640,7 @@ def _append_rawitem( if rawitem is None: return if isinstance(rawitem, bool): - output.write(str(rawitem).lower()) + output.write("true" if rawitem else "false") elif isinstance(rawitem, (str, numbers.Number)): output.write(escape_xml(str(rawitem))) else: @@ -752,13 +795,17 @@ def _append_convert_list( this_id = get_unique_id(parent) if ids else None for i, item in enumerate(items): - attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} + base_attr: dict[str, Any] | None = None + if ids: + base_attr = {"id": f"{this_id}_{i + 1}"} if isinstance(item, bool): + attr = dict(base_attr) if base_attr else {} if item_name_attr: attr.update(item_name_attr) output.write(convert_bool_valid_name(item_name, item, attr_type, attr)) elif isinstance(item, (numbers.Number, str)): + attr = dict(base_attr) if base_attr else {} if scalar_key_attr: attr.update(scalar_key_attr) output.write( @@ -771,6 +818,7 @@ def _append_convert_list( ) ) elif hasattr(item, "isoformat"): + attr = dict(base_attr) if base_attr else {} if item_name_attr: attr.update(item_name_attr) output.write( @@ -783,6 +831,7 @@ def _append_convert_list( ) ) elif isinstance(item, dict): + attr = dict(base_attr) if base_attr else {} _append_dict2xml_str( output, attr_type=attr_type, @@ -797,6 +846,7 @@ def _append_convert_list( list_headers=list_headers, ) elif isinstance(item, Sequence): + attr = dict(base_attr) if base_attr else {} _append_list2xml_str( output, attr_type=attr_type, @@ -809,6 +859,7 @@ def _append_convert_list( list_headers=list_headers, ) elif item is None: + attr = dict(base_attr) if base_attr else {} if item_name_attr: attr.update(item_name_attr) output.write(convert_none_valid_name(item_name, attr_type, attr)) @@ -849,10 +900,7 @@ def convert_kv_valid_name( if hasattr(val, "isoformat") and isinstance(val, (datetime.datetime, datetime.date)): val = val.isoformat() - attr = dict(attr) - if attr_type: - attr["type"] = get_xml_type(val) - attr_string = make_attrstring(attr) + attr_string = make_typed_attrstring(attr, get_xml_type(val)) if attr_type else make_attrstring(attr) return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}" @@ -877,11 +925,8 @@ def convert_bool_valid_name( attr: dict[str, Any], ) -> str: """Converts a boolean when the caller already validated the key.""" - attr = dict(attr) - if attr_type: - attr["type"] = "bool" - attr_string = make_attrstring(attr) - return f"<{key}{attr_string}>{str(val).lower()}" + attr_string = make_typed_attrstring(attr, "bool") if attr_type else make_attrstring(attr) + return f"<{key}{attr_string}>{'true' if val else 'false'}" def convert_none( @@ -902,10 +947,7 @@ def convert_none_valid_name( key: str, attr_type: bool, attr: dict[str, Any] ) -> str: """Converts a null value when the caller already validated the key.""" - attr = dict(attr) - if attr_type: - attr["type"] = "null" - attr_string = make_attrstring(attr) + attr_string = make_typed_attrstring(attr, "null") if attr_type else make_attrstring(attr) return f"<{key}{attr_string}>" @@ -1073,7 +1115,7 @@ def dicttoxml( output.write("") return output.to_bytes() - namespace_str = "" + namespace_parts: list[str] = [] if xml_namespaces is None: xml_namespaces = {} for prefix in xml_namespaces: @@ -1081,19 +1123,20 @@ def dicttoxml( for schema_att in xml_namespaces[prefix]: if schema_att == 'schemaInstance': ns = xml_namespaces[prefix]['schemaInstance'] - namespace_str += f' xmlns:{prefix}="{ns}"' + namespace_parts.append(f' xmlns:{prefix}="{ns}"') elif schema_att == 'schemaLocation': ns = xml_namespaces[prefix][schema_att] - namespace_str += f' xsi:{schema_att}="{ns}"' + namespace_parts.append(f' xsi:{schema_att}="{ns}"') elif prefix == 'xmlns': # xmns needs no prefix ns = xml_namespaces[prefix] - namespace_str += f' xmlns="{ns}"' + namespace_parts.append(f' xmlns="{ns}"') else: ns = xml_namespaces[prefix] - namespace_str += f' xmlns:{prefix}="{ns}"' + namespace_parts.append(f' xmlns:{prefix}="{ns}"') + namespace_str = "".join(namespace_parts) if root: custom_root, root_attr = make_valid_xml_name(custom_root, {}) output = _XMLWriter() diff --git a/json2xml/dicttoxml_fast.py b/json2xml/dicttoxml_fast.py index 44205d9..1ad5b36 100644 --- a/json2xml/dicttoxml_fast.py +++ b/json2xml/dicttoxml_fast.py @@ -121,7 +121,7 @@ def dicttoxml( item_wrap=item_wrap, item_func=item_func or _py_dicttoxml.default_item_func, cdata=cdata, - xml_namespaces=xml_namespaces or {}, + xml_namespaces=xml_namespaces, list_headers=list_headers, xpath_format=xpath_format, ) diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 05f9f07..f27c254 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. +The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Attribute formatting stays centralized through `make_attrstring()`, and `@attrs`/`@val` normalization stays local to dict element handling so caller-owned metadata is never mutated. ## Backend selection diff --git a/lat.md/tests.md b/lat.md/tests.md index 78f1201..4a4ae88 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -74,6 +74,10 @@ Falsy JSON values such as empty objects, empty arrays, zero, false, and empty st Converting dictionaries that use `@attrs` and `@val` should preserve the caller's original data so objects can be reused safely. +### Special attributes accept coercible pairs + +Attribute metadata that can be coerced with `dict()` should keep working so memory optimizations do not narrow legacy caller input. + ### Invalid XML names normalize without double escaping Invalid element names should fall back to `` with the original name escaped exactly once in the emitted attribute. @@ -138,6 +142,10 @@ Helpers that receive prevalidated XML names should add type metadata only to the Dict and list element helpers should add container type metadata only to emitted XML and must not mutate caller-owned attribute dictionaries. +### Typed attributes preserve caller attrs + +Valid-name scalar helpers should overwrite emitted type metadata while preserving caller attribute order and never mutating the input dictionary. + ### XML name validity fast and cached paths XML name validation should agree across the ASCII fast path, parser-backed path, and repeated cached calls so optimization does not change accepted names. diff --git a/tests/test_dict2xml.py b/tests/test_dict2xml.py index f54422e..b02d5c3 100644 --- a/tests/test_dict2xml.py +++ b/tests/test_dict2xml.py @@ -782,6 +782,17 @@ def test_dicttoxml_does_not_mutate_special_attribute_input(self) -> None: ) assert data == original + # @lat: [[tests#Conversion behavior#Special attributes accept coercible pairs]] + def test_dicttoxml_accepts_coercible_attribute_pairs(self) -> None: + """Test @attrs inputs accepted by dict() keep their legacy behavior.""" + result = dicttoxml.dicttoxml( + {"product": {"@attrs": [("id", "7"), ("kind", "bike")], "@val": "Road"}}, + root=False, + attr_type=False, + ) + + assert result == b'Road' + # @lat: [[tests#Conversion behavior#Invalid XML names normalize without double escaping]] def test_invalid_xml_name_fallback_escapes_name_attribute_once(self) -> None: """Test fallback name attributes are escaped once at emission time.""" @@ -1126,7 +1137,7 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: """Invalid generated list item names should preserve the original name attribute.""" item_name_result = dicttoxml.convert_list( items=[True, datetime.date(2026, 5, 27), None], - ids=[], + ids=["with_ids"], parent="items", attr_type=True, item_func=lambda _parent: "bad&key", @@ -1135,7 +1146,7 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: ) parent_name_result = dicttoxml.convert_list( items=[7], - ids=[], + ids=["with_ids"], parent="bad&parent", attr_type=True, item_func=lambda _parent: "item", @@ -1143,10 +1154,13 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: item_wrap=False, ) - assert 'true' in item_name_result - assert '2026-05-27' in item_name_result - assert '' in item_name_result - assert parent_name_result == '7' + assert 'id="items_' in item_name_result + assert 'true' in item_name_result + assert 'name="bad&key" type="str">2026-05-27' in item_name_result + assert 'name="bad&key" type="null">' in item_name_result + assert 'id="bad&parent_' in parent_name_result + assert 'name="bad&parent" type="int">7' in parent_name_result # @lat: [[tests#Conversion behavior#Valid-name scalar helper formats dates]] def test_convert_kv_valid_name_formats_date_values(self) -> None: diff --git a/tests/test_dicttoxml_unit.py b/tests/test_dicttoxml_unit.py index e047e8b..e49a35c 100644 --- a/tests/test_dicttoxml_unit.py +++ b/tests/test_dicttoxml_unit.py @@ -113,6 +113,39 @@ def test_valid_name_helpers_keep_existing_attrs_without_attr_type() -> None: assert base_attrs == {"name": "invalid key"} +# @lat: [[tests#XML helper behavior#Typed attributes preserve caller attrs]] +def test_valid_name_helpers_replace_type_attr_without_mutating_caller_attrs() -> None: + base_attrs = {"type": "caller", "id": "shared"} + + assert ( + dicttoxml.convert_kv_valid_name("name", "Bike", True, base_attrs) + == 'Bike' + ) + assert ( + dicttoxml.convert_bool_valid_name("active", True, True, base_attrs) + == 'true' + ) + assert ( + dicttoxml.convert_none_valid_name("empty", True, base_attrs) + == '' + ) + assert base_attrs == {"type": "caller", "id": "shared"} + + only_type = {"type": "caller"} + assert ( + dicttoxml.convert_bool_valid_name("active", False, True, only_type) + == 'false' + ) + assert only_type == {"type": "caller"} + + metadata_attrs = {"id": "shared", "name": "invalid key"} + assert ( + dicttoxml.convert_none_valid_name("empty", True, metadata_attrs) + == '' + ) + assert metadata_attrs == {"id": "shared", "name": "invalid key"} + + # @lat: [[tests#XML helper behavior#Container helpers preserve caller attrs]] def test_container_helpers_set_type_without_mutating_caller_attrs() -> None: dict_attrs = {"id": "shared"}