From 0b4ae9eed7a3884a16eb48269938341fec89df54 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 10 Jun 2026 00:01:30 +0530 Subject: [PATCH 1/2] perf: reduce serializer hot-path allocations Avoid per-call set intersections in XML escaping, skip attribute dictionary copies on typed scalar and @attrs paths, avoid scalar list item attr dict allocation where metadata is already reusable, and pass namespace defaults through the fast wrapper without materializing an empty mapping.\n\nBenchmarked against 9463457 with 5 warm samples per case:\n- attrs_nested: 552.518ms -> 359.204ms mean, peak 16,590,571 -> 16,580,643 bytes\n- plain_strings: 800.088ms -> 530.016ms mean, peak 9,538,330 -> 9,528,594 bytes --- json2xml/dicttoxml.py | 164 +++++++++++++++++++++++++++-------- json2xml/dicttoxml_fast.py | 2 +- lat.md/architecture.md | 2 +- lat.md/tests.md | 8 ++ tests/test_dict2xml.py | 26 ++++-- tests/test_dicttoxml_unit.py | 33 +++++++ 6 files changed, 189 insertions(+), 46 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 8444080..33a05ff 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -18,7 +18,6 @@ # Set up logging LOG = logging.getLogger("dicttoxml") -_XML_ESCAPE_CHARS = frozenset("&\"'<>") class _XMLWriter: @@ -131,7 +130,7 @@ def escape_xml(s: str | int | float | numbers.Number | None) -> str: str: The escaped string. """ if isinstance(s, str): - if not _XML_ESCAPE_CHARS.intersection(s): + if "&" not in s and '"' not in s and "'" not in s and "<" not in s and ">" not in s: return s s = s.replace("&", "&") s = s.replace('"', """) @@ -159,10 +158,35 @@ def make_attrstring(attr: dict[str, Any]) -> str: if key == "type": return f' type="{val}"' return f' {key}="{escape_xml(val)}"' - attrstring = " ".join([f'{k}="{escape_xml(v)}"' for k, v in attr.items()]) + attrstring = " ".join(f'{k}="{escape_xml(v)}"' for k, v in attr.items()) return f" {attrstring}" +def make_typed_attrstring(attr: dict[str, Any], xml_type: str) -> str: + """Create XML attributes with a type value without copying caller attrs.""" + if not attr: + return f' type="{xml_type}"' + + validate_xml_attr_names(attr) + if len(attr) == 1: + key, val = next(iter(attr.items())) + if key == "type": + return f' type="{xml_type}"' + return f' {key}="{escape_xml(val)}" type="{xml_type}"' + + attr_parts: list[str] = [] + type_written = False + for key, val in attr.items(): + if key == "type": + attr_parts.append(f'type="{xml_type}"') + type_written = True + else: + attr_parts.append(f'{key}="{escape_xml(val)}"') + if not type_written: + attr_parts.append(f'type="{xml_type}"') + return f" {' '.join(attr_parts)}" + + def _is_fast_valid_xml_name(key: str) -> bool: """Return True for ASCII XML names known to be accepted by the legacy parser.""" if not key or not key.isascii() or ":" in key: @@ -560,11 +584,16 @@ def _append_dict2xml_str( if attr_type: attr["type"] = get_xml_type(item) - val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) + has_custom_attrs = "@attrs" in item + if has_custom_attrs: + raw_attrs = item["@attrs"] + val_attr = raw_attrs if isinstance(raw_attrs, dict) else dict(raw_attrs) + else: + val_attr = attr if "@val" in item: rawitem = item["@val"] - elif "@attrs" in item: - rawitem = {key: value for key, value in item.items() if key != "@attrs"} + elif has_custom_attrs: + rawitem = item else: rawitem = item @@ -573,13 +602,46 @@ def _append_dict2xml_str( output.write(f"<{parent}{make_attrstring(val_attr)}>") else: output.write(f"<{parent}>") - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + skip_attrs=has_custom_attrs and "@val" not in item, + ) output.write(f"") elif item.get("@flat", False) or (parentIsList and not item_wrap): - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + skip_attrs=has_custom_attrs and "@val" not in item, + ) else: output.write(f"<{item_name}{make_attrstring(val_attr)}>") - _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + _append_rawitem( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers, + skip_attrs=has_custom_attrs and "@val" not in item, + ) output.write(f"") @@ -593,14 +655,29 @@ def _append_rawitem( item_wrap: bool, item_name: str, list_headers: bool, + skip_attrs: bool = False, ) -> None: if rawitem is None: return if isinstance(rawitem, bool): - output.write(str(rawitem).lower()) + output.write("true" if rawitem else "false") elif isinstance(rawitem, (str, numbers.Number)): output.write(escape_xml(str(rawitem))) else: + if skip_attrs and isinstance(rawitem, dict): + _append_convert_dict( + output, + rawitem, + ids, + item_name, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + skip_key="@attrs", + ) + return _append_convert( output, rawitem, @@ -673,9 +750,12 @@ def _append_convert_dict( cdata: bool, item_wrap: bool, list_headers: bool = False, + skip_key: str | None = None, ) -> None: """Append a dict as XML without allocating a joined child subtree.""" for key, val in obj.items(): + if key == skip_key: + continue attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} key_is_flat = isinstance(key, str) and key.endswith("@flat") xml_key = key[:-5] if key_is_flat else key @@ -752,15 +832,21 @@ def _append_convert_list( this_id = get_unique_id(parent) if ids else None for i, item in enumerate(items): - attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} - if isinstance(item, bool): - if item_name_attr: - attr.update(item_name_attr) + if ids: + attr = {"id": f"{this_id}_{i + 1}"} + if item_name_attr: + attr.update(item_name_attr) + else: + attr = item_name_attr output.write(convert_bool_valid_name(item_name, item, attr_type, attr)) elif isinstance(item, (numbers.Number, str)): - if scalar_key_attr: - attr.update(scalar_key_attr) + if ids: + attr = {"id": f"{this_id}_{i + 1}"} + if scalar_key_attr: + attr.update(scalar_key_attr) + else: + attr = scalar_key_attr output.write( convert_kv_valid_name( key=scalar_key, @@ -771,8 +857,12 @@ def _append_convert_list( ) ) elif hasattr(item, "isoformat"): - if item_name_attr: - attr.update(item_name_attr) + if ids: + attr = {"id": f"{this_id}_{i + 1}"} + if item_name_attr: + attr.update(item_name_attr) + else: + attr = item_name_attr output.write( convert_kv_valid_name( key=item_name, @@ -783,6 +873,7 @@ def _append_convert_list( ) ) elif isinstance(item, dict): + attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} _append_dict2xml_str( output, attr_type=attr_type, @@ -797,6 +888,7 @@ def _append_convert_list( list_headers=list_headers, ) elif isinstance(item, Sequence): + attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} _append_list2xml_str( output, attr_type=attr_type, @@ -809,8 +901,12 @@ def _append_convert_list( list_headers=list_headers, ) elif item is None: - if item_name_attr: - attr.update(item_name_attr) + if ids: + attr = {"id": f"{this_id}_{i + 1}"} + if item_name_attr: + attr.update(item_name_attr) + else: + attr = item_name_attr output.write(convert_none_valid_name(item_name, attr_type, attr)) else: raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") @@ -849,10 +945,7 @@ def convert_kv_valid_name( if hasattr(val, "isoformat") and isinstance(val, (datetime.datetime, datetime.date)): val = val.isoformat() - attr = dict(attr) - if attr_type: - attr["type"] = get_xml_type(val) - attr_string = make_attrstring(attr) + attr_string = make_typed_attrstring(attr, get_xml_type(val)) if attr_type else make_attrstring(attr) return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}" @@ -877,11 +970,8 @@ def convert_bool_valid_name( attr: dict[str, Any], ) -> str: """Converts a boolean when the caller already validated the key.""" - attr = dict(attr) - if attr_type: - attr["type"] = "bool" - attr_string = make_attrstring(attr) - return f"<{key}{attr_string}>{str(val).lower()}" + attr_string = make_typed_attrstring(attr, "bool") if attr_type else make_attrstring(attr) + return f"<{key}{attr_string}>{'true' if val else 'false'}" def convert_none( @@ -902,10 +992,7 @@ def convert_none_valid_name( key: str, attr_type: bool, attr: dict[str, Any] ) -> str: """Converts a null value when the caller already validated the key.""" - attr = dict(attr) - if attr_type: - attr["type"] = "null" - attr_string = make_attrstring(attr) + attr_string = make_typed_attrstring(attr, "null") if attr_type else make_attrstring(attr) return f"<{key}{attr_string}>" @@ -1073,7 +1160,7 @@ def dicttoxml( output.write("") return output.to_bytes() - namespace_str = "" + namespace_parts: list[str] = [] if xml_namespaces is None: xml_namespaces = {} for prefix in xml_namespaces: @@ -1081,19 +1168,20 @@ def dicttoxml( for schema_att in xml_namespaces[prefix]: if schema_att == 'schemaInstance': ns = xml_namespaces[prefix]['schemaInstance'] - namespace_str += f' xmlns:{prefix}="{ns}"' + namespace_parts.append(f' xmlns:{prefix}="{ns}"') elif schema_att == 'schemaLocation': ns = xml_namespaces[prefix][schema_att] - namespace_str += f' xsi:{schema_att}="{ns}"' + namespace_parts.append(f' xsi:{schema_att}="{ns}"') elif prefix == 'xmlns': # xmns needs no prefix ns = xml_namespaces[prefix] - namespace_str += f' xmlns="{ns}"' + namespace_parts.append(f' xmlns="{ns}"') else: ns = xml_namespaces[prefix] - namespace_str += f' xmlns:{prefix}="{ns}"' + namespace_parts.append(f' xmlns:{prefix}="{ns}"') + namespace_str = "".join(namespace_parts) if root: custom_root, root_attr = make_valid_xml_name(custom_root, {}) output = _XMLWriter() diff --git a/json2xml/dicttoxml_fast.py b/json2xml/dicttoxml_fast.py index 44205d9..1ad5b36 100644 --- a/json2xml/dicttoxml_fast.py +++ b/json2xml/dicttoxml_fast.py @@ -121,7 +121,7 @@ def dicttoxml( item_wrap=item_wrap, item_func=item_func or _py_dicttoxml.default_item_func, cdata=cdata, - xml_namespaces=xml_namespaces or {}, + xml_namespaces=xml_namespaces, list_headers=list_headers, xpath_format=xpath_format, ) diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 05f9f07..74acd54 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. +The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Hot scalar and `@attrs` paths avoid copying attribute dictionaries when formatting already has read-only access to the needed metadata. ## Backend selection diff --git a/lat.md/tests.md b/lat.md/tests.md index 78f1201..4a4ae88 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -74,6 +74,10 @@ Falsy JSON values such as empty objects, empty arrays, zero, false, and empty st Converting dictionaries that use `@attrs` and `@val` should preserve the caller's original data so objects can be reused safely. +### Special attributes accept coercible pairs + +Attribute metadata that can be coerced with `dict()` should keep working so memory optimizations do not narrow legacy caller input. + ### Invalid XML names normalize without double escaping Invalid element names should fall back to `` with the original name escaped exactly once in the emitted attribute. @@ -138,6 +142,10 @@ Helpers that receive prevalidated XML names should add type metadata only to the Dict and list element helpers should add container type metadata only to emitted XML and must not mutate caller-owned attribute dictionaries. +### Typed attributes preserve caller attrs + +Valid-name scalar helpers should overwrite emitted type metadata while preserving caller attribute order and never mutating the input dictionary. + ### XML name validity fast and cached paths XML name validation should agree across the ASCII fast path, parser-backed path, and repeated cached calls so optimization does not change accepted names. diff --git a/tests/test_dict2xml.py b/tests/test_dict2xml.py index f54422e..b02d5c3 100644 --- a/tests/test_dict2xml.py +++ b/tests/test_dict2xml.py @@ -782,6 +782,17 @@ def test_dicttoxml_does_not_mutate_special_attribute_input(self) -> None: ) assert data == original + # @lat: [[tests#Conversion behavior#Special attributes accept coercible pairs]] + def test_dicttoxml_accepts_coercible_attribute_pairs(self) -> None: + """Test @attrs inputs accepted by dict() keep their legacy behavior.""" + result = dicttoxml.dicttoxml( + {"product": {"@attrs": [("id", "7"), ("kind", "bike")], "@val": "Road"}}, + root=False, + attr_type=False, + ) + + assert result == b'Road' + # @lat: [[tests#Conversion behavior#Invalid XML names normalize without double escaping]] def test_invalid_xml_name_fallback_escapes_name_attribute_once(self) -> None: """Test fallback name attributes are escaped once at emission time.""" @@ -1126,7 +1137,7 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: """Invalid generated list item names should preserve the original name attribute.""" item_name_result = dicttoxml.convert_list( items=[True, datetime.date(2026, 5, 27), None], - ids=[], + ids=["with_ids"], parent="items", attr_type=True, item_func=lambda _parent: "bad&key", @@ -1135,7 +1146,7 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: ) parent_name_result = dicttoxml.convert_list( items=[7], - ids=[], + ids=["with_ids"], parent="bad&parent", attr_type=True, item_func=lambda _parent: "item", @@ -1143,10 +1154,13 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None: item_wrap=False, ) - assert 'true' in item_name_result - assert '2026-05-27' in item_name_result - assert '' in item_name_result - assert parent_name_result == '7' + assert 'id="items_' in item_name_result + assert 'true' in item_name_result + assert 'name="bad&key" type="str">2026-05-27' in item_name_result + assert 'name="bad&key" type="null">' in item_name_result + assert 'id="bad&parent_' in parent_name_result + assert 'name="bad&parent" type="int">7' in parent_name_result # @lat: [[tests#Conversion behavior#Valid-name scalar helper formats dates]] def test_convert_kv_valid_name_formats_date_values(self) -> None: diff --git a/tests/test_dicttoxml_unit.py b/tests/test_dicttoxml_unit.py index e047e8b..e49a35c 100644 --- a/tests/test_dicttoxml_unit.py +++ b/tests/test_dicttoxml_unit.py @@ -113,6 +113,39 @@ def test_valid_name_helpers_keep_existing_attrs_without_attr_type() -> None: assert base_attrs == {"name": "invalid key"} +# @lat: [[tests#XML helper behavior#Typed attributes preserve caller attrs]] +def test_valid_name_helpers_replace_type_attr_without_mutating_caller_attrs() -> None: + base_attrs = {"type": "caller", "id": "shared"} + + assert ( + dicttoxml.convert_kv_valid_name("name", "Bike", True, base_attrs) + == 'Bike' + ) + assert ( + dicttoxml.convert_bool_valid_name("active", True, True, base_attrs) + == 'true' + ) + assert ( + dicttoxml.convert_none_valid_name("empty", True, base_attrs) + == '' + ) + assert base_attrs == {"type": "caller", "id": "shared"} + + only_type = {"type": "caller"} + assert ( + dicttoxml.convert_bool_valid_name("active", False, True, only_type) + == 'false' + ) + assert only_type == {"type": "caller"} + + metadata_attrs = {"id": "shared", "name": "invalid key"} + assert ( + dicttoxml.convert_none_valid_name("empty", True, metadata_attrs) + == '' + ) + assert metadata_attrs == {"id": "shared", "name": "invalid key"} + + # @lat: [[tests#XML helper behavior#Container helpers preserve caller attrs]] def test_container_helpers_set_type_without_mutating_caller_attrs() -> None: dict_attrs = {"id": "shared"} From 00c05406cc630ebc1b282c3ebe9e3cd2860a7165 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Wed, 10 Jun 2026 00:14:29 +0530 Subject: [PATCH 2/2] refactor: simplify serializer allocation cleanup --- json2xml/dicttoxml.py | 101 ++++++++++++----------------------------- lat.md/architecture.md | 2 +- 2 files changed, 29 insertions(+), 74 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 33a05ff..f136ec3 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -18,6 +18,7 @@ # Set up logging LOG = logging.getLogger("dicttoxml") +_XML_ESCAPE_CHARS = frozenset("&\"'<>") class _XMLWriter: @@ -130,7 +131,7 @@ def escape_xml(s: str | int | float | numbers.Number | None) -> str: str: The escaped string. """ if isinstance(s, str): - if "&" not in s and '"' not in s and "'" not in s and "<" not in s and ">" not in s: + if not _XML_ESCAPE_CHARS.intersection(s): return s s = s.replace("&", "&") s = s.replace('"', """) @@ -163,28 +164,13 @@ def make_attrstring(attr: dict[str, Any]) -> str: def make_typed_attrstring(attr: dict[str, Any], xml_type: str) -> str: - """Create XML attributes with a type value without copying caller attrs.""" + """Create XML attributes with a type value without mutating caller attrs.""" if not attr: return f' type="{xml_type}"' - validate_xml_attr_names(attr) - if len(attr) == 1: - key, val = next(iter(attr.items())) - if key == "type": - return f' type="{xml_type}"' - return f' {key}="{escape_xml(val)}" type="{xml_type}"' - - attr_parts: list[str] = [] - type_written = False - for key, val in attr.items(): - if key == "type": - attr_parts.append(f'type="{xml_type}"') - type_written = True - else: - attr_parts.append(f'{key}="{escape_xml(val)}"') - if not type_written: - attr_parts.append(f'type="{xml_type}"') - return f" {' '.join(attr_parts)}" + typed_attr = dict(attr) + typed_attr["type"] = xml_type + return make_attrstring(typed_attr) def _is_fast_valid_xml_name(key: str) -> bool: @@ -588,14 +574,12 @@ def _append_dict2xml_str( if has_custom_attrs: raw_attrs = item["@attrs"] val_attr = raw_attrs if isinstance(raw_attrs, dict) else dict(raw_attrs) + rawitem = item["@val"] if "@val" in item else { + key: value for key, value in item.items() if key != "@attrs" + } else: val_attr = attr - if "@val" in item: - rawitem = item["@val"] - elif has_custom_attrs: - rawitem = item - else: - rawitem = item + rawitem = item.get("@val", item) if parentIsList and list_headers: if len(val_attr) > 0 and not item_wrap: @@ -612,7 +596,6 @@ def _append_dict2xml_str( item_wrap, item_name, list_headers, - skip_attrs=has_custom_attrs and "@val" not in item, ) output.write(f"") elif item.get("@flat", False) or (parentIsList and not item_wrap): @@ -626,7 +609,6 @@ def _append_dict2xml_str( item_wrap, item_name, list_headers, - skip_attrs=has_custom_attrs and "@val" not in item, ) else: output.write(f"<{item_name}{make_attrstring(val_attr)}>") @@ -640,7 +622,6 @@ def _append_dict2xml_str( item_wrap, item_name, list_headers, - skip_attrs=has_custom_attrs and "@val" not in item, ) output.write(f"") @@ -655,7 +636,6 @@ def _append_rawitem( item_wrap: bool, item_name: str, list_headers: bool, - skip_attrs: bool = False, ) -> None: if rawitem is None: return @@ -664,20 +644,6 @@ def _append_rawitem( elif isinstance(rawitem, (str, numbers.Number)): output.write(escape_xml(str(rawitem))) else: - if skip_attrs and isinstance(rawitem, dict): - _append_convert_dict( - output, - rawitem, - ids, - item_name, - attr_type, - item_func, - cdata, - item_wrap, - list_headers=list_headers, - skip_key="@attrs", - ) - return _append_convert( output, rawitem, @@ -750,12 +716,9 @@ def _append_convert_dict( cdata: bool, item_wrap: bool, list_headers: bool = False, - skip_key: str | None = None, ) -> None: """Append a dict as XML without allocating a joined child subtree.""" for key, val in obj.items(): - if key == skip_key: - continue attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} key_is_flat = isinstance(key, str) and key.endswith("@flat") xml_key = key[:-5] if key_is_flat else key @@ -832,21 +795,19 @@ def _append_convert_list( this_id = get_unique_id(parent) if ids else None for i, item in enumerate(items): + base_attr: dict[str, Any] | None = None + if ids: + base_attr = {"id": f"{this_id}_{i + 1}"} + if isinstance(item, bool): - if ids: - attr = {"id": f"{this_id}_{i + 1}"} - if item_name_attr: - attr.update(item_name_attr) - else: - attr = item_name_attr + attr = dict(base_attr) if base_attr else {} + if item_name_attr: + attr.update(item_name_attr) output.write(convert_bool_valid_name(item_name, item, attr_type, attr)) elif isinstance(item, (numbers.Number, str)): - if ids: - attr = {"id": f"{this_id}_{i + 1}"} - if scalar_key_attr: - attr.update(scalar_key_attr) - else: - attr = scalar_key_attr + attr = dict(base_attr) if base_attr else {} + if scalar_key_attr: + attr.update(scalar_key_attr) output.write( convert_kv_valid_name( key=scalar_key, @@ -857,12 +818,9 @@ def _append_convert_list( ) ) elif hasattr(item, "isoformat"): - if ids: - attr = {"id": f"{this_id}_{i + 1}"} - if item_name_attr: - attr.update(item_name_attr) - else: - attr = item_name_attr + attr = dict(base_attr) if base_attr else {} + if item_name_attr: + attr.update(item_name_attr) output.write( convert_kv_valid_name( key=item_name, @@ -873,7 +831,7 @@ def _append_convert_list( ) ) elif isinstance(item, dict): - attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} + attr = dict(base_attr) if base_attr else {} _append_dict2xml_str( output, attr_type=attr_type, @@ -888,7 +846,7 @@ def _append_convert_list( list_headers=list_headers, ) elif isinstance(item, Sequence): - attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} + attr = dict(base_attr) if base_attr else {} _append_list2xml_str( output, attr_type=attr_type, @@ -901,12 +859,9 @@ def _append_convert_list( list_headers=list_headers, ) elif item is None: - if ids: - attr = {"id": f"{this_id}_{i + 1}"} - if item_name_attr: - attr.update(item_name_attr) - else: - attr = item_name_attr + attr = dict(base_attr) if base_attr else {} + if item_name_attr: + attr.update(item_name_attr) output.write(convert_none_valid_name(item_name, attr_type, attr)) else: raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 74acd54..f27c254 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Hot scalar and `@attrs` paths avoid copying attribute dictionaries when formatting already has read-only access to the needed metadata. +The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Attribute formatting stays centralized through `make_attrstring()`, and `@attrs`/`@val` normalization stays local to dict element handling so caller-owned metadata is never mutated. ## Backend selection