Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 73 additions & 30 deletions json2xml/dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,20 @@ def make_attrstring(attr: dict[str, Any]) -> str:
if key == "type":
return f' type="{val}"'
return f' {key}="{escape_xml(val)}"'
attrstring = " ".join([f'{k}="{escape_xml(v)}"' for k, v in attr.items()])
attrstring = " ".join(f'{k}="{escape_xml(v)}"' for k, v in attr.items())
return f" {attrstring}"


def make_typed_attrstring(attr: dict[str, Any], xml_type: str) -> str:
Comment thread
vinitkumar marked this conversation as resolved.
"""Create XML attributes with a type value without mutating caller attrs."""
if not attr:
return f' type="{xml_type}"'

typed_attr = dict(attr)
typed_attr["type"] = xml_type
return make_attrstring(typed_attr)


def _is_fast_valid_xml_name(key: str) -> bool:
"""Return True for ASCII XML names known to be accepted by the legacy parser."""
if not key or not key.isascii() or ":" in key:
Expand Down Expand Up @@ -560,26 +570,59 @@ def _append_dict2xml_str(

if attr_type:
attr["type"] = get_xml_type(item)
val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr)
if "@val" in item:
rawitem = item["@val"]
elif "@attrs" in item:
rawitem = {key: value for key, value in item.items() if key != "@attrs"}
has_custom_attrs = "@attrs" in item
if has_custom_attrs:
raw_attrs = item["@attrs"]
val_attr = raw_attrs if isinstance(raw_attrs, dict) else dict(raw_attrs)
rawitem = item["@val"] if "@val" in item else {
key: value for key, value in item.items() if key != "@attrs"
}
else:
rawitem = item
val_attr = attr
rawitem = item.get("@val", item)

if parentIsList and list_headers:
if len(val_attr) > 0 and not item_wrap:
output.write(f"<{parent}{make_attrstring(val_attr)}>")
else:
output.write(f"<{parent}>")
_append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers)
_append_rawitem(
output,
rawitem,
ids,
attr_type,
item_func,
cdata,
item_wrap,
item_name,
list_headers,
)
output.write(f"</{parent}>")
elif item.get("@flat", False) or (parentIsList and not item_wrap):
_append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers)
_append_rawitem(
output,
rawitem,
ids,
attr_type,
item_func,
cdata,
item_wrap,
item_name,
list_headers,
)
else:
output.write(f"<{item_name}{make_attrstring(val_attr)}>")
_append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers)
_append_rawitem(
output,
rawitem,
ids,
attr_type,
item_func,
cdata,
item_wrap,
item_name,
list_headers,
)
output.write(f"</{item_name}>")


Expand All @@ -597,7 +640,7 @@ def _append_rawitem(
if rawitem is None:
return
if isinstance(rawitem, bool):
output.write(str(rawitem).lower())
output.write("true" if rawitem else "false")
elif isinstance(rawitem, (str, numbers.Number)):
output.write(escape_xml(str(rawitem)))
else:
Expand Down Expand Up @@ -752,13 +795,17 @@ def _append_convert_list(
this_id = get_unique_id(parent) if ids else None

for i, item in enumerate(items):
attr = {} if not ids else {"id": f"{this_id}_{i + 1}"}
base_attr: dict[str, Any] | None = None
if ids:
base_attr = {"id": f"{this_id}_{i + 1}"}

if isinstance(item, bool):
attr = dict(base_attr) if base_attr else {}
if item_name_attr:
attr.update(item_name_attr)
output.write(convert_bool_valid_name(item_name, item, attr_type, attr))
elif isinstance(item, (numbers.Number, str)):
attr = dict(base_attr) if base_attr else {}
if scalar_key_attr:
attr.update(scalar_key_attr)
output.write(
Expand All @@ -771,6 +818,7 @@ def _append_convert_list(
)
)
elif hasattr(item, "isoformat"):
attr = dict(base_attr) if base_attr else {}
if item_name_attr:
attr.update(item_name_attr)
output.write(
Expand All @@ -783,6 +831,7 @@ def _append_convert_list(
)
)
elif isinstance(item, dict):
attr = dict(base_attr) if base_attr else {}
_append_dict2xml_str(
output,
attr_type=attr_type,
Expand All @@ -797,6 +846,7 @@ def _append_convert_list(
list_headers=list_headers,
)
elif isinstance(item, Sequence):
attr = dict(base_attr) if base_attr else {}
_append_list2xml_str(
output,
attr_type=attr_type,
Expand All @@ -809,6 +859,7 @@ def _append_convert_list(
list_headers=list_headers,
)
elif item is None:
attr = dict(base_attr) if base_attr else {}
if item_name_attr:
attr.update(item_name_attr)
output.write(convert_none_valid_name(item_name, attr_type, attr))
Expand Down Expand Up @@ -849,10 +900,7 @@ def convert_kv_valid_name(
if hasattr(val, "isoformat") and isinstance(val, (datetime.datetime, datetime.date)):
val = val.isoformat()

attr = dict(attr)
if attr_type:
attr["type"] = get_xml_type(val)
attr_string = make_attrstring(attr)
attr_string = make_typed_attrstring(attr, get_xml_type(val)) if attr_type else make_attrstring(attr)
return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}</{key}>"


Expand All @@ -877,11 +925,8 @@ def convert_bool_valid_name(
attr: dict[str, Any],
) -> str:
"""Converts a boolean when the caller already validated the key."""
attr = dict(attr)
if attr_type:
attr["type"] = "bool"
attr_string = make_attrstring(attr)
return f"<{key}{attr_string}>{str(val).lower()}</{key}>"
attr_string = make_typed_attrstring(attr, "bool") if attr_type else make_attrstring(attr)
return f"<{key}{attr_string}>{'true' if val else 'false'}</{key}>"


def convert_none(
Expand All @@ -902,10 +947,7 @@ def convert_none_valid_name(
key: str, attr_type: bool, attr: dict[str, Any]
) -> str:
"""Converts a null value when the caller already validated the key."""
attr = dict(attr)
if attr_type:
attr["type"] = "null"
attr_string = make_attrstring(attr)
attr_string = make_typed_attrstring(attr, "null") if attr_type else make_attrstring(attr)
return f"<{key}{attr_string}></{key}>"


Expand Down Expand Up @@ -1073,27 +1115,28 @@ def dicttoxml(
output.write("</map>")
return output.to_bytes()

namespace_str = ""
namespace_parts: list[str] = []
if xml_namespaces is None:
xml_namespaces = {}
for prefix in xml_namespaces:
if prefix == 'xsi':
for schema_att in xml_namespaces[prefix]:
if schema_att == 'schemaInstance':
ns = xml_namespaces[prefix]['schemaInstance']
namespace_str += f' xmlns:{prefix}="{ns}"'
namespace_parts.append(f' xmlns:{prefix}="{ns}"')
elif schema_att == 'schemaLocation':
ns = xml_namespaces[prefix][schema_att]
namespace_str += f' xsi:{schema_att}="{ns}"'
namespace_parts.append(f' xsi:{schema_att}="{ns}"')

elif prefix == 'xmlns':
# xmns needs no prefix
ns = xml_namespaces[prefix]
namespace_str += f' xmlns="{ns}"'
namespace_parts.append(f' xmlns="{ns}"')

else:
ns = xml_namespaces[prefix]
namespace_str += f' xmlns:{prefix}="{ns}"'
namespace_parts.append(f' xmlns:{prefix}="{ns}"')
namespace_str = "".join(namespace_parts)
if root:
custom_root, root_attr = make_valid_xml_name(custom_root, {})
output = _XMLWriter()
Expand Down
2 changes: 1 addition & 1 deletion json2xml/dicttoxml_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def dicttoxml(
item_wrap=item_wrap,
item_func=item_func or _py_dicttoxml.default_item_func,
cdata=cdata,
xml_namespaces=xml_namespaces or {},
xml_namespaces=xml_namespaces,
list_headers=list_headers,
xpath_format=xpath_format,
)
Expand Down
2 changes: 1 addition & 1 deletion lat.md/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri

[[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data.

The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object.
The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Attribute formatting stays centralized through `make_attrstring()`, and `@attrs`/`@val` normalization stays local to dict element handling so caller-owned metadata is never mutated.

## Backend selection

Expand Down
8 changes: 8 additions & 0 deletions lat.md/tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ Falsy JSON values such as empty objects, empty arrays, zero, false, and empty st

Converting dictionaries that use `@attrs` and `@val` should preserve the caller's original data so objects can be reused safely.

### Special attributes accept coercible pairs

Attribute metadata that can be coerced with `dict()` should keep working so memory optimizations do not narrow legacy caller input.

### Invalid XML names normalize without double escaping

Invalid element names should fall back to `<key name="...">` with the original name escaped exactly once in the emitted attribute.
Expand Down Expand Up @@ -138,6 +142,10 @@ Helpers that receive prevalidated XML names should add type metadata only to the

Dict and list element helpers should add container type metadata only to emitted XML and must not mutate caller-owned attribute dictionaries.

### Typed attributes preserve caller attrs

Valid-name scalar helpers should overwrite emitted type metadata while preserving caller attribute order and never mutating the input dictionary.

### XML name validity fast and cached paths

XML name validation should agree across the ASCII fast path, parser-backed path, and repeated cached calls so optimization does not change accepted names.
Expand Down
26 changes: 20 additions & 6 deletions tests/test_dict2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,17 @@ def test_dicttoxml_does_not_mutate_special_attribute_input(self) -> None:
)
assert data == original

# @lat: [[tests#Conversion behavior#Special attributes accept coercible pairs]]
def test_dicttoxml_accepts_coercible_attribute_pairs(self) -> None:
"""Test @attrs inputs accepted by dict() keep their legacy behavior."""
result = dicttoxml.dicttoxml(
{"product": {"@attrs": [("id", "7"), ("kind", "bike")], "@val": "Road"}},
root=False,
attr_type=False,
)

assert result == b'<product id="7" kind="bike">Road</product>'

# @lat: [[tests#Conversion behavior#Invalid XML names normalize without double escaping]]
def test_invalid_xml_name_fallback_escapes_name_attribute_once(self) -> None:
"""Test fallback name attributes are escaped once at emission time."""
Expand Down Expand Up @@ -1126,7 +1137,7 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None:
"""Invalid generated list item names should preserve the original name attribute."""
item_name_result = dicttoxml.convert_list(
items=[True, datetime.date(2026, 5, 27), None],
ids=[],
ids=["with_ids"],
parent="items",
attr_type=True,
item_func=lambda _parent: "bad&key",
Expand All @@ -1135,18 +1146,21 @@ def test_convert_list_invalid_item_name_metadata_for_scalar_paths(self) -> None:
)
parent_name_result = dicttoxml.convert_list(
items=[7],
ids=[],
ids=["with_ids"],
parent="bad&parent",
attr_type=True,
item_func=lambda _parent: "item",
cdata=False,
item_wrap=False,
)

assert '<key name="bad&amp;key" type="bool">true</key>' in item_name_result
assert '<key name="bad&amp;key" type="str">2026-05-27</key>' in item_name_result
assert '<key name="bad&amp;key" type="null"></key>' in item_name_result
assert parent_name_result == '<key name="bad&amp;parent" type="int">7</key>'
assert 'id="items_' in item_name_result
assert '<key id="' in item_name_result
assert 'name="bad&amp;key" type="bool">true</key>' in item_name_result
assert 'name="bad&amp;key" type="str">2026-05-27</key>' in item_name_result
assert 'name="bad&amp;key" type="null"></key>' in item_name_result
assert 'id="bad&amp;parent_' in parent_name_result
assert 'name="bad&amp;parent" type="int">7</key>' in parent_name_result

# @lat: [[tests#Conversion behavior#Valid-name scalar helper formats dates]]
def test_convert_kv_valid_name_formats_date_values(self) -> None:
Expand Down
33 changes: 33 additions & 0 deletions tests/test_dicttoxml_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,39 @@ def test_valid_name_helpers_keep_existing_attrs_without_attr_type() -> None:
assert base_attrs == {"name": "invalid key"}


# @lat: [[tests#XML helper behavior#Typed attributes preserve caller attrs]]
def test_valid_name_helpers_replace_type_attr_without_mutating_caller_attrs() -> None:
base_attrs = {"type": "caller", "id": "shared"}

assert (
dicttoxml.convert_kv_valid_name("name", "Bike", True, base_attrs)
== '<name type="str" id="shared">Bike</name>'
)
assert (
dicttoxml.convert_bool_valid_name("active", True, True, base_attrs)
== '<active type="bool" id="shared">true</active>'
)
assert (
dicttoxml.convert_none_valid_name("empty", True, base_attrs)
== '<empty type="null" id="shared"></empty>'
)
assert base_attrs == {"type": "caller", "id": "shared"}

only_type = {"type": "caller"}
assert (
dicttoxml.convert_bool_valid_name("active", False, True, only_type)
== '<active type="bool">false</active>'
)
assert only_type == {"type": "caller"}

metadata_attrs = {"id": "shared", "name": "invalid key"}
assert (
dicttoxml.convert_none_valid_name("empty", True, metadata_attrs)
== '<empty id="shared" name="invalid key" type="null"></empty>'
)
assert metadata_attrs == {"id": "shared", "name": "invalid key"}


# @lat: [[tests#XML helper behavior#Container helpers preserve caller attrs]]
def test_container_helpers_set_type_without_mutating_caller_attrs() -> None:
dict_attrs = {"id": "shared"}
Expand Down
Loading