diff --git a/examples/booleanjson.json b/examples/booleanjson.json index a784c7b7..a00ed5e8 100644 --- a/examples/booleanjson.json +++ b/examples/booleanjson.json @@ -4,5 +4,6 @@ {"boolean_dict": {"boolean": true}}, {"boolean_dict": {"boolean": false}} ], - "boolean_list": [true, false] + "boolean_list": [true, false], + "null_atr": null } diff --git a/examples/booleanjson2.json b/examples/booleanjson2.json index 95a35a8e..2cab73b9 100644 --- a/examples/booleanjson2.json +++ b/examples/booleanjson2.json @@ -1,5 +1,7 @@ { "boolean_list": [true, false], "number_array": [1, 2, 3], - "string_array": ["a", "b", "c"] -} \ No newline at end of file + "string_array": ["a", "b", "c"], + "null_str": null, + "time": "2010-04-20T20:08:21.634121" +} diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 8c57a573..896c1ef2 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -1,3 +1,5 @@ +from __future__ import annotations + # coding: utf-8 """ @@ -10,26 +12,27 @@ This module works with Python 3.7+ """ -import collections +import datetime import logging import numbers +from collections.abc import Callable, Sequence from random import randint -from typing import Any, Dict +from typing import Any, Dict, List, Optional, Tuple, Union from defusedxml.minidom import parseString LOG = logging.getLogger("dicttoxml") -ids = [] # initialize list of unique ids +ids: List[str] = [] # initialize list of unique ids -def make_id(element, start=100000, end=999999): +def make_id(element: str, start: int = 100000, end: int = 999999) -> str: """Returns a random integer""" return f"{element}_{randint(start, end)}" -def get_unique_id(element): +def get_unique_id(element: str) -> str: """Returns a unique id for a given element""" this_id = make_id(element) dup = True @@ -42,28 +45,43 @@ def get_unique_id(element): return ids[-1] -def get_xml_type(val): +ELEMENT = Union[ + str, + int, + float, + bool, + numbers.Number, + Sequence, + datetime.datetime, + datetime.date, + None, + Dict[str, Any], +] + + +def get_xml_type(val: ELEMENT) -> str: """Returns the data type for the xml type attribute""" - if type(val).__name__ in ("str", "unicode"): - return "str" - if type(val).__name__ in ("int", "long"): - return "int" - if type(val).__name__ == "float": - return "float" - if type(val).__name__ == "bool": - return "bool" - if isinstance(val, numbers.Number): - return "number" - if type(val).__name__ == "NoneType": + if val is not None: + if type(val).__name__ in ("str", "unicode"): + return "str" + if type(val).__name__ in ("int", "long"): + return "int" + if type(val).__name__ == "float": + return "float" + if type(val).__name__ == "bool": + return "bool" + if isinstance(val, numbers.Number): + return "number" + if isinstance(val, dict): + return "dict" + if isinstance(val, Sequence): + return "list" + else: return "null" - if isinstance(val, dict): - return "dict" - if isinstance(val, collections.abc.Iterable): - return "list" return type(val).__name__ -def escape_xml(s: str) -> str: +def escape_xml(s: Union[str, numbers.Number]) -> str: if isinstance(s, str): s = str(s) # avoid UnicodeDecodeError s = s.replace("&", "&") @@ -71,16 +89,16 @@ def escape_xml(s: str) -> str: s = s.replace("'", "'") s = s.replace("<", "<") s = s.replace(">", ">") - return s + return str(s) -def make_attrstring(attr): +def make_attrstring(attr: dict[str, Any]) -> str: """Returns an attribute string in the form key="val" """ attrstring = " ".join([f'{k}="{v}"' for k, v in attr.items()]) return f'{" " if attrstring != "" else ""}{attrstring}' -def key_is_valid_xml(key): +def key_is_valid_xml(key: str) -> bool: """Checks that a key is a valid XML name""" LOG.info(f'Inside key_is_valid_xml(). Testing "{str(key)}"') test_xml = f'<{key}>foo' @@ -91,7 +109,7 @@ def key_is_valid_xml(key): return False -def make_valid_xml_name(key, attr: Dict[str, Any]): +def make_valid_xml_name(key: str, attr: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]: """Tests an XML name and fixes it if invalid""" LOG.info( f'Inside make_valid_xml_name(). Testing key "{str(key)}" with attr "{str(attr)}"' @@ -123,17 +141,25 @@ def make_valid_xml_name(key, attr: Dict[str, Any]): return key, attr -def wrap_cdata(s: str) -> str: +def wrap_cdata(s: Union[str, numbers.Number]) -> str: """Wraps a string into CDATA sections""" s = str(s).replace("]]>", "]]]]>") return "" -def default_item_func(parent): +def default_item_func(parent: str) -> str: return "item" -def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): +def convert( + obj: ELEMENT, + ids: Any, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, + parent: str = "root", +) -> str: """Routes the elements of an object to the right function to convert them based on their data type""" LOG.info(f'Inside convert(). type(obj)="{type(obj).__name__}"') @@ -147,14 +173,21 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): # here, we just change order and check for bool first, because no other # type other than bool can be true for bool check if isinstance(obj, bool): - return convert_bool(item_name, obj, attr_type, cdata) + return convert_bool(key=item_name, val=obj, attr_type=attr_type, cdata=cdata) + + if isinstance(obj, numbers.Number): + return convert_kv( + key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata + ) - if isinstance(obj, (numbers.Number, str)): + if isinstance(obj, str): return convert_kv( key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata ) - if hasattr(obj, "isoformat"): + if hasattr(obj, "isoformat") and isinstance( + obj, (datetime.datetime, datetime.date) + ): return convert_kv( key=item_name, val=obj.isoformat(), @@ -164,25 +197,39 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): ) if obj is None: - return convert_none(item_name, "", attr_type, cdata) + return convert_none(key=item_name, attr_type=attr_type, cdata=cdata) if isinstance(obj, dict): return convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap) - if isinstance(obj, collections.abc.Iterable): + if isinstance(obj, Sequence): return convert_list(obj, ids, parent, attr_type, item_func, cdata, item_wrap) raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") -def is_primitive_type(val): +def is_primitive_type(val: Any) -> bool: t = get_xml_type(val) - return t in {'str', 'int', 'float', 'bool', 'number', 'null'} - - -def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList): - keys_str = ', '.join(str(key) for key in item) - LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') + return t in {"str", "int", "float", "bool", "number", "null"} + + +def dict2xml_str( + attr_type: bool, + attr: Dict[str, Any], + item: Dict[str, Any], + item_func: Callable[[str], str], + cdata: bool, + item_name: str, + item_wrap: bool, + parentIsList: bool, +) -> str: + """ + parse dict2xml + """ + keys_str = ", ".join(str(key) for key in item) + LOG.info( + f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"' + ) # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') @@ -195,40 +242,70 @@ def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, subtree = rawitem else: # we can not use convert_dict, because rawitem could be non-dict - subtree = convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) + subtree = convert( + rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name + ) if item.get("@flat", False) or (parentIsList and not item_wrap): return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" -def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): +def list2xml_str( + attr_type: bool, + attr: Dict[str, Any], + item: Sequence[Any], + item_func: Callable[[str], str], + cdata: bool, + item_name: str, + item_wrap: bool, +) -> str: if attr_type: attr["type"] = get_xml_type(item) flat = False - if item_name.endswith('@flat'): + if item_name.endswith("@flat"): item_name = item_name[0:-5] flat = True - subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap) + subtree = convert_list( + items=item, + ids=ids, + parent=item_name, + attr_type=attr_type, + item_func=item_func, + cdata=cdata, + item_wrap=item_wrap, + ) if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap): return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" -def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): +def convert_dict( + obj: Dict[str, Any], + ids: List[str], + parent: str, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, +) -> str: """Converts a dict into an XML string.""" - keys_str = ', '.join(str(key) for key in obj) - LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"') + keys_str = ", ".join(str(key) for key in obj) + LOG.info( + f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"' + ) # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') - output = [] + output: List[str] = [] addline = output.append for key, val in obj.items(): - LOG.info(f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"') + LOG.info( + f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"' + ) if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' val="{str(val)}"') @@ -262,13 +339,27 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): ) elif isinstance(val, dict): - addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap, False)) + addline( + dict2xml_str( + attr_type, attr, val, item_func, cdata, key, item_wrap, False + ) + ) - elif isinstance(val, collections.abc.Iterable): - addline(list2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) + elif isinstance(val, Sequence): + addline( + list2xml_str( + attr_type=attr_type, + attr=attr, + item=val, + item_func=item_func, + cdata=cdata, + item_name=key, + item_wrap=item_wrap, + ) + ) elif not val: - addline(convert_none(key, val, attr_type, attr, cdata)) + addline(convert_none(key, attr_type, attr, cdata)) else: raise TypeError(f"Unsupported data type: {val} ({type(val).__name__})") @@ -276,25 +367,35 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): return "".join(output) -def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): +def convert_list( + items: Sequence[Any], + ids: List[str], + parent: str, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, +) -> str: """Converts a list into an XML string.""" LOG.info(f'Inside convert_list(): type(items)="{type(items).__name__}"') # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' items="{str(items)}"') - output = [] + output: List[str] = [] addline = output.append item_name = item_func(parent) - if item_name.endswith('@flat'): + if item_name.endswith("@flat"): item_name = item_name[:-5] this_id = None if ids: this_id = get_unique_id(parent) for i, item in enumerate(items): - LOG.info(f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"') + LOG.info( + f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"' + ) # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') @@ -338,25 +439,44 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): ) elif isinstance(item, dict): - addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, True)) + addline( + dict2xml_str( + attr_type, attr, item, item_func, cdata, item_name, item_wrap, True + ) + ) - elif isinstance(item, collections.abc.Iterable): - addline(list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) + elif isinstance(item, Sequence): + addline( + list2xml_str( + attr_type=attr_type, + attr=attr, + item=item, + item_func=item_func, + cdata=cdata, + item_name=item_name, + item_wrap=item_wrap, + ) + ) elif item is None: - addline(convert_none(item_name, None, attr_type, attr, cdata)) + addline(convert_none(item_name, attr_type, attr, cdata)) else: raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") return "".join(output) -def convert_kv(key, val, attr_type, attr={}, cdata: bool = False): +def convert_kv( + key: str, + val: Union[str, numbers.Number], + attr_type: bool, + attr: Dict[str, Any] = {}, + cdata: bool = False, +) -> str: """Converts a number or string into an XML element""" LOG.info( f'Inside convert_kv(): key="{str(key)}", val="{str(val)}", type(val) is: "{type(val).__name__}"' ) - key, attr = make_valid_xml_name(key, attr) if attr_type: @@ -365,12 +485,13 @@ def convert_kv(key, val, attr_type, attr={}, cdata: bool = False): return f"<{key}{attrstring}>{wrap_cdata(val) if cdata else escape_xml(val)}" -def convert_bool(key, val, attr_type, attr={}, cdata=False): +def convert_bool( + key: str, val: bool, attr_type: bool, attr: Dict[str, Any] = {}, cdata: bool = False +) -> str: """Converts a boolean into an XML element""" LOG.info( f'Inside convert_bool(): key="{str(key)}", val="{str(val)}", type(val) is: "{type(val).__name__}"' ) - key, attr = make_valid_xml_name(key, attr) if attr_type: @@ -379,29 +500,30 @@ def convert_bool(key, val, attr_type, attr={}, cdata=False): return f"<{key}{attrstring}>{str(val).lower()}" -def convert_none(key, val, attr_type, attr={}, cdata=False): +def convert_none( + key: str, attr_type: bool, attr: Dict[str, Any] = {}, cdata: bool = False +) -> str: """Converts a null value into an XML element""" - LOG.info(f'Inside convert_none(): key="{str(key)}"') - + # LOG.info(f'Inside convert_none(): key="{str(key)}" val={type(val)}') key, attr = make_valid_xml_name(key, attr) if attr_type: - attr["type"] = get_xml_type(val) + attr["type"] = get_xml_type(None) attrstring = make_attrstring(attr) return f"<{key}{attrstring}>" def dicttoxml( - obj, + obj: Dict[str, Any], root: bool = True, - custom_root="root", - ids=False, - attr_type=True, - item_wrap=True, - item_func=default_item_func, - cdata=False, - xml_namespaces={} -): + custom_root: str = "root", + ids: Optional[List[int]] = None, + attr_type: bool = True, + item_wrap: bool = True, + item_func: Callable[[str], str] = default_item_func, + cdata: bool = False, + xml_namespaces: dict[str, Any] = {}, +) -> bytes: """Converts a python object into XML. Arguments: - root specifies whether the output is wrapped in an XML root element @@ -432,13 +554,15 @@ def dicttoxml( {'list': {'@attrs': {'a':'b','c':'d'}, '@val': [4, 5, 6]} which results in 456 """ - LOG.info(f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}"') + LOG.info( + f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}", type(ids") is : {type(ids).__name__}' + ) # avoid cpu consuming object serialization (problem for large objects) => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') output = [] - namespacestr = '' + namespacestr = "" for prefix in xml_namespaces: ns = xml_namespaces[prefix] namespacestr += f' xmlns:{prefix}="{ns}"' diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index b85f1d13..7ab03711 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -12,7 +12,7 @@ import json from json2xml import json2xml -from json2xml.dicttoxml import dicttoxml +from json2xml import dicttoxml from json2xml.utils import InvalidDataError, readfromjson, readfromstring, readfromurl, \ JSONReadError, StringReadError, URLReadError @@ -157,17 +157,17 @@ def test_dicttoxml_bug(self): def test_dict2xml_no_root(self): payload = {'mock': 'payload'} - result = dicttoxml(payload, attr_type=False, root=False) + result = dicttoxml.dicttoxml(payload, attr_type=False, root=False) assert b'payload' == result def test_dict2xml_with_root(self): payload = {'mock': 'payload'} - result = dicttoxml(payload, attr_type=False) + result = dicttoxml.dicttoxml(payload, attr_type=False) assert b'payload' == result def test_dict2xml_with_custom_root(self): payload = {'mock': 'payload'} - result = dicttoxml(payload, attr_type=False, custom_root="element") + result = dicttoxml.dicttoxml(payload, attr_type=False, custom_root="element") assert b'payload' == result def test_bad_data(self): @@ -211,7 +211,7 @@ def test_read_boolean_data_from_json2(self): def test_dict2xml_with_namespaces(self): data = {'ns1:node1': 'data in namespace 1', 'ns2:node2': 'data in namespace 2'} namespaces = {'ns1': 'http://www.google.de/ns1', 'ns2': 'http://www.google.de/ns2'} - result = dicttoxml(data, attr_type=False, xml_namespaces=namespaces) + result = dicttoxml.dicttoxml(data, attr_type=False, xml_namespaces=namespaces) print(result) assert b'' \ b'' \ @@ -221,7 +221,7 @@ def test_dict2xml_with_namespaces(self): def test_dict2xml_with_flat(self): data = {'flat_list@flat': [1, 2, 3], 'non_flat_list': [4, 5, 6]} - result = dicttoxml(data, attr_type=False) + result = dicttoxml.dicttoxml(data, attr_type=False) print(result) assert b'' \ b'123' \ @@ -231,9 +231,26 @@ def test_dict2xml_with_flat(self): def test_dict2xml_with_val_and_custom_attr(self): # in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1': 'myval1', 'myattr2': 'myval2'}, '@val': [4, 5, 6]}} - result = dicttoxml(data, attr_type=False) + result = dicttoxml.dicttoxml(data, attr_type=False) print(result) assert b'' \ b'123' \ b'456' \ b'' == result + + def test_make_id(self): + make_id_elem = dicttoxml.make_id("li") + assert 'li' in make_id_elem + + def test_get_unique_id(self): + unique_id_elem_1 = dicttoxml.get_unique_id("li") + unique_id_elem_2 = dicttoxml.get_unique_id("li") + unique_id_elem_3 = dicttoxml.get_unique_id("li") + unique_id_elem_4 = dicttoxml.get_unique_id("li") + assert len(list(set({unique_id_elem_1, unique_id_elem_2, unique_id_elem_3, unique_id_elem_4}))) == 4 + + def test_get_xml_type(self): + assert dicttoxml.get_xml_type("abc") == "str" + assert dicttoxml.get_xml_type(1) == "int" + assert dicttoxml.get_xml_type(True) == "bool" + assert dicttoxml.get_xml_type({}) == "dict"