diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index aef5bbe151cfeba..e13b2c9db490a14 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -496,7 +496,7 @@ subscript notation ``a[k]`` selects the item indexed by ``k`` from the mapping :keyword:`del` statements. The built-in function :func:`len` returns the number of items in a mapping. -There is currently a single intrinsic mapping type: +There are two intrinsic mapping types: Dictionaries @@ -535,6 +535,20 @@ module. an implementation detail at that time rather than a language guarantee. +Frozen dictionaries +^^^^^^^^^^^^^^^^^^^ + +.. index:: pair: object; frozendict + +These represent an immutable dictionary. They are created by the built-in +:func:`frozendict` constructor. A frozendict is :term:`hashable` if all of +its keys and values are hashable, in which case it can be used as an element +of a set, or as a key in another mapping. :class:`!frozendict` is not a +subclass of :class:`dict`; it inherits directly from :class:`object`. + +.. versionadded:: 3.15 + + Callable types -------------- diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 9681a8fe53ec480..37a86831ff94838 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -307,7 +307,7 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - array_hook=None, **kw) + array_hook=array_hook, **kw) def loads(s, *, cls=None, object_hook=None, parse_float=None, diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index d846c8af7ec4345..1d51fb2de0e69e4 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -87,6 +87,13 @@ def test_array_hook(self): self.assertEqual(self.loads('[]', array_hook=tuple), ()) + def test_load_array_hook(self): + # json.load must forward array_hook to loads + fp = StringIO('[10, 20, 30]') + result = self.json.load(fp, array_hook=tuple) + self.assertEqual(result, (10, 20, 30)) + self.assertEqual(type(result), tuple) + def test_decoder_optimizations(self): # Several optimizations were made that skip over calls to # the whitespace regex, so this test is designed to try and diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 3f2c5f7021018de..060a509c1bd1c7a 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -426,6 +426,16 @@ def test_unknown_encoding(self): with self.assertRaises(LookupError): parser.Parse(data, True) + @support.subTests('sample,exception', [ + (b' \xa1', UnicodeDecodeError), # crashed + (b' \xa1 \xa1', expat.ExpatError), + ]) + def test_multibyte_encoding_errors(self, sample, exception): + parser = expat.ParserCreate() + data = b'\n' + sample + with self.assertRaises(exception): + parser.Parse(data, True) class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 3a4d4098fbf567a..acec4ec2ca257c4 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1064,6 +1064,17 @@ def bxml(encoding, body=''): self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) + @support.subTests('sample,exception', [ + (b' \xa1', UnicodeDecodeError), # crashed + (b' \xa1 \xa1', None), # ET.ParseError + ]) + def test_multibyte_encoding_errors(self, sample, exception): + exception = exception or ET.ParseError + data = b'\n' + sample + with self.assertRaises(exception): + ET.XML(data) + def test_methods(self): # Test serialization methods. @@ -1287,7 +1298,15 @@ def check(p, expected, namespaces=None): {'': 'http://www.w3.org/2001/XMLSchema', 'ns': 'http://www.w3.org/2001/XMLSchema'}) - def test_processinginstruction(self): + def test_comment_serialization(self): + comm = ET.Comment(' & ham') + # comments are not escaped + self.assertEqual(ET.tostring(comm), b'') + self.assertEqual(ET.tostring(comm, method='html'), b'') + # no comments in text serialization + self.assertEqual(ET.tostring(comm, method='text'), b'') + + def test_processinginstruction_serialization(self): # Test ProcessingInstruction directly self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), @@ -1296,12 +1315,32 @@ def test_processinginstruction(self): b'') # Issue #2746 - + # processing instructions are not escaped self.assertEqual(ET.tostring(ET.PI('test', '')), b'?>') self.assertEqual(ET.tostring(ET.PI('test', '\xe3'), 'latin-1'), b"\n" b"\xe3?>") + pi = ET.PI('test', 'ham & eggs < spam') + self.assertEqual(ET.tostring(pi), b'') + self.assertEqual(ET.tostring(pi, method='html'), b'') + # no processing instructions in text serialization + self.assertEqual(ET.tostring(pi, method='text'), b'') + + def test_empty_attribute_serialization(self): + # empty attrs only work in html + elem = ET.Element('tag', attrib={'attr': None}) + self.assertRaises(TypeError, ET.tostring, elem) + self.assertEqual(ET.tostring(elem, method='html'), b'') + + @support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes")) + def test_html_cdata_elems_serialization(self, tag): + # content of raw text elements is not escaped in html + tag = tag.title() + elem = ET.Element(tag) + elem.text = '&ham' + self.assertEqual(ET.tostring(elem, method='html'), + ('<%s>&ham' % (tag, tag)).encode()) def test_html_empty_elems_serialization(self): # issue 15970 @@ -1317,6 +1356,14 @@ def test_html_empty_elems_serialization(self): method='html') self.assertEqual(serialized, expected) + def test_html_plaintext_serialization(self): + # content of plaintext is not escaped in html + # no end tag for plaintext + elem = ET.Element('PlainText') + elem.text = '&ham' + self.assertEqual(ET.tostring(elem, method='html'), + b'<spam>&ham') + def test_dump_attribute_order(self): # See BPO 34160 e = ET.Element('cirriculum', status='public', company='example') diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 75bebc0b1668abd..53727d7940b3f2a 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -917,17 +917,20 @@ def _serialize_xml(write, elem, qnames, namespaces, if elem.tail: write(_escape_cdata(elem.tail)) +_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed", + "noframes", "plaintext"} + HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr", "img", "input", "isindex", "link", "meta", "param", "source", - "track", "wbr"} + "track", "wbr", "plaintext"} def _serialize_html(write, elem, qnames, namespaces, **kwargs): tag = elem.tag text = elem.text if tag is Comment: - write("<!--%s-->" % _escape_cdata(text)) + write("<!--%s-->" % text) elif tag is ProcessingInstruction: - write("<?%s?>" % _escape_cdata(text)) + write("<?%s?>" % text) else: tag = qnames[tag] if tag is None: @@ -951,16 +954,19 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs): for k, v in items: if isinstance(k, QName): k = k.text - if isinstance(v, QName): - v = qnames[v.text] + k = qnames[k] + if v is None: + write(" %s" % k) # empty attr else: - v = _escape_attrib_html(v) - # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v) + write(" %s=\"%s\"" % (k, v)) write(">") ltag = tag.lower() if text: - if ltag == "script" or ltag == "style": + if ltag in _CDATA_CONTENT_ELEMENTS: write(text) else: write(_escape_cdata(text)) diff --git a/Misc/NEWS.d/next/Library/2026-04-29-08-10-17.gh-issue-149056.jnaD4W.rst b/Misc/NEWS.d/next/Library/2026-04-29-08-10-17.gh-issue-149056.jnaD4W.rst new file mode 100644 index 000000000000000..0026d02c8762570 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-29-08-10-17.gh-issue-149056.jnaD4W.rst @@ -0,0 +1,2 @@ +Fix :func:`json.load` not forwarding the *array_hook* argument to +:func:`json.loads`. Patch by Thomas Kowalski. diff --git a/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst new file mode 100644 index 000000000000000..1550c893fd7c45b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-07-14-18-47.gh-issue-149489.bX9iHe.rst @@ -0,0 +1,5 @@ +Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of +comments, processing instructions and elements "xmp", "iframe", "noembed", +"noframes", and "plaintext" is no longer escaped. The "plaintext" element no +longer have the closing tag. Add support of empty attributes (with value +``None``). diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index aef6ebad9ce578e..53d42ad50e37b96 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject *mapping) static int pyexpat_encoding_convert(void *data, const char *s) { + if (PyErr_Occurred()) { + return -1; + } pyexpat_encoding_info *info = (pyexpat_encoding_info *)data; int i = (unsigned char)s[0]; assert(info->map[i] < -1);