From 08384ba14d0ace763f7b8f91e41ac1984f34c9be Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Fri, 3 Apr 2026 09:25:19 +0200 Subject: [PATCH 1/4] [GR-74513] Raise ImportError for missing multibyte cjk codecs --- .../com.oracle.graal.python.test/src/tests/test_codecs.py | 7 +++++++ .../builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java | 6 +++--- .../builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java | 6 +++--- .../modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java | 6 +++--- .../builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java | 6 +++--- .../builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java | 6 +++--- .../builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java | 6 +++--- 7 files changed, 25 insertions(+), 18 deletions(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py index 1748a46564..cdb1d5c747 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py @@ -2,6 +2,7 @@ # Copyright (C) 1996-2017 Python Software Foundation # # Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +import importlib import sys @@ -892,6 +893,12 @@ def test_encode_dict_err_xmlcharrefreplace(self): class MultibyteCodecTest(unittest.TestCase): + def test_missing_multibyte_codecs_raise_import_error(self): + for module_name in ('_codecs_cn', '_codecs_hk', '_codecs_iso2022', '_codecs_jp', '_codecs_kr', '_codecs_tw'): + with self.subTest(module_name=module_name): + module = importlib.import_module(module_name) + self.assertRaises(ImportError, module.getcodec, '__missing_codec__') + # just a smoke test def test_encode(self): import _codecs_tw diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java index 742695466d..6d3a23b4e8 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_CN; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_CN; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -134,7 +134,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java index 54e5b17c7e..c487131edb 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_HK; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_HK; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -126,7 +126,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java index 34a5d0f42a..4d11870295 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_ISO2022; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_ISO2022; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -136,7 +136,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java index 1558fde04e..d92bc084c3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_JP; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_JP; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -158,7 +158,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java index 2f48bc253c..135e0c5fb2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_KR; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_KR; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -131,7 +131,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java index 47c88f6a6f..15003556f8 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_TW; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_TW; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; -import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -126,7 +126,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); From 5fc3c37e55382cfc03752217a40831aa13cf7d5a Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Fri, 3 Apr 2026 10:34:54 +0200 Subject: [PATCH 2/4] [GR-74513] Limit missing codec regression to GraalPy semantics --- .../com.oracle.graal.python.test/src/tests/test_codecs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py index cdb1d5c747..bc249959f7 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py @@ -894,10 +894,11 @@ def test_encode_dict_err_xmlcharrefreplace(self): class MultibyteCodecTest(unittest.TestCase): def test_missing_multibyte_codecs_raise_import_error(self): + expected_exc = ImportError if sys.implementation.name == 'graalpy' else LookupError for module_name in ('_codecs_cn', '_codecs_hk', '_codecs_iso2022', '_codecs_jp', '_codecs_kr', '_codecs_tw'): with self.subTest(module_name=module_name): module = importlib.import_module(module_name) - self.assertRaises(ImportError, module.getcodec, '__missing_codec__') + self.assertRaises(expected_exc, module.getcodec, '__missing_codec__') # just a smoke test def test_encode(self): From 65482f451f994bdd914bcc164a3831d6ab850105 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Fri, 3 Apr 2026 10:49:02 +0200 Subject: [PATCH 3/4] [GR-74513] Raise ImportError when unsupported cjk codec modules import --- .../src/tests/test_codecs.py | 26 ++++++++++++++++--- .../cjkcodecs/CodecsCNModuleBuiltins.java | 4 +-- .../cjkcodecs/CodecsHKModuleBuiltins.java | 4 +-- .../CodecsISO2022ModuleBuiltins.java | 4 +-- .../cjkcodecs/CodecsJPModuleBuiltins.java | 4 +-- .../cjkcodecs/CodecsKRModuleBuiltins.java | 4 +-- .../cjkcodecs/CodecsTWModuleBuiltins.java | 4 +-- .../lib-python/3/encodings/euc_jis_2004.py | 5 +++- .../lib-python/3/encodings/euc_jisx0213.py | 5 +++- .../lib-python/3/encodings/iso2022_jp_1.py | 5 +++- .../lib-python/3/encodings/iso2022_jp_2004.py | 5 +++- .../lib-python/3/encodings/iso2022_jp_3.py | 5 +++- .../lib-python/3/encodings/iso2022_jp_ext.py | 5 +++- .../lib-python/3/encodings/shift_jis_2004.py | 5 +++- 14 files changed, 63 insertions(+), 22 deletions(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py index bc249959f7..b00f2e2809 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py @@ -4,6 +4,7 @@ # Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 import importlib import sys +from pathlib import Path def coding_checker(self, coder): @@ -893,12 +894,31 @@ def test_encode_dict_err_xmlcharrefreplace(self): class MultibyteCodecTest(unittest.TestCase): - def test_missing_multibyte_codecs_raise_import_error(self): - expected_exc = ImportError if sys.implementation.name == 'graalpy' else LookupError + def test_missing_multibyte_codecs_raise_lookup_error(self): for module_name in ('_codecs_cn', '_codecs_hk', '_codecs_iso2022', '_codecs_jp', '_codecs_kr', '_codecs_tw'): with self.subTest(module_name=module_name): module = importlib.import_module(module_name) - self.assertRaises(expected_exc, module.getcodec, '__missing_codec__') + self.assertRaises(LookupError, module.getcodec, '__missing_codec__') + + def test_unsupported_multibyte_codec_modules_raise_import_error_on_graalpy(self): + encodings_dir = Path(__file__).resolve().parents[3] / 'lib-python' / '3' / 'encodings' + for module_name in ( + 'encodings.euc_jis_2004', + 'encodings.euc_jisx0213', + 'encodings.iso2022_jp_1', + 'encodings.iso2022_jp_2004', + 'encodings.iso2022_jp_3', + 'encodings.iso2022_jp_ext', + 'encodings.shift_jis_2004', + ): + with self.subTest(module_name=module_name): + module_path = encodings_dir / f'{module_name.rsplit(".", 1)[1]}.py' + spec = importlib.util.spec_from_file_location(f'test_{module_name.replace(".", "_")}', module_path) + module = importlib.util.module_from_spec(spec) + if sys.implementation.name == 'graalpy': + self.assertRaises(ImportError, spec.loader.exec_module, module) + else: + spec.loader.exec_module(module) # just a smoke test def test_encode(self): diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java index 6d3a23b4e8..624b65398c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsCNModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_CN; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_CN; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -134,7 +134,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java index c487131edb..57a8fe9d46 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsHKModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_HK; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_HK; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -126,7 +126,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java index 4d11870295..6c0279168a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsISO2022ModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_ISO2022; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_ISO2022; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -136,7 +136,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java index d92bc084c3..3f25996660 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsJPModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_JP; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_JP; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -158,7 +158,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java index 135e0c5fb2..f59c6cb254 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsKRModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_KR; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_KR; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -131,7 +131,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java index 15003556f8..86d6dc61e0 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cjkcodecs/CodecsTWModuleBuiltins.java @@ -44,11 +44,11 @@ import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.PyMultibyteCodec_CAPSULE_NAME; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.registerCodec; import static com.oracle.graal.python.builtins.modules.cjkcodecs.MultibytecodecModuleBuiltins.CreateCodecNode.createCodec; -import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ImportError; import static com.oracle.graal.python.nodes.BuiltinNames.J__CODECS_TW; import static com.oracle.graal.python.nodes.BuiltinNames.T__CODECS_TW; import static com.oracle.graal.python.nodes.ErrorMessages.ENCODING_NAME_MUST_BE_A_STRING; import static com.oracle.graal.python.nodes.ErrorMessages.NO_SUCH_CODEC_IS_SUPPORTED; +import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import java.util.List; @@ -126,7 +126,7 @@ static Object getcodec(Object encoding, MultibyteCodec codec = findCodec(CODEC_LIST, asUTF8Node.execute(inliningTarget, encoding), isEqual); if (codec == null) { - throw raiseNode.raise(inliningTarget, ImportError, NO_SUCH_CODEC_IS_SUPPORTED); + throw raiseNode.raise(inliningTarget, LookupError, NO_SUCH_CODEC_IS_SUPPORTED); } PyCapsule codecobj = PFactory.createCapsuleJavaName(language, codec, PyMultibyteCodec_CAPSULE_NAME); diff --git a/graalpython/lib-python/3/encodings/euc_jis_2004.py b/graalpython/lib-python/3/encodings/euc_jis_2004.py index 72b87aea68..25a7977031 100644 --- a/graalpython/lib-python/3/encodings/euc_jis_2004.py +++ b/graalpython/lib-python/3/encodings/euc_jis_2004.py @@ -7,7 +7,10 @@ import _codecs_jp, codecs import _multibytecodec as mbc -codec = _codecs_jp.getcodec('euc_jis_2004') +try: + codec = _codecs_jp.getcodec('euc_jis_2004') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/euc_jisx0213.py b/graalpython/lib-python/3/encodings/euc_jisx0213.py index cc47d04112..b0f0e55766 100644 --- a/graalpython/lib-python/3/encodings/euc_jisx0213.py +++ b/graalpython/lib-python/3/encodings/euc_jisx0213.py @@ -7,7 +7,10 @@ import _codecs_jp, codecs import _multibytecodec as mbc -codec = _codecs_jp.getcodec('euc_jisx0213') +try: + codec = _codecs_jp.getcodec('euc_jisx0213') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/iso2022_jp_1.py b/graalpython/lib-python/3/encodings/iso2022_jp_1.py index 997044dc37..fc98b94e9b 100644 --- a/graalpython/lib-python/3/encodings/iso2022_jp_1.py +++ b/graalpython/lib-python/3/encodings/iso2022_jp_1.py @@ -7,7 +7,10 @@ import _codecs_iso2022, codecs import _multibytecodec as mbc -codec = _codecs_iso2022.getcodec('iso2022_jp_1') +try: + codec = _codecs_iso2022.getcodec('iso2022_jp_1') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/iso2022_jp_2004.py b/graalpython/lib-python/3/encodings/iso2022_jp_2004.py index 40198bf098..b89a085e91 100644 --- a/graalpython/lib-python/3/encodings/iso2022_jp_2004.py +++ b/graalpython/lib-python/3/encodings/iso2022_jp_2004.py @@ -7,7 +7,10 @@ import _codecs_iso2022, codecs import _multibytecodec as mbc -codec = _codecs_iso2022.getcodec('iso2022_jp_2004') +try: + codec = _codecs_iso2022.getcodec('iso2022_jp_2004') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/iso2022_jp_3.py b/graalpython/lib-python/3/encodings/iso2022_jp_3.py index 346e08becc..dfec693c79 100644 --- a/graalpython/lib-python/3/encodings/iso2022_jp_3.py +++ b/graalpython/lib-python/3/encodings/iso2022_jp_3.py @@ -7,7 +7,10 @@ import _codecs_iso2022, codecs import _multibytecodec as mbc -codec = _codecs_iso2022.getcodec('iso2022_jp_3') +try: + codec = _codecs_iso2022.getcodec('iso2022_jp_3') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/iso2022_jp_ext.py b/graalpython/lib-python/3/encodings/iso2022_jp_ext.py index 752bab9813..f9205a79ad 100644 --- a/graalpython/lib-python/3/encodings/iso2022_jp_ext.py +++ b/graalpython/lib-python/3/encodings/iso2022_jp_ext.py @@ -7,7 +7,10 @@ import _codecs_iso2022, codecs import _multibytecodec as mbc -codec = _codecs_iso2022.getcodec('iso2022_jp_ext') +try: + codec = _codecs_iso2022.getcodec('iso2022_jp_ext') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode diff --git a/graalpython/lib-python/3/encodings/shift_jis_2004.py b/graalpython/lib-python/3/encodings/shift_jis_2004.py index 161b1e86f9..a0753658c8 100644 --- a/graalpython/lib-python/3/encodings/shift_jis_2004.py +++ b/graalpython/lib-python/3/encodings/shift_jis_2004.py @@ -7,7 +7,10 @@ import _codecs_jp, codecs import _multibytecodec as mbc -codec = _codecs_jp.getcodec('shift_jis_2004') +try: + codec = _codecs_jp.getcodec('shift_jis_2004') +except LookupError as e: + raise ImportError(str(e)) from e class Codec(codecs.Codec): encode = codec.encode From a22647e9d9e98b1c5260f2736bf198cc633e6005 Mon Sep 17 00:00:00 2001 From: Tim Felgentreff Date: Tue, 7 Apr 2026 15:39:57 +0200 Subject: [PATCH 4/4] Align multibyte codec aliases with actual JDK support Map shift_jis_2004 to the JDK charset name that actually exists, x-SJIS_0213, and stop advertising EUC/ISO-2022 Japanese codec aliases that still have no backing Java charset. This keeps encodings.aliases and CharsetMapping consistent with real runtime support, restores shift_jis_2004 for callers like charset-normalizer and requests, and preserves ImportError behavior for the codecs that remain unsupported. --- .../com.oracle.graal.python.test/src/tests/test_codecs.py | 5 ++++- .../src/com/oracle/graal/python/util/CharsetMapping.java | 6 +----- graalpython/lib-python/3/encodings/aliases.py | 7 +++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py index b00f2e2809..ed8071169d 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py @@ -3,6 +3,7 @@ # # Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 import importlib +import importlib.util import sys from pathlib import Path @@ -909,7 +910,6 @@ def test_unsupported_multibyte_codec_modules_raise_import_error_on_graalpy(self) 'encodings.iso2022_jp_2004', 'encodings.iso2022_jp_3', 'encodings.iso2022_jp_ext', - 'encodings.shift_jis_2004', ): with self.subTest(module_name=module_name): module_path = encodings_dir / f'{module_name.rsplit(".", 1)[1]}.py' @@ -920,6 +920,9 @@ def test_unsupported_multibyte_codec_modules_raise_import_error_on_graalpy(self) else: spec.loader.exec_module(module) + def test_shift_jis_2004_codec_module_imports(self): + import encodings.shift_jis_2004 + # just a smoke test def test_encode(self): import _codecs_tw diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java index 44271cac80..7f8b98ee6b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java @@ -306,7 +306,7 @@ private static void addAlias(String alias, String pythonName) { addMapping("mac_turkish", "x-MacTurkish"); addMapping("palmos", null); addMapping("ptcp154", null); - addMapping("shift_jis_2004", "Shift_JISX0213"); + addMapping("shift_jis_2004", "x-SJIS_0213"); addMapping("shift_jis", "Shift_JIS"); addMapping("shift_jisx0213", "x-SJIS_0213"); addMapping("utf_16_be", "UTF-16BE"); @@ -438,10 +438,6 @@ private static void addAlias(String alias, String pythonName) { addAlias("uhc", "cp949"); addAlias("950", "cp950"); addAlias("ms950", "cp950"); - addAlias("jisx0213", "euc_jis_2004"); - addAlias("eucjis2004", "euc_jis_2004"); - addAlias("euc_jis2004", "euc_jis_2004"); - addAlias("eucjisx0213", "euc_jisx0213"); addAlias("eucjp", "euc_jp"); addAlias("ujis", "euc_jp"); addAlias("u_jis", "euc_jp"); diff --git a/graalpython/lib-python/3/encodings/aliases.py b/graalpython/lib-python/3/encodings/aliases.py index 5e2113e4ad..b1fcadb76c 100644 --- a/graalpython/lib-python/3/encodings/aliases.py +++ b/graalpython/lib-python/3/encodings/aliases.py @@ -491,10 +491,9 @@ 's_jis' : 'shift_jis', # shift_jis_2004 codec - # GraalPy change: Java doesn't have this codec - # 'shiftjis2004' : 'shift_jis_2004', - # 'sjis_2004' : 'shift_jis_2004', - # 's_jis_2004' : 'shift_jis_2004', + 'shiftjis2004' : 'shift_jis_2004', + 'sjis_2004' : 'shift_jis_2004', + 's_jis_2004' : 'shift_jis_2004', # shift_jisx0213 codec 'shiftjisx0213' : 'shift_jisx0213',