diff --git a/Include/codecs.h b/Include/codecs.h
index 512a3c723eca18..d14f527dee75da 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -170,6 +170,12 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
PyAPI_DATA(const char *) Py_hexdigits;
#endif
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
+ const char *encoding,
+ const char *alternate_command);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h
index 52dca1362592d6..bfa10eadf73573 100644
--- a/Include/internal/pycore_codecs.h
+++ b/Include/internal/pycore_codecs.h
@@ -45,7 +45,7 @@ extern int _PyCodec_UnregisterError(const char *name);
in Python 3.5+?
*/
-extern PyObject* _PyCodec_LookupTextEncoding(
+PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
const char *encoding,
const char *alternate_command);
diff --git a/Lib/codecs.py b/Lib/codecs.py
index e4a8010aba90a5..e99460a670a516 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -93,7 +93,8 @@ class CodecInfo(tuple):
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
incrementalencoder=None, incrementaldecoder=None, name=None,
- *, _is_text_encoding=None):
+ *, _is_text_encoding=None,
+ _is_single_byte=None):
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
self.name = name
self.encode = encode
@@ -104,6 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
self.streamreader = streamreader
if _is_text_encoding is not None:
self._is_text_encoding = _is_text_encoding
+ if _is_single_byte is not None:
+ self._is_single_byte = _is_single_byte
return self
def __repr__(self):
diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py
index 7adeb0e1605274..8bed14b35c5899 100644
--- a/Lib/encodings/big5.py
+++ b/Lib/encodings/big5.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py
index 350df37baaedaf..eeeb7865895190 100644
--- a/Lib/encodings/big5hkscs.py
+++ b/Lib/encodings/big5hkscs.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py
index e01f59b7190576..3671a4387f96b6 100644
--- a/Lib/encodings/cp932.py
+++ b/Lib/encodings/cp932.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py
index 627c87125e2aff..df998ba3bad75c 100644
--- a/Lib/encodings/cp949.py
+++ b/Lib/encodings/cp949.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py
index 39eec5ed0ddef9..12c7bbd8d226ad 100644
--- a/Lib/encodings/cp950.py
+++ b/Lib/encodings/cp950.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py
index 72b87aea68862f..68604db3c30b2d 100644
--- a/Lib/encodings/euc_jis_2004.py
+++ b/Lib/encodings/euc_jis_2004.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py
index cc47d04112a187..cd2808965a6edd 100644
--- a/Lib/encodings/euc_jisx0213.py
+++ b/Lib/encodings/euc_jisx0213.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py
index 7bcbe4147f2ad4..bcdd0582d71902 100644
--- a/Lib/encodings/euc_jp.py
+++ b/Lib/encodings/euc_jp.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py
index c1fb1260e879f0..8a81356d8f9980 100644
--- a/Lib/encodings/euc_kr.py
+++ b/Lib/encodings/euc_kr.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py
index 34fb6c366a7614..98df7d4cbeec3d 100644
--- a/Lib/encodings/gb18030.py
+++ b/Lib/encodings/gb18030.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py
index 3c3b837d618ecd..ba915a2500f21a 100644
--- a/Lib/encodings/gb2312.py
+++ b/Lib/encodings/gb2312.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py
index 1b45db89859cdf..d597c7bb77e93e 100644
--- a/Lib/encodings/gbk.py
+++ b/Lib/encodings/gbk.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py
index 383442a3c9ac9a..43ee36a9286426 100644
--- a/Lib/encodings/hz.py
+++ b/Lib/encodings/hz.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index d31ee07ab45b76..98bf9462e36fbf 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -385,4 +385,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py
index ab0406069356e4..27129ce67aa884 100644
--- a/Lib/encodings/iso2022_jp.py
+++ b/Lib/encodings/iso2022_jp.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py
index 997044dc378749..0f41dd95cd4332 100644
--- a/Lib/encodings/iso2022_jp_1.py
+++ b/Lib/encodings/iso2022_jp_1.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py
index 9106bf762512fd..25f625819f5ea0 100644
--- a/Lib/encodings/iso2022_jp_2.py
+++ b/Lib/encodings/iso2022_jp_2.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py
index 40198bf098570b..1f0bd1b7874472 100644
--- a/Lib/encodings/iso2022_jp_2004.py
+++ b/Lib/encodings/iso2022_jp_2004.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py
index 346e08beccbbaf..2acdb3a2cd9be3 100644
--- a/Lib/encodings/iso2022_jp_3.py
+++ b/Lib/encodings/iso2022_jp_3.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py
index 752bab9813a094..a32a533e8bdf00 100644
--- a/Lib/encodings/iso2022_jp_ext.py
+++ b/Lib/encodings/iso2022_jp_ext.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py
index bf7018763eae38..51dd4ab560422a 100644
--- a/Lib/encodings/iso2022_kr.py
+++ b/Lib/encodings/iso2022_kr.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py
index 512aeeb732b522..e58c50a06c4b96 100644
--- a/Lib/encodings/johab.py
+++ b/Lib/encodings/johab.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 268fccbd53974e..335acb87cb9b28 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -250,4 +250,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py
index 46c8e070dd192e..5c5b41437a84b4 100644
--- a/Lib/encodings/raw_unicode_escape.py
+++ b/Lib/encodings/raw_unicode_escape.py
@@ -43,4 +43,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py
index 83381172764dea..bf7fded09468c8 100644
--- a/Lib/encodings/shift_jis.py
+++ b/Lib/encodings/shift_jis.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py
index 161b1e86f9918a..ae40b684a010f2 100644
--- a/Lib/encodings/shift_jis_2004.py
+++ b/Lib/encodings/shift_jis_2004.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py
index cb653f53055e67..5af8565618b40e 100644
--- a/Lib/encodings/shift_jisx0213.py
+++ b/Lib/encodings/shift_jisx0213.py
@@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py
index 9b1ce99b339ae0..d896cefc9596be 100644
--- a/Lib/encodings/unicode_escape.py
+++ b/Lib/encodings/unicode_escape.py
@@ -43,4 +43,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
index d3b9980026666f..eac93bd17d07d1 100644
--- a/Lib/encodings/utf_16.py
+++ b/Lib/encodings/utf_16.py
@@ -152,4 +152,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_16_be.py b/Lib/encodings/utf_16_be.py
index 86b458eb9bcd96..d056cf9202a40f 100644
--- a/Lib/encodings/utf_16_be.py
+++ b/Lib/encodings/utf_16_be.py
@@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_16_le.py b/Lib/encodings/utf_16_le.py
index ec454142eedf25..2e07f76cc3f742 100644
--- a/Lib/encodings/utf_16_le.py
+++ b/Lib/encodings/utf_16_le.py
@@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
index 1924bedbb74c68..aebe145ec95e71 100644
--- a/Lib/encodings/utf_32.py
+++ b/Lib/encodings/utf_32.py
@@ -147,4 +147,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py
index fe272b5fafec69..ee1b41a11aa35f 100644
--- a/Lib/encodings/utf_32_be.py
+++ b/Lib/encodings/utf_32_be.py
@@ -34,4 +34,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py
index 9e48210928ee65..4ac786bb73349b 100644
--- a/Lib/encodings/utf_32_le.py
+++ b/Lib/encodings/utf_32_le.py
@@ -34,4 +34,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py
index 8e0567f2087d65..3127867fb5bff9 100644
--- a/Lib/encodings/utf_7.py
+++ b/Lib/encodings/utf_7.py
@@ -35,4 +35,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_8.py b/Lib/encodings/utf_8.py
index 1bf6336571547b..3801615ce34001 100644
--- a/Lib/encodings/utf_8.py
+++ b/Lib/encodings/utf_8.py
@@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index 1bb479203f365d..b5e5c89f80b9eb 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -127,4 +127,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 79c8a7ef886482..03dd61a76db154 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1892,6 +1892,7 @@ def test_copy(self):
self.assertIsNot(dup, orig)
self.assertEqual(dup, orig)
self.assertTrue(orig._is_text_encoding)
+ self.assertFalse(orig._is_single_byte)
self.assertEqual(dup.encode, orig.encode)
self.assertEqual(dup.name, orig.name)
self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1912,6 +1913,7 @@ def test_deepcopy(self):
self.assertIsNot(dup, orig)
self.assertEqual(dup, orig)
self.assertTrue(orig._is_text_encoding)
+ self.assertFalse(orig._is_single_byte)
self.assertEqual(dup.encode, orig.encode)
self.assertEqual(dup.name, orig.name)
self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1940,6 +1942,7 @@ def test_pickle(self):
unpickled_codec_info.incrementalencoder
)
self.assertTrue(unpickled_codec_info._is_text_encoding)
+ self.assertFalse(unpickled_codec_info._is_single_byte)
# Test a CodecInfo with _is_text_encoding equal to false.
codec_info = codecs.lookup('base64')
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index aaa91aca36e3c4..0763bb19865167 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -227,7 +227,7 @@ def _verify_parse_output(self, operations):
"Character data: '\xb5'",
"End element: 'root'",
]
- for operation, expected_operation in zip(operations, expected_operations):
+ for operation, expected_operation in zip(operations, expected_operations, strict=True):
self.assertEqual(operation, expected_operation)
def test_parse_bytes(self):
@@ -276,6 +276,51 @@ def test_parse_again(self):
self.assertEqual(expat.ErrorString(cm.exception.code),
expat.errors.XML_ERROR_FINISHED)
+ @support.subTests('enc', ['UTF-8', 'utf-8', 'utf-16', 'koi8-u',
+ 'cp1125', 'cp1251', 'iso8859-5',
+ 'mac_cyrillic'])
+ def test_supportes_ecodings(self, enc):
+ out = self.Outputter()
+ parser = expat.ParserCreate()
+ self._hookup_callbacks(parser, out)
+ data = (f'\n'
+ '<корінь атрибут="значення">змісткорінь>').encode(enc)
+ parser.Parse(data, True)
+ self.assertEqual(out.out, [
+ ('XML declaration', ('1.0', enc, -1)),
+ "Start element: 'корінь' {'атрибут': 'значення'}",
+ "Character data: 'зміст'",
+ "End element: 'корінь'",
+ ])
+
+ @support.subTests('enc', [
+ 'UTF8', 'UTF-7',
+ "unicode-escape", "raw-unicode-escape",
+ "Big5-HKSCS", "Big5",
+ "cp932", "cp949", "cp950",
+ "EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR",
+ "GB18030", "GB2312", "GBK",
+ "HZ-GB-2312",
+ "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2004",
+ "ISO-2022-JP-2", "ISO-2022-JP-3", "ISO-2022-JP-EXT",
+ "ISO-2022-KR",
+ "johab",
+ "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213",
+ ])
+ def test_unsupportes_ecodings(self, enc):
+ parser = expat.ParserCreate()
+ data = (f'\n'
+ '').encode(enc)
+ with self.assertRaises(ValueError):
+ parser.Parse(data, True)
+
+ def test_unknown_ecoding(self):
+ parser = expat.ParserCreate()
+ data = b'\n'
+ with self.assertRaises(LookupError):
+ parser.Parse(data, True)
+
+
class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
# Tests that make sure we get errors when the namespace_separator value
diff --git a/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
new file mode 100644
index 00000000000000..5dd95047178938
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
@@ -0,0 +1,4 @@
+The :mod:`XML parser ` now raises :exc:`ValueError` for known
+unsupported multi-byte encodings such us "UTF8", "ISO-2022-JP" or
+"raw-unicode-escape" instead of failing later, when encounter non-ASCII
+data.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 0f0afe17513ef1..e95dcb611a33e2 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -4,6 +4,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
+#include "pycore_codecs.h" // _PyCodec_LookupTextEncoding()
#include "pycore_import.h" // _PyImport_SetModule()
#include "pycore_pyhash.h" // _Py_HashSecret
#include "pycore_traceback.h" // _PyTraceback_Add()
@@ -1465,6 +1466,31 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
if (PyErr_Occurred())
return XML_STATUS_ERROR;
+ PyObject *codec = _PyCodec_LookupTextEncoding(name, NULL);
+ if (codec == NULL) {
+ return XML_STATUS_ERROR;
+ }
+ if (!PyTuple_CheckExact(codec)) {
+ PyObject *attr;
+ if (PyObject_GetOptionalAttrString(codec, "_is_single_byte", &attr) < 0) {
+ Py_DECREF(codec);
+ return XML_STATUS_ERROR;
+ }
+ if (attr != NULL) {
+ int is_single_byte = PyObject_IsTrue(attr);
+ Py_DECREF(attr);
+ if (is_single_byte <= 0) {
+ Py_DECREF(codec);
+ if (is_single_byte == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "multi-byte encodings are not supported");
+ }
+ return XML_STATUS_ERROR;
+ }
+ }
+ }
+ Py_DECREF(codec);
+
u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace");
if (u == NULL) {
Py_XDECREF(u);
diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py
index 45866bf2f61062..eb04f67f2077eb 100644
--- a/Tools/unicode/gencjkcodecs.py
+++ b/Tools/unicode/gencjkcodecs.py
@@ -51,6 +51,7 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_single_byte=False,
)
""")