GeospatialPython · JamesParrott · Jun 24, 2026 · Jun 23, 2026
diff --git a/README.md b/README.md
@@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py
 
 - **Author**: [Joel Lawhead](https://github.com/GeospatialPython)
 - **Maintainers**: [James Parrott](https://github.com/JamesParrott) & [Karim Bahgat](https://github.com/karimbahgat)
-- **Version**: 3.0.13
-- **Date**: 19th June 2026
+- **Version**: 3.1.1
+- **Date**: 24th June 2026
 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT)
 
 ## Contents
@@ -93,6 +93,11 @@ part of your geospatial project.
 
 # Version Changes
 
+## 3.1.1
+### Unicode support made even more robust and yet another encoding bug fixed!
+ - When reading, only use minimum number of pad bytes to decode text successfully (fixes issue 423).
+ - When writing, warn (or raise in strict mode) if the text's encoding ends in pad bytes.
+
 ## 3.1.0
 ### Unicode support made more robust and encoding bugs fixed
  - Truncation of field names and text fields now respects unicode code point boundaries (fixes issues -

diff --git a/changelog.txt b/changelog.txt
@@ -1,3 +1,8 @@
+VERSION 3.1.1
+	Unicode support made even more robust and yet another encoding bug fixed!
+	* When reading, only use minimum number of pad bytes to decode text successfully (fixes issue 423).
+	* When writing, warn (or raise in strict mode) if the text's encoding ends in pad bytes.
+
 VERSION 3.1.0
 
 2026-06-23

diff --git a/src/shapefile.py b/src/shapefile.py
@@ -8,7 +8,7 @@
 
 from __future__ import annotations
 
-__version__ = "3.1.0"
+__version__ = "3.1.1"
 
 import abc
 import array
@@ -251,6 +251,38 @@ def __call__(
     ) -> str: ...
 
 
+def _warn_if_string_ends_with_decoded_pad_bytes(
+    s: str,
+    pad_byte: bytes,
+    encoding: str = "utf-8",
+    encodingErrors: str = "strict",
+) -> None:
+    """Warns if e.g. the encoding is utf-16-le, and the
+    decoded text ends in "†", which encodes to a pair of
+    ascii spaces (b"  ", the pad byte for C and M fields).
+    """
+    # Max code unit size under UTF-8, UTF-16, and UTF-32 is 4 bytes.
+    for n in range(1, 5):
+        # TODO: test for encodings ending in a null terminator preceded
+        #       by pad bytes, that are exactly the field's size (length).
+        pad_bytes = pad_byte * n
+        try:
+            decoded_pad_bytes: str = pad_bytes.decode(encoding, encodingErrors)
+        except UnicodeDecodeError:
+            continue
+        if s.endswith(decoded_pad_bytes):
+            msg = (
+                f"Under the given encoding: {encoding}, "
+                f" the text (field name or 'C' or 'M' field): {s!r} "
+                f" ends with {decoded_pad_bytes!r}, which coincidentally"
+                f"encodes to the pad bytes: {pad_bytes!r}. "
+                "The real end of the actual data may be earlier. "
+            )
+
+            warnings.warn(msg, category=PossibleDataLoss)
+            break
+
+
 def _encode_dbf_string(
     s: str,
     size: int,
@@ -273,6 +305,8 @@ def _encode_dbf_string(
     N = len(s)
     trimmed: str
     encoded: bytes
+
+    # i - num of characters to keep.  Starts by trying to keep all N.
     for i in reversed(range(0, N + 1)):
         trimmed = s[:i]
         encoded = trimmed.encode(encoding, encodingErrors)
@@ -300,16 +334,27 @@ def _encode_dbf_string(
             f"to a short enough byte string, using {encoding=}, {encodingErrors=}"
         )
 
+    if pad_byte is not None:
+        _warn_if_string_ends_with_decoded_pad_bytes(
+            s=trimmed,
+            pad_byte=pad_byte,
+            encoding=encoding,
+            encodingErrors=encodingErrors,
+        )
+
     if len(encoded) < size and pad_byte is not None:
         padded = encoded.ljust(size, pad_byte)
     else:
         padded = encoded
 
-    decoded = decode(
-        b=padded,
-        encoding=encoding,
-        encodingErrors=encodingErrors,
-    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        decoded = decode(
+            b=padded,
+            encoding=encoding,
+            encodingErrors=encodingErrors,
+        )
+
     if decoded != trimmed:
         msg = f"Padded value: {padded!r} does not decode to {trimmed!r} using PyShp's decoder: {decode.__name__}"
         if len(trimmed) < len(s):
@@ -324,20 +369,67 @@ def _encode_dbf_string(
     return padded, trimmed
 
 
+def _try_to_decode_dbf_name_or_text_field(
+    b: bytes,
+    pad_bytes: bytes,  # Pad bytes will be trimmed (from the R of b) in their order in the byte-string
+    encoding: str = "utf8",
+    encodingErrors: str = "strict",
+) -> str:
+    N = len(b)
+    decoded: str
+    trimmed = b
+    num_trailing_pad_bytes = N - len(b.rstrip(pad_bytes))
+
+    # Test if we need to restore any pad_bytes to
+    # correctly decode the remaining bytes to a string.
+    # num_to_trim starts from num_trailing_pad_bytes
+    # - initially trimming all trailing pad bytes
+    for num_to_trim in reversed(range(num_trailing_pad_bytes + 1)):
+        i = N - num_to_trim
+        trimmed = b[:i]
+        try:
+            decoded = trimmed.decode(encoding, encodingErrors)
+        except UnicodeDecodeError:
+            continue
+        if num_to_trim < num_trailing_pad_bytes:
+            warnings.warn(
+                f"Used {num_trailing_pad_bytes - num_to_trim} pad bytes ({pad_bytes!r}) "
+                f"from padding to decode raw field: {b!r} "
+                f"to: {decoded!r} ({encoding=}, {encodingErrors=}) ",
+                category=PossibleDataLoss,
+            )
+        return decoded
+
+    raise dbfFileException(
+        f"Could not decode field name or text/memo field: {b!r} using {encoding=} and {encodingErrors=}"
+        " no matter how many trailing pad bytes (if any) ({pad_bytes!r}) were used. "
+    )
+
+
 def _decode_C_or_M_field(
     b: bytes,
     encoding: str = "utf8",
     encodingErrors: str = "strict",
     strict: bool = True,
 ) -> str:
-    retval = b.decode(encoding, encodingErrors).rstrip("\x00").rstrip(" ")
-    if retval.rstrip("\x00") != retval and strict:
+    retval = _try_to_decode_dbf_name_or_text_field(
+        b=b,
+        pad_bytes=b" \x00",
+        encoding=encoding,
+        encodingErrors=encodingErrors,
+    )
+
+    if not strict:
+        return retval
+
+    if retval.rstrip("\x00") != retval:
         msg = (
-            f"More Trailing Null chars in: {b!r}"
-            " after removing trailing null chars and ascii spaces"
-            f", resulting in {retval!r}"
+            f"More trailing null chars in: {retval!r}"
+            " after removing one trailing null char and ascii spaces"
+            f" from {b!r}, and decoding (codec: {encoding}, errors: {encodingErrors}). "
         )
         warnings.warn(msg, category=PossibleDataLoss)
+
     return retval
 
 
@@ -360,34 +452,15 @@ def decode_name(
         encodingErrors: str = "strict",
         strict: bool = True,
     ) -> str:
-        N = len(b)
-        decoded: str
-        num_trailing_null_bytes = N - len(b.rstrip(b"\x00"))
-
-        # Test if we need to restore any of those null bytes to
-        # correctly decode the remaining bytes to a string.
-        for num_to_trim in reversed(range(num_trailing_null_bytes + 1)):
-            i = N - num_to_trim
-            trimmed = b[:i]
-            try:
-                decoded = trimmed.decode(encoding, encodingErrors)
-            except UnicodeDecodeError:
-                continue
-            if strict and num_to_trim < num_trailing_null_bytes:
-                warnings.warn(
-                    f"Used {num_trailing_null_bytes - num_to_trim} null bytes "
-                    f"from padding to decode {b!r} "
-                    f"to: {decoded!r} ({encoding=}, {encodingErrors=}) ",
-                    category=PossibleDataLoss,
-                )
-            if not strict:
-                decoded = decoded.lstrip()
-            return decoded
-
-        raise dbfFileException(
-            f"Could not decode field name: {b!r} using {encoding=} and {encodingErrors=}"
-            " no matter how many trailing null-bytes (if any) were used. "
+        decoded = _try_to_decode_dbf_name_or_text_field(
+            b=b,
+            pad_bytes=b"\x00",
+            encoding=encoding,
+            encodingErrors=encodingErrors,
         )
+        if not strict:
+            decoded = decoded.lstrip()
+        return decoded
 
     @classmethod
     def from_byte_stream(
@@ -445,6 +518,14 @@ def from_unchecked(
             size = 1
             decimal = 0
 
+        if not strict and " " in name:
+            warnings.warn(
+                f"Replacing ascii spaces (0x20, ' 's) with underscores ('_'s) in {name!r}. "
+                "Use a Writer(file, strict=True) to preserve the field name as it is. ",
+                category=PossibleDataLoss,
+            )
+            name = name.replace(" ", "_")
+
         # Only use the portion of the name that we are able to encode to
         # 10 bytes or less.
         _encoded_name, trimmed_name = cls.trim_name_until_encodable(
@@ -502,13 +583,6 @@ def encode_field_descriptor(
             encodingErrors=encodingErrors,
             strict=strict,
         )
-        if not strict and b" " in encoded_name:
-            warnings.warn(
-                "Replacing ascii spaces (0x20) with underscores "
-                f"in encoded bytes: {encoded_name!r}",
-                category=PossibleDataLoss,
-            )
-            encoded_name = encoded_name.replace(b" ", b"_")
 
         encoded_field_type = self.field_type.encode("ascii")
         return self.get_struct().pack(

diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py
@@ -461,12 +461,14 @@ def code_and_shape_strat_from_triple(t):
 @pytest.mark.hypothesis
 @given(codes_and_shapes=codes_and_shapes)
 def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None:
+
     code_ex, expected_shapes = codes_and_shapes
     stream = io.BytesIO()
+
     with shp.ShpWriter(shp=stream, shapeType=code_ex) as w:
         for shape in expected_shapes:
             w.shape(shape)
-    stream.seek(0)
+
     with shp.ShpReader(shp=stream) as r:
         assert r.shapeType == code_ex
 
@@ -495,8 +497,6 @@ def test_shp_reader_writer_roundtrip(codes_and_shapes)-> None:
                 assert not hasattr(expected, "partTypes")
 
 
-
-
 @pytest.mark.hypothesis
 @given(codes_and_shapes=codes_and_shapes)
 def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None:
@@ -516,8 +516,6 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None:
                 offsets_B.append(offset_B)
                 shx_w._shx_record(offset_B, size_B)
 
-    shx_stream.seek(0)
-
     with shp.ShxReader(shx=shx_stream) as r:
         assert r.numShapes == len(expected_shapes)
         assert r.offsets == offsets_B
@@ -655,7 +653,6 @@ def test_dbf_reader_writer_roundtrip(fields_and_records)-> None:
                 written_records.append(record)
 
 
-    stream.seek(0)
     with shp.DbfReader(dbf=stream) as r:
         actual_fields = iter(r.fields)
         next(actual_fields) # skip deletion flag