diff --git a/PyMemoryEditor/macos/process.py b/PyMemoryEditor/macos/process.py index fdb816a..70518cf 100644 --- a/PyMemoryEditor/macos/process.py +++ b/PyMemoryEditor/macos/process.py @@ -312,9 +312,11 @@ def write_process_memory( :param pytype: type of value to be written (bool, int, float, str, bytes). :param bufflength: value size in bytes. Optional — defaults to ``None``, which uses the default width for numeric types (int→4, float→8, - bool→1) and writes the exact encoded length for ``str`` / ``bytes``. - Since it is optional, pass ``value`` by keyword when omitting it - (``write_process_memory(addr, str, value="hi")``). + bool→1) and writes the whole value for ``str`` / ``bytes``. For + ``str`` / ``bytes`` an explicit value is a *maximum* that truncates + the value (``str`` counts characters, ``bytes`` counts bytes) and + never pads. Since it is optional, pass ``value`` by keyword when + omitting it (``write_process_memory(addr, str, value="hi")``). :param value: value to be written. """ self.__require_open() diff --git a/PyMemoryEditor/process/abstract.py b/PyMemoryEditor/process/abstract.py index d81623f..7d7792f 100644 --- a/PyMemoryEditor/process/abstract.py +++ b/PyMemoryEditor/process/abstract.py @@ -376,15 +376,15 @@ def write_process_memory( * For numeric types (int, float, bool) it is the exact write width; leave it as ``None`` to use the default — int→4, float→8, bool→1. - * For ``str`` / ``bytes`` it is a *minimum* field width, not a hard - cap. The whole value is always written: if its encoded form is - longer than ``bufflength`` every byte is still written (so - ``write(addr, str, 3, "olá")`` writes all 4 UTF-8 bytes instead - of raising — you may count characters, not bytes). If it is - shorter, the field is NUL-padded up to ``bufflength`` (handy to - clear a fixed-size buffer). ``None`` (the default) writes exactly - the encoded length. ``str`` is encoded as UTF-8; no NUL terminator - is appended. + * For ``str`` / ``bytes`` it is a *maximum* width that truncates the + value; it never pads. For a ``str`` the cap counts **characters**, + applied before UTF-8 encoding, so multibyte characters are never + split: ``write(addr, str, 2, "óólá")`` keeps ``"óó"`` and writes + its 4 bytes, while ``write(addr, str, 2, "ola")`` writes ``b"ol"``. + For ``bytes`` the cap counts **bytes**. A value shorter than the + cap is written as-is (no NUL padding). ``None`` (the default) + writes the whole value. ``str`` is encoded as UTF-8; no NUL + terminator is appended. :param value: value to be written. Required — since ``bufflength`` is now optional, pass it by keyword when omitting ``bufflength``:: @@ -574,11 +574,11 @@ def write_string( — useful when overwriting a longer string in place so :meth:`read_string` stops where you intend rather than reading the stale tail. - Multi-byte characters are handled correctly — the field grows to the - encoded byte length, so you never have to count bytes yourself. - Returns ``text``. For a fixed-width / NUL-padded field, call - :meth:`write_process_memory` with ``pytype=str`` and an explicit - ``bufflength`` instead. + Multi-byte characters are handled correctly — the whole string is + encoded and written, so you never have to count bytes yourself. + Returns ``text``. To cap the write to a maximum number of characters, + call :meth:`write_process_memory` with ``pytype=str`` and an explicit + ``bufflength`` instead (it truncates, it does not pad). """ payload = text + "\x00" if null_terminator else text self.write_process_memory(address, str, None, payload) diff --git a/PyMemoryEditor/util/convert.py b/PyMemoryEditor/util/convert.py index 702a38e..0b1f2c0 100644 --- a/PyMemoryEditor/util/convert.py +++ b/PyMemoryEditor/util/convert.py @@ -115,17 +115,18 @@ def prepare_write( value via :func:`get_c_type_of` exactly as before. * **str / bytes** — the value is encoded to raw bytes (UTF-8 for ``str``) - and routed through the ``bytes`` path. Here ``bufflength`` is a - *minimum* field width, not a hard cap: - - - the **whole** value is always written, even when its encoded form is - longer than ``bufflength`` — so ``write(addr, str, 3, "olá")`` writes - all 4 UTF-8 bytes instead of raising ``ValueError`` because the caller - counted characters, not bytes; - - when the encoded form is **shorter** than ``bufflength`` the buffer is - NUL-padded up to it, which lets you clear a fixed-size field - (``write(addr, str, 16, "AB")`` writes ``b"AB"`` + 14 zero bytes); - - ``bufflength=None`` writes exactly the encoded length. + and routed through the ``bytes`` path. Here ``bufflength`` is a *maximum* + width that truncates the value; it never pads: + + - for a ``str`` value the cap counts **characters**, applied *before* + encoding — ``write(addr, str, 2, "óólá")`` keeps ``"óó"`` and writes + its 4 UTF-8 bytes, while ``write(addr, str, 2, "ola")`` keeps ``"ol"`` + and writes 2 bytes; + - for a ``bytes`` value the cap counts **bytes** (there are no + characters) — ``write(addr, bytes, 2, b"abc")`` writes ``b"ab"``; + - a value shorter than the cap is written as-is, with no NUL padding — + ``write(addr, str, 10, "ola")`` writes just ``b"ola"`` (3 bytes); + - ``bufflength=None`` writes the whole value (no cap). The caller is expected to return its *original* ``value`` to the user, so this routing through ``bytes`` stays invisible at the public API. @@ -138,15 +139,23 @@ def prepare_write( _validate_pytype(pytype) if pytype is str or pytype is bytes: - raw = value.encode("utf-8") if isinstance(value, str) else value - if not isinstance(raw, (bytes, bytearray)): + if isinstance(value, str): + # str: bufflength caps the number of *characters*, applied before + # encoding so multi-byte characters are never split mid-sequence. + if bufflength is not None: + value = value[:bufflength] + raw = value.encode("utf-8") + elif isinstance(value, (bytes, bytearray)): + # bytes: bufflength caps the number of *bytes*. + raw = bytes(value) + if bufflength is not None: + raw = raw[:bufflength] + else: raise TypeError( "value must be str or bytes when pytype is str/bytes, got %s." % type(value).__name__ ) - raw = bytes(raw) - width = len(raw) if bufflength is None else max(bufflength, len(raw)) - return bytes, width, raw.ljust(width, b"\x00") + return bytes, len(raw), raw return pytype, resolve_bufflength(pytype, bufflength), value diff --git a/docs/api/openprocess.md b/docs/api/openprocess.md index 7256b27..05b27e3 100644 --- a/docs/api/openprocess.md +++ b/docs/api/openprocess.md @@ -111,11 +111,12 @@ with OpenProcess( :param Type pytype: one of the five supported types. :param int bufflength: value size in bytes. **Optional** (defaults to ``None``): numeric types fall back to their default width and ``str`` / - ``bytes`` write the exact encoded length. For ``str`` / ``bytes`` a value - *larger* than the data is a *minimum* width — the whole value is always - written, and the extra space zero-pads the field. Because it is optional, - pass ``value`` by keyword when omitting it (``write_process_memory(addr, - int, value=9999)``). + ``bytes`` write the whole value. For ``str`` / ``bytes`` an explicit value + is a *maximum* width that truncates the value and never pads — ``str`` + counts characters (applied before UTF-8 encoding, so multibyte characters + are never split), ``bytes`` counts bytes. Because it is optional, pass + ``value`` by keyword when omitting it (``write_process_memory(addr, int, + value=9999)``). :param value: the value to write. :returns: the written value. ``` diff --git a/docs/guide/read-write.md b/docs/guide/read-write.md index ca749c5..d9d58cb 100644 --- a/docs/guide/read-write.md +++ b/docs/guide/read-write.md @@ -96,11 +96,12 @@ with OpenProcess(process_name="notepad.exe") as process: ```{admonition} Writing text? Count characters, not bytes. :class: tip -For `str` / `bytes` writes, `bufflength` is just a **minimum** width — your -value is always written in full. So `write_process_memory(addr, str, 3, "olá")` -writes all of `"olá"` even though the `á` takes 2 bytes (4 bytes total): you can -think in characters and never worry about UTF-8 byte math. Pass a *larger* -size to clear a fixed-size field (the extra space is zero-filled). +For `str` writes, `bufflength` is a **maximum** number of *characters* — the +value is truncated to that many characters and then encoded, so you never have +to do UTF-8 byte math. `write_process_memory(addr, str, 2, "óólá")` writes just +`"óó"` (4 bytes), and `write_process_memory(addr, str, 3, "olá")` keeps all of +`"olá"` whole. A shorter value is written as-is (no padding); pass `None` to +write the whole string. For `bytes`, the cap counts bytes instead. ``` ### Method signature @@ -113,9 +114,10 @@ size to clear a fixed-size field (the extra space is zero-filled). :param Type pytype: one of ``bool``, ``int``, ``float``, ``str``, ``bytes``. :param int bufflength: value size in bytes. **Optional** — defaults to ``None``, which uses the default width for numeric types and writes the - exact encoded length for ``str`` / ``bytes``. Since it is optional, pass - ``value`` by keyword when you omit it: ``write_process_memory(addr, int, - value=9999)``. + whole value for ``str`` / ``bytes``. For ``str`` / ``bytes`` an explicit + value is a *maximum* that truncates (``str`` counts characters, ``bytes`` + counts bytes) and never pads. Since it is optional, pass ``value`` by + keyword when you omit it: ``write_process_memory(addr, int, value=9999)``. :param value: the value to write. :return: the written value. ``` diff --git a/tests/test_write_str_bytes_width.py b/tests/test_write_str_bytes_width.py index 1df42ea..3905cf4 100644 --- a/tests/test_write_str_bytes_width.py +++ b/tests/test_write_str_bytes_width.py @@ -4,10 +4,10 @@ Tests for the ``str`` / ``bytes`` write-width semantics of ``write_process_memory`` (see ``util.convert.prepare_write``). -``bufflength`` is a *minimum* field width for these types, not a hard cap: -the whole value is always written (counting characters, not bytes, must not -raise), shorter values NUL-pad up to ``bufflength``, and ``None`` writes -exactly the encoded length. +``bufflength`` is a *maximum* width for these types that truncates the value +and never pads: for ``str`` the cap counts characters (applied before UTF-8 +encoding, so multibyte characters are never split), for ``bytes`` it counts +bytes, shorter values are written as-is, and ``None`` writes the whole value. """ import ctypes @@ -36,19 +36,32 @@ def process(): # --- prepare_write unit tests (platform-independent) -------------------- # -def test_prepare_write_multibyte_grows_to_fit(): - """"olá" is 3 characters but 4 UTF-8 bytes — width must grow, not raise.""" - pytype, length, raw = prepare_write(str, 3, "olá") +def test_prepare_write_caps_by_characters_not_bytes(): + """The cap counts characters: "óólá" capped at 2 keeps "óó" (4 UTF-8 bytes).""" + pytype, length, raw = prepare_write(str, 2, "óólá") assert pytype is bytes + assert raw == "óó".encode("utf-8") assert length == 4 + + +def test_prepare_write_multibyte_within_cap_kept_whole(): + """"olá" is 3 characters (4 bytes); cap of 3 keeps it whole, never splits.""" + pytype, length, raw = prepare_write(str, 3, "olá") + assert pytype is bytes assert raw == "olá".encode("utf-8") + assert length == 4 -def test_prepare_write_pads_up_to_bufflength(): +def test_prepare_write_truncates_to_char_cap(): + assert prepare_write(str, 3, "ola") == (bytes, 3, b"ola") + assert prepare_write(str, 2, "ola") == (bytes, 2, b"ol") + + +def test_prepare_write_shorter_than_cap_is_not_padded(): pytype, length, raw = prepare_write(str, 16, "AB") assert pytype is bytes - assert length == 16 - assert raw == b"AB" + b"\x00" * 14 + assert length == 2 + assert raw == b"AB" def test_prepare_write_none_uses_encoded_length(): @@ -58,9 +71,9 @@ def test_prepare_write_none_uses_encoded_length(): assert raw == "héllo".encode("utf-8") -def test_prepare_write_bytes_grows_and_pads(): - assert prepare_write(bytes, 2, b"\x01\x02\x03\x04") == (bytes, 4, b"\x01\x02\x03\x04") - assert prepare_write(bytes, 4, b"\x01\x02") == (bytes, 4, b"\x01\x02\x00\x00") +def test_prepare_write_bytes_caps_by_bytes(): + assert prepare_write(bytes, 2, b"\x01\x02\x03\x04") == (bytes, 2, b"\x01\x02") + assert prepare_write(bytes, 4, b"\x01\x02") == (bytes, 2, b"\x01\x02") assert prepare_write(bytes, None, b"\x01\x02") == (bytes, 2, b"\x01\x02") @@ -88,11 +101,19 @@ def test_prepare_write_rejects_missing_value(): def test_write_multibyte_string_does_not_raise(process): """The headline case: counting characters must not raise on multibyte.""" buffer = ctypes.create_string_buffer(8) - # 3 characters, 4 bytes — would have raised ValueError before. + # 3 characters, 4 bytes — the cap counts characters, so it stays whole. assert process.write_process_memory(ctypes.addressof(buffer), str, 3, "olá") == "olá" assert process.read_string(ctypes.addressof(buffer), 8) == "olá" +def test_write_caps_string_to_char_count(process): + """A string longer than the cap is truncated by character count.""" + buffer = ctypes.create_string_buffer(8) + assert process.write_process_memory(ctypes.addressof(buffer), str, 2, "óólá") == "óólá" + # Only the first 2 characters ("óó", 4 bytes) reach memory. + assert process.read_string(ctypes.addressof(buffer), 4) == "óó" + + def test_write_returns_original_value_not_bytes(process): """str writes must return the original str, not the routed-through bytes.""" buffer = ctypes.create_string_buffer(16) @@ -101,17 +122,19 @@ def test_write_returns_original_value_not_bytes(process): assert isinstance(result, str) -def test_write_pads_fixed_field(process): - """Writing a short string into a wider field clears the trailing bytes.""" +def test_write_shorter_than_cap_does_not_pad(process): + """A string shorter than the cap writes only its own bytes — no padding.""" buffer = (ctypes.c_uint8 * 8)(*([0xFF] * 8)) process.write_process_memory(ctypes.addressof(buffer), str, 8, "AB") - assert process.read_bytes(ctypes.addressof(buffer), 8) == b"AB" + b"\x00" * 6 + # Only the 2 written bytes change; the rest keep their previous value. + assert process.read_bytes(ctypes.addressof(buffer), 8) == b"AB" + b"\xff" * 6 -def test_write_bytes_round_trip_grows(process): +def test_write_bytes_capped_to_bufflength(process): buffer = (ctypes.c_uint8 * 4)() process.write_process_memory(ctypes.addressof(buffer), bytes, 2, b"\xde\xad\xbe\xef") - assert process.read_bytes(ctypes.addressof(buffer), 4) == b"\xde\xad\xbe\xef" + # Only the first 2 bytes are written; the rest stay zero. + assert process.read_bytes(ctypes.addressof(buffer), 4) == b"\xde\xad\x00\x00" # --- bufflength is now optional: value may be passed by keyword ---------- #