Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions PyMemoryEditor/macos/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,11 @@ def write_process_memory(
:param pytype: type of value to be written (bool, int, float, str, bytes).
:param bufflength: value size in bytes. Optional — defaults to ``None``,
which uses the default width for numeric types (int→4, float→8,
bool→1) and writes the exact encoded length for ``str`` / ``bytes``.
Since it is optional, pass ``value`` by keyword when omitting it
(``write_process_memory(addr, str, value="hi")``).
bool→1) and writes the whole value for ``str`` / ``bytes``. For
``str`` / ``bytes`` an explicit value is a *maximum* that truncates
the value (``str`` counts characters, ``bytes`` counts bytes) and
never pads. Since it is optional, pass ``value`` by keyword when
omitting it (``write_process_memory(addr, str, value="hi")``).
:param value: value to be written.
"""
self.__require_open()
Expand Down
28 changes: 14 additions & 14 deletions PyMemoryEditor/process/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,15 +376,15 @@ def write_process_memory(

* For numeric types (int, float, bool) it is the exact write width;
leave it as ``None`` to use the default — int→4, float→8, bool→1.
* For ``str`` / ``bytes`` it is a *minimum* field width, not a hard
cap. The whole value is always written: if its encoded form is
longer than ``bufflength`` every byte is still written (so
``write(addr, str, 3, "olá")`` writes all 4 UTF-8 bytes instead
of raising — you may count characters, not bytes). If it is
shorter, the field is NUL-padded up to ``bufflength`` (handy to
clear a fixed-size buffer). ``None`` (the default) writes exactly
the encoded length. ``str`` is encoded as UTF-8; no NUL terminator
is appended.
* For ``str`` / ``bytes`` it is a *maximum* width that truncates the
value; it never pads. For a ``str`` the cap counts **characters**,
applied before UTF-8 encoding, so multibyte characters are never
split: ``write(addr, str, 2, "óólá")`` keeps ``"óó"`` and writes
its 4 bytes, while ``write(addr, str, 2, "ola")`` writes ``b"ol"``.
For ``bytes`` the cap counts **bytes**. A value shorter than the
cap is written as-is (no NUL padding). ``None`` (the default)
writes the whole value. ``str`` is encoded as UTF-8; no NUL
terminator is appended.
:param value: value to be written. Required — since ``bufflength`` is
now optional, pass it by keyword when omitting ``bufflength``::

Expand Down Expand Up @@ -574,11 +574,11 @@ def write_string(
— useful when overwriting a longer string in place so :meth:`read_string`
stops where you intend rather than reading the stale tail.

Multi-byte characters are handled correctly — the field grows to the
encoded byte length, so you never have to count bytes yourself.
Returns ``text``. For a fixed-width / NUL-padded field, call
:meth:`write_process_memory` with ``pytype=str`` and an explicit
``bufflength`` instead.
Multi-byte characters are handled correctly — the whole string is
encoded and written, so you never have to count bytes yourself.
Returns ``text``. To cap the write to a maximum number of characters,
call :meth:`write_process_memory` with ``pytype=str`` and an explicit
``bufflength`` instead (it truncates, it does not pad).
"""
payload = text + "\x00" if null_terminator else text
self.write_process_memory(address, str, None, payload)
Expand Down
41 changes: 25 additions & 16 deletions PyMemoryEditor/util/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,18 @@ def prepare_write(
value via :func:`get_c_type_of` exactly as before.

* **str / bytes** — the value is encoded to raw bytes (UTF-8 for ``str``)
and routed through the ``bytes`` path. Here ``bufflength`` is a
*minimum* field width, not a hard cap:

- the **whole** value is always written, even when its encoded form is
longer than ``bufflength`` — so ``write(addr, str, 3, "olá")`` writes
all 4 UTF-8 bytes instead of raising ``ValueError`` because the caller
counted characters, not bytes;
- when the encoded form is **shorter** than ``bufflength`` the buffer is
NUL-padded up to it, which lets you clear a fixed-size field
(``write(addr, str, 16, "AB")`` writes ``b"AB"`` + 14 zero bytes);
- ``bufflength=None`` writes exactly the encoded length.
and routed through the ``bytes`` path. Here ``bufflength`` is a *maximum*
width that truncates the value; it never pads:

- for a ``str`` value the cap counts **characters**, applied *before*
encoding — ``write(addr, str, 2, "óólá")`` keeps ``"óó"`` and writes
its 4 UTF-8 bytes, while ``write(addr, str, 2, "ola")`` keeps ``"ol"``
and writes 2 bytes;
- for a ``bytes`` value the cap counts **bytes** (there are no
characters) — ``write(addr, bytes, 2, b"abc")`` writes ``b"ab"``;
- a value shorter than the cap is written as-is, with no NUL padding —
``write(addr, str, 10, "ola")`` writes just ``b"ola"`` (3 bytes);
- ``bufflength=None`` writes the whole value (no cap).

The caller is expected to return its *original* ``value`` to the user, so
this routing through ``bytes`` stays invisible at the public API.
Expand All @@ -138,15 +139,23 @@ def prepare_write(
_validate_pytype(pytype)

if pytype is str or pytype is bytes:
raw = value.encode("utf-8") if isinstance(value, str) else value
if not isinstance(raw, (bytes, bytearray)):
if isinstance(value, str):
# str: bufflength caps the number of *characters*, applied before
# encoding so multi-byte characters are never split mid-sequence.
if bufflength is not None:
value = value[:bufflength]
raw = value.encode("utf-8")
elif isinstance(value, (bytes, bytearray)):
# bytes: bufflength caps the number of *bytes*.
raw = bytes(value)
if bufflength is not None:
raw = raw[:bufflength]
else:
raise TypeError(
"value must be str or bytes when pytype is str/bytes, got %s."
% type(value).__name__
)
raw = bytes(raw)
width = len(raw) if bufflength is None else max(bufflength, len(raw))
return bytes, width, raw.ljust(width, b"\x00")
return bytes, len(raw), raw

return pytype, resolve_bufflength(pytype, bufflength), value

Expand Down
11 changes: 6 additions & 5 deletions docs/api/openprocess.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ with OpenProcess(
:param Type pytype: one of the five supported types.
:param int bufflength: value size in bytes. **Optional** (defaults to
``None``): numeric types fall back to their default width and ``str`` /
``bytes`` write the exact encoded length. For ``str`` / ``bytes`` a value
*larger* than the data is a *minimum* width — the whole value is always
written, and the extra space zero-pads the field. Because it is optional,
pass ``value`` by keyword when omitting it (``write_process_memory(addr,
int, value=9999)``).
``bytes`` write the whole value. For ``str`` / ``bytes`` an explicit value
is a *maximum* width that truncates the value and never pads — ``str``
counts characters (applied before UTF-8 encoding, so multibyte characters
are never split), ``bytes`` counts bytes. Because it is optional, pass
``value`` by keyword when omitting it (``write_process_memory(addr, int,
value=9999)``).
:param value: the value to write.
:returns: the written value.
```
Expand Down
18 changes: 10 additions & 8 deletions docs/guide/read-write.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ with OpenProcess(process_name="notepad.exe") as process:
```{admonition} Writing text? Count characters, not bytes.
:class: tip

For `str` / `bytes` writes, `bufflength` is just a **minimum** width — your
value is always written in full. So `write_process_memory(addr, str, 3, "olá")`
writes all of `"olá"` even though the `á` takes 2 bytes (4 bytes total): you can
think in characters and never worry about UTF-8 byte math. Pass a *larger*
size to clear a fixed-size field (the extra space is zero-filled).
For `str` writes, `bufflength` is a **maximum** number of *characters* — the
value is truncated to that many characters and then encoded, so you never have
to do UTF-8 byte math. `write_process_memory(addr, str, 2, "óólá")` writes just
`"óó"` (4 bytes), and `write_process_memory(addr, str, 3, "olá")` keeps all of
`"olá"` whole. A shorter value is written as-is (no padding); pass `None` to
write the whole string. For `bytes`, the cap counts bytes instead.
```

### Method signature
Expand All @@ -113,9 +114,10 @@ size to clear a fixed-size field (the extra space is zero-filled).
:param Type pytype: one of ``bool``, ``int``, ``float``, ``str``, ``bytes``.
:param int bufflength: value size in bytes. **Optional** — defaults to
``None``, which uses the default width for numeric types and writes the
exact encoded length for ``str`` / ``bytes``. Since it is optional, pass
``value`` by keyword when you omit it: ``write_process_memory(addr, int,
value=9999)``.
whole value for ``str`` / ``bytes``. For ``str`` / ``bytes`` an explicit
value is a *maximum* that truncates (``str`` counts characters, ``bytes``
counts bytes) and never pads. Since it is optional, pass ``value`` by
keyword when you omit it: ``write_process_memory(addr, int, value=9999)``.
:param value: the value to write.
:return: the written value.
```
Expand Down
61 changes: 42 additions & 19 deletions tests/test_write_str_bytes_width.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
Tests for the ``str`` / ``bytes`` write-width semantics of
``write_process_memory`` (see ``util.convert.prepare_write``).

``bufflength`` is a *minimum* field width for these types, not a hard cap:
the whole value is always written (counting characters, not bytes, must not
raise), shorter values NUL-pad up to ``bufflength``, and ``None`` writes
exactly the encoded length.
``bufflength`` is a *maximum* width for these types that truncates the value
and never pads: for ``str`` the cap counts characters (applied before UTF-8
encoding, so multibyte characters are never split), for ``bytes`` it counts
bytes, shorter values are written as-is, and ``None`` writes the whole value.
"""

import ctypes
Expand Down Expand Up @@ -36,19 +36,32 @@ def process():
# --- prepare_write unit tests (platform-independent) -------------------- #


def test_prepare_write_multibyte_grows_to_fit():
""""olá" is 3 characters but 4 UTF-8 bytes — width must grow, not raise."""
pytype, length, raw = prepare_write(str, 3, "olá")
def test_prepare_write_caps_by_characters_not_bytes():
"""The cap counts characters: "óólá" capped at 2 keeps "óó" (4 UTF-8 bytes)."""
pytype, length, raw = prepare_write(str, 2, "óólá")
assert pytype is bytes
assert raw == "óó".encode("utf-8")
assert length == 4


def test_prepare_write_multibyte_within_cap_kept_whole():
""""olá" is 3 characters (4 bytes); cap of 3 keeps it whole, never splits."""
pytype, length, raw = prepare_write(str, 3, "olá")
assert pytype is bytes
assert raw == "olá".encode("utf-8")
assert length == 4


def test_prepare_write_pads_up_to_bufflength():
def test_prepare_write_truncates_to_char_cap():
assert prepare_write(str, 3, "ola") == (bytes, 3, b"ola")
assert prepare_write(str, 2, "ola") == (bytes, 2, b"ol")


def test_prepare_write_shorter_than_cap_is_not_padded():
pytype, length, raw = prepare_write(str, 16, "AB")
assert pytype is bytes
assert length == 16
assert raw == b"AB" + b"\x00" * 14
assert length == 2
assert raw == b"AB"


def test_prepare_write_none_uses_encoded_length():
Expand All @@ -58,9 +71,9 @@ def test_prepare_write_none_uses_encoded_length():
assert raw == "héllo".encode("utf-8")


def test_prepare_write_bytes_grows_and_pads():
assert prepare_write(bytes, 2, b"\x01\x02\x03\x04") == (bytes, 4, b"\x01\x02\x03\x04")
assert prepare_write(bytes, 4, b"\x01\x02") == (bytes, 4, b"\x01\x02\x00\x00")
def test_prepare_write_bytes_caps_by_bytes():
assert prepare_write(bytes, 2, b"\x01\x02\x03\x04") == (bytes, 2, b"\x01\x02")
assert prepare_write(bytes, 4, b"\x01\x02") == (bytes, 2, b"\x01\x02")
assert prepare_write(bytes, None, b"\x01\x02") == (bytes, 2, b"\x01\x02")


Expand Down Expand Up @@ -88,11 +101,19 @@ def test_prepare_write_rejects_missing_value():
def test_write_multibyte_string_does_not_raise(process):
"""The headline case: counting characters must not raise on multibyte."""
buffer = ctypes.create_string_buffer(8)
# 3 characters, 4 bytes — would have raised ValueError before.
# 3 characters, 4 bytes — the cap counts characters, so it stays whole.
assert process.write_process_memory(ctypes.addressof(buffer), str, 3, "olá") == "olá"
assert process.read_string(ctypes.addressof(buffer), 8) == "olá"


def test_write_caps_string_to_char_count(process):
"""A string longer than the cap is truncated by character count."""
buffer = ctypes.create_string_buffer(8)
assert process.write_process_memory(ctypes.addressof(buffer), str, 2, "óólá") == "óólá"
# Only the first 2 characters ("óó", 4 bytes) reach memory.
assert process.read_string(ctypes.addressof(buffer), 4) == "óó"


def test_write_returns_original_value_not_bytes(process):
"""str writes must return the original str, not the routed-through bytes."""
buffer = ctypes.create_string_buffer(16)
Expand All @@ -101,17 +122,19 @@ def test_write_returns_original_value_not_bytes(process):
assert isinstance(result, str)


def test_write_pads_fixed_field(process):
"""Writing a short string into a wider field clears the trailing bytes."""
def test_write_shorter_than_cap_does_not_pad(process):
"""A string shorter than the cap writes only its own bytes — no padding."""
buffer = (ctypes.c_uint8 * 8)(*([0xFF] * 8))
process.write_process_memory(ctypes.addressof(buffer), str, 8, "AB")
assert process.read_bytes(ctypes.addressof(buffer), 8) == b"AB" + b"\x00" * 6
# Only the 2 written bytes change; the rest keep their previous value.
assert process.read_bytes(ctypes.addressof(buffer), 8) == b"AB" + b"\xff" * 6


def test_write_bytes_round_trip_grows(process):
def test_write_bytes_capped_to_bufflength(process):
buffer = (ctypes.c_uint8 * 4)()
process.write_process_memory(ctypes.addressof(buffer), bytes, 2, b"\xde\xad\xbe\xef")
assert process.read_bytes(ctypes.addressof(buffer), 4) == b"\xde\xad\xbe\xef"
# Only the first 2 bytes are written; the rest stay zero.
assert process.read_bytes(ctypes.addressof(buffer), 4) == b"\xde\xad\x00\x00"


# --- bufflength is now optional: value may be passed by keyword ---------- #
Expand Down
Loading