From 7e5923b37a81b05bd70d0f5c7ea9cba277be0e19 Mon Sep 17 00:00:00 2001 From: JeanExtreme002 Date: Thu, 4 Jun 2026 22:04:31 -0300 Subject: [PATCH] feat: drop the psutil dependency and fix documentation divergences Make the core library truly dependency-free by replacing psutil with native per-platform process discovery, and correct a set of doc/docstring claims that did not match runtime behavior. Drop psutil: - Native process enumeration and pid checks: CreateToolhelp32Snapshot (Windows), /proc (Linux), libproc proc_listpids/proc_name (macOS). - process.util now dispatches to the per-OS backend instead of psutil. - psutil moves from a hard runtime dependency to the [app] extra (the GUI process picker still uses its richer username/memory info). - Add tests/test_process_enumeration.py and rewire the lookup tests off psutil. Docs/docstring fixes: - DEFAULT_MAX_REGION_CHUNK is 256 MiB (was documented as 16 MiB). - iter_region_chunks chunks are contiguous, not overlapping. - partial reads raise OSError; they are not logged as WARNING. - ThreadInfo.start_address is always None on every backend. - RemotePointer bufflength is required only to read str/bytes, not write. - Correct the compile_pattern example output. - OpenProcess signature shows the real platform-specific defaults. - macOS ModuleInfo name/path are both empty when the path is unresolvable. - read_string reads exactly byte_count bytes (raises on a short read). - ptr_size defaults to None (auto-detected) across the pointer APIs. - Migrate search_by_value/search_by_addresses examples to keyword args. --- PyMemoryEditor/__init__.py | 5 +- PyMemoryEditor/linux/functions.py | 38 ++++++++ PyMemoryEditor/macos/functions.py | 74 ++++++++++++++- PyMemoryEditor/macos/libsystem.py | 36 +++++++ PyMemoryEditor/process/abstract.py | 12 ++- PyMemoryEditor/process/module_info.py | 6 +- PyMemoryEditor/process/remote_pointer.py | 6 +- PyMemoryEditor/process/thread_info.py | 5 +- PyMemoryEditor/process/util.py | 43 ++++++--- PyMemoryEditor/win32/functions.py | 72 ++++++++++++++ PyMemoryEditor/win32/types.py | 27 ++++++ README.md | 2 +- docs/api/enums.md | 4 +- docs/api/errors.md | 2 +- docs/api/memory-region.md | 2 +- docs/api/module-info.md | 10 +- docs/api/openprocess.md | 21 ++-- docs/api/remote-pointer.md | 11 ++- docs/api/thread-info.md | 6 +- docs/api/utilities.md | 13 ++- docs/app.md | 4 +- docs/guide/allocate-free.md | 2 +- docs/guide/logging.md | 11 ++- docs/guide/memory-regions.md | 6 +- docs/guide/modules-threads.md | 8 +- docs/guide/pattern-scan.md | 8 +- docs/guide/pointer-scan.md | 6 +- docs/guide/pointers.md | 36 +++---- docs/guide/read-write.md | 9 +- docs/index.md | 2 +- docs/installation.md | 4 +- docs/quickstart.md | 6 +- docs/why.md | 2 +- pyproject.toml | 13 ++- tests/test_errors.py | 3 +- tests/test_partial_name_match.py | 24 +++-- tests/test_process_enumeration.py | 116 +++++++++++++++++++++++ tests/test_process_lookup.py | 47 ++++----- 38 files changed, 572 insertions(+), 130 deletions(-) create mode 100644 tests/test_process_enumeration.py diff --git a/PyMemoryEditor/__init__.py b/PyMemoryEditor/__init__.py index 1789be9..4e5ab72 100644 --- a/PyMemoryEditor/__init__.py +++ b/PyMemoryEditor/__init__.py @@ -33,8 +33,9 @@ # Package-wide logger. Silent by default (NullHandler) — embedding apps opt in # with `logging.basicConfig(level=logging.DEBUG)` or by attaching a handler to # the "PyMemoryEditor" logger. Backends emit DEBUG for transient skips (pages -# vanished mid-scan) and WARNING for surprising-but-recovered conditions -# (partial reads, mach_vm_protect restore failure). +# vanished mid-scan, unreadable chunks) and WARNING for surprising-but-recovered +# conditions (the macOS mach_vm_protect restore failure). A partial read raises +# OSError rather than logging. logger = logging.getLogger("PyMemoryEditor") logger.addHandler(logging.NullHandler()) diff --git a/PyMemoryEditor/linux/functions.py b/PyMemoryEditor/linux/functions.py index 881ae9d..31bab38 100644 --- a/PyMemoryEditor/linux/functions.py +++ b/PyMemoryEditor/linux/functions.py @@ -541,3 +541,41 @@ def get_threads(pid: int) -> Generator[ThreadInfo, None, None]: priority=priority, raw=entry, ) + + +def get_processes() -> Generator[Tuple[int, str], None, None]: + """ + Yield ``(pid, name)`` for every process by listing ``/proc`` — each numeric + subdirectory is a live pid. + + ``name`` comes from ``/proc//comm`` (the kernel truncates it to 15 + characters via ``TASK_COMM_LEN``). Processes that vanish mid-scan are + skipped silently (logged at DEBUG). + """ + try: + entries = os.listdir("/proc") + except OSError as exc: + _logger.debug("get_processes: could not list /proc: %s", exc) + return + + for entry in entries: + if not entry.isdigit(): + continue + pid = int(entry) + + try: + with open("/proc/{}/comm".format(entry), "r") as fh: + name = fh.readline().rstrip("\n") + except OSError as exc: + # Race (process exited) or permission issue — skip it. + _logger.debug("get_processes: could not read comm for pid=%s: %s", entry, exc) + continue + + yield pid, name + + +def process_exists(pid: int) -> bool: + """Return whether a process with ``pid`` currently exists (``/proc/``).""" + if pid < 0: + return False + return os.path.isdir("/proc/{}".format(pid)) diff --git a/PyMemoryEditor/macos/functions.py b/PyMemoryEditor/macos/functions.py index d7c48bf..22000e2 100644 --- a/PyMemoryEditor/macos/functions.py +++ b/PyMemoryEditor/macos/functions.py @@ -32,7 +32,7 @@ ) from ..util.pattern import PatternLike, compile_pattern -from .libsystem import libsystem, mach_error_message, mach_task_self_ +from .libsystem import PROC_ALL_PIDS, libsystem, mach_error_message, mach_task_self_ from .types import ( KERN_INVALID_ADDRESS, KERN_INVALID_ARGUMENT, @@ -905,3 +905,75 @@ def search_values_by_addresses( raise_error=raise_error, transient_error_check=_is_transient, ) + + +def get_processes() -> Generator[Tuple[int, str], None, None]: + """ + Yield ``(pid, name)`` for every process via libproc's ``proc_listpids`` + + ``proc_name`` — no ``task_for_pid`` (and thus no debugger entitlement) + required, since both operate on the BSD process table. + + ``name`` is the executable name ``proc_name`` reports, falling back to the + basename of ``proc_pidpath``; for the rare process that denies both it is an + empty string. Needs no special privileges. + """ + # First call sizes the pid array: proc_listpids(.., NULL, 0) returns the + # number of bytes that would be written. + nbytes = libsystem.proc_listpids(PROC_ALL_PIDS, 0, None, 0) + if nbytes <= 0: + _logger.debug("get_processes: proc_listpids sizing returned %d", nbytes) + return + + count = nbytes // ctypes.sizeof(ctypes.c_int) + # Over-allocate slightly: the table can grow between the two calls. + pids = (ctypes.c_int * (count + 16))() + written = libsystem.proc_listpids( + PROC_ALL_PIDS, 0, pids, ctypes.sizeof(pids) + ) + if written <= 0: + _logger.debug("get_processes: proc_listpids returned %d", written) + return + + name_buffer = ctypes.create_string_buffer(256) + path_buffer = ctypes.create_string_buffer(4096) + for pid in pids[: written // ctypes.sizeof(ctypes.c_int)]: + # pid 0 is the kernel and shows up as a zero-filled slot — skip it. + if pid <= 0: + continue + + length = libsystem.proc_name(pid, name_buffer, ctypes.sizeof(name_buffer)) + if length > 0: + name = name_buffer.raw[:length].decode("utf-8", errors="replace") + else: + # proc_name is denied for many root-owned processes; recover the + # name from the executable path's basename instead. + plen = libsystem.proc_pidpath(pid, path_buffer, ctypes.sizeof(path_buffer)) + if plen > 0: + path = path_buffer.raw[:plen].decode("utf-8", errors="replace") + name = os.path.basename(path) + else: + name = "" + + yield int(pid), name + + +def process_exists(pid: int) -> bool: + """ + Return whether a process with ``pid`` currently exists. + + Uses ``os.kill(pid, 0)``: it sends no signal but performs the existence / + permission check — ``ESRCH`` means no such process, ``EPERM`` means it + exists but is owned by another user (still True). + """ + # ``pid <= 0`` is rejected outright: os.kill(0, 0) targets the *caller's + # process group* (not "pid 0"), which would spuriously report True and + # diverge from the Linux/Windows backends. + if pid <= 0: + return False + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True + return True diff --git a/PyMemoryEditor/macos/libsystem.py b/PyMemoryEditor/macos/libsystem.py index e6ff479..9f41758 100644 --- a/PyMemoryEditor/macos/libsystem.py +++ b/PyMemoryEditor/macos/libsystem.py @@ -199,6 +199,42 @@ class rusage_info_v0(ctypes.Structure): ) libsystem.proc_pid_rusage.restype = ctypes.c_int +# libproc process enumeration (in , exported by libSystem). +# PROC_ALL_PIDS lists every pid; the buffer is an array of pid_t (c_int). +PROC_ALL_PIDS = 1 + +# int proc_listpids(uint32_t type, uint32_t typeinfo, void *buffer, int buffersize); +# With buffer=NULL/buffersize=0 it returns the number of bytes that would be +# written, letting the caller size the pid array. Returns bytes written (or -1). +libsystem.proc_listpids.argtypes = ( + ctypes.c_uint32, + ctypes.c_uint32, + ctypes.c_void_p, + ctypes.c_int, +) +libsystem.proc_listpids.restype = ctypes.c_int + +# int proc_name(int pid, void *buffer, uint32_t buffersize); +# Fills buffer with the executable name and returns its length. Returns 0 for +# processes the caller can't query (e.g. root-owned daemons as a normal user). +libsystem.proc_name.argtypes = ( + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_uint32, +) +libsystem.proc_name.restype = ctypes.c_int + +# int proc_pidpath(int pid, void *buffer, uint32_t buffersize); +# Full executable path; its basename is the fallback name source when proc_name +# is denied — proc_pidpath succeeds for many processes proc_name refuses, +# including root-owned ones. +libsystem.proc_pidpath.argtypes = ( + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_uint32, +) +libsystem.proc_pidpath.restype = ctypes.c_int + # char *mach_error_string(mach_error_t error_value); libsystem.mach_error_string.argtypes = (ctypes.c_int,) libsystem.mach_error_string.restype = ctypes.c_char_p diff --git a/PyMemoryEditor/process/abstract.py b/PyMemoryEditor/process/abstract.py index 7d7792f..2dcb2c7 100644 --- a/PyMemoryEditor/process/abstract.py +++ b/PyMemoryEditor/process/abstract.py @@ -554,14 +554,16 @@ def write_bool(self, address: int, value: bool) -> bool: def read_string(self, address: int, byte_count: int) -> str: """ - Read up to ``byte_count`` bytes, decode them as UTF-8 and return the + Read exactly ``byte_count`` bytes, decode them as UTF-8 and return the text up to the first NUL terminator (C-string semantics). Goes through the ``str`` read path, so invalid UTF-8 becomes ``U+FFFD`` - (``errors="replace"``). ``byte_count`` is the maximum field width to - read; the NUL terminator and everything after it are dropped. To make a - shorter :meth:`write_string` read back cleanly here, write it with - ``null_terminator=True`` (or into an already-zeroed field). + (``errors="replace"``). ``byte_count`` is the field width to read, not an + upper bound — those bytes must all be readable or an ``OSError`` is + raised; the NUL terminator and everything after it are then dropped from + the returned text. To make a shorter :meth:`write_string` read back + cleanly here, write it with ``null_terminator=True`` (or into an + already-zeroed field). """ return self.read_process_memory(address, str, byte_count).split("\x00", 1)[0] diff --git a/PyMemoryEditor/process/module_info.py b/PyMemoryEditor/process/module_info.py index 46bc6ff..9d17491 100644 --- a/PyMemoryEditor/process/module_info.py +++ b/PyMemoryEditor/process/module_info.py @@ -24,8 +24,10 @@ class ModuleInfo: """A single module (executable or shared library) loaded in a process. :param name: file name of the module (e.g. ``"game.exe"``, ``"libc.so.6"``). - :param path: full path of the backing file on disk when the OS exposes it; - falls back to ``name`` when only the name is available. + May be empty on macOS when the image path can't be resolved. + :param path: full path of the backing file on disk. On Windows it falls back + to ``name`` when only the name is available; on macOS ``name`` derives + from ``path``, so an unresolvable image yields both as empty strings. :param base_address: address where the module is loaded for this run — the value to add static offsets to. Defeats ASLR for ``base + offset`` addressing. diff --git a/PyMemoryEditor/process/remote_pointer.py b/PyMemoryEditor/process/remote_pointer.py index 527a4ec..0f34af9 100644 --- a/PyMemoryEditor/process/remote_pointer.py +++ b/PyMemoryEditor/process/remote_pointer.py @@ -53,8 +53,10 @@ class RemotePointer: :param pytype: how to interpret the bytes at the resolved address (bool, int, float, str or bytes). Defaults to ``int``. :param bufflength: value size in bytes. May be ``None`` for numeric types - (defaults: int→4, float→8, bool→1); ``str`` and ``bytes`` require an - explicit size. + (defaults: int→4, float→8, bool→1). For ``str`` / ``bytes`` it is + required only to **read** (``.value`` / :meth:`read`) — there is no + value to infer the width from; **writing** accepts ``None`` and stores + the whole value (a set ``bufflength`` then caps the width, truncating). :param ptr_size: pointer width used when walking ``offsets`` — 8 for 64-bit targets, 4 for 32-bit. Leave ``None`` (the default) to use the target process's :attr:`~PyMemoryEditor.process.abstract.AbstractProcess.pointer_size`, diff --git a/PyMemoryEditor/process/thread_info.py b/PyMemoryEditor/process/thread_info.py index 48983da..405a0ae 100644 --- a/PyMemoryEditor/process/thread_info.py +++ b/PyMemoryEditor/process/thread_info.py @@ -29,8 +29,9 @@ class ThreadInfo: """A single thread inside a target process. :param tid: thread identifier (see module docstring — meaning is platform-dependent). - :param start_address: entry point of the thread, when the OS exposes it - cheaply. ``None`` when not available. + :param start_address: reserved for the thread's entry point. Currently + always ``None`` on every platform (no backend fetches it); kept as a + stable field for forward compatibility. :param state: short human-readable state — e.g. ``"R"`` / ``"S"`` on Linux. ``None`` when not available. :param priority: scheduling priority value as reported by the OS. The scale diff --git a/PyMemoryEditor/process/util.py b/PyMemoryEditor/process/util.py index aa4983a..4f82c5a 100644 --- a/PyMemoryEditor/process/util.py +++ b/PyMemoryEditor/process/util.py @@ -1,12 +1,37 @@ # -*- coding: utf-8 -*- -from typing import List, Optional - -import psutil +import sys +from typing import Iterator, List, Optional, Tuple from .errors import AmbiguousProcessNameError +# Native, dependency-free process enumeration. Each backend exposes: +# iter_processes() -> Iterator[(pid, name)] (name = executable name only) +# backend_process_exists(pid) -> bool +# Windows uses CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS); Linux walks /proc; +# macOS uses libproc's proc_listpids/proc_name. This is the same per-platform +# dispatch PyMemoryEditor already does for OpenProcess. +if sys.platform == "win32": + from ..win32.functions import GetProcesses as _iter_processes + from ..win32.functions import ProcessExists as _backend_process_exists + +elif sys.platform.startswith("linux"): + from ..linux.functions import get_processes as _iter_processes + from ..linux.functions import process_exists as _backend_process_exists + +elif sys.platform == "darwin": + from ..macos.functions import get_processes as _iter_processes + from ..macos.functions import process_exists as _backend_process_exists + +else: # pragma: no cover - importing the package already raises on these. + def _iter_processes() -> Iterator[Tuple[int, str]]: + return iter(()) + + def _backend_process_exists(pid: int) -> bool: + return False + + def get_process_ids_by_process_name( process_name: str, *, @@ -30,12 +55,8 @@ def get_process_ids_by_process_name( matches: List[int] = [] - for process in psutil.process_iter(["name", "pid"]): - try: - name = process.info["name"] or "" - except (psutil.NoSuchProcess, psutil.AccessDenied): - continue - + for pid, name in _iter_processes(): + name = name or "" name_cmp = name if case_sensitive else name.casefold() if exact_match: @@ -44,7 +65,7 @@ def get_process_ids_by_process_name( hit = process_name_cmp in name_cmp if hit: - matches.append(process.info["pid"]) + matches.append(pid) return matches @@ -77,4 +98,4 @@ def pid_exists(pid: int) -> bool: """ Check if the process ID exists. """ - return psutil.pid_exists(pid) + return _backend_process_exists(pid) diff --git a/PyMemoryEditor/win32/functions.py b/PyMemoryEditor/win32/functions.py index 02d8945..adcb91f 100644 --- a/PyMemoryEditor/win32/functions.py +++ b/PyMemoryEditor/win32/functions.py @@ -38,9 +38,11 @@ MEMORY_BASIC_INFORMATION_32, MEMORY_BASIC_INFORMATION_64, MODULEENTRY32, + PROCESSENTRY32, SYSTEM_INFO, TH32CS_SNAPMODULE, TH32CS_SNAPMODULE32, + TH32CS_SNAPPROCESS, TH32CS_SNAPTHREAD, THREADENTRY32, ) @@ -150,6 +152,18 @@ ) kernel32.Module32Next.restype = ctypes.wintypes.BOOL +kernel32.Process32First.argtypes = ( + ctypes.wintypes.HANDLE, + ctypes.POINTER(PROCESSENTRY32), +) +kernel32.Process32First.restype = ctypes.wintypes.BOOL + +kernel32.Process32Next.argtypes = ( + ctypes.wintypes.HANDLE, + ctypes.POINTER(PROCESSENTRY32), +) +kernel32.Process32Next.restype = ctypes.wintypes.BOOL + # LPVOID VirtualAllocEx(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, # DWORD flAllocationType, DWORD flProtect); kernel32.VirtualAllocEx.argtypes = ( @@ -672,6 +686,64 @@ def GetModules(pid: int) -> Generator[ModuleInfo, None, None]: kernel32.CloseHandle(snapshot) +def GetProcesses() -> Generator[Tuple[int, str], None, None]: + """ + Yield ``(pid, name)`` for every process in the system. + + Uses ``CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS)`` followed by + Process32First/Next — the documented user-mode way to enumerate processes + without an extra dependency, mirroring how GetThreads/GetModules already + work. ``szExeFile`` is the executable name only (no path). + """ + snapshot = kernel32.CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0) + if not snapshot or snapshot == ctypes.wintypes.HANDLE(-1).value: + _raise_last_error("CreateToolhelp32Snapshot") + + entry = PROCESSENTRY32() + entry.dwSize = ctypes.sizeof(entry) + + try: + if not kernel32.Process32First(snapshot, ctypes.byref(entry)): + # Empty snapshot is theoretically possible; log and bail. + _logger.debug("GetProcesses: Process32First returned 0 (empty snapshot)") + return + + while True: + name = entry.szExeFile.decode("utf-8", errors="replace") + yield int(entry.th32ProcessID), name + if not kernel32.Process32Next(snapshot, ctypes.byref(entry)): + break + finally: + kernel32.CloseHandle(snapshot) + + +# Minimal right that lets OpenProcess succeed for a probe; available on every +# supported Windows version. +_PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 +_ERROR_ACCESS_DENIED = 5 + + +def ProcessExists(pid: int) -> bool: + """ + Return whether a process with ``pid`` currently exists. + + O(1) probe via ``OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION)``: a handle + means it exists; ``ERROR_ACCESS_DENIED`` means it exists but is protected + (still True); any other failure (notably ``ERROR_INVALID_PARAMETER`` for an + unknown pid) means it does not. + """ + if pid < 0: + return False + + ctypes.set_last_error(0) + handle = kernel32.OpenProcess(_PROCESS_QUERY_LIMITED_INFORMATION, False, pid) + if handle: + kernel32.CloseHandle(handle) + return True + + return ctypes.get_last_error() == _ERROR_ACCESS_DENIED + + def AllocateMemory(process_handle: int, size: int, permission=None) -> int: """ Commit ``size`` bytes in the target process via VirtualAllocEx and return diff --git a/PyMemoryEditor/win32/types.py b/PyMemoryEditor/win32/types.py index 5e4fb57..184c0d5 100644 --- a/PyMemoryEditor/win32/types.py +++ b/PyMemoryEditor/win32/types.py @@ -114,3 +114,30 @@ class MODULEENTRY32(Structure): ("szModule", c_char * (MAX_MODULE_NAME32 + 1)), ("szExePath", c_char * MAX_PATH), ] + + +# TH32CS_SNAPPROCESS flag for CreateToolhelp32Snapshot — used by get_processes() +# to enumerate every process in the system (pid + executable name). +TH32CS_SNAPPROCESS = 0x00000002 + + +class PROCESSENTRY32(Structure): + """Layout matching the ANSI Win32 ``PROCESSENTRY32`` (Process32First/Next). + + ``th32DefaultHeapID`` is a ``ULONG_PTR`` — declared as a void pointer so it + stays pointer-sized on both 32- and 64-bit builds. ``szExeFile`` is a + ``c_char`` array holding the NUL-terminated executable name (no path). + """ + + _fields_ = [ + ("dwSize", wintypes.DWORD), + ("cntUsage", wintypes.DWORD), + ("th32ProcessID", wintypes.DWORD), + ("th32DefaultHeapID", c_void_p), + ("th32ModuleID", wintypes.DWORD), + ("cntThreads", wintypes.DWORD), + ("th32ParentProcessID", wintypes.DWORD), + ("pcPriClassBase", wintypes.LONG), + ("dwFlags", wintypes.DWORD), + ("szExeFile", c_char * MAX_PATH), + ] diff --git a/README.md b/README.md index 0ef2d43..717d468 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ from PyMemoryEditor import OpenProcess with OpenProcess(process_name="game.exe") as process: # Scan the whole process for every address holding the value 100. - for address in process.search_by_value(int, 4, 100): + for address in process.search_by_value(int, value=100): print(f"Found at 0x{address:X}") # Read the current value, then write a new one back. diff --git a/docs/api/enums.md b/docs/api/enums.md index 5556094..0eb5a87 100644 --- a/docs/api/enums.md +++ b/docs/api/enums.md @@ -31,9 +31,7 @@ from PyMemoryEditor import ScanTypesEnum from PyMemoryEditor import OpenProcess, ScanTypesEnum with OpenProcess(process_name="game.exe") as process: - for address in process.search_by_value( - int, 4, 1000, scan_type=ScanTypesEnum.BIGGER_THAN, - ): + for address in process.search_by_value(int, value=1000, scan_type=ScanTypesEnum.BIGGER_THAN): print(hex(address)) ``` diff --git a/docs/api/errors.md b/docs/api/errors.md index 948d903..4eeb93f 100644 --- a/docs/api/errors.md +++ b/docs/api/errors.md @@ -139,7 +139,7 @@ from PyMemoryEditor import ( try: with OpenProcess(process_name="game.exe") as process: - for address in process.search_by_value(int, 4, 100): + for address in process.search_by_value(int, value=100): try: value = process.read_process_memory(address, int) except OSError: diff --git a/docs/api/memory-region.md b/docs/api/memory-region.md index 11165c2..eb4f170 100644 --- a/docs/api/memory-region.md +++ b/docs/api/memory-region.md @@ -107,7 +107,7 @@ writable = [r for r in process.get_memory_regions() if r.is_writable] snapshot = process.snapshot_memory_regions() assert isinstance(snapshot, MemoryRegionSnapshot) # Reuse across many scans: -for addr in process.search_by_value(int, 4, 100, memory_regions=snapshot): +for addr in process.search_by_value(int, value=100, memory_regions=snapshot): ... ``` diff --git a/docs/api/module-info.md b/docs/api/module-info.md index f97157c..a1b682b 100644 --- a/docs/api/module-info.md +++ b/docs/api/module-info.md @@ -19,13 +19,17 @@ Linux, `.dylib` on macOS). .. py:attribute:: name :type: str - File name of the module (e.g. ``"game.exe"``, ``"libc.so.6"``). + File name of the module (e.g. ``"game.exe"``, ``"libc.so.6"``). May be an + empty string on macOS for an image whose path can't be resolved (see + ``path``). .. py:attribute:: path :type: str - Full path of the backing file on disk when the OS exposes it; falls - back to ``name`` when only the name is available. + Full path of the backing file on disk. On Windows it falls back to + ``name`` when only the name is available. On macOS ``name`` is derived + from ``path``, so when the path can't be read **both** ``path`` and + ``name`` are empty strings (no fallback is possible). .. py:attribute:: base_address :type: int diff --git a/docs/api/openprocess.md b/docs/api/openprocess.md index 05b27e3..c8bb597 100644 --- a/docs/api/openprocess.md +++ b/docs/api/openprocess.md @@ -23,9 +23,14 @@ All three subclass `AbstractProcess` and share the API documented below. ## Construction ```{eval-rst} -.. py:class:: OpenProcess(*, process_name=None, pid=None, permission=None, case_sensitive=None, exact_match=True) +.. py:class:: OpenProcess(*, process_name=None, pid=None, permission=, case_sensitive=, exact_match=True) - Open a target process. + Open a target process. ``OpenProcess`` resolves to the concrete backend for + the host OS, so the ``permission`` and ``case_sensitive`` defaults are + platform-specific (see below): on Windows ``permission`` defaults to the + read+write mask and ``case_sensitive`` to ``False``; on Linux/macOS + ``permission`` defaults to ``None`` (ignored) and ``case_sensitive`` to + ``True``. :param str process_name: name of the target process. :param int pid: process ID. Takes precedence over ``process_name``. @@ -178,8 +183,10 @@ identical on every platform. .. py:method:: read_string(address, byte_count) :no-index: - Read up to ``byte_count`` bytes, decode UTF-8, return the text up to the - first NUL. Pair: ``write_string(address, text, *, null_terminator=False)``. + Read exactly ``byte_count`` bytes (a short read raises ``OSError``), decode + UTF-8, and return the text up to the first NUL — so ``byte_count`` is the + field width to read, not an upper bound. Pair: + ``write_string(address, text, *, null_terminator=False)``. .. py:method:: read_bytes(address, length) :no-index: @@ -246,16 +253,16 @@ identical on every platform. ### Pointers ```{eval-rst} -.. py:method:: resolve_pointer_chain(base_address, offsets, *, ptr_size=8) +.. py:method:: resolve_pointer_chain(base_address, offsets, *, ptr_size=None) Walk a multi-level pointer chain and return the final address. -.. py:method:: get_pointer(base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=8) +.. py:method:: get_pointer(base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=None) Build a :py:class:`RemotePointer` bound to this process — a live, re-resolving handle. See :doc:`../guide/pointers`. -.. py:method:: scan_pointer_paths(target_address, *, max_depth=5, max_offset=0x400, ptr_size=8, aligned=True, writable_only=True, static_ranges=None, max_results=None, memory_regions=None, progress_callback=None) +.. py:method:: scan_pointer_paths(target_address, *, max_depth=5, max_offset=0x400, ptr_size=None, aligned=True, writable_only=True, static_ranges=None, max_results=None, memory_regions=None, progress_callback=None) Reverse pointer scan — yield :py:class:`PointerPath` recipes that resolve to ``target_address``. See :doc:`../guide/pointer-scan`. diff --git a/docs/api/remote-pointer.md b/docs/api/remote-pointer.md index 4f215f9..83a8e27 100644 --- a/docs/api/remote-pointer.md +++ b/docs/api/remote-pointer.md @@ -12,7 +12,7 @@ constructor is documented here for completeness. ## Construction ```{eval-rst} -.. py:class:: RemotePointer(process, base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=8) +.. py:class:: RemotePointer(process, base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=None) :param AbstractProcess process: the open process the value lives in. :param int base_address: starting address. For a direct handle this is the @@ -32,9 +32,14 @@ constructor is documented here for completeness. :param Type pytype: how to interpret the bytes — ``bool``, ``int``, ``float``, ``str`` or ``bytes``. Defaults to ``int``. :param int bufflength: value size in bytes. Optional for numeric types - (int→4, float→8, bool→1); required for ``str`` / ``bytes``. + (int→4, float→8, bool→1). For ``str`` / ``bytes`` it is required only to + **read** (nothing to infer the width from); **writing** accepts ``None`` + and stores the whole value, while a set ``bufflength`` caps the width + (truncating). :param int ptr_size: pointer width used when walking ``offsets`` — 8 for - 64-bit targets (default), 4 for 32-bit. Ignored for direct handles. + 64-bit targets, 4 for 32-bit. Leave ``None`` (the default) to use the + target's ``pointer_size``, detected automatically. Ignored for direct + handles. ``` ## Properties diff --git a/docs/api/thread-info.md b/docs/api/thread-info.md index 0b6ef96..80c7d3e 100644 --- a/docs/api/thread-info.md +++ b/docs/api/thread-info.md @@ -30,8 +30,10 @@ mean "this platform does not expose that attribute via the API we use". .. py:attribute:: start_address :type: Optional[int] - Entry point of the thread, when the OS exposes it cheaply. ``None`` - when not available. + Reserved for the thread's entry point. **Currently always ``None``** on + every platform — none of the backends fetch it, since obtaining it cheaply + isn't possible across Windows/Linux/macOS. Kept as a stable field for + forward compatibility. .. py:attribute:: state :type: Optional[str] diff --git a/docs/api/utilities.md b/docs/api/utilities.md index e0996b5..1c8f802 100644 --- a/docs/api/utilities.md +++ b/docs/api/utilities.md @@ -90,14 +90,19 @@ print(byte_length) # 5 .. py:data:: DEFAULT_MAX_REGION_CHUNK Maximum chunk size used by :py:func:`iter_region_chunks` (currently - 16 MiB). Tunes the trade-off between syscall overhead (small chunks) and + 256 MiB). Tunes the trade-off between syscall overhead (small chunks) and peak memory use (huge chunks). -.. py:function:: iter_region_chunks(region_size, item_size) +.. py:function:: iter_region_chunks(region_size, target_value_size, max_chunk=DEFAULT_MAX_REGION_CHUNK) Yield ``(offset, chunk_size)`` pairs that walk a single memory region in - bounded-size chunks. The chunks slightly **overlap** so a pattern straddling - a boundary is still emitted by the higher-level scanner. + bounded-size chunks. Regions up to ``max_chunk`` yield a single chunk; larger + ones are split into **contiguous, non-overlapping** chunks whose size is a + multiple of ``target_value_size`` so a typed numeric scan never splits a value + across a boundary. Boundary handling for *patterns* is done one level up by + the scanner (it overlaps consecutive chunks by ``pattern_length - 1`` bytes); + arbitrary ``str`` matches in a region larger than ``max_chunk`` may still be + missed at a chunk boundary — a documented limitation. .. py:function:: scan_memory(...) .. py:function:: scan_memory_for_exact_value(...) diff --git a/docs/app.md b/docs/app.md index 0ef686c..d6395b9 100644 --- a/docs/app.md +++ b/docs/app.md @@ -17,8 +17,8 @@ If you're new to memory editing, **start with the app** before writing code. pip install "PyMemoryEditor[app]" ``` -The `app` extra adds PySide6 to the install. The library itself stays -dependency-free. +The `app` extra adds PySide6 and psutil to the install (psutil powers the +GUI's process picker). The library itself stays dependency-free. ## Launch diff --git a/docs/guide/allocate-free.md b/docs/guide/allocate-free.md index 255712c..de8c554 100644 --- a/docs/guide/allocate-free.md +++ b/docs/guide/allocate-free.md @@ -13,7 +13,7 @@ size: ```python with OpenProcess(process_name="game.exe") as process: address = process.allocate_memory(64) - process.write_process_memory(address, int, 4, 1337) + process.write_int(address, 1337) ``` ## Freeing diff --git a/docs/guide/logging.md b/docs/guide/logging.md index 4abf701..5132185 100644 --- a/docs/guide/logging.md +++ b/docs/guide/logging.md @@ -20,17 +20,22 @@ You'll start seeing messages like: ``` DEBUG PyMemoryEditor: skipping region 0x7FFD0000–0x7FFD2000 (read failed) -WARNING PyMemoryEditor: partial read at 0x14010F4F4 (got 6 of 8 bytes) +WARNING PyMemoryEditor: mach_vm_protect could not restore protection at 0x14010F4F4 ``` ## Log levels - - + +
LevelWhen it fires
DEBUGTransient skips (pages vanished mid-scan, unreadable chunks).
WARNINGSurprising-but-recovered conditions (partial reads, mach_vm_protect restore failure on macOS).
DEBUGTransient skips during enumeration/scans (pages vanished mid-scan, unreadable chunks, a thread/module/image that couldn't be read).
WARNINGSurprising-but-recovered conditions — currently the macOS mach_vm_protect restore failure after a write to a read-only page.
+```{note} +A **partial read** (fewer bytes returned than requested) is *not* a log event — +it raises ``OSError`` so you never silently decode a half-populated buffer. +``` + ## Routing logs You can route the logger anywhere you like — to a file, to a Qt widget, to a diff --git a/docs/guide/memory-regions.md b/docs/guide/memory-regions.md index 3863b9f..efc17ec 100644 --- a/docs/guide/memory-regions.md +++ b/docs/guide/memory-regions.md @@ -68,8 +68,8 @@ calls can reuse it: regions = process.snapshot_memory_regions() # Pass the same snapshot to as many scans as you want. -candidates = list(process.search_by_value(int, 4, 100, memory_regions=regions)) -refined = list(process.search_by_addresses(int, 4, candidates, memory_regions=regions)) +candidates = list(process.search_by_value(int, value=100, memory_regions=regions)) +refined = list(process.search_by_addresses(int, addresses=candidates, memory_regions=regions)) ``` The return type is `MemoryRegionSnapshot` — a thin `list` subclass that @@ -98,7 +98,7 @@ it to a scan: # Only writable regions (skip read-only static data — much faster). writable = [r for r in regions if r.is_writable] -for address in process.search_by_value(int, 4, target, memory_regions=writable): +for address in process.search_by_value(int, value=target, memory_regions=writable): ... ``` diff --git a/docs/guide/modules-threads.md b/docs/guide/modules-threads.md index 106cf58..bdfa718 100644 --- a/docs/guide/modules-threads.md +++ b/docs/guide/modules-threads.md @@ -41,8 +41,9 @@ with OpenProcess(process_name="game.exe") as process: :no-index: :type: str - Full path of the backing file on disk when the OS exposes it; falls back - to ``name`` when only the name is available. + Full path of the backing file on disk. On Windows it falls back to + ``name`` when only the name is available; on macOS ``name`` derives from + ``path``, so an unresolvable image yields both as empty strings. .. py:attribute:: base_address :no-index: @@ -120,7 +121,8 @@ with OpenProcess(process_name="game.exe") as process: :no-index: :type: Optional[int] - Thread entry point when the OS exposes it cheaply; ``None`` otherwise. + Reserved for the thread entry point — **currently always ``None``** on + every platform (no backend fetches it). .. py:attribute:: state :no-index: diff --git a/docs/guide/pattern-scan.md b/docs/guide/pattern-scan.md index a7be8be..63b6330 100644 --- a/docs/guide/pattern-scan.md +++ b/docs/guide/pattern-scan.md @@ -137,8 +137,12 @@ without a live process: from PyMemoryEditor.util.pattern import compile_pattern regex, byte_length = compile_pattern("48 8B ? 00 00") -print(regex.pattern, byte_length) -# b'\\x48\\x8B.\\x00\\x00' 5 +print(repr(regex.pattern), byte_length) +# b'H\x8b.\x00\x00' 5 ``` +The compiled regex matches the right bytes regardless of how they print: +``re.escape`` renders a printable byte as its ASCII character (``0x48`` → ``H``) +and a non-printable one as ``\xNN`` (``0x8B`` → ``\x8b``); ``?`` becomes ``.``. + See [Utilities API](../api/utilities.md) for the full reference. diff --git a/docs/guide/pointer-scan.md b/docs/guide/pointer-scan.md index dc33643..37e7198 100644 --- a/docs/guide/pointer-scan.md +++ b/docs/guide/pointer-scan.md @@ -24,7 +24,7 @@ It carries everything you need to reconstruct the chain in another run. ## Method signature ```{eval-rst} -.. py:method:: scan_pointer_paths(target_address, *, max_depth=5, max_offset=0x400, ptr_size=8, aligned=True, writable_only=True, static_ranges=None, max_results=None, memory_regions=None, progress_callback=None) +.. py:method:: scan_pointer_paths(target_address, *, max_depth=5, max_offset=0x400, ptr_size=None, aligned=True, writable_only=True, static_ranges=None, max_results=None, memory_regions=None, progress_callback=None) :no-index: :param int target_address: the dynamic address to find pointer paths to @@ -34,7 +34,9 @@ It carries everything you need to reconstruct the chain in another run. :param int max_offset: largest positive offset a single hop may add (the struct-size window). Bigger values catch fields deeper inside objects at the cost of many more candidate paths. - :param int ptr_size: pointer width — ``8`` (default) for 64-bit, ``4`` for 32-bit. + :param int ptr_size: pointer width — ``8`` for 64-bit, ``4`` for 32-bit. + Leave ``None`` (the default) to use the target's ``pointer_size``, + detected automatically. :param bool aligned: only consider pointers at natural alignment (default, much faster). Set ``False`` to also scan misaligned slots (slow). :param bool writable_only: build the pointer map from writable memory only diff --git a/docs/guide/pointers.md b/docs/guide/pointers.md index 5a6f89a..389dbf9 100644 --- a/docs/guide/pointers.md +++ b/docs/guide/pointers.md @@ -33,13 +33,13 @@ module = next(m for m in process.get_modules() if m.name == "game.exe") base = module.base_address + 0x10F4F4 hp_address = process.resolve_pointer_chain(base, [0x0, 0x158]) -hp = process.read_process_memory(hp_address, int, 4) +hp = process.read_int(hp_address) ``` ### Method signature ```{eval-rst} -.. py:method:: resolve_pointer_chain(base_address, offsets, *, ptr_size=8) +.. py:method:: resolve_pointer_chain(base_address, offsets, *, ptr_size=None) :no-index: Walk a multi-level pointer chain. @@ -53,17 +53,19 @@ hp = process.read_process_memory(hp_address, int, 4) ``module_base + static_offset``. :param Sequence[int] offsets: sequence of offsets to walk. Pass ``[]`` to dereference ``base_address`` once and return that pointer. - :param int ptr_size: pointer width — ``8`` for 64-bit targets (default), - ``4`` for 32-bit. + :param int ptr_size: pointer width — ``8`` for 64-bit targets, ``4`` for + 32-bit. Leave ``None`` (the default) to use the target's + ``pointer_size``, detected automatically. :returns: the final address (an ``int``). ``` ```{admonition} 32-bit vs 64-bit :class: warning -Pass `ptr_size=4` when the target is a 32-bit process; pass `ptr_size=8` (the -default) for 64-bit. Mixing them up reads pointers of the wrong width and -yields garbage addresses. +By default (`ptr_size=None`) the pointer width is detected from the target — +`4` bytes for a 32-bit process, `8` for 64-bit. Pass `ptr_size` explicitly only +to override that; setting it to the wrong width reads pointers of the wrong +size and yields garbage addresses. ``` ## Live pointers: `RemotePointer` @@ -86,6 +88,10 @@ print(hp_ptr.value) # read it hp_ptr.value = 9999 # write it ``` +`process.get_pointer(...)` is a convenience wrapper around the +`RemotePointer` constructor — it accepts the same arguments +(`base_address`, `offsets`, `pytype`, `bufflength`, `ptr_size`). + ### Direct vs chained handles The `offsets` argument controls what `RemotePointer` does on every access: @@ -117,7 +123,7 @@ distance = mp_ptr - hp_ptr # 4 ### `RemotePointer` API ```{eval-rst} -.. py:class:: RemotePointer(process, base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=8) +.. py:class:: RemotePointer(process, base_address, offsets=None, *, pytype=int, bufflength=None, ptr_size=None) :no-index: A re-resolving, read/write handle to a typed value in a target process. @@ -178,20 +184,6 @@ distance = mp_ptr - hp_ptr # 4 The resolved :py:attr:`address` — handy for arithmetic and logging. ``` -## Building a handle from `process.get_pointer()` - -A small convenience wrapper around the constructor: - -```python -ptr = process.get_pointer( - base_address=module.base_address + 0x10F4F4, - offsets=[0x0, 0x158], - pytype=int, - bufflength=4, - ptr_size=8, -) -``` - ```{seealso} - [Pointer scan](pointer-scan.md) — discover chains that resolve to a given address. diff --git a/docs/guide/read-write.md b/docs/guide/read-write.md index d9d58cb..04d8060 100644 --- a/docs/guide/read-write.md +++ b/docs/guide/read-write.md @@ -170,13 +170,14 @@ with OpenProcess(process_name="game.exe") as process: # Write your text — UTF-8 encoding (accents, emoji…) is handled for you. process.write_string(0x7FF40020, "Pedro") - # Read it back: read up to 32 bytes, stop at the first NUL terminator. + # Read it back: read a 32-byte field, stop at the first NUL terminator. name = process.read_string(0x7FF40020, 32) # -> "Pedro" ``` -`read_string` reads up to the size you pass and returns everything **before the -first `\0`**, so a generous size like `32` gives you the real string without the -trailing padding. `write_string` writes exactly your text — pass +`read_string` reads exactly the size you pass — that many bytes must be +readable or it raises `OSError` — and returns everything **before the first +`\0`**, so a generous field width like `32` gives you the real string without +the trailing padding. `write_string` writes exactly your text — pass `null_terminator=True` if you're overwriting a longer value and want a clean cut-off: diff --git a/docs/index.md b/docs/index.md index 4ac6f74..a8ad4e1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -40,7 +40,7 @@ from PyMemoryEditor import OpenProcess with OpenProcess(process_name="game.exe") as process: # Scan the whole process for every address holding the value 100. - for address in process.search_by_value(int, 4, 100): + for address in process.search_by_value(int, value=100): print(f"Found at 0x{address:X}") # Read the current value, then write a new one back. diff --git a/docs/installation.md b/docs/installation.md index 74f9b2f..e3882a9 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -33,8 +33,8 @@ Once installed, launch the app from any terminal: pymemoryeditor ``` -The library itself stays dependency-free — only the `app` extra pulls -PySide6 in. +The library itself stays dependency-free — only the `app` extra pulls in its +dependencies (PySide6 and psutil, used by the GUI's process picker). See the [GUI App guide](app.md) for a tour of every feature. diff --git a/docs/quickstart.md b/docs/quickstart.md index 67bf4bf..d65f3da 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -70,7 +70,7 @@ You rarely know the address of a value up front — you **find it by scanning**. ```python with OpenProcess(process_name="game.exe") as process: - for address in process.search_by_value(int, 4, 100): + for address in process.search_by_value(int, value=100): print(f"Found at 0x{address:X}") ``` @@ -92,12 +92,12 @@ The classic loop is: ```python with OpenProcess(process_name="game.exe") as process: # 1. First scan — every address currently holding 100. - candidates = list(process.search_by_value(int, 4, 100)) + candidates = list(process.search_by_value(int, value=100)) # 3. After the value drops to 95 in-game, keep only the matches that agree. survivors = [ address - for address, value in process.search_by_addresses(int, 4, candidates) + for address, value in process.search_by_addresses(int, addresses=candidates) if value == 95 ] diff --git a/docs/why.md b/docs/why.md index a2b13f4..3ffc9cb 100644 --- a/docs/why.md +++ b/docs/why.md @@ -97,7 +97,7 @@ from PyMemoryEditor import OpenProcess with OpenProcess(process_name="game.exe") as process: # Scan the whole process for every address holding the value 100. - for address in process.search_by_value(int, 4, 100): + for address in process.search_by_value(int, value=100): print(f"Found at 0x{address:X}") # Read the current value, then write a new one back. diff --git a/pyproject.toml b/pyproject.toml index 93a8f73..9c799e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,11 @@ classifiers = [ "Typing :: Typed", ] requires-python = ">=3.10" -dependencies = ["psutil>=5.9,<7"] +# The core library is dependency-free: process discovery and read/write/scan +# are implemented natively per platform (CreateToolhelp32Snapshot on Windows, +# /proc on Linux, libproc/Mach on macOS) via ctypes. Optional extras below add +# acceleration (NumPy) and the desktop app (PySide6 + psutil). +dependencies = [] [project.optional-dependencies] # Vectorized scan acceleration. Pulls in NumPy, which lights up the @@ -58,6 +62,10 @@ tests = [ ] app = [ "PySide6>=6.5", + # The desktop app uses psutil's richer per-process info (username, memory + # footprint) in the process picker — beyond the pid/name the core library + # needs. The library itself does not depend on psutil. + "psutil>=5.9,<7", ] # NOTE: NumPy is intentionally NOT listed here. The default test suite must # run on the pure-Python scan path so it stays covered. The NumPy fast path @@ -73,7 +81,10 @@ dev = [ "mypy", "build", "twine", + # GUI deps: the app smoke/worker tests import the desktop modules, which use + # PySide6 and psutil (the latter only in the app, not the core library). "PySide6>=6.5", + "psutil>=5.9,<7", ] [project.scripts] diff --git a/tests/test_errors.py b/tests/test_errors.py index 8528290..e2ed070 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -23,7 +23,8 @@ def test_version_exposed(): def test_open_invalid_pid_raises(): - # 2**31 - 1 is a very large pid unlikely to exist; psutil rejects negative. + # 2**31 - 1 is a very large pid unlikely to exist; the native existence + # check reports it as absent. with pytest.raises(ProcessIDNotExistsError): OpenProcess(pid=2**31 - 1) diff --git a/tests/test_partial_name_match.py b/tests/test_partial_name_match.py index b5899d3..5c70445 100644 --- a/tests/test_partial_name_match.py +++ b/tests/test_partial_name_match.py @@ -1,37 +1,47 @@ # -*- coding: utf-8 -*- """ -Tests for the ``exact_match`` flag on process-name lookup. Uses ``psutil`` -directly to derive the current process's real name from the OS, then -verifies that: +Tests for the ``exact_match`` flag on process-name lookup. Derives the current +process's real name from PyMemoryEditor's own native enumeration (the same +``(pid, name)`` source the lookup uses — Toolhelp on Windows, /proc on Linux, +libproc on macOS), then verifies that: * an exact-name lookup finds it, * a partial (substring) lookup finds it, * a substring lookup that cannot match anything returns nothing. -Avoids relying on a specific executable being installed. +Avoids relying on a specific executable being installed, and on any third-party +dependency. """ import os import sys -import psutil import pytest from PyMemoryEditor.process.util import ( + _iter_processes, get_process_id_by_process_name, get_process_ids_by_process_name, ) -_OWN_PROCESS_NAME = psutil.Process(os.getpid()).name() or "" +def _own_process_name() -> str: + """The OS-reported name of the test process, via native enumeration.""" + for pid, name in _iter_processes(): + if pid == os.getpid(): + return name or "" + return "" + + +_OWN_PROCESS_NAME = _own_process_name() @pytest.fixture(scope="module") def own_name(): """The OS-reported name of the test process (e.g. ``python3.12``).""" if not _OWN_PROCESS_NAME: - pytest.skip("psutil cannot read this process's name on this platform") + pytest.skip("could not read this process's name on this platform") return _OWN_PROCESS_NAME diff --git a/tests/test_process_enumeration.py b/tests/test_process_enumeration.py new file mode 100644 index 0000000..98d3430 --- /dev/null +++ b/tests/test_process_enumeration.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +""" +Cross-platform tests for the native process enumeration that backs +``OpenProcess(process_name=...)`` — the per-platform ``(pid, name)`` source +(``CreateToolhelp32Snapshot`` on Windows, ``/proc`` on Linux, libproc on +macOS) exposed through ``PyMemoryEditor.process.util`` — plus the native +``pid_exists`` probe. These exercise the real OS code paths (no mocking; the +mocked ``util`` layer is covered separately in ``test_process_lookup.py``). +""" + +import os +import sys + +import pytest + +if sys.platform not in ("win32", "darwin") and not sys.platform.startswith("linux"): + pytest.skip("Platform not supported by PyMemoryEditor", allow_module_level=True) + + +from PyMemoryEditor.process.util import ( # noqa: E402 + _iter_processes, + get_process_ids_by_process_name, + pid_exists, +) + + +def _own_name() -> str: + for pid, name in _iter_processes(): + if pid == os.getpid(): + return name or "" + return "" + + +def test_iter_processes_yields_pid_name_pairs(): + """Every row is an ``(int pid >= 0, str name)`` pair and the list is non-empty.""" + rows = list(_iter_processes()) + assert rows, "expected at least one process" + for pid, name in rows: + assert isinstance(pid, int) + assert pid >= 0 + assert isinstance(name, str) + + +def test_enumeration_includes_self(): + """The current process must appear in the enumeration.""" + pids = [pid for pid, _ in _iter_processes()] + assert os.getpid() in pids + + +def test_current_process_has_a_name(): + """A process can always read its own name (proc_name/comm/szExeFile).""" + name = _own_name() + assert name, "expected a non-empty name for the current process" + # The exact spelling is platform-specific (e.g. "python3.11" / "python.exe" + # / "Python", and Linux truncates comm to 15 chars), so assert only that the + # backend produced a clean executable basename — decoded, no path + # separators, no embedded NUL — rather than a brittle exact match. + assert "\x00" not in name + assert "/" not in name and "\\" not in name + + +def test_pids_are_unique(): + """A snapshot lists each pid at most once.""" + pids = [pid for pid, _ in _iter_processes()] + assert len(pids) == len(set(pids)), "duplicate pids in enumeration" + + +def test_pid_exists_true_for_self(): + assert pid_exists(os.getpid()) is True + + +def test_pid_exists_false_for_dead_pid(): + # 2**31 - 1 is a very large pid extremely unlikely to be live. + assert pid_exists(2**31 - 1) is False + + +def test_pid_exists_false_for_negative_pid(): + assert pid_exists(-1) is False + + +def test_resolve_own_name_includes_self(): + """Resolving the current process's name returns a list containing our pid.""" + name = _own_name() + if not name: + pytest.skip("could not read this process's name on this platform") + + pids = get_process_ids_by_process_name(name, exact_match=True) + assert os.getpid() in pids + + +def test_case_insensitive_match_finds_self_native(): + """Case-insensitive matching works end-to-end on the native enumeration.""" + name = _own_name() + if not name: + pytest.skip("could not read this process's name on this platform") + swapped = name.swapcase() + if swapped == name: + pytest.skip("process name has no alphabetic characters to swap") + + # A list (≥1) — other processes may share the name; we only require ours. + pids = get_process_ids_by_process_name( + swapped, exact_match=True, case_sensitive=False + ) + assert os.getpid() in pids + + +def test_substring_match_finds_self_native(): + """Substring (exact_match=False) matching works on the native enumeration.""" + name = _own_name() + if not name or len(name) <= 2: + pytest.skip("process name too short for a substring test") + + substring = name[: max(2, len(name) // 2)] + pids = get_process_ids_by_process_name(substring, exact_match=False) + assert os.getpid() in pids diff --git a/tests/test_process_lookup.py b/tests/test_process_lookup.py index f345961..51b151e 100644 --- a/tests/test_process_lookup.py +++ b/tests/test_process_lookup.py @@ -12,43 +12,38 @@ from PyMemoryEditor.process import util as lookup -class _FakeProcess: - """Stand-in for psutil.Process used by process_iter(["name", "pid"]).""" - - def __init__(self, name: str, pid: int): - self.info = {"name": name, "pid": pid} - - @pytest.fixture def fake_process_iter(monkeypatch): - """Replace psutil.process_iter with a callable returning the provided list.""" + """Replace the native process enumerator with one returning a fixed list. + + The seam is ``util._iter_processes``, which yields ``(pid, name)`` pairs + (the per-platform native enumerator: Toolhelp on Windows, /proc on Linux, + libproc on macOS). Tests supply their own ``(name, pid)`` rows. + """ def install(processes): - monkeypatch.setattr( - lookup.psutil, - "process_iter", - lambda fields=None: iter(processes), - ) + rows = [(pid, name) for name, pid in processes] + monkeypatch.setattr(lookup, "_iter_processes", lambda: iter(rows)) return install def test_returns_none_when_no_match(fake_process_iter): - fake_process_iter([_FakeProcess("chrome", 1), _FakeProcess("firefox", 2)]) + fake_process_iter([("chrome", 1), ("firefox", 2)]) assert lookup.get_process_id_by_process_name("missing.exe") is None def test_returns_pid_on_single_match(fake_process_iter): - fake_process_iter([_FakeProcess("chrome", 1), _FakeProcess("firefox", 2)]) + fake_process_iter([("chrome", 1), ("firefox", 2)]) assert lookup.get_process_id_by_process_name("chrome") == 1 def test_raises_ambiguous_on_multiple_matches(fake_process_iter): fake_process_iter( [ - _FakeProcess("python", 100), - _FakeProcess("python", 200), - _FakeProcess("bash", 300), + ("python", 100), + ("python", 200), + ("bash", 300), ] ) with pytest.raises(AmbiguousProcessNameError) as exc: @@ -59,13 +54,13 @@ def test_raises_ambiguous_on_multiple_matches(fake_process_iter): def test_case_sensitive_default_distinguishes(fake_process_iter): - fake_process_iter([_FakeProcess("Notepad.exe", 42)]) + fake_process_iter([("Notepad.exe", 42)]) assert lookup.get_process_id_by_process_name("notepad.exe") is None assert lookup.get_process_id_by_process_name("Notepad.exe") == 42 def test_case_insensitive_matches(fake_process_iter): - fake_process_iter([_FakeProcess("Notepad.exe", 42)]) + fake_process_iter([("Notepad.exe", 42)]) assert ( lookup.get_process_id_by_process_name("notepad.exe", case_sensitive=False) == 42 ) @@ -77,14 +72,22 @@ def test_case_insensitive_matches(fake_process_iter): def test_get_process_ids_returns_full_list(fake_process_iter): fake_process_iter( [ - _FakeProcess("python", 100), - _FakeProcess("python", 200), + ("python", 100), + ("python", 200), ] ) pids = lookup.get_process_ids_by_process_name("python") assert pids == [100, 200] +def test_empty_name_rows_are_tolerated(fake_process_iter): + """A process with an empty name (macOS can yield this) must not crash the + lookup, and must not spuriously match a non-empty query.""" + fake_process_iter([("", 1), ("chrome", 2)]) + assert lookup.get_process_id_by_process_name("chrome") == 2 + assert lookup.get_process_id_by_process_name("") == 1 + + def test_ambiguous_error_has_args_and_str(): """Regression: errors used to lose information because __init__ didn't call super().""" err = AmbiguousProcessNameError("python", [100, 200])