From 1c807b517ebfecc551f5c3d7223c115a8ef9f004 Mon Sep 17 00:00:00 2001 From: JeanExtreme002 Date: Fri, 5 Jun 2026 01:07:57 -0300 Subject: [PATCH] fix: exclude the dyld shared cache from macOS value scans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On macOS the dyld shared cache (~5.8 GB of read-only library text/data the kernel maps into every process) was scanned on every value/pattern scan because vm_region_basic_info's `shared` flag reports FALSE for it. Linux and Windows already exclude their equivalent file-backed library mappings, so the same scan walked ~6.6 GB on macOS versus ~1 GB elsewhere — making scans (and the test suite) run 4-6x slower. Recognize the cache via its vm_region_extended_info user_tag (VM_MEMORY_SHARED_PMAP) and mark those regions as shared so default_scan_filter drops them, matching the Linux/Win32 backends. Address-list reads and pointer scans are unaffected — they filter on readability/writability, not the shared flag. --- PyMemoryEditor/macos/functions.py | 59 ++++++++++++++++++++++++++++--- PyMemoryEditor/macos/libsystem.py | 6 ++-- PyMemoryEditor/macos/types.py | 41 ++++++++++++++++++++- tests/test_macos_protect.py | 36 +++++++++++++++++++ 4 files changed, 135 insertions(+), 7 deletions(-) diff --git a/PyMemoryEditor/macos/functions.py b/PyMemoryEditor/macos/functions.py index 22000e2..5212ac4 100644 --- a/PyMemoryEditor/macos/functions.py +++ b/PyMemoryEditor/macos/functions.py @@ -43,17 +43,21 @@ TASK_DYLD_INFO, TASK_DYLD_INFO_COUNT, VM_FLAGS_ANYWHERE, + VM_MEMORY_SHARED_PMAP, VM_PROT_COPY, VM_PROT_READ, VM_PROT_WRITE, VM_REGION_BASIC_INFO_64, VM_REGION_BASIC_INFO_COUNT_64, + VM_REGION_EXTENDED_INFO, + VM_REGION_EXTENDED_INFO_COUNT, mach_msg_type_number_t, mach_port_t, mach_vm_address_t, mach_vm_size_t, task_dyld_info_data_t, vm_region_basic_info_64, + vm_region_extended_info, ) @@ -119,6 +123,53 @@ def release_task(task: int) -> None: libsystem.mach_port_deallocate(mach_task_self_.value, task) +def _region_user_tag(task: int, address: int) -> int: + """Return the ``vm_region_extended_info.user_tag`` for the region at ``address``. + + A second, extended-info ``mach_vm_region`` query (the enumeration uses + basic info, which has no ``user_tag``). Returns 0 — an unassigned tag — on + any failure, so callers treat an unknown region as "not the shared cache". + """ + addr = mach_vm_address_t(address) + size = mach_vm_size_t(0) + info = vm_region_extended_info() + info_count = mach_msg_type_number_t(VM_REGION_EXTENDED_INFO_COUNT) + object_name = mach_port_t(0) + + kr = libsystem.mach_vm_region( + task, + ctypes.byref(addr), + ctypes.byref(size), + VM_REGION_EXTENDED_INFO, + ctypes.cast(ctypes.byref(info), ctypes.c_void_p), + ctypes.byref(info_count), + ctypes.byref(object_name), + ) + + if kr != KERN_SUCCESS: + return 0 + if object_name.value: + libsystem.mach_port_deallocate(mach_task_self_.value, object_name.value) + return info.user_tag + + +def _region_is_shared(task: int, address: int, basic_shared: int) -> bool: + """Whether the region is a shared/file-backed mapping value scans should skip. + + The basic-info ``shared`` flag is honored when set, but it reports FALSE for + the dyld shared cache — the multi-GB read-only library blob the kernel maps + into every process through a shared submap (tagged + :data:`VM_MEMORY_SHARED_PMAP`). Recognizing it here lets + :func:`default_scan_filter` exclude it, so a default value/pattern scan walks + only the target's own ~1 GB of private memory instead of ~6 GB — matching + the Linux/Win32 exclusion of file-backed library mappings and keeping macOS + scans (and the test suite) from running 4-6x slower than the other OSes. + """ + if basic_shared: + return True + return _region_user_tag(task, address) == VM_MEMORY_SHARED_PMAP + + def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]: """ Yield {address, size, struct} dicts describing each memory region of the task. @@ -143,7 +194,7 @@ def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]: ctypes.byref(address), ctypes.byref(size), VM_REGION_BASIC_INFO_64, - ctypes.byref(info), + ctypes.cast(ctypes.byref(info), ctypes.c_void_p), ctypes.byref(info_count), ctypes.byref(object_name), ) @@ -168,7 +219,7 @@ def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]: size.value, info.protection, info.max_protection, - info.shared, + _region_is_shared(task, address.value, info.shared), info.reserved, ) @@ -357,7 +408,7 @@ def _query_region(task: int, address: int): ctypes.byref(addr), ctypes.byref(size), VM_REGION_BASIC_INFO_64, - ctypes.byref(info), + ctypes.cast(ctypes.byref(info), ctypes.c_void_p), ctypes.byref(info_count), ctypes.byref(object_name), ) @@ -381,7 +432,7 @@ def _query_region(task: int, address: int): size.value, info.protection, info.max_protection, - info.shared, + _region_is_shared(task, addr.value, info.shared), info.reserved, ), ) diff --git a/PyMemoryEditor/macos/libsystem.py b/PyMemoryEditor/macos/libsystem.py index 9f41758..14ac6ec 100644 --- a/PyMemoryEditor/macos/libsystem.py +++ b/PyMemoryEditor/macos/libsystem.py @@ -24,7 +24,6 @@ mach_vm_size_t, task_t, vm_map_t, - vm_region_basic_info_64, ) @@ -82,7 +81,10 @@ POINTER(mach_vm_address_t), POINTER(mach_vm_size_t), ctypes.c_int, - POINTER(vm_region_basic_info_64), + # `vm_region_info_t` is a generic `int *` the caller sizes via `flavor` — + # declare it as an opaque pointer so both vm_region_basic_info_64 and + # vm_region_extended_info can be passed (each via ctypes.cast(byref, ...)). + ctypes.c_void_p, POINTER(mach_msg_type_number_t), POINTER(mach_port_t), ) diff --git a/PyMemoryEditor/macos/types.py b/PyMemoryEditor/macos/types.py index 53d1810..aa19f6b 100644 --- a/PyMemoryEditor/macos/types.py +++ b/PyMemoryEditor/macos/types.py @@ -10,7 +10,7 @@ - mach/kern_return.h """ -from ctypes import Structure, c_int, c_uint, c_uint64, c_ushort, sizeof +from ctypes import Structure, c_int, c_ubyte, c_uint, c_uint64, c_ushort, sizeof # `info_count` in mach_vm_region is measured in mach_msg_type_number_t units @@ -33,6 +33,17 @@ # Region info flavors VM_REGION_BASIC_INFO_64 = 9 +VM_REGION_EXTENDED_INFO = 13 + +# user_tag (from vm_region_extended_info) of the dyld shared cache regions — +# the read-only library text/data blob the kernel maps into *every* process +# via a shared submap. On this machine it totals ~5.8 GB across three regions. +# Crucially, the basic-info `shared` flag reports FALSE for these regions, so +# without recognizing the tag a default value scan walks all ~6 GB of library +# memory — making macOS scans (and the test suite) 4-6x slower than Linux/Win32, +# which exclude the equivalent file-backed/shared mappings. See VM_MEMORY_* +# constants in . +VM_MEMORY_SHARED_PMAP = 32 # task_info() flavor that returns dyld's image-list pointer (mach/task_info.h). TASK_DYLD_INFO = 17 @@ -76,6 +87,34 @@ class vm_region_basic_info_64(Structure): VM_REGION_BASIC_INFO_COUNT_64 = sizeof(vm_region_basic_info_64) // _NATURAL_T_SIZE +class vm_region_extended_info(Structure): + """Layout of struct vm_region_extended_info_data_t from . + + Only ``user_tag`` is consumed today (to recognize the dyld shared cache — + see :data:`VM_MEMORY_SHARED_PMAP`); the remaining fields are declared so the + struct size — and therefore :data:`VM_REGION_EXTENDED_INFO_COUNT` — matches + what the kernel expects. + """ + + _fields_ = [ + ("protection", vm_prot_t), + ("user_tag", c_uint), + ("pages_resident", c_uint), + ("pages_shared_now_private", c_uint), + ("pages_swapped_out", c_uint), + ("pages_dirtied", c_uint), + ("ref_count", c_uint), + ("shadow_depth", c_ushort), + ("external_pager", c_ubyte), + ("share_mode", c_ubyte), + ("pages_reusable", c_uint), + ] + + +# Number of mach_msg_type_number_t units in vm_region_extended_info. +VM_REGION_EXTENDED_INFO_COUNT = sizeof(vm_region_extended_info) // _NATURAL_T_SIZE + + class task_dyld_info_data_t(Structure): """Layout of struct task_dyld_info from . diff --git a/tests/test_macos_protect.py b/tests/test_macos_protect.py index e2a7c6d..f4ce638 100644 --- a/tests/test_macos_protect.py +++ b/tests/test_macos_protect.py @@ -20,6 +20,7 @@ from ctypes.util import find_library # noqa: E402 from PyMemoryEditor import OpenProcess # noqa: E402 +from PyMemoryEditor.process.region import default_scan_filter # noqa: E402 # Page size on macOS arm64 is 16 KB; x86_64 is 4 KB. mmap will pick the right one. @@ -68,6 +69,41 @@ def _mmap_readonly(size: int) -> int: return addr +def test_dyld_shared_cache_excluded_from_value_scans(): + """The dyld shared cache must be flagged shared so value scans skip it. + + macOS maps the read-only library blob (the dyld shared cache, ~6 GB) into + every process. Its ``vm_region_basic_info`` ``shared`` flag is FALSE, so + before the ``VM_MEMORY_SHARED_PMAP`` user_tag fix a default scan walked all + ~6 GB of it — making macOS scans (and this suite) 4-6x slower than the + other OSes, which exclude their equivalent file-backed library mappings. + + Regression guard: there is at least one large readable region flagged + ``is_shared``, and ``default_scan_filter`` drops enough that the scanned set + is a small fraction of all readable memory. + """ + process = OpenProcess(pid=os.getpid()) + try: + regions = list(process.get_memory_regions()) + finally: + process.close() + + readable = [r for r in regions if r.is_readable] + readable_bytes = sum(r.size for r in readable) + scanned_bytes = sum(r.size for r in readable if default_scan_filter(r)) + + # The dyld shared cache shows up as one or more large readable regions that + # must now be classified as shared (256 MB is well below its real size). + big_shared = [ + r for r in readable if r.is_shared and r.size >= 256 * 1024 * 1024 + ] + assert big_shared, "dyld shared cache not recognized as a shared mapping" + + # With the cache excluded the scanned set is a small slice of all readable + # memory — guards against a regression that scans the whole address space. + assert scanned_bytes < readable_bytes * 0.5 + + def test_write_to_readonly_page_via_protect_flip(): size = 4096 address = _mmap_readonly(size)