Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 55 additions & 4 deletions PyMemoryEditor/macos/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,21 @@
TASK_DYLD_INFO,
TASK_DYLD_INFO_COUNT,
VM_FLAGS_ANYWHERE,
VM_MEMORY_SHARED_PMAP,
VM_PROT_COPY,
VM_PROT_READ,
VM_PROT_WRITE,
VM_REGION_BASIC_INFO_64,
VM_REGION_BASIC_INFO_COUNT_64,
VM_REGION_EXTENDED_INFO,
VM_REGION_EXTENDED_INFO_COUNT,
mach_msg_type_number_t,
mach_port_t,
mach_vm_address_t,
mach_vm_size_t,
task_dyld_info_data_t,
vm_region_basic_info_64,
vm_region_extended_info,
)


Expand Down Expand Up @@ -119,6 +123,53 @@ def release_task(task: int) -> None:
libsystem.mach_port_deallocate(mach_task_self_.value, task)


def _region_user_tag(task: int, address: int) -> int:
"""Return the ``vm_region_extended_info.user_tag`` for the region at ``address``.

A second, extended-info ``mach_vm_region`` query (the enumeration uses
basic info, which has no ``user_tag``). Returns 0 — an unassigned tag — on
any failure, so callers treat an unknown region as "not the shared cache".
"""
addr = mach_vm_address_t(address)
size = mach_vm_size_t(0)
info = vm_region_extended_info()
info_count = mach_msg_type_number_t(VM_REGION_EXTENDED_INFO_COUNT)
object_name = mach_port_t(0)

kr = libsystem.mach_vm_region(
task,
ctypes.byref(addr),
ctypes.byref(size),
VM_REGION_EXTENDED_INFO,
ctypes.cast(ctypes.byref(info), ctypes.c_void_p),
ctypes.byref(info_count),
ctypes.byref(object_name),
)

if kr != KERN_SUCCESS:
return 0
if object_name.value:
libsystem.mach_port_deallocate(mach_task_self_.value, object_name.value)
return info.user_tag


def _region_is_shared(task: int, address: int, basic_shared: int) -> bool:
"""Whether the region is a shared/file-backed mapping value scans should skip.

The basic-info ``shared`` flag is honored when set, but it reports FALSE for
the dyld shared cache — the multi-GB read-only library blob the kernel maps
into every process through a shared submap (tagged
:data:`VM_MEMORY_SHARED_PMAP`). Recognizing it here lets
:func:`default_scan_filter` exclude it, so a default value/pattern scan walks
only the target's own ~1 GB of private memory instead of ~6 GB — matching
the Linux/Win32 exclusion of file-backed library mappings and keeping macOS
scans (and the test suite) from running 4-6x slower than the other OSes.
"""
if basic_shared:
return True
return _region_user_tag(task, address) == VM_MEMORY_SHARED_PMAP


def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]:
"""
Yield {address, size, struct} dicts describing each memory region of the task.
Expand All @@ -143,7 +194,7 @@ def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]:
ctypes.byref(address),
ctypes.byref(size),
VM_REGION_BASIC_INFO_64,
ctypes.byref(info),
ctypes.cast(ctypes.byref(info), ctypes.c_void_p),
ctypes.byref(info_count),
ctypes.byref(object_name),
)
Expand All @@ -168,7 +219,7 @@ def get_memory_regions(task: int) -> Generator[MemoryRegion, None, None]:
size.value,
info.protection,
info.max_protection,
info.shared,
_region_is_shared(task, address.value, info.shared),
info.reserved,
)

Expand Down Expand Up @@ -357,7 +408,7 @@ def _query_region(task: int, address: int):
ctypes.byref(addr),
ctypes.byref(size),
VM_REGION_BASIC_INFO_64,
ctypes.byref(info),
ctypes.cast(ctypes.byref(info), ctypes.c_void_p),
ctypes.byref(info_count),
ctypes.byref(object_name),
)
Expand All @@ -381,7 +432,7 @@ def _query_region(task: int, address: int):
size.value,
info.protection,
info.max_protection,
info.shared,
_region_is_shared(task, addr.value, info.shared),
info.reserved,
),
)
Expand Down
6 changes: 4 additions & 2 deletions PyMemoryEditor/macos/libsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
mach_vm_size_t,
task_t,
vm_map_t,
vm_region_basic_info_64,
)


Expand Down Expand Up @@ -82,7 +81,10 @@
POINTER(mach_vm_address_t),
POINTER(mach_vm_size_t),
ctypes.c_int,
POINTER(vm_region_basic_info_64),
# `vm_region_info_t` is a generic `int *` the caller sizes via `flavor` —
# declare it as an opaque pointer so both vm_region_basic_info_64 and
# vm_region_extended_info can be passed (each via ctypes.cast(byref, ...)).
ctypes.c_void_p,
POINTER(mach_msg_type_number_t),
POINTER(mach_port_t),
)
Expand Down
41 changes: 40 additions & 1 deletion PyMemoryEditor/macos/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
- mach/kern_return.h
"""

from ctypes import Structure, c_int, c_uint, c_uint64, c_ushort, sizeof
from ctypes import Structure, c_int, c_ubyte, c_uint, c_uint64, c_ushort, sizeof


# `info_count` in mach_vm_region is measured in mach_msg_type_number_t units
Expand All @@ -33,6 +33,17 @@

# Region info flavors
VM_REGION_BASIC_INFO_64 = 9
VM_REGION_EXTENDED_INFO = 13

# user_tag (from vm_region_extended_info) of the dyld shared cache regions —
# the read-only library text/data blob the kernel maps into *every* process
# via a shared submap. On this machine it totals ~5.8 GB across three regions.
# Crucially, the basic-info `shared` flag reports FALSE for these regions, so
# without recognizing the tag a default value scan walks all ~6 GB of library
# memory — making macOS scans (and the test suite) 4-6x slower than Linux/Win32,
# which exclude the equivalent file-backed/shared mappings. See VM_MEMORY_*
# constants in <mach/vm_statistics.h>.
VM_MEMORY_SHARED_PMAP = 32

# task_info() flavor that returns dyld's image-list pointer (mach/task_info.h).
TASK_DYLD_INFO = 17
Expand Down Expand Up @@ -76,6 +87,34 @@ class vm_region_basic_info_64(Structure):
VM_REGION_BASIC_INFO_COUNT_64 = sizeof(vm_region_basic_info_64) // _NATURAL_T_SIZE


class vm_region_extended_info(Structure):
"""Layout of struct vm_region_extended_info_data_t from <mach/vm_region.h>.

Only ``user_tag`` is consumed today (to recognize the dyld shared cache —
see :data:`VM_MEMORY_SHARED_PMAP`); the remaining fields are declared so the
struct size — and therefore :data:`VM_REGION_EXTENDED_INFO_COUNT` — matches
what the kernel expects.
"""

_fields_ = [
("protection", vm_prot_t),
("user_tag", c_uint),
("pages_resident", c_uint),
("pages_shared_now_private", c_uint),
("pages_swapped_out", c_uint),
("pages_dirtied", c_uint),
("ref_count", c_uint),
("shadow_depth", c_ushort),
("external_pager", c_ubyte),
("share_mode", c_ubyte),
("pages_reusable", c_uint),
]


# Number of mach_msg_type_number_t units in vm_region_extended_info.
VM_REGION_EXTENDED_INFO_COUNT = sizeof(vm_region_extended_info) // _NATURAL_T_SIZE


class task_dyld_info_data_t(Structure):
"""Layout of struct task_dyld_info from <mach/task_info.h>.

Expand Down
36 changes: 36 additions & 0 deletions tests/test_macos_protect.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ctypes.util import find_library # noqa: E402

from PyMemoryEditor import OpenProcess # noqa: E402
from PyMemoryEditor.process.region import default_scan_filter # noqa: E402


# Page size on macOS arm64 is 16 KB; x86_64 is 4 KB. mmap will pick the right one.
Expand Down Expand Up @@ -68,6 +69,41 @@ def _mmap_readonly(size: int) -> int:
return addr


def test_dyld_shared_cache_excluded_from_value_scans():
"""The dyld shared cache must be flagged shared so value scans skip it.

macOS maps the read-only library blob (the dyld shared cache, ~6 GB) into
every process. Its ``vm_region_basic_info`` ``shared`` flag is FALSE, so
before the ``VM_MEMORY_SHARED_PMAP`` user_tag fix a default scan walked all
~6 GB of it — making macOS scans (and this suite) 4-6x slower than the
other OSes, which exclude their equivalent file-backed library mappings.

Regression guard: there is at least one large readable region flagged
``is_shared``, and ``default_scan_filter`` drops enough that the scanned set
is a small fraction of all readable memory.
"""
process = OpenProcess(pid=os.getpid())
try:
regions = list(process.get_memory_regions())
finally:
process.close()

readable = [r for r in regions if r.is_readable]
readable_bytes = sum(r.size for r in readable)
scanned_bytes = sum(r.size for r in readable if default_scan_filter(r))

# The dyld shared cache shows up as one or more large readable regions that
# must now be classified as shared (256 MB is well below its real size).
big_shared = [
r for r in readable if r.is_shared and r.size >= 256 * 1024 * 1024
]
assert big_shared, "dyld shared cache not recognized as a shared mapping"

# With the cache excluded the scanned set is a small slice of all readable
# memory — guards against a regression that scans the whole address space.
assert scanned_bytes < readable_bytes * 0.5


def test_write_to_readonly_page_via_protect_flip():
size = 4096
address = _mmap_readonly(size)
Expand Down
Loading