JeanExtreme002 · JeanExtreme002 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/PyMemoryEditor/util/scan.py b/PyMemoryEditor/util/scan.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+import re
 import struct
 import sys
 from bisect import bisect_left
@@ -206,6 +207,57 @@ def scan_memory_for_exact_value(
             yield offset
 
 
+def _scan_string_ordered(
+    data: bytes,
+    end: int,
+    target_value_size: int,
+    lo_byte: int,
+    hi_byte: int,
+    boundary_bytes: frozenset,
+    predicate,
+) -> Generator[int, None, None]:
+    """C-accelerated string scan for ordered comparisons (>, <, >=, <=, between).
+
+    Strings compare big-endian, so a fixed-width window can only satisfy an
+    ordered comparison when its *first* byte lies in ``[lo_byte, hi_byte]``.
+    Those candidate positions are located with a regex byte class whose C engine
+    skips the long NUL runs of reserved/zeroed memory orders of magnitude faster
+    than a per-offset Python loop — the same idea that makes the EXACT path's
+    ``bytes.find`` fast. A candidate is accepted outright unless its first byte
+    *ties* a comparison bound (``boundary_bytes``), in which case the full window
+    is decoded and checked with ``predicate``. Yields offsets in ascending order
+    (``re.finditer`` walks left to right), identical to the byte-by-byte loop.
+
+    ``lo_byte > hi_byte`` denotes an empty candidate range — it arises for a
+    reversed VALUE_BETWEEN (``start > end``), where the byte-by-byte loop's
+    ``start <= v <= end`` also matches nothing. Return empty rather than let
+    ``re`` raise "bad character range" on a ``[hi-lo]`` class.
+    """
+    if end <= 0 or lo_byte > hi_byte:
+        return
+
+    # re.escape keeps every byte (incl. class-specials like ] ^ - [ and NUL)
+    # literal inside the class; the unescaped `-` between them is the range op.
+    # Compile with NO flags: on a bytes pattern `[lo-hi]` is the exact inclusive
+    # ordinal range. In particular do NOT pass re.IGNORECASE — it folds ASCII
+    # case *inside* a class, so a range overlapping A-Z/a-z would match the
+    # opposite case too and silently return non-matching offsets.
+    matcher = re.compile(
+        b"[" + re.escape(bytes((lo_byte,))) + b"-" + re.escape(bytes((hi_byte,))) + b"]"
+    )
+
+    for match in matcher.finditer(data):
+        offset = match.start()
+        if offset >= end:
+            break
+        if data[offset] in boundary_bytes:
+            value = int.from_bytes(data[offset : offset + target_value_size], "big")
+            if predicate(value):
+                yield offset
+        else:
+            yield offset
+
+
 def scan_memory(
     memory_region_data: Sequence,
     memory_region_data_size: int,
@@ -236,14 +288,25 @@ def scan_memory(
     # narrowing for the downstream int.from_bytes / struct.unpack calls.
     byte_order: _ByteOrder = cast(_ByteOrder, "big" if is_string else sys.byteorder)
 
+    # First byte of each target, used by the string fast path below to build the
+    # candidate-byte regex class. Captured here where `target_value`'s type is
+    # narrowed (tuple vs bytes); `None` means "empty target, no fast path".
+    first_byte: Optional[int]
+    start_first_byte: Optional[int]
+    end_first_byte: Optional[int]
     if isinstance(target_value, tuple):
         start_target_value = _decode_target(target_value[0], byte_order, pytype)
         end_target_value = _decode_target(target_value[1], byte_order, pytype)
         target_value_decoded: Union[int, float] = 0
+        first_byte = None
+        start_first_byte = target_value[0][0] if target_value[0] else None
+        end_first_byte = target_value[1][0] if target_value[1] else None
     else:
         target_value_decoded = _decode_target(target_value, byte_order, pytype)
         start_target_value = 0
         end_target_value = 0
+        first_byte = target_value[0] if target_value else None
+        start_first_byte = end_first_byte = None
 
     fmt = None if is_string else _struct_format(byte_order, target_value_size, pytype)
 
@@ -334,6 +397,37 @@ def scan_memory(
     int_from_bytes = int.from_bytes
     signed = pytype is int
 
+    # Fast path for ordered string comparisons. Strings compare big-endian, so a
+    # window can only match when its first byte falls in a known range; a regex
+    # byte-class prefilter finds those candidates in C, skipping the huge NUL
+    # runs of reserved memory instead of stepping every byte in Python. Numerics
+    # with unusual sizes (3/6/7) decode little-endian and fall through unchanged.
+    if is_string:
+        spec = None
+        if first_byte is not None and scan_type is ScanTypesEnum.BIGGER_THAN:
+            spec = (first_byte, 0xFF, frozenset((first_byte,)),
+                    lambda v: v > target_value_decoded)
+        elif first_byte is not None and scan_type is ScanTypesEnum.BIGGER_THAN_OR_EXACT_VALUE:
+            spec = (first_byte, 0xFF, frozenset((first_byte,)),
+                    lambda v: v >= target_value_decoded)
+        elif first_byte is not None and scan_type is ScanTypesEnum.SMALLER_THAN:
+            spec = (0x00, first_byte, frozenset((first_byte,)),
+                    lambda v: v < target_value_decoded)
+        elif first_byte is not None and scan_type is ScanTypesEnum.SMALLER_THAN_OR_EXACT_VALUE:
+            spec = (0x00, first_byte, frozenset((first_byte,)),
+                    lambda v: v <= target_value_decoded)
+        elif (
+            scan_type is ScanTypesEnum.VALUE_BETWEEN
+            and start_first_byte is not None
+            and end_first_byte is not None
+        ):
+            spec = (start_first_byte, end_first_byte,
+                    frozenset((start_first_byte, end_first_byte)),
+                    lambda v: start_target_value <= v <= end_target_value)
+        if spec is not None:
+            yield from _scan_string_ordered(data, end, target_value_size, *spec)
+            return
+
     if scan_type is ScanTypesEnum.EXACT_VALUE:
         for offset in range(0, end, step):
             value = int_from_bytes(

diff --git a/docs/guide/searching.md b/docs/guide/searching.md
@@ -198,10 +198,13 @@ missing.
 ## Scan acceleration (the `speed` extra)
 
 By default every scan runs in pure Python, with the hottest paths already
-delegated to C primitives (`bytes.find` for exact matches, `struct.iter_unpack`
-to decode a region). What stays in Python is the per-value **comparison loop**
-of the ordered scans (`BIGGER_THAN`, `SMALLER_THAN`, `VALUE_BETWEEN`, …): for a
-multi-megabyte region it boxes and compares millions of values one at a time.
+delegated to C primitives: `bytes.find` for exact matches, `struct.iter_unpack`
+to decode a region, and a **regex byte-class prefilter** for ordered *string*
+comparisons (`BIGGER_THAN` / `SMALLER_THAN` / `VALUE_BETWEEN` on `str`), which
+skips the long runs of non-matching bytes in C instead of stepping every offset.
+What stays in Python is the per-value **comparison loop** of the ordered
+*numeric* scans: for a multi-megabyte region it boxes and compares millions of
+values one at a time.
 
 Installing the optional [`speed`](../installation.md#install-with-scan-acceleration-speed)
 extra replaces that loop with a single vectorized NumPy comparison:
@@ -241,7 +244,8 @@ emitting matches.
 <tr><th>Scenario</th><th>Typical speedup</th></tr>
 <tr><td>Selective scan of a large region (few matches — the usual first scan / refine step)</td><td><b>10–60×</b></td></tr>
 <tr><td>Scan where most values match (e.g. <code>&gt; 0</code> on mostly-positive data)</td><td>~2× (result building dominates)</td></tr>
-<tr><td><code>str</code> / <code>bytes</code> scans, or unusual widths (3/6/7 bytes)</td><td>no change (no NumPy fast path; pure-Python loop)</td></tr>
+<tr><td><code>str</code> ordered scans (<code>&gt;</code>, <code>&lt;</code>, <code>between</code>)</td><td>no NumPy fast path — instead C-accelerated by the regex byte-class prefilter (independent of the <code>speed</code> extra)</td></tr>
+<tr><td><code>bytes</code> scans, or unusual widths (3/6/7 bytes)</td><td>no change (no NumPy fast path; pure-Python loop)</td></tr>
 <tr><td><code>EXACT_VALUE</code> via <code>search_by_value</code></td><td>already <code>bytes.find</code> in C — NumPy not used</td></tr>
 </table>
 
@@ -262,9 +266,14 @@ for address in process.search_by_value(str, 6, "PLAYER"):
     print(hex(address))
 ```
 
-For `bytes`, comparison ordering depends on your system's `byteorder` —
-something to keep in mind when using `BIGGER_THAN` / `SMALLER_THAN` on raw
-bytes.
+Ordering for the comparison modes differs by type:
+
+- **`str`** compares the UTF-8 bytes **lexicographically** (big-endian), so
+  `"AA" < "AB" < "B"`. The shorter of two values is NUL-padded to `bufflength`
+  before comparing, and a reversed `VALUE_BETWEEN` range (`start > end`) simply
+  matches nothing.
+- **`bytes`** compares using your system's `byteorder` — something to keep in
+  mind when using `BIGGER_THAN` / `SMALLER_THAN` on raw bytes.
 
 ```{seealso}
 - [Pattern scan](pattern-scan.md) — find data by **shape** with regex and AOB

diff --git a/tests/test_scan.py b/tests/test_scan.py
@@ -374,3 +374,64 @@ def test_scan_memory_double_bigger_than_negative():
 
     # -1.0 (offset 8), 1.0 (16), 3.0 (24) match; -3.0 (offset 0) does not.
     assert results == [8, 16, 24]
+
+
+# --- String ordered-comparison fast path (regex byte-class prefilter) ---------
+#
+# These exercise the prefilter directly with hand-checked expected offsets, on
+# top of the property-based equivalence tests in test_scan_properties.py.
+
+
+def _scan_str(data, target, size, scan_type):
+    return list(scan_memory(data, len(data), target, size, scan_type, str))
+
+
+def test_scan_string_bigger_than_first_byte_dominates():
+    # 2-byte windows, step 1. Target "MA" (0x4D41). Accept windows > it.
+    data = b"AAZZMAMBLZ"
+    results = _scan_str(data, b"MA", 2, ScanTypesEnum.BIGGER_THAN)
+    # Windows (big-endian) and whether > "MA": AA<,AZ<,ZZ>,ZM>,MA=,AM<,MB>,BL<,LZ<
+    assert results == [2, 3, 6]
+
+
+def test_scan_string_smaller_than_includes_low_bytes():
+    data = b"AAMAZZ"
+    results = _scan_str(data, b"MA", 2, ScanTypesEnum.SMALLER_THAN)
+    # AA<,AM<,MA=,AZ<,ZZ>  -> offsets 0,1,3 are smaller.
+    assert results == [0, 1, 3]
+
+
+def test_scan_string_value_between_skips_noise():
+    # Only windows whose value lands in ["EA","WZ"] inclusive should match.
+    data = b"AB" + b"EM" + b"ZZ" + b"WZ" + b"  "
+    results = _scan_str(data, (b"EA", b"WZ"), 2, ScanTypesEnum.VALUE_BETWEEN)
+    # offsets: 0 AB(no) 1 BE(no) 2 EM(yes) 3 MZ(yes) 4 ZZ(no) 5 ZW(no) 6 WZ(yes)
+    #          7 Z?(no) 8 ' '..(no)
+    assert results == [2, 3, 6]
+
+
+def test_scan_string_value_between_reversed_range_is_empty():
+    """Regression: a reversed range (start > end) must yield nothing, not crash.
+
+    The fast path builds a regex class ``[start_byte-end_byte]``; a reversed
+    range would compile to ``[hi-lo]`` and raise ``re.error: bad character
+    range``. The byte-by-byte loop returns [] for start > end, so the fast path
+    must too.
+    """
+    data = b"MMMMMM"
+    assert _scan_str(data, (b"ZZ", b"AA"), 2, ScanTypesEnum.VALUE_BETWEEN) == []
+    # Reversed but sharing a first byte still resolves to empty.
+    assert _scan_str(data, (b"MZ", b"MA"), 2, ScanTypesEnum.VALUE_BETWEEN) == []
+
+
+def test_scan_string_regex_special_bytes_as_bounds():
+    """Bytes that are special inside a regex class (]^-\\[) must be literal."""
+    data = bytes([0x5D, 0x5E, 0x2D, 0x5C, 0x5B, 0x41, 0xFF])  # ] ^ - \\ [ A 0xff
+    # 1-byte EXACT-equivalent via BIGGER_THAN_OR_EXACT over a special boundary:
+    # bytes >= '-' (0x2D): all except none here are below 0x2D.
+    results = _scan_str(data, b"\x2d", 1, ScanTypesEnum.BIGGER_THAN_OR_EXACT_VALUE)
+    assert results == [0, 1, 2, 3, 4, 5, 6]
+    # SMALLER_THAN ']' (0x5D): bytes < 0x5D are
+    #   '-'(0x2d=off2), '\\'(0x5c=off3), '['(0x5b=off4), 'A'(0x41=off5).
+    results = _scan_str(data, b"\x5d", 1, ScanTypesEnum.SMALLER_THAN)
+    assert results == [2, 3, 4, 5]
diff --git a/tests/test_scan_properties.py b/tests/test_scan_properties.py
@@ -68,6 +68,98 @@ def _int_payload(draw):
     return size, b"".join(struct.pack(fmt, v) for v in values), struct.pack(fmt, target)
 
 
+# Ordered string comparisons that scan_memory routes through the regex
+# byte-class fast path (NOT_* are dense and keep the byte-by-byte loop).
+_ORDERED_STRING_SCAN_TYPES = (
+    ScanTypesEnum.BIGGER_THAN,
+    ScanTypesEnum.SMALLER_THAN,
+    ScanTypesEnum.BIGGER_THAN_OR_EXACT_VALUE,
+    ScanTypesEnum.SMALLER_THAN_OR_EXACT_VALUE,
+)
+
+# Bias the byte alphabet toward the values most likely to trip the fast path:
+# the lexicographic extremes (0x00 / 0xff), boundary ties, and the bytes that
+# are special inside a regex character class ([ ] ^ - \ etc.).
+_TRICKY_BYTES = st.sampled_from([0x00, 0xFF] + list(b"[]^-\\&~|ABC"))
+_ANY_BYTE = st.integers(min_value=0, max_value=255)
+
+
+@st.composite
+def _string_payload(draw):
+    size = draw(st.integers(min_value=1, max_value=8))
+    count = draw(st.integers(min_value=0, max_value=40))
+    byte = st.one_of(_TRICKY_BYTES, _ANY_BYTE)
+    data = bytes(draw(st.lists(byte, min_size=count, max_size=count)))
+    target = bytes(draw(st.lists(byte, min_size=size, max_size=size)))
+    return size, data, target
+
+
+def _scan_string_slow(data, size, target_value, scan_type):
+    """Reference: byte-by-byte big-endian string scan (the pre-fast-path loop)."""
+    end = len(data) - size + 1
+    results = []
+    if isinstance(target_value, tuple):
+        lo = int.from_bytes(target_value[0], "big")
+        hi = int.from_bytes(target_value[1], "big")
+    else:
+        target = int.from_bytes(target_value, "big")
+    for offset in range(0, max(end, 0)):
+        value = int.from_bytes(data[offset : offset + size], "big")
+        if scan_type is ScanTypesEnum.BIGGER_THAN and value > target:
+            results.append(offset)
+        elif scan_type is ScanTypesEnum.SMALLER_THAN and value < target:
+            results.append(offset)
+        elif scan_type is ScanTypesEnum.BIGGER_THAN_OR_EXACT_VALUE and value >= target:
+            results.append(offset)
+        elif scan_type is ScanTypesEnum.SMALLER_THAN_OR_EXACT_VALUE and value <= target:
+            results.append(offset)
+        elif scan_type is ScanTypesEnum.VALUE_BETWEEN and lo <= value <= hi:
+            results.append(offset)
+    return results
+
+
+@settings(
+    suppress_health_check=[HealthCheck.too_slow],
+    deadline=None,
+    max_examples=300,
+)
+@given(
+    payload=_string_payload(),
+    scan_type=st.sampled_from(_ORDERED_STRING_SCAN_TYPES),
+)
+def test_ordered_string_scan_matches_reference(payload, scan_type):
+    """Regex byte-class fast path must agree with the byte-by-byte reference.
+
+    Strings step by one byte and compare big-endian, so the fast path uses a
+    first-byte prefilter; this checks it yields exactly the same offsets across
+    boundary ties and regex-special bytes.
+    """
+    size, data, target = payload
+    fast = list(scan_memory(data, len(data), target, size, scan_type, str))
+    slow = _scan_string_slow(data, size, target, scan_type)
+    assert fast == slow
+
+
+@settings(
+    suppress_health_check=[HealthCheck.too_slow],
+    deadline=None,
+    max_examples=300,
+)
+@given(payload=_string_payload())
+def test_value_between_string_matches_reference(payload):
+    """VALUE_BETWEEN over strings (the search_by_value_between path) must match."""
+    size, data, a = payload
+    # Build a valid [lo, hi] range from two same-width byte strings.
+    b = bytes((x + 1) % 256 for x in a)
+    lo, hi = (a, b) if a <= b else (b, a)
+    target = (lo, hi)
+    fast = list(
+        scan_memory(data, len(data), target, size, ScanTypesEnum.VALUE_BETWEEN, str)
+    )
+    slow = _scan_string_slow(data, size, target, ScanTypesEnum.VALUE_BETWEEN)
+    assert fast == slow
+
+
 @st.composite
 def _float_payload(draw):
     size = draw(st.sampled_from(_FLOAT_SIZES))