From 279935a5f3e73b454ef7ca81f8a39e66228ca456 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Thu, 30 Apr 2026 17:33:34 -0700
Subject: [PATCH 1/8] test: xfail Windows mempool OOM cases

Work around nvbugs5815123 by treating OOM returns from mempool setup in affected tests as expected failures on Windows. Unsupported configurations still skip normally, while other platforms continue to fail on unexpected OOMs.

Made-with: Cursor
---
 cuda_bindings/tests/test_cuda.py              | 12 ++++++
 cuda_bindings/tests/test_cudart.py            |  7 ++++
 cuda_core/tests/conftest.py                   | 23 +++++++++++-
 .../tests/test_managed_memory_warning.py      | 10 ++++-
 cuda_core/tests/test_memory.py                | 37 ++++++++++---------
 cuda_core/tests/test_tensor_map.py            |  6 +--
 6 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
index e3eefb1fdd7..521c0ebc2e1 100644
--- a/cuda_bindings/tests/test_cuda.py
+++ b/cuda_bindings/tests/test_cuda.py
@@ -32,6 +32,11 @@ def supportsManagedMemory():
     return err == cudart.cudaError_t.cudaSuccess and isSupported
 
 
+def xfail_if_mempool_oom(err, api_name):
+    if platform.system() == "Windows" and err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY:
+        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on this Windows platform")
+
+
 def supportsCudaAPI(name):
     return name in dir(cuda)
 
@@ -270,6 +275,7 @@ def test_cuda_memPool_attr():
 
     attr_list = [None] * 8
     err, pool = cuda.cuMemPoolCreate(poolProps)
+    xfail_if_mempool_oom(err, "cuMemPoolCreate")
     assert err == cuda.CUresult.CUDA_SUCCESS
 
     for idx, attr in enumerate(
@@ -468,6 +474,12 @@ def test_cuda_graphMem_attr(device):
     params.bytesize = allocSize
 
     err, allocNode = cuda.cuGraphAddMemAllocNode(graph, None, 0, params)
+    if err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY:
+        (destroy_err,) = cuda.cuGraphDestroy(graph)
+        assert destroy_err == cuda.CUresult.CUDA_SUCCESS
+        (destroy_err,) = cuda.cuStreamDestroy(stream)
+        assert destroy_err == cuda.CUresult.CUDA_SUCCESS
+        xfail_if_mempool_oom(err, "cuGraphAddMemAllocNode")
     assert err == cuda.CUresult.CUDA_SUCCESS
     err, freeNode = cuda.cuGraphAddMemFreeNode(graph, [allocNode], 1, params.dptr)
     assert err == cuda.CUresult.CUDA_SUCCESS
diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py
index 3fa5594a262..87e31c4160c 100644
--- a/cuda_bindings/tests/test_cudart.py
+++ b/cuda_bindings/tests/test_cudart.py
@@ -3,6 +3,7 @@
 
 import ctypes
 import math
+import platform
 
 import numpy as np
 import pytest
@@ -32,6 +33,11 @@ def supportsMemoryPool():
     return isSuccess(err) and isSupported
 
 
+def xfail_if_mempool_oom(err, api_name):
+    if platform.system() == "Windows" and err == cudart.cudaError_t.cudaErrorMemoryAllocation:
+        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on this Windows platform")
+
+
 def supportsSparseTexturesDeviceFilter():
     err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported, 0)
     return isSuccess(err) and isSupported
@@ -432,6 +438,7 @@ def test_cudart_MemPool_attr():
 
     attr_list = [None] * 8
     err, pool = cudart.cudaMemPoolCreate(poolProps)
+    xfail_if_mempool_oom(err, "cudaMemPoolCreate")
     assertSuccess(err)
 
     for idx, attr in enumerate(
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 85c5e75ff78..86fa9be7811 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -27,7 +27,7 @@
     PinnedMemoryResourceOptions,
     _device,
 )
-from cuda.core._utils.cuda_utils import handle_return
+from cuda.core._utils.cuda_utils import CUDAError, handle_return
 
 # Import shared test helpers for tests across subprojects.
 # PLEASE KEEP IN SYNC with copies in other conftest.py in this repo.
@@ -61,21 +61,40 @@ def skip_if_managed_memory_unsupported(device):
         pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
     try:
         ManagedMemoryResource()
+    except CUDAError as e:
+        xfail_if_mempool_oom(e)
+        raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
             pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
         raise
 
 
-def create_managed_memory_resource_or_skip(*args, **kwargs):
+def create_managed_memory_resource_or_xfail(*args, **kwargs):
     try:
         return ManagedMemoryResource(*args, **kwargs)
+    except CUDAError as e:
+        xfail_if_mempool_oom(e)
+        raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
             pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
         raise
 
 
+def create_pinned_memory_resource_or_skip(*args, **kwargs):
+    try:
+        return PinnedMemoryResource(*args, **kwargs)
+    except CUDAError as e:
+        xfail_if_mempool_oom(e)
+        raise
+
+
+def xfail_if_mempool_oom(exc):
+    if sys.platform == "win32" and "CUDA_ERROR_OUT_OF_MEMORY" in str(exc):
+        pytest.xfail("Driver could not reserve VA for mempool operations on this Windows platform")
+
+
 @pytest.fixture(scope="session", autouse=True)
 def session_setup():
     # Always init CUDA.
diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
index 78015978e72..c31052c82d1 100644
--- a/cuda_core/tests/test_managed_memory_warning.py
+++ b/cuda_core/tests/test_managed_memory_warning.py
@@ -13,8 +13,10 @@
 import pytest
 
 import cuda.bindings
+from conftest import xfail_if_mempool_oom
 from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
 from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
+from cuda.core._utils.cuda_utils import CUDAError
 
 _cuda_major = int(cuda.bindings.__version__.split(".")[0])
 
@@ -47,8 +49,12 @@ def device_without_concurrent_managed_access(init_cuda):
 @requires_cuda_13
 def test_default_pool_error_without_concurrent_access(device_without_concurrent_managed_access):
     """ManagedMemoryResource() raises RuntimeError when the default pool doesn't support managed."""
-    with pytest.raises(RuntimeError, match="does not support managed allocations"):
-        ManagedMemoryResource()
+    try:
+        with pytest.raises(RuntimeError, match="does not support managed allocations"):
+            ManagedMemoryResource()
+    except CUDAError as exc:
+        xfail_if_mempool_oom(exc)
+        raise
 
 
 @requires_cuda_13
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 85dd4a7ea2b..834d7a3a947 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -21,7 +21,8 @@
 from helpers.buffers import DummyUnifiedMemoryResource, TrackingMR
 
 from conftest import (
-    create_managed_memory_resource_or_skip,
+    create_managed_memory_resource_or_xfail,
+    create_pinned_memory_resource_or_skip,
     skip_if_managed_memory_unsupported,
     skip_if_pinned_memory_unsupported,
 )
@@ -617,7 +618,7 @@ def test_managed_memory_resource_buffer_dlpack_device_type():
     device = Device()
     device.set_current()
     skip_if_managed_memory_unsupported(device)
-    mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=device.device_id))
+    mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=device.device_id))
     buf = mr.allocate(1024)
 
     assert mr.is_managed
@@ -639,7 +640,7 @@ def test_non_managed_resources_report_not_managed(mr_kind):
         mr = DeviceMemoryResource(device)
     else:
         skip_if_pinned_memory_unsupported(device)
-        mr = PinnedMemoryResource()
+        mr = create_pinned_memory_resource_or_skip()
     assert mr.is_managed is False
     buf = mr.allocate(1024)
     assert buf.is_managed is False
@@ -684,7 +685,7 @@ def test_pinned_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = PinnedMemoryResource()
+    mr = create_pinned_memory_resource_or_skip()
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -713,7 +714,7 @@ def test_managed_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = create_managed_memory_resource_or_skip()
+    mr = create_managed_memory_resource_or_xfail()
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -1028,7 +1029,7 @@ def test_managed_memory_resource_with_options(init_cuda):
 
     # Test basic pool creation
     options = ManagedMemoryResourceOptions()
-    mr = create_managed_memory_resource_or_skip(options)
+    mr = create_managed_memory_resource_or_xfail(options)
     assert mr.is_device_accessible
     assert mr.is_host_accessible
     assert not mr.is_ipc_enabled
@@ -1071,7 +1072,7 @@ def test_managed_memory_resource_preferred_location_default(init_cuda):
     skip_if_managed_memory_unsupported(device)
     device.set_current()
 
-    mr = create_managed_memory_resource_or_skip()
+    mr = create_managed_memory_resource_or_xfail()
     assert mr.preferred_location is None
 
 
@@ -1083,7 +1084,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda):
 
     # Legacy style
     opts = ManagedMemoryResourceOptions(preferred_location=device.device_id)
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("device", device.device_id)
 
     # Explicit style
@@ -1091,7 +1092,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda):
         preferred_location=device.device_id,
         preferred_location_type="device",
     )
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("device", device.device_id)
 
 
@@ -1103,12 +1104,12 @@ def test_managed_memory_resource_preferred_location_host(init_cuda):
 
     # Legacy style
     opts = ManagedMemoryResourceOptions(preferred_location=-1)
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("host", None)
 
     # Explicit style
     opts = ManagedMemoryResourceOptions(preferred_location_type="host")
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("host", None)
 
 
@@ -1124,7 +1125,7 @@ def test_managed_memory_resource_preferred_location_host_numa(init_cuda):
 
     # Auto-resolved from current device
     opts = ManagedMemoryResourceOptions(preferred_location_type="host_numa")
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("host_numa", numa_id)
 
     # Explicit NUMA node ID
@@ -1132,7 +1133,7 @@ def test_managed_memory_resource_preferred_location_host_numa(init_cuda):
         preferred_location=numa_id,
         preferred_location_type="host_numa",
     )
-    mr = create_managed_memory_resource_or_skip(opts)
+    mr = create_managed_memory_resource_or_xfail(opts)
     assert mr.preferred_location == ("host_numa", numa_id)
 
 
@@ -1423,7 +1424,7 @@ def test_mempool_attributes(ipc_enabled, memory_resource_factory, property_name,
         assert mr.is_ipc_enabled == ipc_enabled
     elif MR is ManagedMemoryResource:
         options = MRops()
-        mr = create_managed_memory_resource_or_skip(options)
+        mr = create_managed_memory_resource_or_xfail(options)
         assert not mr.is_ipc_enabled
 
     # Get the property value
@@ -1476,7 +1477,7 @@ def test_mempool_attributes_repr(memory_resource_factory):
     elif MR is PinnedMemoryResource:
         mr = MR(options={"max_size": 2048})
     elif MR is ManagedMemoryResource:
-        mr = create_managed_memory_resource_or_skip(options={})
+        mr = create_managed_memory_resource_or_xfail(options={})
 
     buffer1 = mr.allocate(64)
     buffer2 = mr.allocate(64)
@@ -1513,7 +1514,7 @@ def test_mempool_attributes_ownership(memory_resource_factory):
     elif MR is PinnedMemoryResource:
         mr = MR({"max_size": POOL_SIZE})
     elif MR is ManagedMemoryResource:
-        mr = create_managed_memory_resource_or_skip({})
+        mr = create_managed_memory_resource_or_xfail({})
 
     attributes = mr.attributes
     mr.close()
@@ -1581,10 +1582,10 @@ def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
         pytest.skip("Device does not support mempool operations")
     elif MR is PinnedMemoryResource:
         skip_if_pinned_memory_unsupported(device)
-        mr = MR()
+        mr = create_pinned_memory_resource_or_skip()
     elif MR is ManagedMemoryResource:
         skip_if_managed_memory_unsupported(device)
-        mr = create_managed_memory_resource_or_skip(MROps(preferred_location=device.device_id))
+        mr = create_managed_memory_resource_or_xfail(MROps(preferred_location=device.device_id))
     else:
         assert MR is DeviceMemoryResource
         mr = MR(device)
diff --git a/cuda_core/tests/test_tensor_map.py b/cuda_core/tests/test_tensor_map.py
index 9ca8790d2b8..17a1ad221e3 100644
--- a/cuda_core/tests/test_tensor_map.py
+++ b/cuda_core/tests/test_tensor_map.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from conftest import create_managed_memory_resource_or_skip, skip_if_managed_memory_unsupported
+from conftest import create_managed_memory_resource_or_xfail, skip_if_managed_memory_unsupported
 from cuda.core import (
     Device,
     ManagedMemoryResourceOptions,
@@ -403,7 +403,7 @@ def test_replace_address_accepts_managed_buffer_on_nonzero_device(self, init_cud
             data_type=TensorMapDataType.FLOAT32,
         )
 
-        mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
+        mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
         managed_buf = mr.allocate(1024 * 4)
 
         desc.replace_address(managed_buf)
@@ -442,7 +442,7 @@ def test_from_tiled_accepts_managed_buffer_on_nonzero_device(self, init_cuda):
         skip_if_managed_memory_unsupported(dev1)
 
         dev1.set_current()
-        mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
+        mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
         managed_buf = mr.allocate(1024 * 4)
 
         desc = _as_view(managed_buf).as_tensor_map(

From 6f3ac7f98f82cd4cade4d8c94bece141ee955ded Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Thu, 30 Apr 2026 17:43:49 -0700
Subject: [PATCH 2/8] test: limit mempool OOM xfail to MCDM

Use NVML to confirm the CUDA device is running on Windows MCDM before treating mempool OOM setup failures as expected. If the MCDM check cannot be completed, leave the original test failure visible.

Made-with: Cursor
---
 cuda_bindings/tests/test_cuda.py              | 35 +++++++++--
 cuda_bindings/tests/test_cudart.py            | 33 ++++++++--
 cuda_core/tests/conftest.py                   | 61 ++++++++++++++++---
 .../tests/test_managed_memory_warning.py      |  2 +-
 cuda_core/tests/test_memory.py                |  6 +-
 5 files changed, 116 insertions(+), 21 deletions(-)

diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
index 521c0ebc2e1..844d2cac138 100644
--- a/cuda_bindings/tests/test_cuda.py
+++ b/cuda_bindings/tests/test_cuda.py
@@ -32,9 +32,34 @@ def supportsManagedMemory():
     return err == cudart.cudaError_t.cudaSuccess and isSupported
 
 
-def xfail_if_mempool_oom(err, api_name):
-    if platform.system() == "Windows" and err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY:
-        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on this Windows platform")
+def is_windows_mcdm_device(device):
+    if platform.system() != "Windows":
+        return False
+    try:
+        import cuda.bindings.nvml as nvml
+
+        (err,) = cuda.cuInit(0)
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            return False
+        err, pci_bus_id = cuda.cuDeviceGetPCIBusId(13, device)
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            return False
+        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+        nvml.init_v2()
+        try:
+            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+            current, _ = nvml.device_get_driver_model_v2(handle)
+            return current == nvml.DriverModel.DRIVER_MCDM
+        finally:
+            nvml.shutdown()
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return False
+
+
+def xfail_if_mempool_oom(err, api_name, device=0):
+    if err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY and is_windows_mcdm_device(device):
+        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on Windows MCDM")
 
 
 def supportsCudaAPI(name):
@@ -275,7 +300,7 @@ def test_cuda_memPool_attr():
 
     attr_list = [None] * 8
     err, pool = cuda.cuMemPoolCreate(poolProps)
-    xfail_if_mempool_oom(err, "cuMemPoolCreate")
+    xfail_if_mempool_oom(err, "cuMemPoolCreate", poolProps.location.id)
     assert err == cuda.CUresult.CUDA_SUCCESS
 
     for idx, attr in enumerate(
@@ -479,7 +504,7 @@ def test_cuda_graphMem_attr(device):
         assert destroy_err == cuda.CUresult.CUDA_SUCCESS
         (destroy_err,) = cuda.cuStreamDestroy(stream)
         assert destroy_err == cuda.CUresult.CUDA_SUCCESS
-        xfail_if_mempool_oom(err, "cuGraphAddMemAllocNode")
+        xfail_if_mempool_oom(err, "cuGraphAddMemAllocNode", device)
     assert err == cuda.CUresult.CUDA_SUCCESS
     err, freeNode = cuda.cuGraphAddMemFreeNode(graph, [allocNode], 1, params.dptr)
     assert err == cuda.CUresult.CUDA_SUCCESS
diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py
index 87e31c4160c..8e011bff26f 100644
--- a/cuda_bindings/tests/test_cudart.py
+++ b/cuda_bindings/tests/test_cudart.py
@@ -33,9 +33,34 @@ def supportsMemoryPool():
     return isSuccess(err) and isSupported
 
 
-def xfail_if_mempool_oom(err, api_name):
-    if platform.system() == "Windows" and err == cudart.cudaError_t.cudaErrorMemoryAllocation:
-        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on this Windows platform")
+def is_windows_mcdm_device(device):
+    if platform.system() != "Windows":
+        return False
+    try:
+        import cuda.bindings.nvml as nvml
+
+        (err,) = cuda.cuInit(0)
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            return False
+        err, pci_bus_id = cuda.cuDeviceGetPCIBusId(13, device)
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            return False
+        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+        nvml.init_v2()
+        try:
+            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+            current, _ = nvml.device_get_driver_model_v2(handle)
+            return current == nvml.DriverModel.DRIVER_MCDM
+        finally:
+            nvml.shutdown()
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return False
+
+
+def xfail_if_mempool_oom(err, api_name, device=0):
+    if err == cudart.cudaError_t.cudaErrorMemoryAllocation and is_windows_mcdm_device(device):
+        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on Windows MCDM")
 
 
 def supportsSparseTexturesDeviceFilter():
@@ -438,7 +463,7 @@ def test_cudart_MemPool_attr():
 
     attr_list = [None] * 8
     err, pool = cudart.cudaMemPoolCreate(poolProps)
-    xfail_if_mempool_oom(err, "cudaMemPoolCreate")
+    xfail_if_mempool_oom(err, "cudaMemPoolCreate", poolProps.location.id)
     assertSuccess(err)
 
     for idx, attr in enumerate(
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 86fa9be7811..161180a74a0 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -62,7 +62,7 @@ def skip_if_managed_memory_unsupported(device):
     try:
         ManagedMemoryResource()
     except CUDAError as e:
-        xfail_if_mempool_oom(e)
+        xfail_if_mempool_oom(e, device)
         raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
@@ -70,11 +70,11 @@ def skip_if_managed_memory_unsupported(device):
         raise
 
 
-def create_managed_memory_resource_or_xfail(*args, **kwargs):
+def create_managed_memory_resource_or_xfail(*args, xfail_device=None, **kwargs):
     try:
         return ManagedMemoryResource(*args, **kwargs)
     except CUDAError as e:
-        xfail_if_mempool_oom(e)
+        xfail_if_mempool_oom(e, _device_id_from_resource_options(xfail_device, args, kwargs))
         raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
@@ -82,17 +82,62 @@ def create_managed_memory_resource_or_xfail(*args, **kwargs):
         raise
 
 
-def create_pinned_memory_resource_or_skip(*args, **kwargs):
+def create_pinned_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
     try:
         return PinnedMemoryResource(*args, **kwargs)
     except CUDAError as e:
-        xfail_if_mempool_oom(e)
+        xfail_if_mempool_oom(e, xfail_device)
         raise
 
 
-def xfail_if_mempool_oom(exc):
-    if sys.platform == "win32" and "CUDA_ERROR_OUT_OF_MEMORY" in str(exc):
-        pytest.xfail("Driver could not reserve VA for mempool operations on this Windows platform")
+def is_windows_mcdm_device(device=0):
+    if sys.platform != "win32":
+        return False
+    try:
+        import cuda.bindings.nvml as nvml
+
+        device_id = int(device.device_id if hasattr(device, "device_id") else device)
+        (err,) = driver.cuInit(0)
+        if err != driver.CUresult.CUDA_SUCCESS:
+            return False
+        err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
+        if err != driver.CUresult.CUDA_SUCCESS:
+            return False
+        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+        nvml.init_v2()
+        try:
+            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+            current, _ = nvml.device_get_driver_model_v2(handle)
+            return current == nvml.DriverModel.DRIVER_MCDM
+        finally:
+            nvml.shutdown()
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return False
+
+
+def xfail_if_mempool_oom(exc, device=0):
+    if "CUDA_ERROR_OUT_OF_MEMORY" in str(exc) and is_windows_mcdm_device(device):
+        pytest.xfail("Driver could not reserve VA for mempool operations on Windows MCDM")
+
+
+def _device_id_from_resource_options(device, args, kwargs):
+    if device is not None:
+        return device
+    options = kwargs.get("options")
+    if options is None and args:
+        options = args[0]
+    if options is None:
+        return 0
+    if isinstance(options, dict):
+        preferred_location = options.get("preferred_location")
+        preferred_location_type = options.get("preferred_location_type")
+    else:
+        preferred_location = getattr(options, "preferred_location", None)
+        preferred_location_type = getattr(options, "preferred_location_type", None)
+    if preferred_location_type in (None, "device") and isinstance(preferred_location, int) and preferred_location >= 0:
+        return preferred_location
+    return 0
 
 
 @pytest.fixture(scope="session", autouse=True)
diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
index c31052c82d1..5e6032ebe9e 100644
--- a/cuda_core/tests/test_managed_memory_warning.py
+++ b/cuda_core/tests/test_managed_memory_warning.py
@@ -53,7 +53,7 @@ def test_default_pool_error_without_concurrent_access(device_without_concurrent_
         with pytest.raises(RuntimeError, match="does not support managed allocations"):
             ManagedMemoryResource()
     except CUDAError as exc:
-        xfail_if_mempool_oom(exc)
+        xfail_if_mempool_oom(exc, device_without_concurrent_managed_access)
         raise
 
 
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 834d7a3a947..a118ec81c1d 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -640,7 +640,7 @@ def test_non_managed_resources_report_not_managed(mr_kind):
         mr = DeviceMemoryResource(device)
     else:
         skip_if_pinned_memory_unsupported(device)
-        mr = create_pinned_memory_resource_or_skip()
+        mr = create_pinned_memory_resource_or_skip(xfail_device=device)
     assert mr.is_managed is False
     buf = mr.allocate(1024)
     assert buf.is_managed is False
@@ -685,7 +685,7 @@ def test_pinned_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = create_pinned_memory_resource_or_skip()
+    mr = create_pinned_memory_resource_or_skip(xfail_device=device)
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -1582,7 +1582,7 @@ def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
         pytest.skip("Device does not support mempool operations")
     elif MR is PinnedMemoryResource:
         skip_if_pinned_memory_unsupported(device)
-        mr = create_pinned_memory_resource_or_skip()
+        mr = create_pinned_memory_resource_or_skip(xfail_device=device)
     elif MR is ManagedMemoryResource:
         skip_if_managed_memory_unsupported(device)
         mr = create_managed_memory_resource_or_xfail(MROps(preferred_location=device.device_id))

From d89a77bee73cd23eefb567de81a0ae129d5379fb Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Thu, 30 Apr 2026 19:59:35 -0700
Subject: [PATCH 3/8] test: centralize mempool OOM xfail helper

Move the Windows MCDM detection and mempool OOM xfail handling into a shared test helper so cuda.bindings and cuda.core tests use the same workaround logic.

Made-with: Cursor
---
 .../cuda/bindings/_test_helpers/mempool.py    | 51 +++++++++++++++++++
 cuda_bindings/tests/test_cuda.py              | 31 +----------
 cuda_bindings/tests/test_cudart.py            | 32 +-----------
 cuda_core/tests/conftest.py                   | 32 +-----------
 .../tests/test_managed_memory_warning.py      |  2 +-
 5 files changed, 55 insertions(+), 93 deletions(-)
 create mode 100644 cuda_bindings/cuda/bindings/_test_helpers/mempool.py

diff --git a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
new file mode 100644
index 00000000000..5bf5e8d6d10
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import sys
+
+import pytest
+
+from cuda.bindings import driver, runtime
+
+
+def is_windows_mcdm_device(device=0):
+    if sys.platform != "win32":
+        return False
+    try:
+        import cuda.bindings.nvml as nvml
+
+        device_id = int(device.device_id if hasattr(device, "device_id") else device)
+        (err,) = driver.cuInit(0)
+        if err != driver.CUresult.CUDA_SUCCESS:
+            return False
+        err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
+        if err != driver.CUresult.CUDA_SUCCESS:
+            return False
+        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+        nvml.init_v2()
+        try:
+            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+            current, _ = nvml.device_get_driver_model_v2(handle)
+            return current == nvml.DriverModel.DRIVER_MCDM
+        finally:
+            nvml.shutdown()
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return False
+
+
+def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
+    if api_name is not None and not isinstance(api_name, str):
+        device = api_name
+        api_name = None
+
+    is_oom = err_or_exc in (
+        driver.CUresult.CUDA_ERROR_OUT_OF_MEMORY,
+        runtime.cudaError_t.cudaErrorMemoryAllocation,
+    ) or "CUDA_ERROR_OUT_OF_MEMORY" in str(err_or_exc)
+
+    if not is_oom or not is_windows_mcdm_device(device):
+        return
+
+    api_context = f"{api_name} " if api_name else ""
+    pytest.xfail(f"{api_context}could not reserve VA for mempool operations on Windows MCDM")
diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
index 844d2cac138..e12d53d9665 100644
--- a/cuda_bindings/tests/test_cuda.py
+++ b/cuda_bindings/tests/test_cuda.py
@@ -12,6 +12,7 @@
 import cuda.bindings.driver as cuda
 import cuda.bindings.runtime as cudart
 from cuda.bindings import driver
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 
 
 def driverVersionLessThan(target):
@@ -32,36 +33,6 @@ def supportsManagedMemory():
     return err == cudart.cudaError_t.cudaSuccess and isSupported
 
 
-def is_windows_mcdm_device(device):
-    if platform.system() != "Windows":
-        return False
-    try:
-        import cuda.bindings.nvml as nvml
-
-        (err,) = cuda.cuInit(0)
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            return False
-        err, pci_bus_id = cuda.cuDeviceGetPCIBusId(13, device)
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            return False
-        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
-        nvml.init_v2()
-        try:
-            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
-            current, _ = nvml.device_get_driver_model_v2(handle)
-            return current == nvml.DriverModel.DRIVER_MCDM
-        finally:
-            nvml.shutdown()
-    except Exception:
-        # If MCDM detection fails, leave the primary test failure visible.
-        return False
-
-
-def xfail_if_mempool_oom(err, api_name, device=0):
-    if err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY and is_windows_mcdm_device(device):
-        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on Windows MCDM")
-
-
 def supportsCudaAPI(name):
     return name in dir(cuda)
 
diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py
index 8e011bff26f..144d7e75b12 100644
--- a/cuda_bindings/tests/test_cudart.py
+++ b/cuda_bindings/tests/test_cudart.py
@@ -3,7 +3,6 @@
 
 import ctypes
 import math
-import platform
 
 import numpy as np
 import pytest
@@ -12,6 +11,7 @@
 import cuda.bindings.runtime as cudart
 from cuda import pathfinder
 from cuda.bindings import runtime
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 
 
 def isSuccess(err):
@@ -33,36 +33,6 @@ def supportsMemoryPool():
     return isSuccess(err) and isSupported
 
 
-def is_windows_mcdm_device(device):
-    if platform.system() != "Windows":
-        return False
-    try:
-        import cuda.bindings.nvml as nvml
-
-        (err,) = cuda.cuInit(0)
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            return False
-        err, pci_bus_id = cuda.cuDeviceGetPCIBusId(13, device)
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            return False
-        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
-        nvml.init_v2()
-        try:
-            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
-            current, _ = nvml.device_get_driver_model_v2(handle)
-            return current == nvml.DriverModel.DRIVER_MCDM
-        finally:
-            nvml.shutdown()
-    except Exception:
-        # If MCDM detection fails, leave the primary test failure visible.
-        return False
-
-
-def xfail_if_mempool_oom(err, api_name, device=0):
-    if err == cudart.cudaError_t.cudaErrorMemoryAllocation and is_windows_mcdm_device(device):
-        pytest.xfail(f"{api_name} could not reserve VA for mempool operations on Windows MCDM")
-
-
 def supportsSparseTexturesDeviceFilter():
     err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported, 0)
     return isSuccess(err) and isSupported
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 161180a74a0..ff1be4de28a 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -17,6 +17,7 @@
     from cuda import cuda as driver
 
 import cuda.core
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 from cuda.core import (
     Device,
     DeviceMemoryResource,
@@ -90,37 +91,6 @@ def create_pinned_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
         raise
 
 
-def is_windows_mcdm_device(device=0):
-    if sys.platform != "win32":
-        return False
-    try:
-        import cuda.bindings.nvml as nvml
-
-        device_id = int(device.device_id if hasattr(device, "device_id") else device)
-        (err,) = driver.cuInit(0)
-        if err != driver.CUresult.CUDA_SUCCESS:
-            return False
-        err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
-        if err != driver.CUresult.CUDA_SUCCESS:
-            return False
-        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
-        nvml.init_v2()
-        try:
-            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
-            current, _ = nvml.device_get_driver_model_v2(handle)
-            return current == nvml.DriverModel.DRIVER_MCDM
-        finally:
-            nvml.shutdown()
-    except Exception:
-        # If MCDM detection fails, leave the primary test failure visible.
-        return False
-
-
-def xfail_if_mempool_oom(exc, device=0):
-    if "CUDA_ERROR_OUT_OF_MEMORY" in str(exc) and is_windows_mcdm_device(device):
-        pytest.xfail("Driver could not reserve VA for mempool operations on Windows MCDM")
-
-
 def _device_id_from_resource_options(device, args, kwargs):
     if device is not None:
         return device
diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
index 5e6032ebe9e..dff6622d504 100644
--- a/cuda_core/tests/test_managed_memory_warning.py
+++ b/cuda_core/tests/test_managed_memory_warning.py
@@ -13,7 +13,7 @@
 import pytest
 
 import cuda.bindings
-from conftest import xfail_if_mempool_oom
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
 from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
 from cuda.core._utils.cuda_utils import CUDAError

From 79543e47eb4019a0154b744c74b92edb6c5a6272 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Thu, 30 Apr 2026 20:09:49 -0700
Subject: [PATCH 4/8] test: keep MCDM detection fallback in xfail helper

Let the MCDM detector report only the detected state and keep the broad fallback in the mempool OOM xfail path, where detection failures should leave the original test failure visible.

Made-with: Cursor
---
 .../cuda/bindings/_test_helpers/mempool.py    | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
index 5bf5e8d6d10..22de522f570 100644
--- a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
+++ b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
@@ -11,27 +11,23 @@
 def is_windows_mcdm_device(device=0):
     if sys.platform != "win32":
         return False
-    try:
-        import cuda.bindings.nvml as nvml
-
-        device_id = int(device.device_id if hasattr(device, "device_id") else device)
-        (err,) = driver.cuInit(0)
-        if err != driver.CUresult.CUDA_SUCCESS:
-            return False
-        err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
-        if err != driver.CUresult.CUDA_SUCCESS:
-            return False
-        pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
-        nvml.init_v2()
-        try:
-            handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
-            current, _ = nvml.device_get_driver_model_v2(handle)
-            return current == nvml.DriverModel.DRIVER_MCDM
-        finally:
-            nvml.shutdown()
-    except Exception:
-        # If MCDM detection fails, leave the primary test failure visible.
+    import cuda.bindings.nvml as nvml
+
+    device_id = int(device.device_id if hasattr(device, "device_id") else device)
+    (err,) = driver.cuInit(0)
+    if err != driver.CUresult.CUDA_SUCCESS:
+        return False
+    err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
+    if err != driver.CUresult.CUDA_SUCCESS:
         return False
+    pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+    nvml.init_v2()
+    try:
+        handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+        current, _ = nvml.device_get_driver_model_v2(handle)
+        return current == nvml.DriverModel.DRIVER_MCDM
+    finally:
+        nvml.shutdown()
 
 
 def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
@@ -44,7 +40,14 @@ def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
         runtime.cudaError_t.cudaErrorMemoryAllocation,
     ) or "CUDA_ERROR_OUT_OF_MEMORY" in str(err_or_exc)
 
-    if not is_oom or not is_windows_mcdm_device(device):
+    if not is_oom:
+        return
+    try:
+        is_windows_mcdm = is_windows_mcdm_device(device)
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return
+    if not is_windows_mcdm:
         return
 
     api_context = f"{api_name} " if api_name else ""

From 20c8a7ac7aa0e648dc40bcbf98d3a1e3e2bea5b6 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Fri, 1 May 2026 10:30:00 -0700
Subject: [PATCH 5/8] test: simplify MCDM helper device lookup

Use getattr for the shared mempool helper so it accepts device objects and raw ordinals without extra branching.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cuda_bindings/cuda/bindings/_test_helpers/mempool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
index 22de522f570..deee79f1aff 100644
--- a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
+++ b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
@@ -13,7 +13,7 @@ def is_windows_mcdm_device(device=0):
         return False
     import cuda.bindings.nvml as nvml
 
-    device_id = int(device.device_id if hasattr(device, "device_id") else device)
+    device_id = int(getattr(device, "device_id", device))
     (err,) = driver.cuInit(0)
     if err != driver.CUresult.CUDA_SUCCESS:
         return False

From 6e790f7145c177c21120b60ab45389de276fc54a Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Fri, 1 May 2026 10:40:57 -0700
Subject: [PATCH 6/8] test: restore managed helper skip naming

Keep the established managed-memory test helper name so call sites stay readable, while documenting that Windows MCDM mempool OOM setup failures are xfailed rather than skipped.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cuda_core/tests/conftest.py        |  4 +++-
 cuda_core/tests/test_memory.py     | 30 +++++++++++++++---------------
 cuda_core/tests/test_tensor_map.py |  6 +++---
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index ff1be4de28a..d8a8b91bb84 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -71,7 +71,9 @@ def skip_if_managed_memory_unsupported(device):
         raise
 
 
-def create_managed_memory_resource_or_xfail(*args, xfail_device=None, **kwargs):
+def create_managed_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
+    # Keep the established "skip" helper name for call-site readability, even though
+    # Windows MCDM mempool OOM setup failures are xfailed instead of skipped.
     try:
         return ManagedMemoryResource(*args, **kwargs)
     except CUDAError as e:
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index a118ec81c1d..90984db58b5 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -21,7 +21,7 @@
 from helpers.buffers import DummyUnifiedMemoryResource, TrackingMR
 
 from conftest import (
-    create_managed_memory_resource_or_xfail,
+    create_managed_memory_resource_or_skip,
     create_pinned_memory_resource_or_skip,
     skip_if_managed_memory_unsupported,
     skip_if_pinned_memory_unsupported,
@@ -618,7 +618,7 @@ def test_managed_memory_resource_buffer_dlpack_device_type():
     device = Device()
     device.set_current()
     skip_if_managed_memory_unsupported(device)
-    mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=device.device_id))
+    mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=device.device_id))
     buf = mr.allocate(1024)
 
     assert mr.is_managed
@@ -714,7 +714,7 @@ def test_managed_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = create_managed_memory_resource_or_xfail()
+    mr = create_managed_memory_resource_or_skip()
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -1029,7 +1029,7 @@ def test_managed_memory_resource_with_options(init_cuda):
 
     # Test basic pool creation
     options = ManagedMemoryResourceOptions()
-    mr = create_managed_memory_resource_or_xfail(options)
+    mr = create_managed_memory_resource_or_skip(options)
     assert mr.is_device_accessible
     assert mr.is_host_accessible
     assert not mr.is_ipc_enabled
@@ -1072,7 +1072,7 @@ def test_managed_memory_resource_preferred_location_default(init_cuda):
     skip_if_managed_memory_unsupported(device)
     device.set_current()
 
-    mr = create_managed_memory_resource_or_xfail()
+    mr = create_managed_memory_resource_or_skip()
     assert mr.preferred_location is None
 
 
@@ -1084,7 +1084,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda):
 
     # Legacy style
     opts = ManagedMemoryResourceOptions(preferred_location=device.device_id)
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("device", device.device_id)
 
     # Explicit style
@@ -1092,7 +1092,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda):
         preferred_location=device.device_id,
         preferred_location_type="device",
     )
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("device", device.device_id)
 
 
@@ -1104,12 +1104,12 @@ def test_managed_memory_resource_preferred_location_host(init_cuda):
 
     # Legacy style
     opts = ManagedMemoryResourceOptions(preferred_location=-1)
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("host", None)
 
     # Explicit style
     opts = ManagedMemoryResourceOptions(preferred_location_type="host")
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("host", None)
 
 
@@ -1125,7 +1125,7 @@ def test_managed_memory_resource_preferred_location_host_numa(init_cuda):
 
     # Auto-resolved from current device
     opts = ManagedMemoryResourceOptions(preferred_location_type="host_numa")
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("host_numa", numa_id)
 
     # Explicit NUMA node ID
@@ -1133,7 +1133,7 @@ def test_managed_memory_resource_preferred_location_host_numa(init_cuda):
         preferred_location=numa_id,
         preferred_location_type="host_numa",
     )
-    mr = create_managed_memory_resource_or_xfail(opts)
+    mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("host_numa", numa_id)
 
 
@@ -1424,7 +1424,7 @@ def test_mempool_attributes(ipc_enabled, memory_resource_factory, property_name,
         assert mr.is_ipc_enabled == ipc_enabled
     elif MR is ManagedMemoryResource:
         options = MRops()
-        mr = create_managed_memory_resource_or_xfail(options)
+        mr = create_managed_memory_resource_or_skip(options)
         assert not mr.is_ipc_enabled
 
     # Get the property value
@@ -1477,7 +1477,7 @@ def test_mempool_attributes_repr(memory_resource_factory):
     elif MR is PinnedMemoryResource:
         mr = MR(options={"max_size": 2048})
     elif MR is ManagedMemoryResource:
-        mr = create_managed_memory_resource_or_xfail(options={})
+        mr = create_managed_memory_resource_or_skip(options={})
 
     buffer1 = mr.allocate(64)
     buffer2 = mr.allocate(64)
@@ -1514,7 +1514,7 @@ def test_mempool_attributes_ownership(memory_resource_factory):
     elif MR is PinnedMemoryResource:
         mr = MR({"max_size": POOL_SIZE})
     elif MR is ManagedMemoryResource:
-        mr = create_managed_memory_resource_or_xfail({})
+        mr = create_managed_memory_resource_or_skip({})
 
     attributes = mr.attributes
     mr.close()
@@ -1585,7 +1585,7 @@ def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
         mr = create_pinned_memory_resource_or_skip(xfail_device=device)
     elif MR is ManagedMemoryResource:
         skip_if_managed_memory_unsupported(device)
-        mr = create_managed_memory_resource_or_xfail(MROps(preferred_location=device.device_id))
+        mr = create_managed_memory_resource_or_skip(MROps(preferred_location=device.device_id))
     else:
         assert MR is DeviceMemoryResource
         mr = MR(device)
diff --git a/cuda_core/tests/test_tensor_map.py b/cuda_core/tests/test_tensor_map.py
index 17a1ad221e3..9ca8790d2b8 100644
--- a/cuda_core/tests/test_tensor_map.py
+++ b/cuda_core/tests/test_tensor_map.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from conftest import create_managed_memory_resource_or_xfail, skip_if_managed_memory_unsupported
+from conftest import create_managed_memory_resource_or_skip, skip_if_managed_memory_unsupported
 from cuda.core import (
     Device,
     ManagedMemoryResourceOptions,
@@ -403,7 +403,7 @@ def test_replace_address_accepts_managed_buffer_on_nonzero_device(self, init_cud
             data_type=TensorMapDataType.FLOAT32,
         )
 
-        mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
+        mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
         managed_buf = mr.allocate(1024 * 4)
 
         desc.replace_address(managed_buf)
@@ -442,7 +442,7 @@ def test_from_tiled_accepts_managed_buffer_on_nonzero_device(self, init_cuda):
         skip_if_managed_memory_unsupported(dev1)
 
         dev1.set_current()
-        mr = create_managed_memory_resource_or_xfail(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
+        mr = create_managed_memory_resource_or_skip(ManagedMemoryResourceOptions(preferred_location=dev1.device_id))
         managed_buf = mr.allocate(1024 * 4)
 
         desc = _as_view(managed_buf).as_tensor_map(

From d757fa0a5339cb388ebf7e7e6e3ff789abc5303e Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Fri, 1 May 2026 10:56:02 -0700
Subject: [PATCH 7/8] test: rename pinned helper for xfail flow

Clarify pinned mempool test setup by keeping skip for capability checks and using xfail naming for the Windows MCDM constructor workaround.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cuda_core/tests/conftest.py    | 2 +-
 cuda_core/tests/test_memory.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index d8a8b91bb84..074a3f3942a 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -85,7 +85,7 @@ def create_managed_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
         raise
 
 
-def create_pinned_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
+def create_pinned_memory_resource_or_xfail(*args, xfail_device=None, **kwargs):
     try:
         return PinnedMemoryResource(*args, **kwargs)
     except CUDAError as e:
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 90984db58b5..fb99895616d 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -22,7 +22,7 @@
 
 from conftest import (
     create_managed_memory_resource_or_skip,
-    create_pinned_memory_resource_or_skip,
+    create_pinned_memory_resource_or_xfail,
     skip_if_managed_memory_unsupported,
     skip_if_pinned_memory_unsupported,
 )
@@ -640,7 +640,7 @@ def test_non_managed_resources_report_not_managed(mr_kind):
         mr = DeviceMemoryResource(device)
     else:
         skip_if_pinned_memory_unsupported(device)
-        mr = create_pinned_memory_resource_or_skip(xfail_device=device)
+        mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     assert mr.is_managed is False
     buf = mr.allocate(1024)
     assert buf.is_managed is False
@@ -685,7 +685,7 @@ def test_pinned_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = create_pinned_memory_resource_or_skip(xfail_device=device)
+    mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -1582,7 +1582,7 @@ def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
         pytest.skip("Device does not support mempool operations")
     elif MR is PinnedMemoryResource:
         skip_if_pinned_memory_unsupported(device)
-        mr = create_pinned_memory_resource_or_skip(xfail_device=device)
+        mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     elif MR is ManagedMemoryResource:
         skip_if_managed_memory_unsupported(device)
         mr = create_managed_memory_resource_or_skip(MROps(preferred_location=device.device_id))

From 04307fee6a68c224cc785bfd3df7192d2b7c8b0d Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Fri, 1 May 2026 12:50:15 -0700
Subject: [PATCH 8/8] test: tolerate missing mempool xfail helper

Allow cuda_core tests to run against older cuda.bindings artifacts by falling back when the mempool xfail helper is unavailable, so collection succeeds without the new OOM xfail behavior.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cuda_core/tests/conftest.py                    | 11 ++++++++++-
 cuda_core/tests/test_managed_memory_warning.py |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 074a3f3942a..9f48686c30c 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -17,7 +17,6 @@
     from cuda import cuda as driver
 
 import cuda.core
-from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 from cuda.core import (
     Device,
     DeviceMemoryResource,
@@ -30,6 +29,16 @@
 )
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
 
+try:
+    from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
+except ModuleNotFoundError:
+    # Older cuda.bindings artifacts (for example 12.9.x backports) do not ship
+    # this helper yet. In that case, keep the primary failure visible instead of
+    # xfail-ing the known Windows MCDM mempool setup issue.
+    def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
+        return
+
+
 # Import shared test helpers for tests across subprojects.
 # PLEASE KEEP IN SYNC with copies in other conftest.py in this repo.
 _test_helpers_root = pathlib.Path(__file__).resolve().parents[2] / "cuda_python_test_helpers"
diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
index dff6622d504..5e6032ebe9e 100644
--- a/cuda_core/tests/test_managed_memory_warning.py
+++ b/cuda_core/tests/test_managed_memory_warning.py
@@ -13,7 +13,7 @@
 import pytest
 
 import cuda.bindings
-from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
+from conftest import xfail_if_mempool_oom
 from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
 from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
 from cuda.core._utils.cuda_utils import CUDAError