NVIDIA · rwgk · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/cuda_bindings/cuda/bindings/_test_helpers/mempool.py b/cuda_bindings/cuda/bindings/_test_helpers/mempool.py
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import sys
+
+import pytest
+
+from cuda.bindings import driver, runtime
+
+
+def is_windows_mcdm_device(device=0):
+    if sys.platform != "win32":
+        return False
+    import cuda.bindings.nvml as nvml
+
+    device_id = int(getattr(device, "device_id", device))
+    (err,) = driver.cuInit(0)
+    if err != driver.CUresult.CUDA_SUCCESS:
+        return False
+    err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id)
+    if err != driver.CUresult.CUDA_SUCCESS:
+        return False
+    pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii")
+    nvml.init_v2()
+    try:
+        handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+        current, _ = nvml.device_get_driver_model_v2(handle)
+        return current == nvml.DriverModel.DRIVER_MCDM
+    finally:
+        nvml.shutdown()
+
+
+def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
+    if api_name is not None and not isinstance(api_name, str):
+        device = api_name
+        api_name = None
+
+    is_oom = err_or_exc in (
+        driver.CUresult.CUDA_ERROR_OUT_OF_MEMORY,
+        runtime.cudaError_t.cudaErrorMemoryAllocation,
+    ) or "CUDA_ERROR_OUT_OF_MEMORY" in str(err_or_exc)
+
+    if not is_oom:
+        return
+    try:
+        is_windows_mcdm = is_windows_mcdm_device(device)
+    except Exception:
+        # If MCDM detection fails, leave the primary test failure visible.
+        return
+    if not is_windows_mcdm:
+        return
+
+    api_context = f"{api_name} " if api_name else ""
+    pytest.xfail(f"{api_context}could not reserve VA for mempool operations on Windows MCDM")
diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
@@ -12,6 +12,7 @@
 import cuda.bindings.driver as cuda
 import cuda.bindings.runtime as cudart
 from cuda.bindings import driver
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 
 
 def driverVersionLessThan(target):
@@ -270,6 +271,7 @@ def test_cuda_memPool_attr():
 
     attr_list = [None] * 8
     err, pool = cuda.cuMemPoolCreate(poolProps)
+    xfail_if_mempool_oom(err, "cuMemPoolCreate", poolProps.location.id)
     assert err == cuda.CUresult.CUDA_SUCCESS
 
     for idx, attr in enumerate(
@@ -468,6 +470,12 @@ def test_cuda_graphMem_attr(device):
     params.bytesize = allocSize
 
     err, allocNode = cuda.cuGraphAddMemAllocNode(graph, None, 0, params)
+    if err == cuda.CUresult.CUDA_ERROR_OUT_OF_MEMORY:
+        (destroy_err,) = cuda.cuGraphDestroy(graph)
+        assert destroy_err == cuda.CUresult.CUDA_SUCCESS
+        (destroy_err,) = cuda.cuStreamDestroy(stream)
+        assert destroy_err == cuda.CUresult.CUDA_SUCCESS
+        xfail_if_mempool_oom(err, "cuGraphAddMemAllocNode", device)
     assert err == cuda.CUresult.CUDA_SUCCESS
     err, freeNode = cuda.cuGraphAddMemFreeNode(graph, [allocNode], 1, params.dptr)
     assert err == cuda.CUresult.CUDA_SUCCESS

diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py
@@ -11,6 +11,7 @@
 import cuda.bindings.runtime as cudart
 from cuda import pathfinder
 from cuda.bindings import runtime
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 
 
 def isSuccess(err):
@@ -432,6 +433,7 @@ def test_cudart_MemPool_attr():
 
     attr_list = [None] * 8
     err, pool = cudart.cudaMemPoolCreate(poolProps)
+    xfail_if_mempool_oom(err, "cudaMemPoolCreate", poolProps.location.id)
     assertSuccess(err)
 
     for idx, attr in enumerate(

diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
@@ -27,7 +27,17 @@
     PinnedMemoryResourceOptions,
     _device,
 )
-from cuda.core._utils.cuda_utils import handle_return
+from cuda.core._utils.cuda_utils import CUDAError, handle_return
+
+try:
+    from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
+except ModuleNotFoundError:
+    # Older cuda.bindings artifacts (for example 12.9.x backports) do not ship
+    # this helper yet. In that case, keep the primary failure visible instead of
+    # xfail-ing the known Windows MCDM mempool setup issue.
+    def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0):
+        return
+
 
 # Import shared test helpers for tests across subprojects.
 # PLEASE KEEP IN SYNC with copies in other conftest.py in this repo.
@@ -61,21 +71,56 @@ def skip_if_managed_memory_unsupported(device):
         pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
     try:
         ManagedMemoryResource()
+    except CUDAError as e:
+        xfail_if_mempool_oom(e, device)
+        raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
             pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
         raise
 
 
-def create_managed_memory_resource_or_skip(*args, **kwargs):
+def create_managed_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
+    # Keep the established "skip" helper name for call-site readability, even though
+    # Windows MCDM mempool OOM setup failures are xfailed instead of skipped.
     try:
         return ManagedMemoryResource(*args, **kwargs)
+    except CUDAError as e:
+        xfail_if_mempool_oom(e, _device_id_from_resource_options(xfail_device, args, kwargs))
+        raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
             pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
         raise
 
 
+def create_pinned_memory_resource_or_xfail(*args, xfail_device=None, **kwargs):
+    try:
+        return PinnedMemoryResource(*args, **kwargs)
+    except CUDAError as e:
+        xfail_if_mempool_oom(e, xfail_device)
+        raise
+
+
+def _device_id_from_resource_options(device, args, kwargs):
+    if device is not None:
+        return device
+    options = kwargs.get("options")
+    if options is None and args:
+        options = args[0]
+    if options is None:
+        return 0
+    if isinstance(options, dict):
+        preferred_location = options.get("preferred_location")
+        preferred_location_type = options.get("preferred_location_type")
+    else:
+        preferred_location = getattr(options, "preferred_location", None)
+        preferred_location_type = getattr(options, "preferred_location_type", None)
+    if preferred_location_type in (None, "device") and isinstance(preferred_location, int) and preferred_location >= 0:
+        return preferred_location
+    return 0
+
+
 @pytest.fixture(scope="session", autouse=True)
 def session_setup():
     # Always init CUDA.

diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
@@ -13,8 +13,10 @@
 import pytest
 
 import cuda.bindings
+from conftest import xfail_if_mempool_oom
 from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
 from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
+from cuda.core._utils.cuda_utils import CUDAError
 
 _cuda_major = int(cuda.bindings.__version__.split(".")[0])
 
@@ -47,8 +49,12 @@ def device_without_concurrent_managed_access(init_cuda):
 @requires_cuda_13
 def test_default_pool_error_without_concurrent_access(device_without_concurrent_managed_access):
     """ManagedMemoryResource() raises RuntimeError when the default pool doesn't support managed."""
-    with pytest.raises(RuntimeError, match="does not support managed allocations"):
-        ManagedMemoryResource()
+    try:
+        with pytest.raises(RuntimeError, match="does not support managed allocations"):
+            ManagedMemoryResource()
+    except CUDAError as exc:
+        xfail_if_mempool_oom(exc, device_without_concurrent_managed_access)
+        raise
 
 
 @requires_cuda_13

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
@@ -22,6 +22,7 @@
 
 from conftest import (
     create_managed_memory_resource_or_skip,
+    create_pinned_memory_resource_or_xfail,
     skip_if_managed_memory_unsupported,
     skip_if_pinned_memory_unsupported,
 )
@@ -639,7 +640,7 @@ def test_non_managed_resources_report_not_managed(mr_kind):
         mr = DeviceMemoryResource(device)
     else:
         skip_if_pinned_memory_unsupported(device)
-        mr = PinnedMemoryResource()
+        mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     assert mr.is_managed is False
     buf = mr.allocate(1024)
     assert buf.is_managed is False
@@ -684,7 +685,7 @@ def test_pinned_memory_resource_initialization(init_cuda):
 
     device.set_current()
 
-    mr = PinnedMemoryResource()
+    mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     assert mr.is_device_accessible
     assert mr.is_host_accessible
 
@@ -1581,7 +1582,7 @@ def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
         pytest.skip("Device does not support mempool operations")
     elif MR is PinnedMemoryResource:
         skip_if_pinned_memory_unsupported(device)
-        mr = MR()
+        mr = create_pinned_memory_resource_or_xfail(xfail_device=device)
     elif MR is ManagedMemoryResource:
         skip_if_managed_memory_unsupported(device)
         mr = create_managed_memory_resource_or_skip(MROps(preferred_location=device.device_id))