Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions architecture/compute-runtimes.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ template resource limits. Docker and Podman apply them as runtime limits.
Kubernetes mirrors each limit into the matching request. VM accepts the fields
but currently ignores them.

GPU requests enter the driver layer through `SandboxSpec.gpu` and
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this will change in #1156

`SandboxSpec.gpu_device`. Docker and Podman map default GPU requests to one
concrete NVIDIA CDI device when individual CDI devices are available, use
`nvidia.com/gpu=all` only for WSL2/all-only compatibility, and pass explicit
driver-native device IDs through.

VM runtime state paths are derived only from driver-validated sandbox IDs
matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
private `run/` directory plus Unix peer UID/PID checks. Standalone
Expand Down
314 changes: 296 additions & 18 deletions crates/openshell-core/src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,183 @@

//! Shared GPU request helpers.

use std::fmt;
use std::sync::atomic::{AtomicUsize, Ordering};

use crate::config::CDI_GPU_DEVICE_ALL;

/// Resolve the existing GPU request fields into CDI device identifiers.
///
/// `None` means no GPU was requested. A GPU request with no explicit device
/// ID uses the CDI all-GPU request; otherwise the driver-native ID passes
/// through unchanged.
#[must_use]
pub fn cdi_gpu_device_ids(gpu: bool, gpu_device: &str) -> Option<Vec<String>> {
gpu.then(|| {
if gpu_device.is_empty() {
vec![CDI_GPU_DEVICE_ALL.to_string()]
const CDI_NVIDIA_GPU_PREFIX: &str = "nvidia.com/gpu=";
const CDI_NVIDIA_GPU_ALL_SUFFIX: &str = "all";

/// Normalized CDI GPU inventory used by local container drivers.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct CdiGpuInventory {
device_ids: Vec<String>,
}

impl CdiGpuInventory {
/// Build a normalized inventory from runtime-reported CDI device IDs.
#[must_use]
pub fn new(device_ids: impl IntoIterator<Item = impl AsRef<str>>) -> Self {
let mut device_ids = device_ids
.into_iter()
.filter_map(|id| {
let id = id.as_ref().trim();
id.starts_with(CDI_NVIDIA_GPU_PREFIX)
.then(|| id.to_string())
})
.collect::<Vec<_>>();
device_ids.sort();
device_ids.dedup();
Self { device_ids }
}

#[must_use]
pub fn as_slice(&self) -> &[String] {
&self.device_ids
}

#[must_use]
pub fn is_empty(&self) -> bool {
self.device_ids.is_empty()
}

fn default_device_family(
&self,
allow_all_devices: bool,
) -> Result<Vec<String>, CdiGpuSelectionError> {
let mut indexed = self
.device_ids
.iter()
.filter_map(|id| {
let suffix = cdi_nvidia_gpu_suffix(id)?;
let index = suffix.parse::<u64>().ok()?;
Some((index, id.clone()))
})
.collect::<Vec<_>>();
if !indexed.is_empty() {
indexed.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
return Ok(indexed.into_iter().map(|(_, id)| id).collect());
}

let mut named = self
.device_ids
.iter()
.filter_map(|id| {
let suffix = cdi_nvidia_gpu_suffix(id)?;
(suffix != CDI_NVIDIA_GPU_ALL_SUFFIX).then(|| id.clone())
})
.collect::<Vec<_>>();
if !named.is_empty() {
named.sort();
return Ok(named);
}

if self.device_ids.iter().any(|id| id == CDI_GPU_DEVICE_ALL) {
if !allow_all_devices {
return Err(CdiGpuSelectionError::AllDevicesDefaultUnsupported);
}
return Ok(vec![CDI_GPU_DEVICE_ALL.to_string()]);
}

Err(CdiGpuSelectionError::NoAvailableDevices)
}
}

/// Concurrency-safe round-robin cursor for default CDI GPU selection.
#[derive(Debug, Default)]
pub struct CdiGpuRoundRobin {
next: AtomicUsize,
}

impl CdiGpuRoundRobin {
#[must_use]
pub const fn new() -> Self {
Self {
next: AtomicUsize::new(0),
}
}

/// Return the next default device ID and advance the cursor.
pub fn next_default_device_id(
&self,
inventory: &CdiGpuInventory,
allow_all_devices: bool,
) -> Result<String, CdiGpuSelectionError> {
self.selected_default_device_id(inventory, true, allow_all_devices)
}

/// Return the current default device ID without advancing the cursor.
pub fn peek_default_device_id(
&self,
inventory: &CdiGpuInventory,
allow_all_devices: bool,
) -> Result<String, CdiGpuSelectionError> {
self.selected_default_device_id(inventory, false, allow_all_devices)
}

fn selected_default_device_id(
&self,
inventory: &CdiGpuInventory,
consume: bool,
allow_all_devices: bool,
) -> Result<String, CdiGpuSelectionError> {
let devices = inventory.default_device_family(allow_all_devices)?;
let base = if consume {
self.next.fetch_add(1, Ordering::Relaxed)
} else {
vec![gpu_device.to_string()]
self.next.load(Ordering::Relaxed)
};
Ok(devices[base % devices.len()].clone())
}
}

/// CDI GPU selection failed.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CdiGpuSelectionError {
NoAvailableDevices,
MissingDefaultDevice,
AllDevicesDefaultUnsupported,
}

impl fmt::Display for CdiGpuSelectionError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NoAvailableDevices => f.write_str("no NVIDIA CDI GPU devices were discovered"),
Self::MissingDefaultDevice => {
f.write_str("GPU request requires a selected default CDI GPU device")
}
Self::AllDevicesDefaultUnsupported => f.write_str(
"default GPU request resolved only to nvidia.com/gpu=all, which is not allowed on this platform; pass --gpu-device nvidia.com/gpu=all explicitly to request all GPUs",
),
}
})
}
}

impl std::error::Error for CdiGpuSelectionError {}

/// Resolve the existing GPU request fields into CDI device identifiers.
///
/// `None` means no GPU was requested. A GPU request with an explicit device ID
/// passes through unchanged. A default GPU request uses the driver-selected
/// default CDI ID.
pub fn cdi_gpu_device_ids(
gpu: bool,
gpu_device: &str,
selected_default_device: Option<&str>,
) -> Result<Option<Vec<String>>, CdiGpuSelectionError> {
if !gpu {
return Ok(None);
}
if !gpu_device.is_empty() {
return Ok(Some(vec![gpu_device.to_string()]));
}
let device = selected_default_device.ok_or(CdiGpuSelectionError::MissingDefaultDevice)?;
Ok(Some(vec![device.to_string()]))
}

fn cdi_nvidia_gpu_suffix(id: &str) -> Option<&str> {
id.strip_prefix(CDI_NVIDIA_GPU_PREFIX)
}

#[cfg(test)]
Expand All @@ -27,22 +188,139 @@ mod tests {

#[test]
fn cdi_gpu_device_ids_returns_none_when_absent() {
assert_eq!(cdi_gpu_device_ids(false, ""), None);
assert_eq!(cdi_gpu_device_ids(false, "", None), Ok(None));
}

#[test]
fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
fn cdi_gpu_device_ids_uses_selected_default_device() {
assert_eq!(
cdi_gpu_device_ids(true, ""),
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
cdi_gpu_device_ids(true, "", Some("nvidia.com/gpu=0")),
Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
);
}

#[test]
fn cdi_gpu_device_ids_rejects_missing_default_device() {
assert_eq!(
cdi_gpu_device_ids(true, "", None),
Err(CdiGpuSelectionError::MissingDefaultDevice)
);
}

#[test]
fn cdi_gpu_device_ids_passes_explicit_device_id_through() {
assert_eq!(
cdi_gpu_device_ids(true, "nvidia.com/gpu=0"),
Some(vec!["nvidia.com/gpu=0".to_string()])
cdi_gpu_device_ids(true, "nvidia.com/gpu=0", None),
Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
);
}

#[test]
fn inventory_filters_and_deduplicates_nvidia_gpu_ids() {
let inventory = CdiGpuInventory::new([
"nvidia.com/gpu=1",
"vendor.example/device=0",
"nvidia.com/gpu=1",
" nvidia.com/gpu=0 ",
]);

assert_eq!(
inventory.as_slice(),
&vec![
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
]
);
}

#[test]
fn round_robin_prefers_indexed_family_and_sorts_numerically() {
let inventory = CdiGpuInventory::new([
"nvidia.com/gpu=10",
"nvidia.com/gpu=UUID-b",
"nvidia.com/gpu=2",
"nvidia.com/gpu=all",
]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=2".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=10".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=2".to_string())
);
}

#[test]
fn round_robin_uses_named_family_when_no_indexed_ids_exist() {
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=UUID-b", "nvidia.com/gpu=UUID-a"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=UUID-a".to_string())
);
}

#[test]
fn round_robin_uses_all_only_inventory_when_allowed() {
let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory, true),
Ok(CDI_GPU_DEVICE_ALL.to_string())
);
}

#[test]
fn round_robin_rejects_all_only_inventory_when_not_allowed() {
let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory, false),
Err(CdiGpuSelectionError::AllDevicesDefaultUnsupported)
);
}

#[test]
fn round_robin_rejects_empty_inventory() {
let inventory = CdiGpuInventory::new(["vendor.example/device=0"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory, false),
Err(CdiGpuSelectionError::NoAvailableDevices)
);
}

#[test]
fn peek_does_not_advance_round_robin_cursor() {
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.peek_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.peek_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory, false),
Ok("nvidia.com/gpu=1".to_string())
);
}
}
2 changes: 1 addition & 1 deletion crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ contract:
| `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
| `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
| `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise selects one concrete NVIDIA CDI GPU when the sandbox spec asks for GPU support and daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. |

The agent child process does not retain these supervisor privileges.

Expand Down
Loading
Loading