diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 63748bb33..3b5fecfd8 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -76,6 +76,10 @@ pub struct KubernetesComputeConfig { pub host_gateway_ip: String, pub enable_user_namespaces: bool, pub workspace_default_storage_size: String, + /// Default Kubernetes `runtimeClassName` for sandbox pods. + /// Applied when a `CreateSandbox` request does not specify one. + /// Empty string (default) = omit the field, using the cluster default. + pub default_runtime_class_name: String, /// Lifetime (seconds) of the projected `ServiceAccount` token kubelet /// writes into each sandbox pod. Used only for the one-shot /// `IssueSandboxToken` bootstrap exchange — the gateway-minted JWT @@ -116,6 +120,7 @@ impl Default for KubernetesComputeConfig { host_gateway_ip: String::new(), enable_user_namespaces: false, workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE.to_string(), + default_runtime_class_name: String::new(), sa_token_ttl_secs: 3600, } } @@ -176,6 +181,21 @@ mod tests { assert_eq!(cfg.service_account_name, "openshell-sandbox"); } + #[test] + fn serde_override_default_runtime_class_name() { + let json = serde_json::json!({ + "default_runtime_class_name": "nvidia" + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert_eq!(cfg.default_runtime_class_name, "nvidia"); + } + + #[test] + fn default_runtime_class_name_is_empty() { + let cfg = KubernetesComputeConfig::default(); + assert!(cfg.default_runtime_class_name.is_empty()); + } + #[test] fn serde_override_image_pull_secrets() { let json = serde_json::json!({ diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 5a43eb980..34ab44a2e 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -329,6 +329,7 @@ impl KubernetesComputeDriver { host_gateway_ip: &self.config.host_gateway_ip, enable_user_namespaces: self.config.enable_user_namespaces, workspace_default_storage_size: &self.config.workspace_default_storage_size, + default_runtime_class_name: &self.config.default_runtime_class_name, sa_token_ttl_secs: self.config.effective_sa_token_ttl_secs(), }; obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); @@ -1041,6 +1042,7 @@ struct SandboxPodParams<'a> { host_gateway_ip: &'a str, enable_user_namespaces: bool, workspace_default_storage_size: &'a str, + default_runtime_class_name: &'a str, /// Lifetime (seconds) of the projected `ServiceAccount` token used /// for the bootstrap `IssueSandboxToken` exchange. sa_token_ttl_secs: i64, @@ -1064,6 +1066,7 @@ impl Default for SandboxPodParams<'_> { host_gateway_ip: "", enable_user_namespaces: false, workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE, + default_runtime_class_name: "", sa_token_ttl_secs: 3600, } } @@ -1184,7 +1187,11 @@ fn sandbox_template_to_k8s( } let mut spec = serde_json::Map::new(); - if let Some(runtime_class) = platform_config_string(template, "runtime_class_name") { + let runtime_class_name = platform_config_string(template, "runtime_class_name").or_else(|| { + (!params.default_runtime_class_name.is_empty()) + .then(|| params.default_runtime_class_name.to_string()) + }); + if let Some(runtime_class) = runtime_class_name { spec.insert( "runtimeClassName".to_string(), serde_json::json!(runtime_class), @@ -2075,6 +2082,84 @@ mod tests { ); } + #[test] + fn default_runtime_class_name_applied_when_template_omits_it() { + let template = SandboxTemplate::default(); + let pod_template = { + let params = SandboxPodParams { + default_runtime_class_name: "kata-containers", + ..SandboxPodParams::default() + }; + sandbox_template_to_k8s( + &template, + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ) + }; + + assert_eq!( + pod_template["spec"]["runtimeClassName"], + serde_json::json!("kata-containers") + ); + } + + #[test] + fn template_runtime_class_name_overrides_config_default() { + let template = SandboxTemplate { + platform_config: Some(Struct { + fields: std::iter::once(( + "runtime_class_name".to_string(), + Value { + kind: Some(Kind::StringValue("gvisor".to_string())), + }, + )) + .collect(), + }), + ..SandboxTemplate::default() + }; + + let pod_template = { + let params = SandboxPodParams { + default_runtime_class_name: "kata-containers", + ..SandboxPodParams::default() + }; + sandbox_template_to_k8s( + &template, + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ) + }; + + assert_eq!( + pod_template["spec"]["runtimeClassName"], + serde_json::json!("gvisor") + ); + } + + #[test] + fn runtime_class_name_omitted_when_both_template_and_default_empty() { + let template = SandboxTemplate::default(); + let pod_template = { + let params = SandboxPodParams::default(); + sandbox_template_to_k8s( + &template, + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ) + }; + + assert_eq!( + pod_template["spec"]["runtimeClassName"], + serde_json::json!(null) + ); + } + #[test] fn gpu_sandbox_preserves_existing_resource_limits() { use openshell_core::proto::compute::v1::DriverResourceRequirements; diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index fd5d902e5..b0316b66a 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -122,6 +122,8 @@ async fn main() -> Result<()> { .unwrap_or_else(|_| { openshell_driver_kubernetes::DEFAULT_WORKSPACE_STORAGE_SIZE.to_string() }), + default_runtime_class_name: std::env::var("OPENSHELL_K8S_DEFAULT_RUNTIME_CLASS_NAME") + .unwrap_or_default(), sa_token_ttl_secs: args.sa_token_ttl_secs, }) .await diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 62b313e0b..315fc5f47 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -187,6 +187,7 @@ JWT signing Secret. | securityContext.runAsUser | int | `1000` | UID assigned to the gateway container. | | server.auth.allowUnauthenticatedUsers | bool | `false` | UNSAFE: accept unauthenticated CLI/user requests as a local developer principal. Intended only for trusted local Skaffold/k3d development or a fully trusted fronting proxy. Leave false for shared or production clusters. | | server.dbUrl | string | `"sqlite:/var/openshell/openshell.db"` | Gateway database URL (used for the default SQLite backend). | +| server.defaultRuntimeClassName | string | `""` | Default Kubernetes runtimeClassName for sandbox pods. Applied when a CreateSandbox request does not specify one. Empty (default) = omit the field, using the cluster's default RuntimeClass. Set to a RuntimeClass name (e.g. "kata-containers", "nvidia") to apply it to all sandboxes that don't explicitly override it. | | server.disableTls | bool | `false` | Disable TLS entirely - the server listens on plaintext HTTP. Set to true when a reverse proxy / tunnel terminates TLS at the edge. | | server.enableLoopbackServiceHttp | bool | `true` | Enable plaintext HTTP routing for loopback sandbox service URLs on TLS-enabled gateways. | | server.enableUserNamespaces | bool | `false` | Enable Kubernetes user namespace isolation (hostUsers: false) for sandbox pods. Requires Kubernetes 1.33+ with user namespace support available (beta through 1.35, GA in 1.36+), plus a supporting container runtime and Linux 5.12+. When enabled, container UID 0 maps to an unprivileged host UID and capabilities become namespaced. | diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index ac7478de2..6c1805036 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -118,6 +118,9 @@ data: {{- if .Values.server.workspaceDefaultStorageSize }} workspace_default_storage_size = {{ .Values.server.workspaceDefaultStorageSize | quote }} {{- end }} + {{- if .Values.server.defaultRuntimeClassName }} + default_runtime_class_name = {{ .Values.server.defaultRuntimeClassName | quote }} + {{- end }} {{- if .Values.supervisor.image.pullPolicy }} supervisor_image_pull_policy = {{ .Values.supervisor.image.pullPolicy | quote }} {{- end }} diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 26b1a2486..848bc816e 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -158,6 +158,12 @@ server: # Uses Kubernetes quantity syntax (e.g. "2Gi", "10Gi", "500Mi"). # Empty = built-in default (2Gi). workspaceDefaultStorageSize: "" + # -- Default Kubernetes runtimeClassName for sandbox pods. + # Applied when a CreateSandbox request does not specify one. + # Empty (default) = omit the field, using the cluster's default RuntimeClass. + # Set to a RuntimeClass name (e.g. "kata-containers", "nvidia") to apply it + # to all sandboxes that don't explicitly override it. + defaultRuntimeClassName: "" # -- gRPC endpoint sandboxes call back into the gateway. Leave empty to derive # it from the chart fullname, release namespace, service port, and # disableTls flag, for example https://openshell.openshell.svc.cluster.local:8080. diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 0d49a6096..55e2f4aba 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -178,6 +178,9 @@ client_tls_secret_name = "openshell-client-tls" host_gateway_ip = "10.0.0.1" enable_user_namespaces = false workspace_default_storage_size = "10Gi" +# Kubernetes RuntimeClass applied to sandbox pods when the API request does +# not specify one. Empty (default) = omit the field, using the cluster default. +# default_runtime_class_name = "kata-containers" # Kubelet clamps projected tokens below 600 seconds. The driver caps values at 86400. sa_token_ttl_secs = 3600 ```