NVIDIA · ericcurtin · May 11, 2026 · johntmyers · Jun 3, 2026 · maxamillion
@@ -154,6 +154,19 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     passthrough_headers: &["x-model-id"],
 };
 
+static MODEL_RUNNER_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "model-runner",
+    // Accessible from Docker containers via the Docker-internal hostname on port 80.
+    default_base_url: "http://model-runner.docker.internal/engines/llama.cpp/v1",
+    protocols: OPENAI_PROTOCOLS,
+    // Docker Model Runner requires no authentication by default.
+    credential_key_names: &[],
+    base_url_config_keys: &["MODEL_RUNNER_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+    passthrough_headers: &["x-model-id"],
+};
+
 /// Canonicalize an inference provider type string to a well-known identifier.
 ///
 /// Returns `Some(canonical_name)` for recognized inference providers,
@@ -169,6 +182,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
         "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
             Some("google-vertex-ai")
         }
+        "model-runner" | "model_runner" => Some("model-runner"),
         _ => None,
     }
 }
@@ -183,6 +197,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
         "google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
+        "model-runner" => Some(&MODEL_RUNNER_PROFILE),
         _ => None,
     }
 }
@@ -360,6 +375,19 @@ mod tests {
         }
     }
 
+    #[test]
+    fn profile_for_model_runner() {
+        let profile = profile_for("model-runner").expect("model-runner profile should be Some");
+        assert_eq!(profile.provider_type, "model-runner");
+        assert!(
+            profile
+                .default_base_url
+                .contains("model-runner.docker.internal"),
+            "default base URL should use Docker-internal hostname"
+        );
+        assert!(profile.credential_key_names.is_empty());
+    }
+
     #[test]
     fn auth_for_vertex_uses_bearer() {
         let (auth, headers) = auth_for_provider_type("google-vertex-ai");

@@ -118,6 +118,7 @@ impl ProviderRegistry {
         registry.register(providers::nvidia::SPEC);
         registry.register(providers::gitlab::SPEC);
         registry.register(providers::github::SPEC);
+        registry.register(providers::model_runner::ModelRunnerProvider);
         registry.register(providers::outlook::OutlookProvider);
         registry
     }
@@ -182,6 +183,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "generic" => Some("generic"),
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
+        "model-runner" | "model_runner" => Some("model-runner"),
         "outlook" => Some("outlook"),
         _ => None,
     }

@@ -23,6 +23,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
     include_str!("../../../providers/cursor.yaml"),
     include_str!("../../../providers/github.yaml"),
     include_str!("../../../providers/google-vertex-ai.yaml"),
+    include_str!("../../../providers/model-runner.yaml"),
     include_str!("../../../providers/nvidia.yaml"),
     include_str!("../../../providers/pypi.yaml"),
 ];

@@ -37,6 +37,7 @@ pub mod copilot;
 pub mod generic;
 pub mod github;
 pub mod gitlab;
+pub mod model_runner;
 pub mod nvidia;
 pub mod openai;
 pub mod opencode;

@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{DiscoveredProvider, ProviderError, ProviderPlugin};
+
+pub struct ModelRunnerProvider;
+
+impl ProviderPlugin for ModelRunnerProvider {
+    fn id(&self) -> &'static str {
+        "model-runner"
+    }
+
+    fn discover_existing(&self) -> Result<Option<DiscoveredProvider>, ProviderError> {
+        Ok(Some(DiscoveredProvider::default()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ModelRunnerProvider;
+    use crate::ProviderPlugin;
+
+    #[test]
+    fn model_runner_provider_id_is_correct() {
+        assert_eq!(ModelRunnerProvider.id(), "model-runner");
+    }
+
+    #[test]
+    fn model_runner_discover_returns_default_provider() {
+        let result = ModelRunnerProvider
+            .discover_existing()
+            .expect("discovery should succeed");
+        assert!(result.is_some());
+    }
+}
@@ -620,7 +620,7 @@ fn resolve_provider_route(
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia, google-vertex-ai)",
+                 (supported: openai, anthropic, nvidia, google-vertex-ai, model-runner)",
             name = provider.object_name()
         ))
     })?;

@@ -27,6 +27,11 @@ Launch Claude Code in a sandbox, diagnose a policy denial, and iterate on a cust
 Configure a Providers v2 Microsoft Graph provider with gateway-managed OAuth2 refresh-token rotation.
 </Card>
 
+<Card title="Inference with Docker Model Runner" href="/get-started/tutorials/inference-docker-model-runner">
+
+Route inference to Docker Model Runner using the built-in model-runner provider type. No credentials required.
+</Card>
+
 <Card title="Inference with Ollama" href="/get-started/tutorials/inference-ollama">
 
 Route inference through Ollama using cloud-hosted or local models, and verify it from a sandbox.

@@ -0,0 +1,128 @@
+---
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+title: "Run Local Inference with Docker Model Runner"
+sidebar-title: "Inference with Docker Model Runner"
+slug: "get-started/tutorials/inference-docker-model-runner"
+description: "Route sandbox inference requests to Docker Model Runner running on your host machine using the built-in model-runner provider type."
+keywords: "Generative AI, Cybersecurity, Tutorial, Inference Routing, Docker Model Runner, Local Inference, Sandbox"
+---
+
+This tutorial shows how to route `inference.local` requests from OpenShell sandboxes to Docker Model Runner on your host machine.
+
+Docker Model Runner is built into Docker Desktop. It runs models locally using the OpenAI-compatible API with no external service or API key required.
+
+After completing this tutorial, you will know how to:
+
+- Pull and run a model with Docker Model Runner.
+- Create a `model-runner` provider in OpenShell.
+- Set Docker Model Runner as the `inference.local` backend.
+- Verify inference from inside a sandbox.
+
+## Prerequisites
+
+- A working OpenShell installation. Complete the [Quickstart](/get-started/quickstart) before proceeding.
+- Docker Desktop with Docker Model Runner enabled (Docker Desktop 4.40 or later).
+
+## Verify Docker Model Runner Is Available
+
+Confirm Docker Model Runner is running on your host:
+
+```shell
+docker model version
+```
+
+If Docker Model Runner is not available, upgrade Docker Desktop or enable the feature in Docker Desktop settings under the **Beta Features** tab.
+
+<Steps toc={true}>
+
+### Pull a Model
+
+Pull a model to use for inference. A small model is a good starting point:
+
+```shell
+docker model pull ai/smollm2
+```
+
+Verify the model is available:
+
+```shell
+docker model list
+```
+
+### Create a Provider
+
+Create a `model-runner` provider. No credentials are needed because Docker Model Runner is accessed over the Docker-internal network:
+
+```shell
+openshell provider create --name model-runner --type model-runner
+```
+
+### Set Inference Routing
+
+Point `inference.local` at the model-runner provider and choose a model:
+
+```shell
+openshell inference set --provider model-runner --model ai/smollm2
+```
+
+OpenShell will verify that the upstream endpoint is reachable before saving. If the model has not fully loaded yet, wait a few seconds and retry.
+
+Confirm:
+
+```shell
+openshell inference get
+```
+
+### Verify from a Sandbox
+
+Run a request through `https://inference.local`:
+
+```shell
+openshell sandbox create -- \
+    curl https://inference.local/v1/chat/completions \
+    --json '{"messages":[{"role":"user","content":"hello"}],"max_tokens":10}'
+```
+
+A JSON response from the model confirms end-to-end connectivity.
+
+</Steps>
+
+## Model Recommendations
+
+| Use case | Model | Notes |
+|---|---|---|
+| Smoke test | `ai/smollm2` | Small, fast, good for verifying setup |
+| Coding and reasoning | `ai/llama3.2` | Strong general-purpose model |
+| Chat | `ai/gemma3` | Lightweight with good instruction following |
+
+Search for additional models with:
+
+```shell
+docker model search <query>
+```
+
+## Troubleshooting
+
+Common issues and fixes:
+
+- **`docker model version` fails** — Docker Desktop is not running or Docker Model Runner is disabled. Enable it in Docker Desktop settings.
+- **`openshell inference set` fails with connection refused** — The model may still be loading. Run `docker model ps` to check. If no model is loaded, run `docker model run --detach ai/smollm2` to pre-load it.
+- **Model not found** — Run `docker model list` to confirm the model is present. Run `docker model pull <model>` if needed.
+- **HTTPS vs HTTP** — Code inside sandboxes must call `https://inference.local`, not `http://`.
+
+Useful commands:
+
+```shell
+openshell status
+openshell inference get
+openshell provider get model-runner
+docker model ps
+docker model list
+```
+
+## Next Steps
+
+- To learn more about managed inference, refer to [Inference Routing](/sandboxes/inference-routing).
+- To configure a different self-hosted backend, refer to [Inference Routing](/sandboxes/inference-routing#configure-inference-routing).
+- To learn how to use Ollama for local inference, refer to [Inference with Ollama](/get-started/tutorials/inference-ollama).
@@ -118,6 +118,18 @@ Use [Google Vertex AI](/providers/google-vertex-ai) for the full auth flows, inc
 
 </Tab>
 
+<Tab title="Docker Model Runner">
+
+Docker Model Runner is built into Docker Desktop and exposes an OpenAI-compatible API over the Docker-internal network. No credentials are required.
+
+```shell
+openshell provider create --name model-runner --type model-runner
+```
+
+The `model-runner` type uses `model-runner.docker.internal` as the default endpoint. No base URL or API key configuration is needed. For a step-by-step setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner).
+
+</Tab>
+
 <Tab title="Local Endpoint">
 
 ```shell
@@ -287,6 +299,7 @@ A successful response confirms the privacy router can reach the configured backe
 
 Explore related topics:
 
+- To follow a complete Docker Model Runner setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner).
 - To follow a complete Ollama-based local setup, refer to [Inference Ollama](/get-started/tutorials/inference-ollama).
 - To follow a complete LM Studio-based local setup, refer to [Local Inference LM Studio](/get-started/tutorials/local-inference-lmstudio).
 - To control external endpoints, refer to [Policies](/sandboxes/policies).

@@ -253,6 +253,7 @@ The following provider types are supported.
 | `generic` | User-defined | Any service with custom credentials |
 | `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). |
 | `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
+| `model-runner` | (none) | Docker Model Runner local inference. No credentials required. Refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner). |
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to [Inference Routing](/sandboxes/inference-routing). |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | OpenCode |
@@ -280,6 +281,7 @@ The following providers have been tested with `inference.local`. Any provider th
 | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
 | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
 | Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` |
+| Docker Model Runner | `model-runner` | `model-runner` | `http://model-runner.docker.internal/engines/llama.cpp/v1` | (none required) |
 | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` |
 | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` |
 

@@ -0,0 +1,15 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+id: model-runner
+display_name: Docker Model Runner
+description: Local AI inference via Docker Model Runner
+category: inference
+inference_capable: true
+endpoints:
+  - host: model-runner.docker.internal
+    port: 80
+    protocol: rest
+    access: read-write
+    enforcement: enforce
+binaries: [/usr/local/bin/docker]