diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index c04feb6b4..1d6b4a18a 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -154,6 +154,19 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["x-model-id"], }; +static MODEL_RUNNER_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "model-runner", + // Accessible from Docker containers via the Docker-internal hostname on port 80. + default_base_url: "http://model-runner.docker.internal/engines/llama.cpp/v1", + protocols: OPENAI_PROTOCOLS, + // Docker Model Runner requires no authentication by default. + credential_key_names: &[], + base_url_config_keys: &["MODEL_RUNNER_BASE_URL"], + auth: AuthHeader::Bearer, + default_headers: &[], + passthrough_headers: &["x-model-id"], +}; + /// Canonicalize an inference provider type string to a well-known identifier. /// /// Returns `Some(canonical_name)` for recognized inference providers, @@ -169,6 +182,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> { "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => { Some("google-vertex-ai") } + "model-runner" | "model_runner" => Some("model-runner"), _ => None, } } @@ -183,6 +197,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), "google-vertex-ai" => Some(&VERTEX_AI_PROFILE), + "model-runner" => Some(&MODEL_RUNNER_PROFILE), _ => None, } } @@ -360,6 +375,19 @@ mod tests { } } + #[test] + fn profile_for_model_runner() { + let profile = profile_for("model-runner").expect("model-runner profile should be Some"); + assert_eq!(profile.provider_type, "model-runner"); + assert!( + profile + .default_base_url + .contains("model-runner.docker.internal"), + "default base URL should use Docker-internal hostname" + ); + assert!(profile.credential_key_names.is_empty()); + } + #[test] fn auth_for_vertex_uses_bearer() { let (auth, headers) = auth_for_provider_type("google-vertex-ai"); diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index 1d0d5a192..60c485e5e 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -118,6 +118,7 @@ impl ProviderRegistry { registry.register(providers::nvidia::SPEC); registry.register(providers::gitlab::SPEC); registry.register(providers::github::SPEC); + registry.register(providers::model_runner::ModelRunnerProvider); registry.register(providers::outlook::OutlookProvider); registry } @@ -182,6 +183,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "generic" => Some("generic"), "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), + "model-runner" | "model_runner" => Some("model-runner"), "outlook" => Some("outlook"), _ => None, } diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 63a6b2eb3..0b647a076 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -23,6 +23,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ include_str!("../../../providers/cursor.yaml"), include_str!("../../../providers/github.yaml"), include_str!("../../../providers/google-vertex-ai.yaml"), + include_str!("../../../providers/model-runner.yaml"), include_str!("../../../providers/nvidia.yaml"), include_str!("../../../providers/pypi.yaml"), ]; diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index dfe5935a1..13a1ed319 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -37,6 +37,7 @@ pub mod copilot; pub mod generic; pub mod github; pub mod gitlab; +pub mod model_runner; pub mod nvidia; pub mod openai; pub mod opencode; diff --git a/crates/openshell-providers/src/providers/model_runner.rs b/crates/openshell-providers/src/providers/model_runner.rs new file mode 100644 index 000000000..564474daa --- /dev/null +++ b/crates/openshell-providers/src/providers/model_runner.rs @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::{DiscoveredProvider, ProviderError, ProviderPlugin}; + +pub struct ModelRunnerProvider; + +impl ProviderPlugin for ModelRunnerProvider { + fn id(&self) -> &'static str { + "model-runner" + } + + fn discover_existing(&self) -> Result, ProviderError> { + Ok(Some(DiscoveredProvider::default())) + } +} + +#[cfg(test)] +mod tests { + use super::ModelRunnerProvider; + use crate::ProviderPlugin; + + #[test] + fn model_runner_provider_id_is_correct() { + assert_eq!(ModelRunnerProvider.id(), "model-runner"); + } + + #[test] + fn model_runner_discover_returns_default_provider() { + let result = ModelRunnerProvider + .discover_existing() + .expect("discovery should succeed"); + assert!(result.is_some()); + } +} diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 58b5feb2a..8164208f3 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -620,7 +620,7 @@ fn resolve_provider_route( let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| { Status::invalid_argument(format!( "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \ - (supported: openai, anthropic, nvidia, google-vertex-ai)", + (supported: openai, anthropic, nvidia, google-vertex-ai, model-runner)", name = provider.object_name() )) })?; diff --git a/docs/get-started/tutorials/index.mdx b/docs/get-started/tutorials/index.mdx index bc30bcfc4..02e5d22f6 100644 --- a/docs/get-started/tutorials/index.mdx +++ b/docs/get-started/tutorials/index.mdx @@ -27,6 +27,11 @@ Launch Claude Code in a sandbox, diagnose a policy denial, and iterate on a cust Configure a Providers v2 Microsoft Graph provider with gateway-managed OAuth2 refresh-token rotation. + + +Route inference to Docker Model Runner using the built-in model-runner provider type. No credentials required. + + Route inference through Ollama using cloud-hosted or local models, and verify it from a sandbox. diff --git a/docs/get-started/tutorials/inference-docker-model-runner.mdx b/docs/get-started/tutorials/inference-docker-model-runner.mdx new file mode 100644 index 000000000..250ed5ae8 --- /dev/null +++ b/docs/get-started/tutorials/inference-docker-model-runner.mdx @@ -0,0 +1,128 @@ +--- +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +title: "Run Local Inference with Docker Model Runner" +sidebar-title: "Inference with Docker Model Runner" +slug: "get-started/tutorials/inference-docker-model-runner" +description: "Route sandbox inference requests to Docker Model Runner running on your host machine using the built-in model-runner provider type." +keywords: "Generative AI, Cybersecurity, Tutorial, Inference Routing, Docker Model Runner, Local Inference, Sandbox" +--- + +This tutorial shows how to route `inference.local` requests from OpenShell sandboxes to Docker Model Runner on your host machine. + +Docker Model Runner is built into Docker Desktop. It runs models locally using the OpenAI-compatible API with no external service or API key required. + +After completing this tutorial, you will know how to: + +- Pull and run a model with Docker Model Runner. +- Create a `model-runner` provider in OpenShell. +- Set Docker Model Runner as the `inference.local` backend. +- Verify inference from inside a sandbox. + +## Prerequisites + +- A working OpenShell installation. Complete the [Quickstart](/get-started/quickstart) before proceeding. +- Docker Desktop with Docker Model Runner enabled (Docker Desktop 4.40 or later). + +## Verify Docker Model Runner Is Available + +Confirm Docker Model Runner is running on your host: + +```shell +docker model version +``` + +If Docker Model Runner is not available, upgrade Docker Desktop or enable the feature in Docker Desktop settings under the **Beta Features** tab. + + + +### Pull a Model + +Pull a model to use for inference. A small model is a good starting point: + +```shell +docker model pull ai/smollm2 +``` + +Verify the model is available: + +```shell +docker model list +``` + +### Create a Provider + +Create a `model-runner` provider. No credentials are needed because Docker Model Runner is accessed over the Docker-internal network: + +```shell +openshell provider create --name model-runner --type model-runner +``` + +### Set Inference Routing + +Point `inference.local` at the model-runner provider and choose a model: + +```shell +openshell inference set --provider model-runner --model ai/smollm2 +``` + +OpenShell will verify that the upstream endpoint is reachable before saving. If the model has not fully loaded yet, wait a few seconds and retry. + +Confirm: + +```shell +openshell inference get +``` + +### Verify from a Sandbox + +Run a request through `https://inference.local`: + +```shell +openshell sandbox create -- \ + curl https://inference.local/v1/chat/completions \ + --json '{"messages":[{"role":"user","content":"hello"}],"max_tokens":10}' +``` + +A JSON response from the model confirms end-to-end connectivity. + + + +## Model Recommendations + +| Use case | Model | Notes | +|---|---|---| +| Smoke test | `ai/smollm2` | Small, fast, good for verifying setup | +| Coding and reasoning | `ai/llama3.2` | Strong general-purpose model | +| Chat | `ai/gemma3` | Lightweight with good instruction following | + +Search for additional models with: + +```shell +docker model search +``` + +## Troubleshooting + +Common issues and fixes: + +- **`docker model version` fails** — Docker Desktop is not running or Docker Model Runner is disabled. Enable it in Docker Desktop settings. +- **`openshell inference set` fails with connection refused** — The model may still be loading. Run `docker model ps` to check. If no model is loaded, run `docker model run --detach ai/smollm2` to pre-load it. +- **Model not found** — Run `docker model list` to confirm the model is present. Run `docker model pull ` if needed. +- **HTTPS vs HTTP** — Code inside sandboxes must call `https://inference.local`, not `http://`. + +Useful commands: + +```shell +openshell status +openshell inference get +openshell provider get model-runner +docker model ps +docker model list +``` + +## Next Steps + +- To learn more about managed inference, refer to [Inference Routing](/sandboxes/inference-routing). +- To configure a different self-hosted backend, refer to [Inference Routing](/sandboxes/inference-routing#configure-inference-routing). +- To learn how to use Ollama for local inference, refer to [Inference with Ollama](/get-started/tutorials/inference-ollama). diff --git a/docs/sandboxes/inference-routing.mdx b/docs/sandboxes/inference-routing.mdx index 3d8c48cd8..cc3789c7d 100644 --- a/docs/sandboxes/inference-routing.mdx +++ b/docs/sandboxes/inference-routing.mdx @@ -118,6 +118,18 @@ Use [Google Vertex AI](/providers/google-vertex-ai) for the full auth flows, inc + + +Docker Model Runner is built into Docker Desktop and exposes an OpenAI-compatible API over the Docker-internal network. No credentials are required. + +```shell +openshell provider create --name model-runner --type model-runner +``` + +The `model-runner` type uses `model-runner.docker.internal` as the default endpoint. No base URL or API key configuration is needed. For a step-by-step setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner). + + + ```shell @@ -287,6 +299,7 @@ A successful response confirms the privacy router can reach the configured backe Explore related topics: +- To follow a complete Docker Model Runner setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner). - To follow a complete Ollama-based local setup, refer to [Inference Ollama](/get-started/tutorials/inference-ollama). - To follow a complete LM Studio-based local setup, refer to [Local Inference LM Studio](/get-started/tutorials/local-inference-lmstudio). - To control external endpoints, refer to [Policies](/sandboxes/policies). diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx index a6b9654d0..12000ac23 100644 --- a/docs/sandboxes/manage-providers.mdx +++ b/docs/sandboxes/manage-providers.mdx @@ -253,6 +253,7 @@ The following provider types are supported. | `generic` | User-defined | Any service with custom credentials | | `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). | | `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI | +| `model-runner` | (none) | Docker Model Runner local inference. No credentials required. Refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner). | | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog | | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to [Inference Routing](/sandboxes/inference-routing). | | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | OpenCode | @@ -280,6 +281,7 @@ The following providers have been tested with `inference.local`. Any provider th | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` | | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` | | Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` | +| Docker Model Runner | `model-runner` | `model-runner` | `http://model-runner.docker.internal/engines/llama.cpp/v1` | (none required) | | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` | | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` | diff --git a/providers/model-runner.yaml b/providers/model-runner.yaml new file mode 100644 index 000000000..90caf1a1e --- /dev/null +++ b/providers/model-runner.yaml @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: model-runner +display_name: Docker Model Runner +description: Local AI inference via Docker Model Runner +category: inference +inference_capable: true +endpoints: + - host: model-runner.docker.internal + port: 80 + protocol: rest + access: read-write + enforcement: enforce +binaries: [/usr/local/bin/docker]