Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions crates/openshell-core/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,19 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
passthrough_headers: &["x-model-id"],
};

static MODEL_RUNNER_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
provider_type: "model-runner",
// Accessible from Docker containers via the Docker-internal hostname on port 80.
default_base_url: "http://model-runner.docker.internal/engines/llama.cpp/v1",
protocols: OPENAI_PROTOCOLS,
// Docker Model Runner requires no authentication by default.
credential_key_names: &[],
base_url_config_keys: &["MODEL_RUNNER_BASE_URL"],
auth: AuthHeader::Bearer,
default_headers: &[],
passthrough_headers: &["x-model-id"],
};

/// Canonicalize an inference provider type string to a well-known identifier.
///
/// Returns `Some(canonical_name)` for recognized inference providers,
Expand All @@ -169,6 +182,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
"google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
Some("google-vertex-ai")
}
"model-runner" | "model_runner" => Some("model-runner"),
_ => None,
}
}
Expand All @@ -183,6 +197,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
"anthropic" => Some(&ANTHROPIC_PROFILE),
"nvidia" => Some(&NVIDIA_PROFILE),
"google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
"model-runner" => Some(&MODEL_RUNNER_PROFILE),
_ => None,
}
}
Expand Down Expand Up @@ -360,6 +375,19 @@ mod tests {
}
}

#[test]
fn profile_for_model_runner() {
let profile = profile_for("model-runner").expect("model-runner profile should be Some");
assert_eq!(profile.provider_type, "model-runner");
assert!(
profile
.default_base_url
.contains("model-runner.docker.internal"),
"default base URL should use Docker-internal hostname"
);
assert!(profile.credential_key_names.is_empty());
}

#[test]
fn auth_for_vertex_uses_bearer() {
let (auth, headers) = auth_for_provider_type("google-vertex-ai");
Expand Down
2 changes: 2 additions & 0 deletions crates/openshell-providers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ impl ProviderRegistry {
registry.register(providers::nvidia::SPEC);
registry.register(providers::gitlab::SPEC);
registry.register(providers::github::SPEC);
registry.register(providers::model_runner::ModelRunnerProvider);
registry.register(providers::outlook::OutlookProvider);
registry
}
Expand Down Expand Up @@ -182,6 +183,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
"generic" => Some("generic"),
"gitlab" | "glab" => Some("gitlab"),
"github" | "gh" => Some("github"),
"model-runner" | "model_runner" => Some("model-runner"),
"outlook" => Some("outlook"),
_ => None,
}
Expand Down
1 change: 1 addition & 0 deletions crates/openshell-providers/src/profiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
include_str!("../../../providers/cursor.yaml"),
include_str!("../../../providers/github.yaml"),
include_str!("../../../providers/google-vertex-ai.yaml"),
include_str!("../../../providers/model-runner.yaml"),
include_str!("../../../providers/nvidia.yaml"),
include_str!("../../../providers/pypi.yaml"),
];
Expand Down
1 change: 1 addition & 0 deletions crates/openshell-providers/src/providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub mod copilot;
pub mod generic;
pub mod github;
pub mod gitlab;
pub mod model_runner;
pub mod nvidia;
pub mod openai;
pub mod opencode;
Expand Down
35 changes: 35 additions & 0 deletions crates/openshell-providers/src/providers/model_runner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use crate::{DiscoveredProvider, ProviderError, ProviderPlugin};

pub struct ModelRunnerProvider;

impl ProviderPlugin for ModelRunnerProvider {
fn id(&self) -> &'static str {
"model-runner"
}

fn discover_existing(&self) -> Result<Option<DiscoveredProvider>, ProviderError> {
Ok(Some(DiscoveredProvider::default()))
}
}

#[cfg(test)]
mod tests {
use super::ModelRunnerProvider;
use crate::ProviderPlugin;

#[test]
fn model_runner_provider_id_is_correct() {
assert_eq!(ModelRunnerProvider.id(), "model-runner");
}

#[test]
fn model_runner_discover_returns_default_provider() {
let result = ModelRunnerProvider
.discover_existing()
.expect("discovery should succeed");
assert!(result.is_some());
}
}
2 changes: 1 addition & 1 deletion crates/openshell-server/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ fn resolve_provider_route(
let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
Status::invalid_argument(format!(
"provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \
(supported: openai, anthropic, nvidia, google-vertex-ai)",
(supported: openai, anthropic, nvidia, google-vertex-ai, model-runner)",
name = provider.object_name()
))
})?;
Expand Down
5 changes: 5 additions & 0 deletions docs/get-started/tutorials/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ Launch Claude Code in a sandbox, diagnose a policy denial, and iterate on a cust
Configure a Providers v2 Microsoft Graph provider with gateway-managed OAuth2 refresh-token rotation.
</Card>

<Card title="Inference with Docker Model Runner" href="/get-started/tutorials/inference-docker-model-runner">

Route inference to Docker Model Runner using the built-in model-runner provider type. No credentials required.
</Card>

<Card title="Inference with Ollama" href="/get-started/tutorials/inference-ollama">

Route inference through Ollama using cloud-hosted or local models, and verify it from a sandbox.
Expand Down
128 changes: 128 additions & 0 deletions docs/get-started/tutorials/inference-docker-model-runner.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
---
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
title: "Run Local Inference with Docker Model Runner"
sidebar-title: "Inference with Docker Model Runner"
slug: "get-started/tutorials/inference-docker-model-runner"
description: "Route sandbox inference requests to Docker Model Runner running on your host machine using the built-in model-runner provider type."
keywords: "Generative AI, Cybersecurity, Tutorial, Inference Routing, Docker Model Runner, Local Inference, Sandbox"
---

This tutorial shows how to route `inference.local` requests from OpenShell sandboxes to Docker Model Runner on your host machine.

Docker Model Runner is built into Docker Desktop. It runs models locally using the OpenAI-compatible API with no external service or API key required.

After completing this tutorial, you will know how to:

- Pull and run a model with Docker Model Runner.
- Create a `model-runner` provider in OpenShell.
- Set Docker Model Runner as the `inference.local` backend.
- Verify inference from inside a sandbox.

## Prerequisites

- A working OpenShell installation. Complete the [Quickstart](/get-started/quickstart) before proceeding.
- Docker Desktop with Docker Model Runner enabled (Docker Desktop 4.40 or later).

## Verify Docker Model Runner Is Available

Confirm Docker Model Runner is running on your host:

```shell
docker model version
```

If Docker Model Runner is not available, upgrade Docker Desktop or enable the feature in Docker Desktop settings under the **Beta Features** tab.

<Steps toc={true}>

### Pull a Model

Pull a model to use for inference. A small model is a good starting point:

```shell
docker model pull ai/smollm2
```

Verify the model is available:

```shell
docker model list
```

### Create a Provider

Create a `model-runner` provider. No credentials are needed because Docker Model Runner is accessed over the Docker-internal network:

```shell
openshell provider create --name model-runner --type model-runner
```

### Set Inference Routing

Point `inference.local` at the model-runner provider and choose a model:

```shell
openshell inference set --provider model-runner --model ai/smollm2
```

OpenShell will verify that the upstream endpoint is reachable before saving. If the model has not fully loaded yet, wait a few seconds and retry.

Confirm:

```shell
openshell inference get
```

### Verify from a Sandbox

Run a request through `https://inference.local`:

```shell
openshell sandbox create -- \
curl https://inference.local/v1/chat/completions \
--json '{"messages":[{"role":"user","content":"hello"}],"max_tokens":10}'
```

A JSON response from the model confirms end-to-end connectivity.

</Steps>

## Model Recommendations

| Use case | Model | Notes |
|---|---|---|
| Smoke test | `ai/smollm2` | Small, fast, good for verifying setup |
| Coding and reasoning | `ai/llama3.2` | Strong general-purpose model |
| Chat | `ai/gemma3` | Lightweight with good instruction following |

Search for additional models with:

```shell
docker model search <query>
```

## Troubleshooting

Common issues and fixes:

- **`docker model version` fails** — Docker Desktop is not running or Docker Model Runner is disabled. Enable it in Docker Desktop settings.
- **`openshell inference set` fails with connection refused** — The model may still be loading. Run `docker model ps` to check. If no model is loaded, run `docker model run --detach ai/smollm2` to pre-load it.
- **Model not found** — Run `docker model list` to confirm the model is present. Run `docker model pull <model>` if needed.
- **HTTPS vs HTTP** — Code inside sandboxes must call `https://inference.local`, not `http://`.

Useful commands:

```shell
openshell status
openshell inference get
openshell provider get model-runner
docker model ps
docker model list
```

## Next Steps

- To learn more about managed inference, refer to [Inference Routing](/sandboxes/inference-routing).
- To configure a different self-hosted backend, refer to [Inference Routing](/sandboxes/inference-routing#configure-inference-routing).
- To learn how to use Ollama for local inference, refer to [Inference with Ollama](/get-started/tutorials/inference-ollama).
13 changes: 13 additions & 0 deletions docs/sandboxes/inference-routing.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,18 @@ Use [Google Vertex AI](/providers/google-vertex-ai) for the full auth flows, inc

</Tab>

<Tab title="Docker Model Runner">

Docker Model Runner is built into Docker Desktop and exposes an OpenAI-compatible API over the Docker-internal network. No credentials are required.

```shell
openshell provider create --name model-runner --type model-runner
```

The `model-runner` type uses `model-runner.docker.internal` as the default endpoint. No base URL or API key configuration is needed. For a step-by-step setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner).

</Tab>

<Tab title="Local Endpoint">

```shell
Expand Down Expand Up @@ -287,6 +299,7 @@ A successful response confirms the privacy router can reach the configured backe

Explore related topics:

- To follow a complete Docker Model Runner setup, refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner).
- To follow a complete Ollama-based local setup, refer to [Inference Ollama](/get-started/tutorials/inference-ollama).
- To follow a complete LM Studio-based local setup, refer to [Local Inference LM Studio](/get-started/tutorials/local-inference-lmstudio).
- To control external endpoints, refer to [Policies](/sandboxes/policies).
Expand Down
2 changes: 2 additions & 0 deletions docs/sandboxes/manage-providers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ The following provider types are supported.
| `generic` | User-defined | Any service with custom credentials |
| `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). |
| `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
| `model-runner` | (none) | Docker Model Runner local inference. No credentials required. Refer to [Inference with Docker Model Runner](/get-started/tutorials/inference-docker-model-runner). |
| `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
| `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to [Inference Routing](/sandboxes/inference-routing). |
| `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | OpenCode |
Expand Down Expand Up @@ -280,6 +281,7 @@ The following providers have been tested with `inference.local`. Any provider th
| Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
| Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
| Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` |
| Docker Model Runner | `model-runner` | `model-runner` | `http://model-runner.docker.internal/engines/llama.cpp/v1` | (none required) |
| Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` |
| LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` |

Expand Down
15 changes: 15 additions & 0 deletions providers/model-runner.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

id: model-runner
display_name: Docker Model Runner
description: Local AI inference via Docker Model Runner
category: inference
inference_capable: true
endpoints:
- host: model-runner.docker.internal
port: 80
protocol: rest
access: read-write
enforcement: enforce
binaries: [/usr/local/bin/docker]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is docker making the API call or would it be a model harness, agent workload, etc?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this instance it's the inference provider. Docker model runner is effectively an alternative to ollama.

Loading