Skip to content
3 changes: 2 additions & 1 deletion cmd/odek/browser_tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type browserState struct {
// ── Browser Tool ──────────────────────────────────────────────────────

type browserTool struct {
ctxTool
state *browserState
client *http.Client
dangerousConfig danger.DangerousConfig
Expand Down Expand Up @@ -196,7 +197,7 @@ func (t *browserTool) doNavigate(rawURL string) (string, error) {
return jsonError(fmt.Sprintf("invalid URL %q: must start with http:// or https://", rawURL))
}

req, err := http.NewRequest("GET", rawURL, nil)
req, err := http.NewRequestWithContext(t.toolCtx(), "GET", rawURL, nil)
if err != nil {
return jsonError(fmt.Sprintf("cannot create request: %v", err))
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/odek/perf_tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ func (t *parallelShellTool) runOne(cmd parallelShellCmd) parallelShellEntry {
const maxHTTPBatchURLs = 10

type httpBatchTool struct {
ctxTool
dangerousConfig danger.DangerousConfig
client *http.Client
}
Expand Down Expand Up @@ -556,7 +557,7 @@ func (t *httpBatchTool) fetchOne(r httpBatchReq) httpBatchEntry {
}

entry := httpBatchEntry{URL: r.URL}
httpReq, err := http.NewRequest(method, r.URL, nil)
httpReq, err := http.NewRequestWithContext(t.toolCtx(), method, r.URL, nil)
if err != nil {
entry.Error = err.Error()
return entry
Expand Down
65 changes: 61 additions & 4 deletions cmd/odek/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,26 @@ package main

import (
"bytes"
"context"
"encoding/json"
"fmt"
"os/exec"
"strings"
"sync"
"syscall"
"time"

"github.com/BackendStack21/odek/internal/danger"
)

// defaultShellTimeout bounds a single shell command. It is deliberately
// generous — the goal is to stop a genuinely stuck command (a network read
// that never returns, an interactive prompt, an infinite loop) from wedging
// the agent forever, NOT to kill legitimate long builds or test suites. When
// the agent context is cancelled (Ctrl-C, turn timeout) the command is killed
// immediately regardless of this backstop.
const defaultShellTimeout = 30 * time.Minute

// shellTool is odek's built-in tool that lets the agent run shell commands.
//
// This is the only built-in tool — it's enough for reading files, running
Expand Down Expand Up @@ -61,6 +72,13 @@ type shellTool struct {
// ttyPath is the path to the terminal device for approval prompts.
// Overridden in tests to mock user input. Only used when approver is nil.
ttyPath string

// ctxTool provides SetContext/toolCtx so cancelling the agent context
// (Ctrl-C, turn timeout) kills the running command.
ctxTool

// timeout bounds a single command. Zero falls back to defaultShellTimeout.
timeout time.Duration
}

func (t *shellTool) Name() string { return "shell" }
Expand Down Expand Up @@ -113,13 +131,52 @@ func (t *shellTool) Call(args string) (string, error) {
return "", err
}

cmd := t.buildCmd(input.Command)
// Bound execution: cancel with the agent context (Ctrl-C / turn timeout)
// and a generous backstop timeout so a stuck command can never wedge the
// agent forever. Note: in sandbox mode this kills the host-side
// `docker exec` client, which unblocks the agent, but Docker does not
// propagate the signal to the in-container process — that lingers until the
// container is torn down at session end.
base := t.toolCtx()
timeout := t.timeout
if timeout <= 0 {
timeout = defaultShellTimeout
}
ctx, cancel := context.WithTimeout(base, timeout)
defer cancel()

cmd := t.buildCmd(ctx, input.Command)
// Run the command in its own process group and, on cancel/timeout, kill the
// WHOLE group — not just the `sh` leader. `sh -c "<cmd>"` may fork children
// (e.g. `sleep`); killing only the leader leaves them alive holding the
// output pipes, so Run() would block until WaitDelay. Signalling the group
// (negative pid) tears the whole tree down at once.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
if cmd.Process != nil {
// Best-effort group kill; ignore ESRCH if it already exited.
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
return nil
}
// WaitDelay is a backstop in case a process somehow outlives the group kill.
cmd.WaitDelay = 3 * time.Second

var outBuf, errBuf bytes.Buffer
cmd.Stdout = &outBuf
cmd.Stderr = &errBuf

err := cmd.Run()

// Surface cancellation/timeout as a clear, actionable error rather than an
// opaque "signal: killed".
if ctxErr := ctx.Err(); ctxErr != nil {
if ctxErr == context.DeadlineExceeded {
return "", fmt.Errorf("shell: command timed out after %s (still running? it was killed): %s", timeout, input.Command)
}
return "", fmt.Errorf("shell: command cancelled: %s", input.Command)
}

output := strings.TrimSpace(outBuf.String())
stderrStr := strings.TrimSpace(errBuf.String())
if stderrStr != "" {
Expand Down Expand Up @@ -200,9 +257,9 @@ func (t *shellTool) promptUser(cmd, description string) error {
//
// When running on the host (default), the command executes via "sh -c"
// in odek's current working directory.
func (t *shellTool) buildCmd(command string) *exec.Cmd {
func (t *shellTool) buildCmd(ctx context.Context, command string) *exec.Cmd {
if t.containerName != "" {
return exec.Command("docker", "exec", "-w", "/workspace", t.containerName, "sh", "-c", command)
return exec.CommandContext(ctx, "docker", "exec", "-w", "/workspace", t.containerName, "sh", "-c", command)
}
return exec.Command("sh", "-c", command)
return exec.CommandContext(ctx, "sh", "-c", command)
}
62 changes: 59 additions & 3 deletions cmd/odek/shell_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package main

import (
"context"
"encoding/json"
"os"
"os/exec"
"strings"
"testing"
"time"
)

func TestShellTool_Name(t *testing.T) {
Expand All @@ -15,6 +17,60 @@ func TestShellTool_Name(t *testing.T) {
}
}

// TestShellTool_Timeout verifies a stuck command can no longer wedge the agent:
// a tiny per-tool timeout kills the command and Call returns promptly with a
// clear timeout error instead of blocking forever.
func TestShellTool_Timeout(t *testing.T) {
st := &shellTool{timeout: 200 * time.Millisecond}
done := make(chan struct{})
var out string
var err error
go func() {
out, err = st.Call(`{"command":"sleep 30 | cat"}`)
close(done)
}()
select {
case <-done:
case <-time.After(5 * time.Second):
t.Fatal("Call did not return after the command timeout — agent would hang")
}
if err == nil {
t.Fatalf("expected a timeout error, got output %q", out)
}
if !strings.Contains(err.Error(), "timed out") {
t.Errorf("error should mention the timeout, got: %v", err)
}
}

// TestShellTool_ContextCancellation verifies Ctrl-C / turn cancellation kills a
// running command immediately via the agent context.
func TestShellTool_ContextCancellation(t *testing.T) {
st := &shellTool{}
ctx, cancel := context.WithCancel(context.Background())
st.SetContext(ctx)

done := make(chan struct{})
var err error
go func() {
_, err = st.Call(`{"command":"sleep 30 | cat"}`)
close(done)
}()
time.Sleep(100 * time.Millisecond)
cancel()

select {
case <-done:
case <-time.After(5 * time.Second):
t.Fatal("Call did not return after context cancellation — Ctrl-C would not work")
}
if err == nil {
t.Fatal("expected a cancellation error")
}
if !strings.Contains(err.Error(), "cancelled") {
t.Errorf("error should mention cancellation, got: %v", err)
}
}

func TestShellTool_Description(t *testing.T) {
st := &shellTool{}
desc := st.Description()
Expand Down Expand Up @@ -130,7 +186,7 @@ func TestShellTool_Call_StdoutAndStderr(t *testing.T) {

func TestShellTool_BuildCmd_Local(t *testing.T) {
st := &shellTool{}
cmd := st.buildCmd("echo test")
cmd := st.buildCmd(context.Background(), "echo test")
args := cmd.Args
if args[0] != "sh" || args[1] != "-c" || args[2] != "echo test" {
t.Errorf("local cmd args = %v, want [sh -c 'echo test']", args)
Expand All @@ -139,7 +195,7 @@ func TestShellTool_BuildCmd_Local(t *testing.T) {

func TestShellTool_BuildCmd_Docker(t *testing.T) {
st := &shellTool{containerName: "odek-12345"}
cmd := st.buildCmd("echo test")
cmd := st.buildCmd(context.Background(), "echo test")
args := cmd.Args
expected := []string{"docker", "exec", "-w", "/workspace", "odek-12345", "sh", "-c", "echo test"}
if !stringSlicesEqual(args, expected) {
Expand Down Expand Up @@ -303,7 +359,7 @@ func TestShellTool_CheckApproval(t *testing.T) {

func TestShellTool_BuildCmd_Default(t *testing.T) {
st := &shellTool{}
cmd := st.buildCmd("echo hello")
cmd := st.buildCmd(context.Background(), "echo hello")
if cmd.Args[0] != "sh" {
t.Errorf("expected sh, got %s", cmd.Args[0])
}
Expand Down
40 changes: 40 additions & 0 deletions cmd/odek/toolctx.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package main

import (
"context"
"sync"
)

// ctxTool is embedded by tools that support agent-context cancellation. The
// agent loop calls SetContext on any tool implementing it (see internal/loop)
// right before invoking the tool, so cancelling the agent context — Ctrl-C, a
// turn timeout — interrupts the tool's in-flight network request or subprocess
// instead of letting it run to completion (or hang) unsupervised.
//
// The mutex matters: when the LLM emits two calls to the SAME tool in one
// turn, the loop runs them in parallel goroutines and calls SetContext on the
// shared instance from each. Without synchronisation that is a data race on the
// context field even though the value is identical for the turn.
type ctxTool struct {
mu sync.Mutex
ctx context.Context
}

// SetContext records the agent context for the next Call. Safe for concurrent
// use by parallel invocations of the same tool instance.
func (c *ctxTool) SetContext(ctx context.Context) {
c.mu.Lock()
c.ctx = ctx
c.mu.Unlock()
}

// toolCtx returns the recorded agent context, or context.Background() if none
// was set (e.g. tools invoked directly in tests or outside the agent loop).
func (c *ctxTool) toolCtx() context.Context {
c.mu.Lock()
defer c.mu.Unlock()
if c.ctx == nil {
return context.Background()
}
return c.ctx
}
68 changes: 68 additions & 0 deletions cmd/odek/toolctx_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package main

import (
"context"
"strings"
"sync"
"testing"
)

func TestCtxTool_DefaultsToBackground(t *testing.T) {
var c ctxTool
if c.toolCtx() != context.Background() {
t.Error("unset ctxTool should return context.Background()")
}
}

func TestCtxTool_SetAndGet(t *testing.T) {
var c ctxTool
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c.SetContext(ctx)
if c.toolCtx() != ctx {
t.Error("toolCtx should return the context set via SetContext")
}
}

// TestCtxTool_ConcurrentSetContext mirrors the loop calling SetContext on a
// shared tool instance from parallel goroutines — it must be race-free.
func TestCtxTool_ConcurrentSetContext(t *testing.T) {
var c ctxTool
ctx := context.Background()
var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func() {
defer wg.Done()
c.SetContext(ctx)
_ = c.toolCtx()
}()
}
wg.Wait()
}

// TestHTTPBatch_ContextCancelled verifies the propagated context aborts the
// fetch: a cancelled context yields an error entry instead of a real request.
func TestHTTPBatch_ContextCancelled(t *testing.T) {
tool := newHTTPBatchTool(allowAllDanger())
ctx, cancel := context.WithCancel(context.Background())
cancel() // already cancelled
tool.SetContext(ctx)

result := callJSON(t, tool, `{"requests":[{"url":"http://example.com/"}]}`)
var r struct {
Results []struct {
Error string `json:"error"`
} `json:"results"`
}
mustUnmarshal(t, result, &r)
if len(r.Results) != 1 {
t.Fatalf("Results = %d, want 1", len(r.Results))
}
if r.Results[0].Error == "" {
t.Fatal("expected an error from the cancelled context")
}
if !strings.Contains(r.Results[0].Error, "context canceled") {
t.Errorf("error should reflect cancellation, got: %s", r.Results[0].Error)
}
}
Loading
Loading