diff --git a/internal/shellwrap/testhooks.go b/internal/shellwrap/testhooks.go new file mode 100644 index 00000000..72296866 --- /dev/null +++ b/internal/shellwrap/testhooks.go @@ -0,0 +1,32 @@ +package shellwrap + +// This file exposes a few narrowly-scoped test seams so that packages OTHER +// than shellwrap (notably internal/upstream/core) can build hermetic +// integration tests over the docker-path resolution + spawn chain — e.g. the +// GitHub #696 "Docker Desktop installed but docker is not on the spawn PATH" +// scenario, which can only be reproduced end-to-end by driving the REAL +// ResolveDockerPath (not a stub) under a controlled PATH and well-known-path +// list, then feeding its result through core.setupDockerIsolation. +// +// They are intentionally named *ForTest and do nothing a caller would want in +// production; keeping them in a non-_test.go file is the only way to make them +// reachable from a sibling package's test binary (Go's export_test.go trick is +// package-local). Do NOT call these outside tests. + +// SetWellKnownDockerPathsForTest overrides the well-known docker install +// locations probed by ResolveDockerPath and returns a restore func that +// reinstalls the previous list. Pass a func returning the absolute path(s) of a +// fake docker binary to simulate a Docker Desktop bundle that is reachable only +// off the standard spawn PATH. +func SetWellKnownDockerPathsForTest(fn func() []string) (restore func()) { + prev := wellKnownDockerPathsFn + wellKnownDockerPathsFn = fn + return func() { wellKnownDockerPathsFn = prev } +} + +// ResetDockerPathCacheForTest clears the process-wide docker-path resolution +// cache (path, source, error, expiry) so a test starts from a clean slate +// regardless of what an earlier test or the host environment resolved. +func ResetDockerPathCacheForTest() { + resetDockerPathCacheForTest() +} diff --git a/internal/upstream/core/connection_docker.go b/internal/upstream/core/connection_docker.go index 0e53de8f..fd97b620 100644 --- a/internal/upstream/core/connection_docker.go +++ b/internal/upstream/core/connection_docker.go @@ -110,6 +110,14 @@ func (c *Client) setupDockerIsolation(command string, args []string) (dockerComm resolved, resErr := resolveDockerBinary(c.logger) switch { case resErr == nil && isDirectExecutable(resolved) && dockerDaemonEnvGuaranteed(): + // INFO (not Debug): make the spawn decision observable in main.log so a + // field report like #696 ("docker installed but not on PATH") can be + // triaged from one line — which docker binary was chosen and that it was + // exec'd DIRECTLY rather than shell-wrapped with bare `docker`. + c.logger.Info("Docker spawn: direct-exec of resolved docker binary", + zap.String("server", c.config.Name), + zap.String("docker_path", resolved), + zap.Bool("shell_wrapped", false)) return resolved, finalArgs, false case resErr != nil: c.logger.Warn("Could not resolve docker to an absolute path; falling back to bare 'docker' via login shell (isolated server may fail if docker is not on the spawn PATH)", @@ -132,6 +140,14 @@ func (c *Client) setupDockerIsolation(command string, args []string) (dockerComm if isDirectExecutable(resolved) { dockerBin = resolved } + // INFO: the login-shell fallback is the ONLY path that can produce #696's + // `command not found: docker`, so surface that we took it (and whether we at + // least wrap the resolved absolute path vs. bare `docker`). + c.logger.Info("Docker spawn: login-shell wrap fallback", + zap.String("server", c.config.Name), + zap.String("docker_command", dockerBin), + zap.Bool("docker_resolved", isDirectExecutable(resolved)), + zap.Bool("shell_wrapped", true)) shellCmd, shellArgs := c.wrapWithUserShell(dockerBin, finalArgs) return shellCmd, shellArgs, true } diff --git a/internal/upstream/core/connection_docker_exec_e2e_test.go b/internal/upstream/core/connection_docker_exec_e2e_test.go new file mode 100644 index 00000000..406b18a7 --- /dev/null +++ b/internal/upstream/core/connection_docker_exec_e2e_test.go @@ -0,0 +1,89 @@ +package core + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/smart-mcp-proxy/mcpproxy-go/internal/shellwrap" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeRecordingDockerShim writes a fake `docker` shell script that appends its +// own argv[0] (the path it was invoked as) to recordFile and exits 0. It lets a +// test prove, at the OS-exec level, HOW docker was launched: a direct-exec +// records the absolute bundle path, while a bare-`docker` login-shell wrap would +// either fail (`command not found`) or record just "docker". +func writeRecordingDockerShim(t *testing.T, dir, recordFile string) string { + t.Helper() + name := "docker" + if runtime.GOOS == osWindows { + name = "docker.bat" + } + p := filepath.Join(dir, name) + script := "#!/bin/sh\nprintf '%s\\n' \"$0\" >> " + shellSingleQuote(recordFile) + "\nexit 0\n" + if err := os.WriteFile(p, []byte(script), 0o755); err != nil { + t.Fatalf("write recording docker shim: %v", err) + } + return p +} + +func shellSingleQuote(s string) string { + return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" +} + +// TestE2E_DockerSpawnExecsAbsoluteBundlePath is the behavioral end-to-end proof +// for #696. It drives the REAL shellwrap.ResolveDockerPath + setupDockerIsolation +// chain under the field scenario (Docker Desktop bundle present, docker absent +// from the spawn PATH and login shell), then ACTUALLY execs the resulting +// (command, args) the way connection_stdio would. A recording shim captures the +// path it was invoked as. +// +// The assertion is the crux of the bug report: the process is launched by its +// ABSOLUTE bundle path — so it CANNOT produce `zsh:1: command not found: docker` +// — proving the spawn pipeline (not just setupDockerIsolation's return value) +// uses the resolved binary. +func TestE2E_DockerSpawnExecsAbsoluteBundlePath(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("shim is a /bin/sh script; unix-only") + } + useRealDockerResolver(t) + forceDockerDaemonEnvGOOS(t, osDarwin) + + bundleDir := t.TempDir() + recordFile := filepath.Join(t.TempDir(), "invocations.log") + bundleDocker := writeRecordingDockerShim(t, bundleDir, recordFile) + + restore := shellwrap.SetWellKnownDockerPathsForTest(func() []string { return []string{bundleDocker} }) + t.Cleanup(restore) + // Spawn PATH and login shell cannot resolve docker — only the well-known + // bundle probe can (the exact #696 condition). + t.Setenv("PATH", t.TempDir()) + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + c := newIsolatedTestClient() + cmd, args, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + require.False(t, shellWrapped, "#696: bundle-resolved docker must be direct-exec'd") + + // Exec it exactly as connection_stdio would (exec.CommandContext(cmd, args...)). + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + execCmd := exec.CommandContext(ctx, cmd, args...) + require.NoError(t, execCmd.Run(), "spawning the resolved docker binary must succeed") + + recorded, err := os.ReadFile(recordFile) + require.NoError(t, err, "the docker shim must have been invoked and recorded its argv[0]") + got := strings.TrimSpace(string(recorded)) + + assert.Equal(t, bundleDocker, got, + "#696: docker must be exec'd by its absolute bundle path, got %q", got) + assert.NotEqual(t, "docker", got, "#696: must not be launched as bare 'docker'") + require.True(t, filepath.IsAbs(got), "argv[0] must be absolute, got %q", got) +} diff --git a/internal/upstream/core/connection_docker_integration_test.go b/internal/upstream/core/connection_docker_integration_test.go new file mode 100644 index 00000000..9ee4bd28 --- /dev/null +++ b/internal/upstream/core/connection_docker_integration_test.go @@ -0,0 +1,229 @@ +package core + +import ( + "fmt" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/smart-mcp-proxy/mcpproxy-go/internal/shellwrap" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" +) + +// newObservedIsolatedClient returns an isolated test client whose logger feeds +// an in-memory observer, so tests can assert on the spawn-decision INFO logs. +func newObservedIsolatedClient(t *testing.T) (*Client, *observer.ObservedLogs) { + t.Helper() + core, recorded := observer.New(zap.InfoLevel) + c := newIsolatedTestClient() + c.logger = zap.New(core) + return c, recorded +} + +// TestSpawnDecisionIsObservable_DirectExec asserts the #696 diagnosability fix: +// the direct-exec branch emits an INFO line carrying the resolved docker_path so +// a field report can be triaged from main.log alone. +func TestSpawnDecisionIsObservable_DirectExec(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("unix fake-binary fixture") + } + useRealDockerResolver(t) + forceDockerDaemonEnvGOOS(t, osDarwin) + + bundleDocker := writeFakeDockerExecutable(t) + restore := shellwrap.SetWellKnownDockerPathsForTest(func() []string { return []string{bundleDocker} }) + t.Cleanup(restore) + t.Setenv("PATH", t.TempDir()) + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + c, recorded := newObservedIsolatedClient(t) + _, _, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + require.False(t, shellWrapped) + + entries := recorded.FilterMessage("Docker spawn: direct-exec of resolved docker binary").All() + require.Len(t, entries, 1, "direct-exec must log exactly one observable spawn-decision line") + fields := entries[0].ContextMap() + assert.Equal(t, bundleDocker, fields["docker_path"], "spawn log must record the resolved absolute docker path") + assert.Equal(t, false, fields["shell_wrapped"]) +} + +// TestSpawnDecisionIsObservable_ShellWrapFallback asserts the fallback branch is +// equally observable and flags whether docker even resolved — the one path that +// can yield #696's `command not found: docker`. +func TestSpawnDecisionIsObservable_ShellWrapFallback(t *testing.T) { + orig := resolveDockerBinary + t.Cleanup(func() { resolveDockerBinary = orig }) + resolveDockerBinary = func(_ *zap.Logger) (string, error) { + return "", fmt.Errorf("docker not found") + } + + c, recorded := newObservedIsolatedClient(t) + _, _, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + require.True(t, shellWrapped) + + entries := recorded.FilterMessage("Docker spawn: login-shell wrap fallback").All() + require.Len(t, entries, 1, "shell-wrap fallback must log exactly one observable spawn-decision line") + fields := entries[0].ContextMap() + assert.Equal(t, true, fields["shell_wrapped"]) + assert.Equal(t, false, fields["docker_resolved"], "unresolved docker must be flagged in the spawn log") + assert.Equal(t, cmdDocker, fields["docker_command"], "fallback with no resolution wraps bare 'docker'") +} + +// These tests close the integration gap behind GitHub #696: the existing +// connection_docker_test.go suite STUBS resolveDockerBinary, so it proves the +// gating logic in isolation but never exercises the REAL +// shellwrap.ResolveDockerPath -> setupDockerIsolation chain. The #696 field +// failure ("docker installed via Docker Desktop but not on the spawn PATH" +// still spawns bare `docker` and dies with `command not found: docker`) can +// only be reproduced — or refuted — by driving the real resolver end-to-end. +// +// useRealDockerResolver pins resolveDockerBinary to the production resolver +// (un-stubbing any leakage from a sibling test) and clears the process-wide +// docker-path cache so resolution starts from a clean slate. +func useRealDockerResolver(t *testing.T) { + t.Helper() + orig := resolveDockerBinary + resolveDockerBinary = shellwrap.ResolveDockerPath + shellwrap.ResetDockerPathCacheForTest() + t.Cleanup(func() { + resolveDockerBinary = orig + shellwrap.ResetDockerPathCacheForTest() + }) +} + +// TestIntegration_DockerOnlyAtBundlePath_DirectExecs is the faithful #696 +// reproduction: on macOS, Docker Desktop's CLI lives only inside the app bundle +// (/Applications/Docker.app/Contents/Resources/bin/docker) and is absent from +// the spawn PATH. The real resolver must discover it via the well-known-path +// probe, and setupDockerIsolation must DIRECT-EXEC that absolute path — never +// fall back to a `$SHELL -l -c "docker run …"` wrap with bare `docker` (which is +// what the login shell cannot resolve and what the field report shows failing). +func TestIntegration_DockerOnlyAtBundlePath_DirectExecs(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("well-known bundle probe + login-shell fallback are unix-only") + } + useRealDockerResolver(t) + forceDockerDaemonEnvGOOS(t, osDarwin) // matches the affected hosts + + // Docker exists ONLY at a "bundle" path, off the spawn PATH. + bundleDocker := writeFakeDockerExecutable(t) + restore := shellwrap.SetWellKnownDockerPathsForTest(func() []string { return []string{bundleDocker} }) + t.Cleanup(restore) + + // Spawn PATH excludes the bundle dir (the #696 condition) and the login + // shell can't find docker either, so the well-known probe is the ONLY way to + // resolve it. + t.Setenv("PATH", t.TempDir()) + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + c := newIsolatedTestClient() + cmd, args, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + + require.False(t, shellWrapped, + "#696: docker resolved at the bundle path MUST be direct-exec'd, not shell-wrapped with bare docker") + assert.Equal(t, bundleDocker, cmd, + "#696: spawn must exec the resolved absolute bundle path, got %q", cmd) + assert.NotEqual(t, cmdDocker, cmd, "#696: spawn must not degrade to bare 'docker'") + require.True(t, filepath.IsAbs(cmd), "spawn command must be absolute, got %q", cmd) + require.NotEmpty(t, args) + assert.Equal(t, cmdRun, args[0], "args must be raw argv (first token 'run'), got %v", args) + for _, a := range args { + assert.NotContains(t, a, " run ", "args must be raw argv tokens, not a shell -c string: %v", args) + } +} + +// TestIntegration_DockerOnPath_DirectExecs covers the other real resolution +// leg: when docker IS on the spawn PATH, exec.LookPath resolves it to an +// absolute path and the spawn direct-execs it. +func TestIntegration_DockerOnPath_DirectExecs(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("unix fake-binary fixture") + } + useRealDockerResolver(t) + forceDockerDaemonEnvGOOS(t, osDarwin) + + pathDocker := writeFakeDockerExecutable(t) + // Put the fake docker's dir on PATH so exec.LookPath finds it first. + t.Setenv("PATH", filepath.Dir(pathDocker)) + // No well-known override needed; LookPath should win before the probe. + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + c := newIsolatedTestClient() + cmd, args, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + + require.False(t, shellWrapped, "docker on PATH must resolve to an absolute path and direct-exec") + assert.Equal(t, pathDocker, cmd, "spawn must exec the LookPath-resolved absolute path, got %q", cmd) + require.NotEmpty(t, args) + assert.Equal(t, cmdRun, args[0]) +} + +// TestIntegration_DockerUnresolvable_FallsBackToBareDocker proves the worst +// case (#696 absent): when docker is nowhere — not on PATH, not at a well-known +// path, not in the login shell — the spawn falls back to a login-shell wrap of +// bare `docker`. This is the ONLY path that should ever produce the field +// failure's `command not found: docker`, and it requires docker to be genuinely +// absent (not merely off PATH). +func TestIntegration_DockerUnresolvable_FallsBackToBareDocker(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("unix login-shell fallback") + } + useRealDockerResolver(t) + forceDockerDaemonEnvGOOS(t, osDarwin) + + // No docker anywhere: empty PATH, well-known list points at nothing, login + // shell sabotaged so its lookup fails too. + restore := shellwrap.SetWellKnownDockerPathsForTest(func() []string { return nil }) + t.Cleanup(restore) + t.Setenv("PATH", t.TempDir()) + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + c := newIsolatedTestClient() + _, shellArgs, shellWrapped := c.setupDockerIsolation(c.config.Command, c.config.Args) + + require.True(t, shellWrapped, "unresolvable docker must fall back to the login-shell wrap") + require.NotEmpty(t, shellArgs) + cmdStr := shellArgs[len(shellArgs)-1] + assert.True(t, strings.HasPrefix(cmdStr, "docker run"), + "only a genuinely-absent docker should degrade to bare 'docker', got %q", cmdStr) +} + +// TestIntegration_StatusAndSpawnResolverDoNotDiverge guards the invariant the +// #696 v0.39.1 report worried about ("detection discovers docker_path but the +// spawn doesn't use it"). docker_status.docker_path (server.dockerPathResolver) +// and the spawn (core.resolveDockerBinary) both call shellwrap.ResolveDockerPath +// and share ONE process-wide cache, so for a given environment they MUST agree. +// If a future refactor gives the status panel its own resolver, this test +// fails — surfacing exactly the divergence the field reports feared. +func TestIntegration_StatusAndSpawnResolverDoNotDiverge(t *testing.T) { + if runtime.GOOS == osWindows { + t.Skip("unix fake-binary fixture") + } + useRealDockerResolver(t) + + bundleDocker := writeFakeDockerExecutable(t) + restore := shellwrap.SetWellKnownDockerPathsForTest(func() []string { return []string{bundleDocker} }) + t.Cleanup(restore) + t.Setenv("PATH", t.TempDir()) + t.Setenv("SHELL", "/nonexistent/shell-must-not-be-invoked") + + // Spawn side (the value setupDockerIsolation would exec). + spawnPath, err := resolveDockerBinary(zap.NewNop()) + require.NoError(t, err) + + // Status side reports the SAME resolution branch + path from the shared cache. + source := shellwrap.ResolveDockerSource(zap.NewNop()) + statusPath, err := shellwrap.ResolveDockerPath(zap.NewNop()) + require.NoError(t, err) + + assert.Equal(t, bundleDocker, spawnPath, "spawn must resolve the bundle path") + assert.Equal(t, spawnPath, statusPath, "status and spawn must resolve to the SAME docker path (no divergence)") + assert.Equal(t, shellwrap.DockerSourceBundled, source, + "status must report the bundled source matching the spawn resolution") + assert.True(t, isDirectExecutable(statusPath), + "the path docker_status reports must itself satisfy the direct-exec contract") +} diff --git a/internal/upstream/core/connection_stdio.go b/internal/upstream/core/connection_stdio.go index bf7594a3..6e4ac0a7 100644 --- a/internal/upstream/core/connection_stdio.go +++ b/internal/upstream/core/connection_stdio.go @@ -280,8 +280,16 @@ func (c *Client) connectStdio(ctx context.Context) error { // was launched and whether Docker isolation was in effect) so // users can tell from one log line whether to look at the host // command or the Docker layer. + // + // Report the RESOLVED command actually exec'd (finalCommand), not + // c.config.Command. For a Docker-isolated server config.Command is + // always "docker", so reporting it made a successful direct-exec of an + // absolute path (e.g. /Applications/Docker.app/.../docker, #696) look + // identical in the error to a bare-`docker` spawn — actively + // misdirecting diagnosis. finalCommand is the real argv[0]: an absolute + // path on direct-exec, or the login shell on the shell-wrap fallback. return fmt.Errorf("stdio transport (command=%q, docker_isolation=%t): %w", - c.config.Command, c.isDockerCommand, err) + finalCommand, c.isDockerCommand, err) } // CRITICAL FIX: Extract underlying process from mcp-go transport for lifecycle management