From cab08042f2770a28c7ee3a44bbcee1715bcc7b43 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 15:20:11 +0200 Subject: [PATCH 1/4] Add local-env pipeline, detection, and package-manager interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fifth in the stacked local-env series. This completes the libs/localenv engine; the uv backend and the CLI command follow in later PRs. - pipeline.go: the six-phase orchestrator (preflight → resolve → fetch → merge → provision → validate) that ties the earlier layers together and records per-phase status into the Result. It drives everything through the PackageManager interface, so it is unit-tested end to end against a fake package manager and a stub compute client. This file also owns the filesystem/artifact constants and the canonical phase-order slice, which the foundation PR intentionally left to their consumer. - pkgmanager.go: the PackageManager interface the pipeline provisions through (implemented by uv in the next PR). - detect.go: package-manager detection (uv vs conda vs pip) and the writability preflight, biased toward uv since uv's native project file is the pyproject.toml this command already merges. A read error on an existing pyproject.toml that is not a not-exist error is treated as a failure rather than as greenfield, so a permission or transient I/O error can never cause the project to be overwritten with a fresh file and no backup. Depends on the foundation, target, constraints, and merge PRs. The package is still dormant: nothing under cmd/ imports it. Co-authored-by: Isaac --- libs/localenv/detect.go | 80 ++++++ libs/localenv/detect_test.go | 45 +++ libs/localenv/pipeline.go | 486 +++++++++++++++++++++++++++++++++ libs/localenv/pipeline_test.go | 369 +++++++++++++++++++++++++ libs/localenv/pkgmanager.go | 31 +++ 5 files changed, 1011 insertions(+) create mode 100644 libs/localenv/detect.go create mode 100644 libs/localenv/detect_test.go create mode 100644 libs/localenv/pipeline.go create mode 100644 libs/localenv/pipeline_test.go create mode 100644 libs/localenv/pkgmanager.go diff --git a/libs/localenv/detect.go b/libs/localenv/detect.go new file mode 100644 index 0000000000..8ccb84d117 --- /dev/null +++ b/libs/localenv/detect.go @@ -0,0 +1,80 @@ +package localenv + +import ( + "os" + "path/filepath" +) + +// manager identifies the Python package manager a project uses. P0 only +// supports uv; every other value results in a clean E_MANAGER_UNSUPPORTED exit. +type manager string + +const ( + managerUv manager = "uv" + managerConda manager = "conda" + managerPip manager = "pip" +) + +// detectManager inspects projectDir for package-manager markers, only deep +// enough to branch uv vs. not-uv (spec §5). It emits no telemetry (spec §5). +// +// Detection is deliberately biased toward uv, because uv's native project file +// is pyproject.toml (PEP 621) — the same format this command writes and merges: +// - A uv marker (uv.lock or a [tool.uv] table) → uv. +// - A pyproject.toml with no competing marker → uv (a plain PEP 621 project is +// exactly the "existing project merge" case; uv can drive it). +// - conda (environment.yml) or pip (requirements.txt) with no pyproject.toml → +// that manager; automated setup is P1, so the caller exits cleanly. +// - Greenfield (no markers at all) → uv, the manager this command provisions. +// +// A conda/pip marker that sits alongside a pyproject.toml still resolves to uv: +// the project already has the file we drive, so we proceed rather than block. +func detectManager(projectDir string) manager { + // uv markers take precedence: an existing uv project or lockfile. + if fileExists(filepath.Join(projectDir, "uv.lock")) { + return managerUv + } + if fileExists(filepath.Join(projectDir, pyprojectFile)) { + // A pyproject.toml — with or without a [tool.uv] table — is uv-drivable. + return managerUv + } + + // No pyproject.toml: a conda or pip marker means a non-uv project we cannot + // yet automate. conda before pip (environment.yml is the more specific signal). + if fileExists(filepath.Join(projectDir, "environment.yml")) || + fileExists(filepath.Join(projectDir, "environment.yaml")) { + return managerConda + } + if fileExists(filepath.Join(projectDir, "requirements.txt")) { + return managerPip + } + + // Greenfield: nothing to disambiguate; this command provisions uv. + return managerUv +} + +// managerGuidance returns the actionable, non-blaming message shown when a +// non-uv manager is detected (spec §5). +func managerGuidance(m manager) string { + return "detected a " + string(m) + " project; automated setup for " + string(m) + + " is not yet available (P1). Use a uv project (add a pyproject.toml with a [tool.uv] table, or run `uv init`) to provision automatically" +} + +// fileExists reports whether path exists and is a regular file. +func fileExists(path string) bool { + info, err := os.Stat(path) + return err == nil && !info.IsDir() +} + +// ensureWritable verifies the process can create files in dir by creating and +// removing a temporary file. A permission failure is reported so preflight can +// stop before any real write (invariant 1). +func ensureWritable(dir string) error { + f, err := os.CreateTemp(dir, ".localenv-writecheck-*") + if err != nil { + return err + } + name := f.Name() + f.Close() + return os.Remove(name) +} diff --git a/libs/localenv/detect_test.go b/libs/localenv/detect_test.go new file mode 100644 index 0000000000..304a0e8118 --- /dev/null +++ b/libs/localenv/detect_test.go @@ -0,0 +1,45 @@ +package localenv + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectManager(t *testing.T) { + write := func(t *testing.T, files ...string) string { + dir := t.TempDir() + for _, f := range files { + require.NoError(t, os.WriteFile(filepath.Join(dir, f), []byte("x"), 0o644)) + } + return dir + } + + cases := []struct { + name string + files []string + want manager + }{ + {"greenfield", nil, managerUv}, + {"uv lock", []string{"uv.lock"}, managerUv}, + {"plain pyproject", []string{"pyproject.toml"}, managerUv}, + {"pyproject wins over requirements", []string{"pyproject.toml", "requirements.txt"}, managerUv}, + {"conda only", []string{"environment.yml"}, managerConda}, + {"conda yaml", []string{"environment.yaml"}, managerConda}, + {"pip only", []string{"requirements.txt"}, managerPip}, + {"conda before pip", []string{"environment.yml", "requirements.txt"}, managerConda}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, detectManager(write(t, tc.files...))) + }) + } +} + +func TestEnsureWritable(t *testing.T) { + assert.NoError(t, ensureWritable(t.TempDir())) + assert.Error(t, ensureWritable(filepath.Join(t.TempDir(), "does-not-exist"))) +} diff --git a/libs/localenv/pipeline.go b/libs/localenv/pipeline.go new file mode 100644 index 0000000000..7b7320d4bd --- /dev/null +++ b/libs/localenv/pipeline.go @@ -0,0 +1,486 @@ +package localenv + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/log" + "github.com/hexops/gotextdiff" + "github.com/hexops/gotextdiff/myers" + "github.com/hexops/gotextdiff/span" +) + +// Filenames and directories the pipeline reads and writes. venvDir is also the +// virtualenv directory the package manager provisions. +const ( + pyprojectFile = "pyproject.toml" + backupFile = "pyproject.toml.bak" + venvDir = ".venv" +) + +// artifactSource values reported in --json resolved.artifactSource (spec §6). +const ( + artifactNetwork = "network" + artifactCache = "cache" +) + +// allPhases is the canonical phase order for the --json "phases" array. The +// pipeline seeds every phase pending from this slice and flips each to ok/error +// as the run progresses. +var allPhases = []PhaseName{ + PhasePreflight, + PhaseResolve, + PhaseFetch, + PhaseMerge, + PhaseProvision, + PhaseValidate, +} + +// Pipeline orchestrates the dbconnect sync phases against a project directory. +type Pipeline struct { + Mode Mode + Check bool + ProjectDir string + ConstraintBaseURL string + CacheDir string + Flags TargetFlags + Bundle BundleTarget + Compute ComputeClient + PM PackageManager + + // res accumulates phase statuses and result fields as the run progresses. + res *Result +} + +// Run executes all pipeline phases in order and returns a fully populated Result. +// On a phase error, Result.Error is set and the same error is also returned. The +// Result always carries the full canonical phase list: phases completed before a +// failure are "ok", the failing phase is "error", and the rest are "pending". +func (p *Pipeline) Run(ctx context.Context) (*Result, error) { + log.Debugf(ctx, "local-env: mode=%s check=%v project=%s cacheDir=%s constraintBaseURL=%s flags=%+v", + p.Mode, + p.Check, + filepath.ToSlash(p.ProjectDir), + filepath.ToSlash(p.CacheDir), + p.ConstraintBaseURL, + p.Flags, + ) + + p.res = &Result{ + SchemaVersion: SchemaVersion, + Command: CommandName, + Mode: p.Mode.String(), + DryRun: p.Check, + // Phases start as pending and flip to ok/error as the run progresses. + Phases: initialPhases(), + Warnings: []Warning{}, + } + + if err := p.run(ctx); err != nil { + return p.res, err + } + p.res.OK = true + return p.res, nil +} + +// run drives the phases and returns the first phase error. Result bookkeeping +// (phase status, error object) is handled by fail / markOK. +func (p *Pipeline) run(ctx context.Context) error { + // Phase: preflight — manager detection, writability, package-manager availability. + // P0 supports only uv; any other detected manager is a clean, non-blaming exit. + if m := detectManager(p.ProjectDir); m != managerUv { + return p.fail(PhasePreflight, false, NewError(ErrManagerUnsupported, nil, "%s", managerGuidance(m))) + } + if err := ensureWritable(p.ProjectDir); err != nil { + return p.fail(PhasePreflight, false, NewError(ErrNotWritable, err, "project directory %s is not writable", filepath.ToSlash(p.ProjectDir))) + } + version, err := p.PM.EnsureAvailable(ctx) + if err != nil { + return p.fail(PhasePreflight, false, asPipelineError(err, ErrUvMissing, "%s unavailable", p.PM.Name())) + } + p.markOK(PhasePreflight, p.PM.Name()+" "+version) + + // Phase: resolve — compute target → environment key. + target, err := p.resolve(ctx) + if err != nil { + return err + } + + // Phase: fetch — constraint artifact for the resolved env key. + c, err := p.fetch(ctx, target) + if err != nil { + return err + } + + // Parse the required Python minor from the artifact; a failure here reflects a + // bad artifact, reported at the fetch phase. + pyMinor, err := PythonMinorFromRequires(c.RequiresPython) + if err != nil { + return p.fail(PhaseFetch, false, NewError(ErrFetch, err, "cannot parse python version from constraints %q", c.RequiresPython)) + } + + dbcPin := c.DatabricksConnect + if p.Mode == ModeConstraintsOnly { + // constraints-only omits the databricks-connect dependency entirely. + dbcPin = "" + } + p.res.Resolved = &ResolvedInfo{ + PythonVersion: pyMinor, + DBConnectVersion: versionFromPin(dbcPin), + ArtifactSource: artifactSource(c.FromCache), + } + + // Phase: merge — compute the merged pyproject.toml (in-memory, no writes yet). + mergedBytes, greenfield, err := p.mergePlan(ctx, pyMinor, c, dbcPin) + if err != nil { + return err + } + p.res.Greenfield = greenfield + + // Check mode stops after planning — nothing below mutates disk. + if p.Check { + p.markOK(PhaseMerge, "") + p.markOK(PhaseProvision, "") + p.markOK(PhaseValidate, "") + return nil + } + + // Apply the merged content to disk (backup first for existing projects). + if err := p.applyMerge(ctx, mergedBytes, greenfield); err != nil { + return err + } + p.markOK(PhaseMerge, "") + + // Phase: provision — ensure Python, run uv sync, seed pip. + if err := p.provision(ctx, pyMinor); err != nil { + return err + } + + // Phase: validate — assert the venv matches the target. + return p.validate(ctx, pyMinor, dbcPin) +} + +// resolve runs ResolveTarget and records the resolve phase. +func (p *Pipeline) resolve(ctx context.Context) (*TargetInfo, error) { + target, err := ResolveTarget(ctx, p.Flags, p.Compute, p.Bundle) + if err != nil { + return nil, p.fail(PhaseResolve, false, asPipelineError(err, ErrResolve, "target resolution failed")) + } + p.res.Target = target + p.markOK(PhaseResolve, fmt.Sprintf("source=%s envKey=%s", target.Source, target.EnvKey)) + return target, nil +} + +// fetch fetches constraints for the resolved target and records the fetch phase. +func (p *Pipeline) fetch(ctx context.Context, target *TargetInfo) (*Constraints, error) { + c, err := FetchConstraints(ctx, p.ConstraintBaseURL, target.EnvKey, p.CacheDir) + if err != nil { + // FetchConstraints classifies the cause: E_ENV_UNSUPPORTED for a missing + // key (404) versus E_FETCH for transport failure with no cache. Both are + // discovered here, so both are reported at the fetch phase (the JSON keeps + // the failing phase and error.failurePhase in agreement). + return nil, p.fail(PhaseFetch, false, asPipelineError(err, ErrFetch, "fetch constraints failed")) + } + p.markOK(PhaseFetch, fmt.Sprintf("source=%s fromCache=%v", c.SourceURL, c.FromCache)) + return c, nil +} + +// pyprojectPath returns the path to pyproject.toml in the project directory. +func (p *Pipeline) pyprojectPath() string { + return filepath.Join(p.ProjectDir, pyprojectFile) +} + +// backupPath returns the path to the pyproject.toml backup file. +func (p *Pipeline) backupPath() string { + return filepath.Join(p.ProjectDir, backupFile) +} + +// mergePlan computes the merged pyproject.toml bytes (without writing to disk), +// decides greenfield vs. existing, and builds the Plan (populated only under +// --check). dbcPin is the databricks-connect pin to inject, or "" in +// constraints-only mode. +func (p *Pipeline) mergePlan(_ context.Context, pyMinor string, c *Constraints, dbcPin string) (merged []byte, greenfield bool, err error) { + pyproject := p.pyprojectPath() + backup := p.backupPath() + + // Determine base bytes for the merge. For an existing project with a backup, + // the backup is the canonical base so the merge starts from the original + // unmanaged state. + var baseBytes []byte + if data, rerr := os.ReadFile(backup); rerr == nil { + baseBytes = data + } else if !errors.Is(rerr, os.ErrNotExist) { + // A backup that exists but can't be read (permissions, I/O) must not be + // silently ignored: falling through would treat the project as greenfield + // and overwrite it. Fail loudly instead. + return nil, false, p.fail(PhaseMerge, false, NewError(ErrMerge, rerr, "read backup %s failed", filepath.ToSlash(backup))) + } + if baseBytes == nil { + data, rerr := os.ReadFile(pyproject) + switch { + case rerr == nil: + baseBytes = data + case !errors.Is(rerr, os.ErrNotExist): + // Only a genuine not-exist means greenfield. Any other read error on an + // existing pyproject.toml (permission change, transient I/O, delete race + // after detectManager saw it) must not be misread as greenfield — that + // would render a fresh file and overwrite the user's project with no + // backup. Fail instead of destroying unrecoverable state (invariant 2). + return nil, false, p.fail(PhaseMerge, false, NewError(ErrMerge, rerr, "read pyproject.toml %s failed", filepath.ToSlash(pyproject))) + } + } + greenfield = baseBytes == nil + + // The artifact drives the merge; in constraints-only mode we clear the + // databricks-connect pin so it is neither written nor asserted. + effective := *c + effective.DatabricksConnect = dbcPin + + var changedRegions []string + if greenfield { + // No existing pyproject.toml — render a fresh one. The project name comes + // from the directory name as a reasonable default. + projectName := filepath.Base(p.ProjectDir) + merged = RenderFreshPyproject(projectName, effective) + changedRegions = []string{regionRequiresPython, regionToolUv} + if dbcPin != "" { + changedRegions = append(changedRegions, regionDatabricksConnect) + } + } else { + merged, changedRegions, err = MergeManaged(baseBytes, effective) + if err != nil { + return nil, greenfield, p.fail(PhaseMerge, false, NewError(ErrMerge, err, "merge managed regions failed")) + } + } + + // Under --check, build the plan (with a diff) for reporting. A real run does + // not need the diff. + if p.Check { + oldStr := "" + newStr := string(merged) + oldName := pyprojectFile + newName := pyprojectFile + if !greenfield { + oldStr = string(baseBytes) + newName = pyprojectFile + ".new" + } + edits := myers.ComputeEdits(span.URIFromPath(oldName), oldStr, newStr) + diff := fmt.Sprint(gotextdiff.ToUnified(oldName, newName, oldStr, edits)) + + plan := &Plan{ + WouldWrite: filepath.ToSlash(pyproject), + Diff: diff, + ChangedRegions: changedRegions, + WouldInstallPython: pyMinor, + } + if !greenfield { + plan.WouldBackup = filepath.ToSlash(backup) + } + p.res.Plan = plan + } + return merged, greenfield, nil +} + +// applyMerge writes the merged bytes to disk, backing up an existing +// pyproject.toml first. From this point on, disk has been mutated. +func (p *Pipeline) applyMerge(_ context.Context, mergedBytes []byte, greenfield bool) error { + pyproject := p.pyprojectPath() + backup := p.backupPath() + + if !greenfield { + // Back up before modifying so the user's original is recoverable + // (invariant 2). Only create the backup when one does not already exist: + // on a re-run the existing .bak is the canonical original unmanaged state + // (mergePlan used it as the base), so overwriting it with the already-merged + // pyproject.toml would destroy that baseline. + if _, err := os.Stat(backup); err != nil { + if err := copyFile(pyproject, backup); err != nil { + return p.fail(PhaseMerge, false, NewError(ErrMerge, err, "backup pyproject.toml failed")) + } + } + p.res.BackupPath = filepath.ToSlash(backup) + } + + if err := os.WriteFile(pyproject, mergedBytes, 0o644); err != nil { + code := ErrMerge + if greenfield { + code = ErrWrite + } + return p.fail(PhaseMerge, true, NewError(code, err, "write pyproject.toml failed")) + } + return nil +} + +// provision ensures the required Python version is installed, runs uv sync, and +// seeds pip. All three are reported under the provision phase. +func (p *Pipeline) provision(ctx context.Context, pyMinor string) error { + if err := p.PM.EnsurePython(ctx, pyMinor); err != nil { + return p.fail(PhaseProvision, true, asPipelineError(err, ErrPythonInstall, "ensure python %s failed", pyMinor)) + } + if err := p.PM.Provision(ctx, p.ProjectDir); err != nil { + return p.fail(PhaseProvision, true, asPipelineError(err, ErrProvision, "provision failed")) + } + if err := p.PM.PostProvision(ctx, p.ProjectDir); err != nil { + return p.fail(PhaseProvision, true, asPipelineError(err, ErrProvision, "post-provision failed")) + } + p.markOK(PhaseProvision, "") + return nil +} + +// validate reads the Python and databricks-connect versions from the venv and +// populates the venv path. dbcPin is "" in constraints-only mode, where the DB +// Connect assertion is skipped. +func (p *Pipeline) validate(ctx context.Context, expectedPyMinor, dbcPin string) error { + pyVer, dbcVer, err := p.PM.Validate(ctx, p.ProjectDir) + if err != nil { + return p.fail(PhaseValidate, true, asPipelineError(err, ErrValidate, "validation failed")) + } + + // Assert the installed Python minor matches the target. + if pyVer != expectedPyMinor { + return p.fail(PhaseValidate, true, NewError(ErrValidate, nil, + "python version mismatch: want %s, got %s", expectedPyMinor, pyVer)) + } + + // In default mode, assert the installed databricks-connect major matches the + // pin's major. dbcPin is e.g. "databricks-connect~=17.2.0"; dbcVer is "17.2.0". + if dbcPin != "" { + pinMajor := dbcMajorFromPin(dbcPin) + if pinMajor == "" { + return p.fail(PhaseValidate, true, NewError(ErrValidate, nil, + "cannot determine databricks-connect major version from pin %q", dbcPin)) + } + installedMajor := majorVersion(dbcVer) + if installedMajor == "" { + return p.fail(PhaseValidate, true, NewError(ErrValidate, nil, + "cannot determine installed databricks-connect major version from %q", dbcVer)) + } + if pinMajor != installedMajor { + return p.fail(PhaseValidate, true, NewError(ErrValidate, nil, + "databricks-connect major version mismatch: want %s.x, got %s", pinMajor, dbcVer)) + } + } + + // Report the installed databricks-connect version only in default mode. In + // constraints-only mode databricks-connect is not a managed dependency, so the + // spec omits dbconnectVersion even if the package is present transitively. + defaultMode := dbcPin != "" + + detail := "python=" + pyVer + if defaultMode && dbcVer != "" { + detail += " databricks-connect=" + dbcVer + } + p.markOK(PhaseValidate, detail) + + p.res.VenvPath = filepath.ToSlash(filepath.Join(p.ProjectDir, venvDir)) + if p.res.Resolved != nil { + if defaultMode { + p.res.Resolved.DBConnectVersion = dbcVer + } else { + p.res.Resolved.DBConnectVersion = "" + } + } + return nil +} + +// initialPhases returns the canonical phase list with every phase pending. +func initialPhases() []PhaseStatus { + phases := make([]PhaseStatus, len(allPhases)) + for i, name := range allPhases { + phases[i] = PhaseStatus{Phase: name, Status: StatusPending} + } + return phases +} + +// markOK marks a phase ok with an optional human-readable detail. +func (p *Pipeline) markOK(name PhaseName, detail string) { + for i := range p.res.Phases { + if p.res.Phases[i].Phase == name { + p.res.Phases[i].Status = StatusOK + p.res.Phases[i].Detail = detail + return + } + } +} + +// fail marks the given phase as errored, attaches the error (with its phase and +// disk-mutation flag) to the Result, and returns it. Phases after the failing +// one remain pending. +func (p *Pipeline) fail(phase PhaseName, diskMutated bool, pe *PipelineError) error { + pe.FailurePhase = phase + pe.DiskMutated = diskMutated + for i := range p.res.Phases { + if p.res.Phases[i].Phase == phase { + p.res.Phases[i].Status = StatusError + p.res.Phases[i].Detail = pe.Error() + break + } + } + p.res.Error = pe + return pe +} + +// asPipelineError returns err as a *PipelineError if it already is one, otherwise +// wraps it with the fallback code and message. +func asPipelineError(err error, fallback ErrorCode, format string, args ...any) *PipelineError { + if pe, ok := errors.AsType[*PipelineError](err); ok { + return pe + } + return NewError(fallback, err, format, args...) +} + +// artifactSource maps the from-cache flag to the JSON artifactSource value. +func artifactSource(fromCache bool) string { + if fromCache { + return artifactCache + } + return artifactNetwork +} + +// versionFromPin extracts the bare version from a dependency pin such as +// "databricks-connect~=17.2.0" → "17.2.0". Returns "" when pin is empty or has +// no version component. +func versionFromPin(pin string) string { + for i, c := range pin { + if c >= '0' && c <= '9' { + return pin[i:] + } + } + return "" +} + +// dbcMajorFromPin extracts the major version number from a databricks-connect +// pin string such as "databricks-connect~=17.2.0". Returns "" if unparseable. +func dbcMajorFromPin(pin string) string { + return majorVersion(versionFromPin(pin)) +} + +// majorVersion returns the major portion of a version string (digits before the +// first dot), e.g. "17" from "17.2.0". A bare integer like "17" returns "17". +// Returns "" for an empty string. +func majorVersion(v string) string { + if v == "" { + return "" + } + before, _, ok := strings.Cut(v, ".") + if !ok { + return v + } + return before +} + +// copyFile copies src to dst, creating or overwriting dst. +func copyFile(src, dst string) error { + data, err := os.ReadFile(src) + if err != nil { + return fmt.Errorf("read %s: %w", src, err) + } + if err := os.WriteFile(dst, data, 0o644); err != nil { + return fmt.Errorf("write %s: %w", dst, err) + } + return nil +} diff --git a/libs/localenv/pipeline_test.go b/libs/localenv/pipeline_test.go new file mode 100644 index 0000000000..a9eb80c5aa --- /dev/null +++ b/libs/localenv/pipeline_test.go @@ -0,0 +1,369 @@ +package localenv + +import ( + "context" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type fakePM struct{ py, dbc string } + +func (fakePM) Name() string { return "fake" } +func (fakePM) EnsureAvailable(context.Context) (string, error) { return "fake 1.0", nil } +func (fakePM) EnsurePython(context.Context, string) error { return nil } +func (fakePM) Provision(context.Context, string) error { return nil } +func (fakePM) PostProvision(context.Context, string) error { return nil } +func (f fakePM) Validate(context.Context, string) (string, string, error) { + return f.py, f.dbc, nil +} + +func writeProject(t *testing.T) string { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte(`[project] +name = "demo" +requires-python = ">=3.10" + +[dependency-groups] +dev = ["databricks-connect~=16.0.0"] +`), 0o644)) + return dir +} + +func newTestServer(t *testing.T) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(sampleToml)) + })) +} + +func TestPipelineCheckMutatesNothing(t *testing.T) { + dir := writeProject(t) + before, _ := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, Check: true, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.DryRun) + assert.True(t, res.OK) + require.NotNil(t, res.Plan) + assert.Contains(t, res.Plan.Diff, "==3.12.*") + after, _ := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + assert.Equal(t, string(before), string(after)) // unchanged +} + +func TestPipelineProvisionsAndValidatesExisting(t *testing.T) { + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + assert.False(t, res.Greenfield) + require.NotNil(t, res.Resolved) + assert.Equal(t, "3.12", res.Resolved.PythonVersion) + assert.Equal(t, "17.2.0", res.Resolved.DBConnectVersion) + assert.Equal(t, ".venv", filepath.Base(res.VenvPath)) + merged, _ := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + assert.Contains(t, string(merged), `"databricks-connect~=17.2.0"`) + assert.FileExists(t, filepath.Join(dir, "pyproject.toml.bak")) +} + +func TestPipelineGreenfieldCreatesNewPyproject(t *testing.T) { + dir := t.TempDir() + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + assert.True(t, res.Greenfield) + data, readErr := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + require.NoError(t, readErr) + assert.Contains(t, string(data), `"databricks-connect~=17.2.0",`) + // No backup created when pyproject.toml did not previously exist. + assert.NoFileExists(t, filepath.Join(dir, "pyproject.toml.bak")) +} + +func TestPipelineExistingBacksUp(t *testing.T) { + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + assert.FileExists(t, filepath.Join(dir, "pyproject.toml.bak")) + assert.Equal(t, "pyproject.toml.bak", filepath.Base(res.BackupPath)) +} + +func TestPipelineConstraintsOnlyOmitsDBConnect(t *testing.T) { + dir := t.TempDir() + srv := newTestServer(t) + defer srv.Close() + + // Even though databricks-connect is present in the venv (fakePM reports a + // version), constraints-only must not assert it, not write the pin, and not + // report dbconnectVersion (spec §6 omits it in this mode). + p := &Pipeline{ + Mode: ModeConstraintsOnly, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + assert.Equal(t, "constraints-only", res.Mode) + require.NotNil(t, res.Resolved) + assert.Empty(t, res.Resolved.DBConnectVersion, "constraints-only must omit dbconnectVersion") + data, readErr := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + require.NoError(t, readErr) + assert.NotContains(t, string(data), "databricks-connect") + // The dev group is emitted empty, not with a blank-string entry. + assert.Contains(t, string(data), "dev = []") + assert.NotContains(t, string(data), `""`) +} + +func TestPipelineNoTarget(t *testing.T) { + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{}, + Compute: stubCompute{}, PM: fakePM{}, + } + res, err := p.Run(t.Context()) + require.Error(t, err) + require.NotNil(t, res) + require.NotNil(t, res.Error) + assert.False(t, res.OK) + assert.Equal(t, ErrNoTarget, res.Error.Code) + // No-target is detected during target resolution, so it is reported at the + // resolve phase (the failing phase in phases[] and error.failurePhase agree). + assert.Equal(t, PhaseResolve, res.Error.FailurePhase) + // Preflight succeeded, resolve errored, everything after stays pending. + assert.Equal(t, StatusOK, phaseStatus(res, PhasePreflight)) + assert.Equal(t, StatusError, phaseStatus(res, PhaseResolve)) + assert.Equal(t, StatusPending, phaseStatus(res, PhaseProvision)) +} + +func TestPipelineRestoresBackupBeforeMerge(t *testing.T) { + dir := t.TempDir() + // Write an original pyproject.toml and a pre-existing .bak. + original := []byte(`[project] +name = "demo" +requires-python = ">=3.9" + +[dependency-groups] +dev = ["databricks-connect~=15.0.0"] +`) + require.NoError(t, os.WriteFile(filepath.Join(dir, "pyproject.toml.bak"), original, 0o644)) + // Current pyproject.toml has been mutated by a previous run. + mutated := []byte(`[project] +name = "demo" +requires-python = "==3.12.*" + +[dependency-groups] +dev = ["databricks-connect~=17.2.0"] +`) + require.NoError(t, os.WriteFile(filepath.Join(dir, "pyproject.toml"), mutated, 0o644)) + + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + require.NotNil(t, res) + // The bak content (requires-python = ">=3.9") was the base; merged result should + // contain the newly pinned version. + data, _ := os.ReadFile(filepath.Join(dir, "pyproject.toml")) + assert.Contains(t, string(data), `"databricks-connect~=17.2.0"`) + assert.Contains(t, string(data), `requires-python = "==3.12.*"`) +} + +func TestPipelineUnreadableExistingIsNotTreatedAsGreenfield(t *testing.T) { + if runtime.GOOS == "windows" || os.Getuid() == 0 { + // chmod 000 does not block reads for root or on Windows. + t.Skip("read-permission enforcement not available") + } + dir := t.TempDir() + pyproject := filepath.Join(dir, "pyproject.toml") + original := []byte(`[project] +name = "demo" +requires-python = ">=3.10" + +[dependency-groups] +dev = ["databricks-connect~=16.0.0"] +`) + require.NoError(t, os.WriteFile(pyproject, original, 0o644)) + // Make it unreadable so os.ReadFile fails with a non-not-exist error after + // detectManager has already seen the file exists. + require.NoError(t, os.Chmod(pyproject, 0o000)) + t.Cleanup(func() { _ = os.Chmod(pyproject, 0o644) }) + + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + // The run must fail at merge rather than silently overwrite the user's file. + require.Error(t, err) + var pe *PipelineError + require.ErrorAs(t, err, &pe) + assert.Equal(t, ErrMerge, pe.Code) + assert.Equal(t, PhaseMerge, pe.FailurePhase) + assert.False(t, res.Greenfield, "an unreadable existing file must not be treated as greenfield") + + // The original file must be untouched (still unreadable, not overwritten). + require.NoError(t, os.Chmod(pyproject, 0o644)) + after, readErr := os.ReadFile(pyproject) + require.NoError(t, readErr) + assert.Equal(t, string(original), string(after), "the user's pyproject.toml must not be overwritten") +} + +func TestPipelineResultPopulatesResolved(t *testing.T) { + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, Check: true, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + require.NotNil(t, res.Resolved) + assert.Equal(t, "3.12", res.Resolved.PythonVersion) + assert.Equal(t, "17.2.0", res.Resolved.DBConnectVersion) + assert.Equal(t, "network", res.Resolved.ArtifactSource) +} + +// newServerWithDBC returns a test server that serves a constraints TOML with the +// given databricks-connect pin value in the dev dependency group. +func newServerWithDBC(t *testing.T, dbcPin string) *httptest.Server { + t.Helper() + body := `[project] +requires-python = "==3.12.*" + +[dependency-groups] +dev = ["` + dbcPin + `"] + +[tool.uv] +constraint-dependencies = [] +` + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + })) +} + +func TestPipelineValidateRejectsUnparseablePin(t *testing.T) { + dir := writeProject(t) + // Serve a TOML whose dev group has a malformed databricks-connect entry + // (no version digits after the package name). + srv := newServerWithDBC(t, "databricks-connect") + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.Error(t, err) + require.NotNil(t, res.Error) + assert.Equal(t, ErrValidate, res.Error.Code) + assert.Equal(t, PhaseValidate, res.Error.FailurePhase) +} + +func TestPipelineValidateRejectsUnparseableInstalledVersion(t *testing.T) { + dir := writeProject(t) + // sampleToml has databricks-connect~=17.2.0 as the pin; use an empty installed + // version string to simulate an installed version that can't be parsed. + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: ""}, + } + res, err := p.Run(t.Context()) + require.Error(t, err) + require.NotNil(t, res.Error) + assert.Equal(t, ErrValidate, res.Error.Code) +} + +func TestMajorVersion(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"17.2.0", "17"}, + {"17", "17"}, + {"", ""}, + {"3.12", "3"}, + } + for _, tc := range cases { + assert.Equal(t, tc.want, majorVersion(tc.input), "input=%q", tc.input) + } +} + +// phaseStatus returns the status recorded for the named phase in res. +func phaseStatus(res *Result, name PhaseName) string { + for _, ph := range res.Phases { + if ph.Phase == name { + return ph.Status + } + } + return "" +} diff --git a/libs/localenv/pkgmanager.go b/libs/localenv/pkgmanager.go new file mode 100644 index 0000000000..fff370c76f --- /dev/null +++ b/libs/localenv/pkgmanager.go @@ -0,0 +1,31 @@ +package localenv + +import "context" + +// PackageManager manages the Python environment for a dbconnect project. +type PackageManager interface { + // Name returns the name of the package manager (e.g. "uv"). + Name() string + + // EnsureAvailable ensures the package manager binary is present, installing + // it if necessary. It returns the version string on success. + EnsureAvailable(ctx context.Context) (version string, err error) + + // EnsurePython ensures the requested Python minor version (e.g. "3.12") is + // available via the package manager. + EnsurePython(ctx context.Context, minor string) error + + // Provision installs the project dependencies inside projectDir. + Provision(ctx context.Context, projectDir string) error + + // PostProvision seeds pip into the virtual environment inside projectDir. + // This step is required because VS Code's ms-python.vscode-python-envs + // extension falls back to `python -m pip list` when its `uv --version` + // probe fails on the GUI PATH; uv venvs contain no pip; and `uv sync` + // strips pip, so seeding must run after every sync. + PostProvision(ctx context.Context, projectDir string) error + + // Validate reads the Python minor version and databricks-connect version + // from the virtual environment inside projectDir. + Validate(ctx context.Context, projectDir string) (pythonVersion, dbconnectVersion string, err error) +} From 117bee1d5a19335e7dfdd3bb9786c0f356b50f44 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 20:01:00 +0200 Subject: [PATCH 2/4] Harden pipeline backup safety and version-major parsing Review of the pipeline layer found: - applyMerge treated every os.Stat error on the backup as "does not exist" and proceeded to copyFile over it. An existing-but-unstattable backup (permission or I/O error) would thus be overwritten with the already-merged content, destroying the canonical original the merge base relies on. It now only creates the backup on os.ErrNotExist and fails on any other stat error. - A backup-copy failure was reported with DiskMutated=false even though copyFile creates/truncates the .bak path and can leave a partial file. That path now reports DiskMutated=true. - majorVersion accepted any non-empty prefix before the first dot, so a malformed version like "bad.version" yielded major "bad" and could be compared as a real major. It now returns "" for a non-numeric major so the validate phase rejects malformed versions. Adds a direct applyMerge test for the unstattable-backup branch and extends the majorVersion cases with non-numeric inputs. Co-authored-by: Isaac --- libs/localenv/pipeline.go | 36 +++++++++++++++++++++++++++++---- libs/localenv/pipeline_test.go | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/libs/localenv/pipeline.go b/libs/localenv/pipeline.go index 7b7320d4bd..f7b20b5c1d 100644 --- a/libs/localenv/pipeline.go +++ b/libs/localenv/pipeline.go @@ -297,10 +297,20 @@ func (p *Pipeline) applyMerge(_ context.Context, mergedBytes []byte, greenfield // on a re-run the existing .bak is the canonical original unmanaged state // (mergePlan used it as the base), so overwriting it with the already-merged // pyproject.toml would destroy that baseline. - if _, err := os.Stat(backup); err != nil { + _, statErr := os.Stat(backup) + switch { + case statErr == nil: + // Backup already exists — keep it as the canonical baseline. + case errors.Is(statErr, os.ErrNotExist): + // copyFile creates/truncates the backup path, so a failure mid-copy may + // leave a partial .bak: report disk as mutated. if err := copyFile(pyproject, backup); err != nil { - return p.fail(PhaseMerge, false, NewError(ErrMerge, err, "backup pyproject.toml failed")) + return p.fail(PhaseMerge, true, NewError(ErrMerge, err, "backup pyproject.toml failed")) } + default: + // An existing-but-unstattable backup must not be overwritten (that would + // destroy the recoverable original); fail before any write instead. + return p.fail(PhaseMerge, false, NewError(ErrMerge, statErr, "cannot stat backup %s", filepath.ToSlash(backup))) } p.res.BackupPath = filepath.ToSlash(backup) } @@ -461,18 +471,36 @@ func dbcMajorFromPin(pin string) string { // majorVersion returns the major portion of a version string (digits before the // first dot), e.g. "17" from "17.2.0". A bare integer like "17" returns "17". -// Returns "" for an empty string. +// Returns "" for an empty string or a non-numeric major component, so the +// validate phase rejects a malformed version rather than comparing arbitrary +// strings as major versions. func majorVersion(v string) string { if v == "" { return "" } before, _, ok := strings.Cut(v, ".") if !ok { - return v + before = v + } + if before == "" || !isAllDigits(before) { + return "" } return before } +// isAllDigits reports whether s is non-empty and every rune is an ASCII digit. +func isAllDigits(s string) bool { + if s == "" { + return false + } + for _, c := range s { + if c < '0' || c > '9' { + return false + } + } + return true +} + // copyFile copies src to dst, creating or overwriting dst. func copyFile(src, dst string) error { data, err := os.ReadFile(src) diff --git a/libs/localenv/pipeline_test.go b/libs/localenv/pipeline_test.go index a9eb80c5aa..52f7f297b3 100644 --- a/libs/localenv/pipeline_test.go +++ b/libs/localenv/pipeline_test.go @@ -128,6 +128,38 @@ func TestPipelineExistingBacksUp(t *testing.T) { assert.Equal(t, "pyproject.toml.bak", filepath.Base(res.BackupPath)) } +func TestApplyMergeFailsOnUnstattableBackupWithoutOverwrite(t *testing.T) { + if runtime.GOOS == "windows" || os.Getuid() == 0 { + // chmod-based stat blocking does not apply for root or on Windows. + t.Skip("stat-permission enforcement not available") + } + // Both pyproject.toml and its .bak live in a project dir that is made + // unsearchable, so os.Stat of the backup fails with a permission error rather + // than not-exist — isolating applyMerge's "can't stat" branch. applyMerge is + // called directly, bypassing the writability preflight. + dir := filepath.Join(t.TempDir(), "proj") + require.NoError(t, os.Mkdir(dir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte("[project]\n"), 0o644)) + backup := filepath.Join(dir, "pyproject.toml.bak") + require.NoError(t, os.WriteFile(backup, []byte("ORIGINAL BACKUP\n"), 0o644)) + require.NoError(t, os.Chmod(dir, 0o000)) + t.Cleanup(func() { _ = os.Chmod(dir, 0o755) }) + + p := &Pipeline{ProjectDir: dir, res: &Result{Phases: initialPhases()}} + err := p.applyMerge(t.Context(), []byte("merged"), false) + require.Error(t, err) + var pe *PipelineError + require.ErrorAs(t, err, &pe) + assert.Equal(t, PhaseMerge, pe.FailurePhase) + assert.False(t, pe.DiskMutated, "no write should have happened before the stat check") + + // The original backup must be intact. + require.NoError(t, os.Chmod(dir, 0o755)) + got, readErr := os.ReadFile(backup) + require.NoError(t, readErr) + assert.Equal(t, "ORIGINAL BACKUP\n", string(got), "the canonical backup must not be overwritten") +} + func TestPipelineConstraintsOnlyOmitsDBConnect(t *testing.T) { dir := t.TempDir() srv := newTestServer(t) @@ -352,6 +384,11 @@ func TestMajorVersion(t *testing.T) { {"17", "17"}, {"", ""}, {"3.12", "3"}, + // Non-numeric major components are rejected (empty) so validation does + // not compare arbitrary strings as versions. + {"bad.version", ""}, + {"v17.2", ""}, + {"abc", ""}, } for _, tc := range cases { assert.Equal(t, tc.want, majorVersion(tc.input), "input=%q", tc.input) From 27e56f9f5063838d6abd4bc210ae39b588795a39 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 21:13:56 +0200 Subject: [PATCH 3/4] Skip the writability probe under --check Round-4 review noted preflight ran ensureWritable even under --check, which creates and removes a temp file (a disk mutation in a dry run) and fails a read-only project the user only wants to inspect. The probe now runs only for a real (non-check) run; it exists to fail fast before a write that --check never performs. Co-authored-by: Isaac --- libs/localenv/pipeline.go | 10 ++++++++-- libs/localenv/pipeline_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/libs/localenv/pipeline.go b/libs/localenv/pipeline.go index f7b20b5c1d..ddcb1204f1 100644 --- a/libs/localenv/pipeline.go +++ b/libs/localenv/pipeline.go @@ -95,8 +95,14 @@ func (p *Pipeline) run(ctx context.Context) error { if m := detectManager(p.ProjectDir); m != managerUv { return p.fail(PhasePreflight, false, NewError(ErrManagerUnsupported, nil, "%s", managerGuidance(m))) } - if err := ensureWritable(p.ProjectDir); err != nil { - return p.fail(PhasePreflight, false, NewError(ErrNotWritable, err, "project directory %s is not writable", filepath.ToSlash(p.ProjectDir))) + // Skip the writability probe under --check: it creates and removes a temp + // file, which would be a disk mutation in a dry run, and a read-only project + // the user only wants to inspect must not fail. The probe exists to fail fast + // before a real write, which --check never performs. + if !p.Check { + if err := ensureWritable(p.ProjectDir); err != nil { + return p.fail(PhasePreflight, false, NewError(ErrNotWritable, err, "project directory %s is not writable", filepath.ToSlash(p.ProjectDir))) + } } version, err := p.PM.EnsureAvailable(ctx) if err != nil { diff --git a/libs/localenv/pipeline_test.go b/libs/localenv/pipeline_test.go index 52f7f297b3..00db3d0482 100644 --- a/libs/localenv/pipeline_test.go +++ b/libs/localenv/pipeline_test.go @@ -64,6 +64,37 @@ func TestPipelineCheckMutatesNothing(t *testing.T) { assert.Equal(t, string(before), string(after)) // unchanged } +func TestPipelineCheckWorksOnReadOnlyDir(t *testing.T) { + if runtime.GOOS == "windows" || os.Getuid() == 0 { + t.Skip("read-only-dir enforcement not available") + } + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + // Make the project dir read-only: --check must still compute the plan without + // a writability probe (which would both mutate disk and fail here). + require.NoError(t, os.Chmod(dir, 0o555)) + t.Cleanup(func() { _ = os.Chmod(dir, 0o755) }) + + p := &Pipeline{ + Mode: ModeDefault, Check: true, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: fakePM{py: "3.12", dbc: "17.2.0"}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + require.NotNil(t, res.Plan) + // No writecheck temp file was left behind in the project dir. + entries, err := os.ReadDir(dir) + require.NoError(t, err) + for _, e := range entries { + assert.NotContains(t, e.Name(), "writecheck", "dry run must not create temp files") + } +} + func TestPipelineProvisionsAndValidatesExisting(t *testing.T) { dir := writeProject(t) srv := newTestServer(t) From d8f0de9cf58a9333b8bcfd83c3367b8a1bc89024 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 21:22:15 +0200 Subject: [PATCH 4/4] Skip package-manager availability under --check too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-5 review noted that although --check now skips the writability probe, it still called PackageManager.EnsureAvailable, which may install the manager (uv) when missing — another disk mutation in a dry run. Preflight now performs neither write-side step under --check (neither is needed to compute the plan) and reports the phase as "check". Adds a test with a PackageManager that errors on every method, asserting --check still succeeds and produces a plan. Co-authored-by: Isaac --- libs/localenv/pipeline.go | 25 ++++++++++-------- libs/localenv/pipeline_test.go | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/libs/localenv/pipeline.go b/libs/localenv/pipeline.go index ddcb1204f1..004271eb69 100644 --- a/libs/localenv/pipeline.go +++ b/libs/localenv/pipeline.go @@ -95,20 +95,25 @@ func (p *Pipeline) run(ctx context.Context) error { if m := detectManager(p.ProjectDir); m != managerUv { return p.fail(PhasePreflight, false, NewError(ErrManagerUnsupported, nil, "%s", managerGuidance(m))) } - // Skip the writability probe under --check: it creates and removes a temp - // file, which would be a disk mutation in a dry run, and a read-only project - // the user only wants to inspect must not fail. The probe exists to fail fast - // before a real write, which --check never performs. - if !p.Check { + // Under --check the pipeline only reads and reports a plan, so it must not + // mutate anything at preflight. Two preflight steps can write: + // - ensureWritable creates and removes a temp file (and would fail a + // read-only project the user only wants to inspect); + // - PackageManager.EnsureAvailable may install the manager (uv) if missing. + // Both exist to fail fast before real writes, which --check never performs, so + // they are skipped in a dry run. Neither result is needed to compute the plan. + if p.Check { + p.markOK(PhasePreflight, "check") + } else { if err := ensureWritable(p.ProjectDir); err != nil { return p.fail(PhasePreflight, false, NewError(ErrNotWritable, err, "project directory %s is not writable", filepath.ToSlash(p.ProjectDir))) } + version, err := p.PM.EnsureAvailable(ctx) + if err != nil { + return p.fail(PhasePreflight, false, asPipelineError(err, ErrUvMissing, "%s unavailable", p.PM.Name())) + } + p.markOK(PhasePreflight, p.PM.Name()+" "+version) } - version, err := p.PM.EnsureAvailable(ctx) - if err != nil { - return p.fail(PhasePreflight, false, asPipelineError(err, ErrUvMissing, "%s unavailable", p.PM.Name())) - } - p.markOK(PhasePreflight, p.PM.Name()+" "+version) // Phase: resolve — compute target → environment key. target, err := p.resolve(ctx) diff --git a/libs/localenv/pipeline_test.go b/libs/localenv/pipeline_test.go index 00db3d0482..fdc76fbd24 100644 --- a/libs/localenv/pipeline_test.go +++ b/libs/localenv/pipeline_test.go @@ -2,6 +2,7 @@ package localenv import ( "context" + "errors" "net/http" "net/http/httptest" "os" @@ -24,6 +25,32 @@ func (f fakePM) Validate(context.Context, string) (string, string, error) { return f.py, f.dbc, nil } +// noProvisionPM fails any method that could touch the machine (install the +// manager, install Python, sync, seed pip, validate). It asserts that --check +// never reaches those write-side operations. +type noProvisionPM struct{} + +func (noProvisionPM) Name() string { return "noprov" } +func (noProvisionPM) EnsureAvailable(context.Context) (string, error) { + return "", errors.New("EnsureAvailable must not be called under --check") +} + +func (noProvisionPM) EnsurePython(context.Context, string) error { + return errors.New("EnsurePython must not be called under --check") +} + +func (noProvisionPM) Provision(context.Context, string) error { + return errors.New("Provision must not be called under --check") +} + +func (noProvisionPM) PostProvision(context.Context, string) error { + return errors.New("PostProvision must not be called under --check") +} + +func (noProvisionPM) Validate(context.Context, string) (string, string, error) { + return "", "", errors.New("Validate must not be called under --check") +} + func writeProject(t *testing.T) string { dir := t.TempDir() require.NoError(t, os.WriteFile(filepath.Join(dir, "pyproject.toml"), []byte(`[project] @@ -64,6 +91,26 @@ func TestPipelineCheckMutatesNothing(t *testing.T) { assert.Equal(t, string(before), string(after)) // unchanged } +func TestPipelineCheckDoesNotProvision(t *testing.T) { + // --check must not call any PackageManager method that could mutate the + // machine (EnsureAvailable may install uv). noProvisionPM errors on all of + // them; the dry run must still succeed and produce a plan. + dir := writeProject(t) + srv := newTestServer(t) + defer srv.Close() + + p := &Pipeline{ + Mode: ModeDefault, Check: true, ProjectDir: dir, + ConstraintBaseURL: srv.URL, CacheDir: t.TempDir(), + Flags: TargetFlags{Serverless: "v4"}, + Compute: stubCompute{}, PM: noProvisionPM{}, + } + res, err := p.Run(t.Context()) + require.NoError(t, err) + assert.True(t, res.OK) + require.NotNil(t, res.Plan) +} + func TestPipelineCheckWorksOnReadOnlyDir(t *testing.T) { if runtime.GOOS == "windows" || os.Getuid() == 0 { t.Skip("read-only-dir enforcement not available")