diff --git a/libs/localenv/envkey.go b/libs/localenv/envkey.go new file mode 100644 index 0000000000..39cff1a833 --- /dev/null +++ b/libs/localenv/envkey.go @@ -0,0 +1,80 @@ +package localenv + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +// clauseRe splits a single requires-python clause into its operator (optional) +// and MAJOR.MINOR version. A clause with no operator is a bare floor. +var clauseRe = regexp.MustCompile(`^(>=|<=|===|==|~=|!=|<|>)?\s*(\d+)\.(\d+)`) + +// NormalizeServerless returns the canonical "vN" spelling of a serverless +// version accepting "4", "v4", or "V4". +func NormalizeServerless(version string) string { + return "v" + strings.TrimPrefix(strings.ToLower(version), "v") +} + +// EnvKeyForServerless returns the environment key for a serverless version. +func EnvKeyForServerless(version string) string { + return "serverless/serverless-" + NormalizeServerless(version) +} + +// EnvKeyForSparkVersion returns the environment key for a Spark version. +func EnvKeyForSparkVersion(sparkVersion string) string { + return "dbr/" + sparkVersion +} + +// PythonMinorFromRequires parses a PEP 440 requires-python string and returns +// the MAJOR.MINOR of the Python version to install: the effective lower bound. +// +// A requires-python is a comma-separated list of clauses in any order (e.g. +// "<3.13,>=3.10"). Each clause is classified by operator: +// - lower-bound / pinning (>=, >, ==, ~=, ===) or a bare MAJOR.MINOR with no +// operator establishes a floor; +// - upper-bound / exclusion (<, <=, !=) does not — those versions are capped +// or forbidden and must never be installed. +// +// The result is the highest floor across all floor clauses (so ">=3.8,>=3.11" +// yields 3.11, the version that satisfies every clause). A spec with no floor +// clause at all (e.g. "<3.13" or "!=3.12") is an error rather than a guess. +func PythonMinorFromRequires(requiresPython string) (string, error) { + bestMajor, bestMinor := -1, -1 + sawClause := false + for clause := range strings.SplitSeq(requiresPython, ",") { + clause = strings.TrimSpace(clause) + if clause == "" { + continue + } + m := clauseRe.FindStringSubmatch(clause) + if m == nil { + continue + } + sawClause = true + op := m[1] + // Upper-bound and exclusion operators never establish a floor. + if op == "<" || op == "<=" || op == "!=" { + continue + } + major, _ := strconv.Atoi(m[2]) + minor, _ := strconv.Atoi(m[3]) + // A strict ">" excludes the whole given minor series (PEP 440: ">3.10" + // matches neither 3.10 nor any 3.10.x), so the lowest installable minor is + // the next one up. + if op == ">" { + minor++ + } + if major > bestMajor || (major == bestMajor && minor > bestMinor) { + bestMajor, bestMinor = major, minor + } + } + if bestMajor >= 0 { + return fmt.Sprintf("%d.%d", bestMajor, bestMinor), nil + } + if sawClause { + return "", fmt.Errorf("requires-python %q has no lower bound to install from", requiresPython) + } + return "", fmt.Errorf("cannot parse python version from %q", requiresPython) +} diff --git a/libs/localenv/envkey_test.go b/libs/localenv/envkey_test.go new file mode 100644 index 0000000000..8c5f3be671 --- /dev/null +++ b/libs/localenv/envkey_test.go @@ -0,0 +1,62 @@ +package localenv + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestEnvKeyForServerless(t *testing.T) { + for _, in := range []string{"4", "v4", "V4"} { + assert.Equal(t, "serverless/serverless-v4", EnvKeyForServerless(in)) + } +} + +func TestEnvKeyForSparkVersion(t *testing.T) { + assert.Equal(t, "dbr/15.4.x-scala2.12", EnvKeyForSparkVersion("15.4.x-scala2.12")) +} + +func TestPythonMinorFromRequires(t *testing.T) { + cases := map[string]string{ + "==3.12.*": "3.12", + ">=3.12": "3.12", + "==3.12.3": "3.12", + "~=3.11": "3.11", + // Multi-clause specifiers: the lower bound is the version to install, + // regardless of clause order. Taking the first number would pick the + // excluded upper bound (e.g. 3.13 from "<3.13"). + "<3.13,>=3.10": "3.10", + ">=3.10,<3.13": "3.10", + ">=3.10, <3.13": "3.10", + "<4.0,>=3.9": "3.9", + "===3.11": "3.11", + // The effective floor is the HIGHEST lower bound, regardless of order. + ">=3.8,>=3.11": "3.11", + ">=3.11,>=3.8": "3.11", + // A bare floor alongside an exclusion is still a floor. + "!=3.11,3.12": "3.12", + "3.12,!=3.12.4": "3.12", + // Bare version with no operator. + "3.12": "3.12", + // Whitespace and patch components tolerated. + ">= 3.10 , < 3.13": "3.10", + // Strict ">" excludes the whole minor series (PEP 440), so the floor is + // the next minor up. + ">3.10": "3.11", + ">3.10,<3.13": "3.11", + ">=3.9,>3.10": "3.11", + } + for in, want := range cases { + got, err := PythonMinorFromRequires(in) + require.NoError(t, err) + assert.Equal(t, want, got, "input %q", in) + } + + // No usable floor: only upper-bound / exclusion clauses. Must error rather + // than select a forbidden/capped version. + for _, in := range []string{"<3.13", "<=3.12", "!=3.12", "<3.13,!=3.12", "garbage", ""} { + _, err := PythonMinorFromRequires(in) + assert.Error(t, err, "input %q must error", in) + } +} diff --git a/libs/localenv/result.go b/libs/localenv/result.go new file mode 100644 index 0000000000..82faed1b1d --- /dev/null +++ b/libs/localenv/result.go @@ -0,0 +1,180 @@ +package localenv + +import "fmt" + +// Command path components, defined once so a rename touches a single place +// (spec §0 / invariant 8 / scenario 21). The cmd layer builds the Cobra +// command tree from CommandGroup/CommandSubgroup/CommandVerb; the --json +// "command" field uses CommandName. No other string re-spells the command path. +const ( + CommandGroup = "local-env" + CommandSubgroup = "python" + CommandVerb = "sync" + CommandName = CommandGroup + " " + CommandSubgroup + " " + CommandVerb + + // SchemaVersion is the version of the --json output contract (spec §6). + // Bump it on any breaking change to the JSON shape. + SchemaVersion = 1 +) + +// Mode is the provisioning mode: a full environment (default) or the +// constraints-only variant that omits the databricks-connect dependency. +type Mode int + +const ( + ModeDefault Mode = iota + ModeConstraintsOnly +) + +// String returns the JSON/text spelling of the mode ("default" | "constraints-only"). +func (m Mode) String() string { + if m == ModeConstraintsOnly { + return "constraints-only" + } + return "default" +} + +// PhaseName is a canonical execution phase (spec §3 / §6). The set is fixed and +// ordered; the --json "phases" array reports every phase in this order. +type PhaseName string + +const ( + PhasePreflight PhaseName = "preflight" + PhaseResolve PhaseName = "resolve" + PhaseFetch PhaseName = "fetch" + PhaseMerge PhaseName = "merge" + PhaseProvision PhaseName = "provision" + PhaseValidate PhaseName = "validate" +) + +// Phase status values (spec §6.2). +const ( + StatusOK = "ok" + StatusError = "error" + StatusPending = "pending" +) + +// ErrorCode is a stable failure-class identifier surfaced in --json error.code +// (spec §7). Values are compared via the ErrorCode constants, never by +// string-matching messages, and are defined once here. +type ErrorCode string + +const ( + ErrNoTarget ErrorCode = "E_NO_TARGET" + ErrManagerUnsupported ErrorCode = "E_MANAGER_UNSUPPORTED" + ErrUvMissing ErrorCode = "E_UV_MISSING" + ErrNotWritable ErrorCode = "E_NOT_WRITABLE" + ErrResolve ErrorCode = "E_RESOLVE" + ErrEnvUnsupported ErrorCode = "E_ENV_UNSUPPORTED" + ErrFetch ErrorCode = "E_FETCH" + ErrWrite ErrorCode = "E_WRITE" + ErrMerge ErrorCode = "E_MERGE" + ErrPythonInstall ErrorCode = "E_PYTHON_INSTALL" + ErrProvision ErrorCode = "E_PROVISION" + ErrValidate ErrorCode = "E_VALIDATE" +) + +// PipelineError is a failure carrying a stable code, the phase at which it +// occurred, and whether disk was mutated before the failure. It marshals to the +// --json error object (spec §6.2). Code and FailurePhase are the stable +// contract; Err holds the wrapped cause for errors.Is/As and is not serialized. +type PipelineError struct { + Code ErrorCode `json:"code"` + FailurePhase PhaseName `json:"failurePhase"` + Msg string `json:"message"` + DiskMutated bool `json:"diskMutated"` + Err error `json:"-"` +} + +func (e *PipelineError) Error() string { + if e.Err != nil { + return e.Msg + ": " + e.Err.Error() + } + return e.Msg +} + +func (e *PipelineError) Unwrap() error { + return e.Err +} + +// NewError creates a PipelineError with a code and message. FailurePhase and +// DiskMutated are filled in by the pipeline when it records the failure. The +// message is formatted with fmt.Sprintf(format, args...); err may be nil. +func NewError(code ErrorCode, err error, format string, args ...any) *PipelineError { + return &PipelineError{ + Code: code, + Msg: fmt.Sprintf(format, args...), + Err: err, + } +} + +// TargetInfo is the resolved compute target (spec §6 "target"). Source records +// which of the four precedence sources was used. SparkVersion is the raw cluster +// runtime string the resolver read; it is folded into EnvKey (dbr/) +// and is not part of the JSON contract, kept only as intermediate resolver state. +type TargetInfo struct { + Source string `json:"source"` + ClusterID string `json:"clusterId,omitempty"` + ServerlessVersion string `json:"serverlessVersion,omitempty"` + EnvKey string `json:"envKey"` + + SparkVersion string `json:"-"` +} + +// ResolvedInfo is the resolved environment definition (spec §6 "resolved"). +// DBConnectVersion is omitted in constraints-only mode. +type ResolvedInfo struct { + PythonVersion string `json:"pythonVersion"` + DBConnectVersion string `json:"dbconnectVersion,omitempty"` + ArtifactSource string `json:"artifactSource"` +} + +// Plan describes the changes a --check run would apply (spec §6.3). +// ChangedRegions is retained for text output only and is not serialized. +type Plan struct { + WouldWrite string `json:"wouldWrite"` + WouldBackup string `json:"wouldBackup,omitempty"` + WouldInstallPython string `json:"wouldInstallPython,omitempty"` + Diff string `json:"diff"` + + ChangedRegions []string `json:"-"` +} + +// PhaseStatus is one entry in the --json "phases" array (spec §6). Detail is +// used for human-readable text output only and is not serialized. +type PhaseStatus struct { + Phase PhaseName `json:"phase"` + Status string `json:"status"` + + Detail string `json:"-"` +} + +// Warning is a non-fatal advisory surfaced in --json "warnings" (spec §6). +type Warning struct { + Code string `json:"code"` + Message string `json:"message"` +} + +// Result is the full outcome of a sync run and the root of the --json object +// (spec §6). Field order matches the spec's schema so JSON key order is stable. +type Result struct { + SchemaVersion int `json:"schemaVersion"` + Command string `json:"command"` + OK bool `json:"ok"` + Mode string `json:"mode"` + DryRun bool `json:"dryRun"` + Target *TargetInfo `json:"target,omitempty"` + Resolved *ResolvedInfo `json:"resolved,omitempty"` + Greenfield bool `json:"greenfield"` + Plan *Plan `json:"plan,omitempty"` + VenvPath string `json:"venvPath,omitempty"` + Phases []PhaseStatus `json:"phases"` + Warnings []Warning `json:"warnings"` + Error *PipelineError `json:"error"` + BackupPath string `json:"backupPath,omitempty"` + // DurationMs is part of the §6 contract but reserved for now: the pipeline + // does not measure wall time (a real clock would make acceptance goldens + // non-deterministic), so it is always emitted as 0 until timing is wired + // through a clock the tests can control. + DurationMs int64 `json:"durationMs"` +} diff --git a/libs/localenv/result_test.go b/libs/localenv/result_test.go new file mode 100644 index 0000000000..76d30ec22b --- /dev/null +++ b/libs/localenv/result_test.go @@ -0,0 +1,27 @@ +package localenv + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPipelineErrorWrapsAndExposesCode(t *testing.T) { + base := errors.New("boom") + err := NewError(ErrFetch, base, "fetch %s", "x") + assert.Equal(t, "fetch x: boom", err.Error()) + assert.Equal(t, ErrFetch, err.Code) + assert.ErrorIs(t, err, base) +} + +func TestModeString(t *testing.T) { + assert.Equal(t, "default", ModeDefault.String()) + assert.Equal(t, "constraints-only", ModeConstraintsOnly.String()) +} + +func TestCommandName(t *testing.T) { + // The --json "command" field and all help text derive from these; the + // three-part path must join to the full command a user types. + assert.Equal(t, "local-env python sync", CommandName) +}