From dfb47cda9b54888e49f0acc8719154b7a8f972b7 Mon Sep 17 00:00:00 2001 From: Mike Landau Date: Sun, 21 Jun 2026 19:03:42 -0700 Subject: [PATCH 1/2] test(ci): speed up cli-tests by raising parallelism and de-slowing add_insecure The cli-tests `test` job was dominated by the testscripts package (~380s wall). Two changes cut that down: - Pass `-parallel 8` to both the fast tests (project-tests-off) and the project tests (test-projects-only). The testscripts are I/O-bound on nix downloads/builds, so oversubscribing the 4 vCPU runner lets more scripts overlap their waits instead of capping concurrency at GOMAXPROCS. - Switch the add_insecure testscript from python@2.7.18.6 to nodejs@16 (16.20.2). The Python version is not in the binary cache and compiled from source (~260s, the single longest pole). nodejs_16 is flagged insecure in the pinned nixpkgs (EOL) yet has a prebuilt binary on cache.nixos.org, so it still exercises the --allow-insecure flow but substitutes in seconds. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/cli-tests.yaml | 6 +++++- devbox.json | 2 +- testscripts/add/add_insecure.tst.txt | 11 ++++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cli-tests.yaml b/.github/workflows/cli-tests.yaml index 47b8b5f1ee7..7490d14f482 100644 --- a/.github/workflows/cli-tests.yaml +++ b/.github/workflows/cli-tests.yaml @@ -227,7 +227,11 @@ jobs: echo "::group::Resolved Nix config" nix show-config --extra-experimental-features nix-command echo "::endgroup::" - devbox run go test -v -timeout $DEVBOX_GOLANG_TEST_TIMEOUT ./... + # The testscripts are heavily I/O-bound (waiting on nix downloads and + # builds), so we oversubscribe the 4 vCPU runner with -parallel to let + # more scripts overlap their nix waits. See devbox.json for the + # equivalent flag on the project tests. + devbox run go test -v -parallel 8 -timeout $DEVBOX_GOLANG_TEST_TIMEOUT ./... - name: Run project (slow) tests if: matrix.run-project-tests == 'project-tests-only' run: devbox run test-projects-only diff --git a/devbox.json b/devbox.json index a9325696106..1e79aaa2cde 100644 --- a/devbox.json +++ b/devbox.json @@ -41,7 +41,7 @@ "lint": "go tool golangci-lint run --timeout 5m && scripts/gofumpt.sh", "fmt": "scripts/gofumpt.sh", "test": "go test -race -cover ./...", - "test-projects-only": "DEVBOX_RUN_PROJECT_TESTS=1 go test -v -timeout ${DEVBOX_GOLANG_TEST_TIMEOUT:-30m} ./... -run \"TestExamples|TestScriptsWithProjects\"", + "test-projects-only": "DEVBOX_RUN_PROJECT_TESTS=1 go test -v -parallel 8 -timeout ${DEVBOX_GOLANG_TEST_TIMEOUT:-30m} ./... -run \"TestExamples|TestScriptsWithProjects\"", "update-examples": "devbox run build && go run testscripts/testrunner/updater/main.go", // Updates the Flake's vendorHash: First run `go mod vendor` to vendor // the dependencies, then hash the vendor directory with Nix. diff --git a/testscripts/add/add_insecure.tst.txt b/testscripts/add/add_insecure.tst.txt index 2836fc56fbe..7dfd330dcbe 100644 --- a/testscripts/add/add_insecure.tst.txt +++ b/testscripts/add/add_insecure.tst.txt @@ -1,9 +1,10 @@ # Tests installing an insecure package. -# This test is pretty slow, maybe there's a different package we can -# use for testing. - -# we could also isolate this test and run on its own. +# +# We use nodejs@16 (16.20.2) because it is flagged insecure in nixpkgs (Node 16 +# is EOL) yet has a prebuilt binary on cache.nixos.org, so the test exercises the +# --allow-insecure flow with a fast substitution instead of a slow source build. +# (The previous python@2.7.18.6 was not cached and compiled from source, ~4min.) exec devbox init -exec devbox add python@2.7.18.6 --allow-insecure python-2.7.18.6 +exec devbox add nodejs@16 --allow-insecure nodejs-16.20.2 exec devbox install From b5274fde5e458873fed2936713ad59546ecb5bd9 Mon Sep 17 00:00:00 2001 From: Mike Landau Date: Sun, 21 Jun 2026 19:50:01 -0700 Subject: [PATCH 2/2] test(ci): shard testscripts across runners instead of oversubscribing one The earlier -parallel 8 bump did not help: the testscripts are bound by per-runner nix work (downloading ~677 package closures and evaluating flakes on each devbox invocation), which is serialized through a single nix-daemon and does not parallelize within one runner. Raising in-process parallelism just made scripts contend, roughly doubling each script's wall time for no net gain. Revert it. Instead, shard the scripts across runners, where each runner has its own daemon, cores, and network: - Add DEVBOX_TEST_SHARD_INDEX / DEVBOX_TEST_SHARD_TOTAL support to the testscript runners. RunTestscripts now runs each script via its own testscript.Run (params.Files) so scripts can be partitioned individually; RunDevboxTestscripts shards example projects in WalkDir order. Assignment is round-robin over a sorted list so heavy scripts spread across shards. - Add a `shard: [0, 1, 2]` matrix axis to the cli-tests `test` job and a `test-result` job that aggregates the shards into one stable status check (the per-shard job names are unstable for branch protection). Keeps the add_insecure -> nodejs@16 package swap from the previous commit. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/cli-tests.yaml | 36 ++++++++-- devbox.json | 2 +- testscripts/testrunner/examplesrunner.go | 13 ++++ testscripts/testrunner/testrunner.go | 88 ++++++++++++++++++------ 4 files changed, 112 insertions(+), 27 deletions(-) diff --git a/.github/workflows/cli-tests.yaml b/.github/workflows/cli-tests.yaml index 7490d14f482..568953c6052 100644 --- a/.github/workflows/cli-tests.yaml +++ b/.github/workflows/cli-tests.yaml @@ -133,6 +133,12 @@ jobs: # the devbox.json tests. We can require the other tests to complete before # merging, while keeping the others as an additional non-required signal run-project-tests: ["project-tests-only", "project-tests-off"] + # The testscripts are bound by per-runner nix work (downloading package + # closures and evaluating flakes), which barely parallelizes within a + # single runner. We instead shard the scripts across multiple runners. + # The runner count here must match DEVBOX_TEST_SHARD_TOTAL below. The + # `test-result` job aggregates all shards into one required status check. + shard: [0, 1, 2] # NOTE: this job used to run a `nix-version` matrix, but the # devbox-install-action we now use to install Nix doesn't let us pin a # specific Nix version. The matrix legs were therefore running identical @@ -153,6 +159,10 @@ jobs: # But we allow overriding via inputs.example-debug DEVBOX_DEBUG: ${{ (matrix.run-project-tests == 'project-tests-off' || inputs.example-debug) && '1' || '0' }} DEVBOX_GOLANG_TEST_TIMEOUT: "${{ (github.ref == 'refs/heads/main' || inputs.run-mac-tests) && '1h' || '30m' }}" + # Shard the testscripts across runners. DEVBOX_TEST_SHARD_TOTAL must match + # the length of the `shard` matrix axis above. + DEVBOX_TEST_SHARD_INDEX: ${{ matrix.shard }} + DEVBOX_TEST_SHARD_TOTAL: 3 steps: - name: clear directories to reduce disk usage # https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930 @@ -227,15 +237,31 @@ jobs: echo "::group::Resolved Nix config" nix show-config --extra-experimental-features nix-command echo "::endgroup::" - # The testscripts are heavily I/O-bound (waiting on nix downloads and - # builds), so we oversubscribe the 4 vCPU runner with -parallel to let - # more scripts overlap their nix waits. See devbox.json for the - # equivalent flag on the project tests. - devbox run go test -v -parallel 8 -timeout $DEVBOX_GOLANG_TEST_TIMEOUT ./... + devbox run go test -v -timeout $DEVBOX_GOLANG_TEST_TIMEOUT ./... - name: Run project (slow) tests if: matrix.run-project-tests == 'project-tests-only' run: devbox run test-projects-only + # Aggregates the sharded `test` matrix into a single status check. Because the + # shard matrix produces per-shard job names (e.g. + # "test (not-main, ubuntu-latest, project-tests-off, 0)"), branch protection + # should require this stable `test-result` check instead of the individual + # matrix jobs. + test-result: + name: test-result + if: always() + needs: test + runs-on: ubuntu-latest + steps: + - name: Check that all test shards passed + run: | + result="${{ needs.test.result }}" + echo "Aggregate result of the test matrix: $result" + if [ "$result" != "success" ]; then + echo "::error::One or more test shards did not succeed ($result)." + exit 1 + fi + auto-nix-install: # ensure Devbox installs nix and works properly after installation. needs: build-devbox strategy: diff --git a/devbox.json b/devbox.json index 1e79aaa2cde..a9325696106 100644 --- a/devbox.json +++ b/devbox.json @@ -41,7 +41,7 @@ "lint": "go tool golangci-lint run --timeout 5m && scripts/gofumpt.sh", "fmt": "scripts/gofumpt.sh", "test": "go test -race -cover ./...", - "test-projects-only": "DEVBOX_RUN_PROJECT_TESTS=1 go test -v -parallel 8 -timeout ${DEVBOX_GOLANG_TEST_TIMEOUT:-30m} ./... -run \"TestExamples|TestScriptsWithProjects\"", + "test-projects-only": "DEVBOX_RUN_PROJECT_TESTS=1 go test -v -timeout ${DEVBOX_GOLANG_TEST_TIMEOUT:-30m} ./... -run \"TestExamples|TestScriptsWithProjects\"", "update-examples": "devbox run build && go run testscripts/testrunner/updater/main.go", // Updates the Flake's vendorHash: First run `go mod vendor` to vendor // the dependencies, then hash the vendor directory with Nix. diff --git a/testscripts/testrunner/examplesrunner.go b/testscripts/testrunner/examplesrunner.go index 87ee6adad67..7270f3a552c 100644 --- a/testscripts/testrunner/examplesrunner.go +++ b/testscripts/testrunner/examplesrunner.go @@ -33,6 +33,11 @@ func RunDevboxTestscripts(t *testing.T, dir string) { t.Error(err) } + shard := shardFromEnv(t) + // projectIdx counts the projects that are actually run (in deterministic + // WalkDir order) so we can assign each to a shard. It is incremented only + // after all skip conditions, keeping the numbering identical on every runner. + projectIdx := 0 err = filepath.WalkDir(dir, func(path string, entry os.DirEntry, err error) error { if err != nil { return err @@ -75,6 +80,14 @@ func RunDevboxTestscripts(t *testing.T, dir string) { return nil } + // Assign this project a stable shard slot, then skip it if it does not + // belong to the current runner. + idx := projectIdx + projectIdx++ + if !shard.includes(idx) { + return nil + } + t.Logf("running testscript for example: %s\n", path) runSingleDevboxTestscript(t, dir, path) return nil diff --git a/testscripts/testrunner/testrunner.go b/testscripts/testrunner/testrunner.go index 869f6e82efa..83854e9ec2f 100644 --- a/testscripts/testrunner/testrunner.go +++ b/testscripts/testrunner/testrunner.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "sort" "strconv" "strings" "testing" @@ -30,41 +31,86 @@ func Main(m *testing.M) { func RunTestscripts(t *testing.T, testscriptsDir string) { globPattern := filepath.Join(testscriptsDir, "**/*.test.txt") - dirs := globDirs(globPattern) - require.NotEmpty(t, dirs, "no test scripts found") - - // Loop through all the directories and run all tests scripts (files ending - // in .test.txt) - for _, dir := range dirs { - // The testrunner dir has the testscript we use for projects in examples/ directory. - // We should skip that one since it is run separately (see RunExamplesTestscripts). - if filepath.Base(dir) == "testrunner" { + scripts := globScripts(globPattern) + require.NotEmpty(t, scripts, "no test scripts found") + + shard := shardFromEnv(t) + + // Run each test script (a file ending in .test.txt) in its own + // testscript.Run call so that we can shard at the granularity of an + // individual script. The scripts still run as parallel subtests. + for i, script := range scripts { + if !shard.includes(i) { continue } - - testscript.Run(t, getTestscriptParams(dir)) + params := getTestscriptParams(filepath.Dir(script)) + // Pass the single script explicitly rather than its directory so that + // sharding partitions scripts, not whole directories. + params.Dir = "" + params.Files = []string{script} + testscript.Run(t, params) } } -// Return directories that contain files matching the pattern. -func globDirs(pattern string) []string { +// globScripts returns the test script files matching pattern, sorted for a +// deterministic order (so sharding is stable across runners). The testrunner +// dir is skipped: it holds the generic testscript used for projects in the +// examples/ directory, which is run separately (see RunDevboxTestscripts). +func globScripts(pattern string) []string { scripts, err := doublestar.FilepathGlob(pattern) if err != nil { return nil } - // List of directories with test scripts. - directories := []string{} - dups := map[string]bool{} + filtered := scripts[:0] for _, script := range scripts { - dir := filepath.Dir(script) - if _, ok := dups[dir]; !ok { - directories = append(directories, dir) - dups[dir] = true + if filepath.Base(filepath.Dir(script)) == "testrunner" { + continue } + filtered = append(filtered, script) + } + sort.Strings(filtered) + return filtered +} + +// shard partitions the test scripts across CI runners. The testscripts are +// bound by per-runner nix work (downloading package closures and evaluating +// flakes), which does not parallelize well within a single runner, so we split +// the scripts across several runners instead. +type shard struct { + index int // 0-based index of this runner + total int // total number of runners +} + +// shardFromEnv reads the shard configuration from the environment. When +// DEVBOX_TEST_SHARD_TOTAL is unset (or 1), all scripts run on a single runner. +// Otherwise each runner sets DEVBOX_TEST_SHARD_INDEX (0-based) and runs only the +// scripts assigned to it. +func shardFromEnv(t *testing.T) shard { + total := 1 + if v := os.Getenv("DEVBOX_TEST_SHARD_TOTAL"); v != "" { + n, err := strconv.Atoi(v) + require.NoError(t, err, "invalid DEVBOX_TEST_SHARD_TOTAL=%q", v) + require.Positive(t, n, "DEVBOX_TEST_SHARD_TOTAL must be positive") + total = n } - return directories + index := 0 + if v := os.Getenv("DEVBOX_TEST_SHARD_INDEX"); v != "" { + n, err := strconv.Atoi(v) + require.NoError(t, err, "invalid DEVBOX_TEST_SHARD_INDEX=%q", v) + index = n + } + require.GreaterOrEqual(t, index, 0, "DEVBOX_TEST_SHARD_INDEX must be >= 0") + require.Less(t, index, total, "DEVBOX_TEST_SHARD_INDEX must be < DEVBOX_TEST_SHARD_TOTAL") + return shard{index: index, total: total} +} + +// includes reports whether the item at position i (in a deterministic ordering) +// belongs to this shard. Round-robin assignment keeps heavy scripts spread +// across shards rather than clustered on one runner. +func (s shard) includes(i int) bool { + return i%s.total == s.index } // copyFileCmd enables copying files within the WORKDIR