From b6df6903a2759c917fe450bd87d6fcd320bba5dd Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 15:11:21 +0800 Subject: [PATCH 1/8] feat: update breaking changes detection workflows - Modified `.github/workflows/breaking_changes_detector.yml`: - Replaced current semver job with an advisory PR-local check. - Introduced a new blocking latest-release check job. - Ensured both jobs retain a green status on semver breaks. - Updated latest stable tag to exclude -* tags. - Updated `.github/workflows/breaking_changes_detector_comment.yml`: - Added downloads for both artifacts. - Adjusted comment to show: - Blocking latest-release signal. - Advisory PR-local signal. - Updated label to track blocking latest-release failure only. - Added issues: documentation for label operations. --- .../workflows/breaking_changes_detector.yml | 132 +++++++++++++++--- .../breaking_changes_detector_comment.yml | 124 ++++++++++++---- 2 files changed, 212 insertions(+), 44 deletions(-) diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 67ab985228b47..29c48c644094b 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -50,8 +50,8 @@ permissions: contents: read jobs: - check-semver: - name: Check semver + check-semver-advisory: + name: Check semver - advisory PR-local runs-on: ubuntu-latest steps: - name: Checkout @@ -93,7 +93,7 @@ jobs: with: tool: cargo-semver-checks - - name: Run cargo-semver-checks + - name: Run cargo-semver-checks against PR base branch id: check_semver if: steps.changed_crates.outputs.packages != '' env: @@ -103,13 +103,13 @@ jobs: set +e # `tee` lets cargo's output stream live into the Actions log # while we also keep a copy for the PR comment. - # Using `apache` remote here to point to the repository the pull request is against + # Using `apache` remote here to point to the repository the pull request is against. ci/scripts/changed_crates.sh semver-check "apache/${BASE_REF}" $PACKAGES \ - 2>&1 | tee /tmp/semver-output.txt + 2>&1 | tee /tmp/advisory-semver-output.txt EXIT_CODE=${PIPESTATUS[0]} # Pass the result through an output instead of failing the job: - # a detected breaking change should surface as a PR comment, not a - # red check, so PR authors aren't confused by an intentional break. + # a detected PR-local breaking change should surface as an advisory + # PR comment, not a red check. if [ "$EXIT_CODE" -eq 0 ]; then echo "result=success" >> "$GITHUB_OUTPUT" else @@ -120,25 +120,123 @@ jobs: # directory. We default the result to "success" so the comment # workflow clears any stale comment when the check step is skipped # (e.g. no published crates changed). - - name: Stage artifact for comment workflow + - name: Stage advisory artifact for comment workflow + if: always() + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + BASE_REF: ${{ github.base_ref }} + CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} + run: | + mkdir -p semver-advisory-artifact + echo "$PR_NUMBER" > semver-advisory-artifact/pr_number + echo "$CHECK_RESULT" > semver-advisory-artifact/result + echo "apache/${BASE_REF}" > semver-advisory-artifact/baseline_ref + if [ -f /tmp/advisory-semver-output.txt ]; then + sed 's/\x1b\[[0-9;]*m//g' /tmp/advisory-semver-output.txt > semver-advisory-artifact/logs + else + : > semver-advisory-artifact/logs + fi + + - name: Upload advisory artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-advisory-check-result + path: semver-advisory-artifact/ + retention-days: 1 + + check-semver-blocking: + name: Check semver - blocking latest release + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Fetch base branch + env: + BASE_REF: ${{ github.base_ref }} + REPO: ${{ github.repository }} + run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" + + - name: Determine latest stable release tag + id: latest_release + run: | + git fetch --tags --force + LATEST_RELEASE_TAG=$(git tag --list '[0-9]*.[0-9]*.[0-9]*' \ + | grep -Ev '-' \ + | sort -V \ + | tail -n1) + if [ -z "$LATEST_RELEASE_TAG" ]; then + echo "Could not determine latest stable release tag" >&2 + exit 1 + fi + echo "tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" + echo "Latest stable release tag: $LATEST_RELEASE_TAG" + + - name: Determine changed crates + id: changed_crates + env: + BASE_REF: ${{ github.base_ref }} + run: | + PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") + echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" + echo "Changed crates: $PACKAGES" + + - name: Install Protobuf Compiler + if: steps.changed_crates.outputs.packages != '' + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + + - name: Install cargo-semver-checks + if: steps.changed_crates.outputs.packages != '' + uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 + with: + tool: cargo-semver-checks + + - name: Run cargo-semver-checks against latest stable release + id: check_semver + if: steps.changed_crates.outputs.packages != '' + env: + LATEST_RELEASE_TAG: ${{ steps.latest_release.outputs.tag }} + PACKAGES: ${{ steps.changed_crates.outputs.packages }} + run: | + set +e + ci/scripts/changed_crates.sh semver-check "$LATEST_RELEASE_TAG" $PACKAGES \ + 2>&1 | tee /tmp/blocking-semver-output.txt + EXIT_CODE=${PIPESTATUS[0]} + # Pass the result through an output instead of failing the job: + # release-baseline breakage should emit a blocking warning comment + # and label, while this CI job status stays green. + if [ "$EXIT_CODE" -eq 0 ]; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi + + - name: Stage blocking artifact for comment workflow if: always() env: PR_NUMBER: ${{ github.event.pull_request.number }} CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} + LATEST_RELEASE_TAG: ${{ steps.latest_release.outputs.tag }} run: | - mkdir -p semver-artifact - echo "$PR_NUMBER" > semver-artifact/pr_number - echo "$CHECK_RESULT" > semver-artifact/result - if [ -f /tmp/semver-output.txt ]; then - sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs + mkdir -p semver-blocking-artifact + echo "$PR_NUMBER" > semver-blocking-artifact/pr_number + echo "$CHECK_RESULT" > semver-blocking-artifact/result + echo "$LATEST_RELEASE_TAG" > semver-blocking-artifact/baseline_ref + if [ -f /tmp/blocking-semver-output.txt ]; then + sed 's/\x1b\[[0-9;]*m//g' /tmp/blocking-semver-output.txt > semver-blocking-artifact/logs else - : > semver-artifact/logs + : > semver-blocking-artifact/logs fi - - name: Upload artifact + - name: Upload blocking artifact if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: - name: semver-check-result - path: semver-artifact/ + name: semver-blocking-check-result + path: semver-blocking-artifact/ retention-days: 1 diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index 579c61cb9d5c7..e2f728532b450 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -53,7 +53,8 @@ permissions: # A dedicated label, separate from the existing `api change` label. # `api change` may be applied manually for behavioral changes that aren't # strictly API changes, so we can't safely auto-remove it when this check -# passes. This auto-managed label is fully owned by the workflow. +# passes. This auto-managed label is fully owned by the workflow and tracks +# only the blocking latest-release semver signal. env: BREAKING_CHANGE_LABEL: "auto detected api change" @@ -62,53 +63,99 @@ jobs: name: Comment on pull request if: github.event.workflow_run.event == 'pull_request' runs-on: ubuntu-latest - # Scoped to the minimum needed to upsert/delete the sticky comment. + # Scoped to the minimum needed to upsert/delete the sticky comment and label. permissions: actions: read + issues: write pull-requests: write steps: - - name: Download semver-check artifact + - name: Download advisory semver-check artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: - name: semver-check-result + name: semver-advisory-check-result run-id: ${{ github.event.workflow_run.id }} github-token: ${{ github.token }} - path: ./semver-artifact + path: ./semver-advisory-artifact - - name: Read and validate artifact + - name: Download blocking semver-check artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: semver-blocking-check-result + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ github.token }} + path: ./semver-blocking-artifact + + - name: Read and validate artifacts id: read run: | set -euo pipefail - # Validate every field: the artifact comes from a workflow run - # that compiled fork-controlled code, so its contents are untrusted. - PR_NUMBER=$(cat ./semver-artifact/pr_number) - if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Invalid PR number: $PR_NUMBER" >&2 + # Validate every field: the artifacts come from workflow runs + # that compiled fork-controlled code, so their contents are untrusted. + ADVISORY_PR_NUMBER=$(cat ./semver-advisory-artifact/pr_number) + BLOCKING_PR_NUMBER=$(cat ./semver-blocking-artifact/pr_number) + if ! [[ "$ADVISORY_PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "Invalid advisory PR number: $ADVISORY_PR_NUMBER" >&2 + exit 1 + fi + if ! [[ "$BLOCKING_PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "Invalid blocking PR number: $BLOCKING_PR_NUMBER" >&2 + exit 1 + fi + if [ "$ADVISORY_PR_NUMBER" != "$BLOCKING_PR_NUMBER" ]; then + echo "Mismatched PR numbers: $ADVISORY_PR_NUMBER != $BLOCKING_PR_NUMBER" >&2 + exit 1 + fi + + ADVISORY_RESULT=$(cat ./semver-advisory-artifact/result) + BLOCKING_RESULT=$(cat ./semver-blocking-artifact/result) + if [[ "$ADVISORY_RESULT" != "success" && "$ADVISORY_RESULT" != "failure" ]]; then + echo "Invalid advisory check result: $ADVISORY_RESULT" >&2 + exit 1 + fi + if [[ "$BLOCKING_RESULT" != "success" && "$BLOCKING_RESULT" != "failure" ]]; then + echo "Invalid blocking check result: $BLOCKING_RESULT" >&2 + exit 1 + fi + + ADVISORY_BASELINE=$(cat ./semver-advisory-artifact/baseline_ref) + BLOCKING_BASELINE=$(cat ./semver-blocking-artifact/baseline_ref) + if ! [[ "$ADVISORY_BASELINE" =~ ^[A-Za-z0-9._/-]+$ ]]; then + echo "Invalid advisory baseline: $ADVISORY_BASELINE" >&2 exit 1 fi - CHECK_RESULT=$(cat ./semver-artifact/result) - if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then - echo "Invalid check result: $CHECK_RESULT" >&2 + if ! [[ "$BLOCKING_BASELINE" =~ ^[A-Za-z0-9._/-]+$ ]]; then + echo "Invalid blocking baseline: $BLOCKING_BASELINE" >&2 exit 1 fi - echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT" + + echo "pr_number=$ADVISORY_PR_NUMBER" >> "$GITHUB_OUTPUT" + echo "advisory_result=$ADVISORY_RESULT" >> "$GITHUB_OUTPUT" + echo "blocking_result=$BLOCKING_RESULT" >> "$GITHUB_OUTPUT" + echo "advisory_baseline=$ADVISORY_BASELINE" >> "$GITHUB_OUTPUT" + echo "blocking_baseline=$BLOCKING_BASELINE" >> "$GITHUB_OUTPUT" # Multi-line output: random delimiter so a malicious log line can't # close the heredoc and inject extra output keys. See: # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings - DELIM="EOF_$(openssl rand -hex 16)" + ADVISORY_DELIM="EOF_$(openssl rand -hex 16)" { - echo "logs<<${DELIM}" - cat ./semver-artifact/logs - echo "${DELIM}" + echo "advisory_logs<<${ADVISORY_DELIM}" + cat ./semver-advisory-artifact/logs + echo "${ADVISORY_DELIM}" + } >> "$GITHUB_OUTPUT" + + BLOCKING_DELIM="EOF_$(openssl rand -hex 16)" + { + echo "blocking_logs<<${BLOCKING_DELIM}" + cat ./semver-blocking-artifact/logs + echo "${BLOCKING_DELIM}" } >> "$GITHUB_OUTPUT" # The marker `` is what makes the comment # "sticky": maintain-one-comment uses it to find and replace (or # delete) the existing comment instead of stacking new ones. - name: Upsert sticky comment - if: steps.read.outputs.result != 'success' + if: steps.read.outputs.advisory_result != 'success' || steps.read.outputs.blocking_result != 'success' uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -118,19 +165,42 @@ jobs: Thank you for opening this pull request! - Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch). + [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported semver compatibility warnings for changed published crates. + + ## Blocking latest-release signal + + Result: `${{ steps.read.outputs.blocking_result }}` + Baseline: `${{ steps.read.outputs.blocking_baseline }}` + + If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with the latest stable release. This is the blocking semver warning signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label. + +
+ Blocking latest-release details + + ``` + ${{ steps.read.outputs.blocking_logs }} + ``` + +
+ + ## Advisory PR-local signal + + Result: `${{ steps.read.outputs.advisory_result }}` + Baseline: `${{ steps.read.outputs.advisory_baseline }}` + + If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with its base branch. This is advisory review information only.
- Details + Advisory PR-local details ``` - ${{ steps.read.outputs.logs }} + ${{ steps.read.outputs.advisory_logs }} ```
- name: Delete sticky comment - if: steps.read.outputs.result == 'success' + if: steps.read.outputs.advisory_result == 'success' && steps.read.outputs.blocking_result == 'success' uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -139,7 +209,7 @@ jobs: delete: true - name: Add "auto detected api change" label - if: steps.read.outputs.result != 'success' + if: steps.read.outputs.blocking_result != 'success' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} @@ -149,7 +219,7 @@ jobs: --add-label "$BREAKING_CHANGE_LABEL" - name: Remove "auto detected api change" label - if: steps.read.outputs.result == 'success' + if: steps.read.outputs.blocking_result == 'success' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO: ${{ github.repository }} From 68a6a15517241e31b429eddecd0ae0bdc6946328 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 15:23:44 +0800 Subject: [PATCH 2/8] feat(ci): enhance scripts and workflows for crate versioning - Added `latest-release-tag` functionality to `ci/scripts/changed_crates.sh` - Implemented filtering for only stable semantic version tags and error handling for the absence of stable tags - Updated documentation for `changed_crates.sh` - Enhanced `ci/scripts/test_changed_crates.sh` with new shell tests for: - Stable tags preference over newer release candidates (RC) - Semantic version sorting - Ignoring malformed or namespaced tags - Failing on missing tags - Failing on presence of only RC tags - Updated `.github/workflows/breaking_changes_detector.yml`: - Blocking job now invokes `ci/scripts/changed_crates.sh` for `latest-release-tag` - Artifacts now include `latest_release_tag` - Consolidated duplicated install steps into a local action - Modified `.github/actions/setup-semver-check/action.yml` for shared setup of protobuf and cargo-semver checks - Improved `.github/workflows/breaking_changes_detector_comment.yml` to validate and print the `latest_release_tag` --- .github/actions/setup-semver-check/action.yml | 35 ++++++ .../workflows/breaking_changes_detector.yml | 37 +------ .../breaking_changes_detector_comment.yml | 14 ++- ci/scripts/changed_crates.sh | 38 +++++-- ci/scripts/test_changed_crates.sh | 103 ++++++++++++++++++ 5 files changed, 186 insertions(+), 41 deletions(-) create mode 100644 .github/actions/setup-semver-check/action.yml create mode 100755 ci/scripts/test_changed_crates.sh diff --git a/.github/actions/setup-semver-check/action.yml b/.github/actions/setup-semver-check/action.yml new file mode 100644 index 0000000000000..da5b9be856ea9 --- /dev/null +++ b/.github/actions/setup-semver-check/action.yml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Setup semver check +description: Install tools required by cargo-semver-checks for changed crates. +runs: + using: composite + steps: + # `datafusion-substrait` (and crates that depend on it via sqllogictest) + # have a build script that calls protoc, which is not preinstalled on + # ubuntu-latest runners. + - name: Install Protobuf Compiler + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + + - name: Install cargo-semver-checks + uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 + with: + tool: cargo-semver-checks diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index 29c48c644094b..eab26f12065cd 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -78,20 +78,9 @@ jobs: echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" echo "Changed crates: $PACKAGES" - # `datafusion-substrait` (and crates that depend on it via sqllogictest) - # have a build script that calls protoc, which is not preinstalled on - # ubuntu-latest runners. - - name: Install Protobuf Compiler + - name: Setup semver check if: steps.changed_crates.outputs.packages != '' - run: | - sudo apt-get update - sudo apt-get install -y protobuf-compiler - - - name: Install cargo-semver-checks - if: steps.changed_crates.outputs.packages != '' - uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 - with: - tool: cargo-semver-checks + uses: ./.github/actions/setup-semver-check - name: Run cargo-semver-checks against PR base branch id: check_semver @@ -164,14 +153,7 @@ jobs: id: latest_release run: | git fetch --tags --force - LATEST_RELEASE_TAG=$(git tag --list '[0-9]*.[0-9]*.[0-9]*' \ - | grep -Ev '-' \ - | sort -V \ - | tail -n1) - if [ -z "$LATEST_RELEASE_TAG" ]; then - echo "Could not determine latest stable release tag" >&2 - exit 1 - fi + LATEST_RELEASE_TAG=$(ci/scripts/changed_crates.sh latest-release-tag) echo "tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" echo "Latest stable release tag: $LATEST_RELEASE_TAG" @@ -184,17 +166,9 @@ jobs: echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" echo "Changed crates: $PACKAGES" - - name: Install Protobuf Compiler + - name: Setup semver check if: steps.changed_crates.outputs.packages != '' - run: | - sudo apt-get update - sudo apt-get install -y protobuf-compiler - - - name: Install cargo-semver-checks - if: steps.changed_crates.outputs.packages != '' - uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 - with: - tool: cargo-semver-checks + uses: ./.github/actions/setup-semver-check - name: Run cargo-semver-checks against latest stable release id: check_semver @@ -227,6 +201,7 @@ jobs: echo "$PR_NUMBER" > semver-blocking-artifact/pr_number echo "$CHECK_RESULT" > semver-blocking-artifact/result echo "$LATEST_RELEASE_TAG" > semver-blocking-artifact/baseline_ref + echo "$LATEST_RELEASE_TAG" > semver-blocking-artifact/latest_release_tag if [ -f /tmp/blocking-semver-output.txt ]; then sed 's/\x1b\[[0-9;]*m//g' /tmp/blocking-semver-output.txt > semver-blocking-artifact/logs else diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index e2f728532b450..fad51ba00f736 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -119,6 +119,7 @@ jobs: ADVISORY_BASELINE=$(cat ./semver-advisory-artifact/baseline_ref) BLOCKING_BASELINE=$(cat ./semver-blocking-artifact/baseline_ref) + LATEST_RELEASE_TAG=$(cat ./semver-blocking-artifact/latest_release_tag) if ! [[ "$ADVISORY_BASELINE" =~ ^[A-Za-z0-9._/-]+$ ]]; then echo "Invalid advisory baseline: $ADVISORY_BASELINE" >&2 exit 1 @@ -127,12 +128,21 @@ jobs: echo "Invalid blocking baseline: $BLOCKING_BASELINE" >&2 exit 1 fi + if ! [[ "$LATEST_RELEASE_TAG" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Invalid latest release tag: $LATEST_RELEASE_TAG" >&2 + exit 1 + fi + if [ "$BLOCKING_BASELINE" != "$LATEST_RELEASE_TAG" ]; then + echo "Mismatched blocking baseline and latest release tag: $BLOCKING_BASELINE != $LATEST_RELEASE_TAG" >&2 + exit 1 + fi echo "pr_number=$ADVISORY_PR_NUMBER" >> "$GITHUB_OUTPUT" echo "advisory_result=$ADVISORY_RESULT" >> "$GITHUB_OUTPUT" echo "blocking_result=$BLOCKING_RESULT" >> "$GITHUB_OUTPUT" echo "advisory_baseline=$ADVISORY_BASELINE" >> "$GITHUB_OUTPUT" echo "blocking_baseline=$BLOCKING_BASELINE" >> "$GITHUB_OUTPUT" + echo "latest_release_tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" # Multi-line output: random delimiter so a malicious log line can't # close the heredoc and inject extra output keys. See: @@ -170,9 +180,9 @@ jobs: ## Blocking latest-release signal Result: `${{ steps.read.outputs.blocking_result }}` - Baseline: `${{ steps.read.outputs.blocking_baseline }}` + Latest release tag: `${{ steps.read.outputs.latest_release_tag }}` - If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with the latest stable release. This is the blocking semver warning signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label. + If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with the latest stable release tag. This is the blocking semver warning signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label.
Blocking latest-release details diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh index 6d014a9492632..17f148e3349a3 100755 --- a/ci/scripts/changed_crates.sh +++ b/ci/scripts/changed_crates.sh @@ -24,10 +24,15 @@ # Only published workspace members (those without `publish = false`) are # considered. # -# semver-check -# Run cargo-semver-checks for the given packages against base_ref. -# Output and exit code are passed through unchanged; the caller is -# responsible for capturing/formatting them. +# latest-release-tag +# Print the latest stable release tag. RC and other pre-release tags are +# ignored. Tags must be plain semver values like `53.1.0`. +# +# semver-check +# Run cargo-semver-checks for the given packages against baseline_ref. +# baseline_ref can be a tag or any git ref. Output and exit code are +# passed through unchanged; the caller is responsible for capturing and +# formatting them. set -euo pipefail @@ -59,9 +64,25 @@ cmd_changed_crates() { done <<<"$crates" | xargs } +# ── latest-release-tag ────────────────────────────────────────────── +cmd_latest_release_tag() { + local latest_release_tag + latest_release_tag=$(git tag --list \ + | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$' \ + | sort -V \ + | tail -n1 || true) + + if [ -z "$latest_release_tag" ]; then + echo "No stable release tags found" >&2 + return 1 + fi + + echo "$latest_release_tag" +} + # ── semver-check ──────────────────────────────────────────────────── cmd_semver_check() { - local base_ref="${1:?Usage: changed_crates.sh semver-check }" + local base_ref="${1:?Usage: changed_crates.sh semver-check }" shift local args=() @@ -73,11 +94,12 @@ cmd_semver_check() { } # ── main ──────────────────────────────────────────────────────────── -cmd="${1:?Usage: changed_crates.sh [args...]}" +cmd="${1:?Usage: changed_crates.sh [args...]}" shift case "$cmd" in - changed-crates) cmd_changed_crates "$@" ;; - semver-check) cmd_semver_check "$@" ;; + changed-crates) cmd_changed_crates "$@" ;; + latest-release-tag) cmd_latest_release_tag "$@" ;; + semver-check) cmd_semver_check "$@" ;; *) echo "Unknown command: $cmd" >&2; exit 1 ;; esac diff --git a/ci/scripts/test_changed_crates.sh b/ci/scripts/test_changed_crates.sh new file mode 100755 index 0000000000000..64279135ca04f --- /dev/null +++ b/ci/scripts/test_changed_crates.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +CHANGED_CRATES_SH="$SCRIPT_DIR/changed_crates.sh" +TMP_ROOT=$(mktemp -d) +trap 'rm -rf "$TMP_ROOT"' EXIT + +setup_git_repo() { + local repo_dir=$1 + git -C "$repo_dir" init --quiet + git -C "$repo_dir" config user.email test@example.com + git -C "$repo_dir" config user.name test + git -C "$repo_dir" commit --quiet --allow-empty -m init +} + +new_git_repo() { + local repo_dir + repo_dir=$(mktemp -d "$TMP_ROOT/repo.XXXXXX") + setup_git_repo "$repo_dir" + echo "$repo_dir" +} + +run_latest_release_tag() { + local repo_dir=$1 + (cd "$repo_dir" && "$CHANGED_CRATES_SH" latest-release-tag) +} + +assert_eq() { + local expected=$1 + local actual=$2 + local message=$3 + if [ "$actual" != "$expected" ]; then + echo "FAIL: $message" >&2 + echo "expected: $expected" >&2 + echo "actual: $actual" >&2 + exit 1 + fi +} + +assert_latest_release_tag() { + local test_name=$1 + local expected=$2 + shift 2 + + local repo_dir + repo_dir=$(new_git_repo) + + for tag in "$@"; do + git -C "$repo_dir" tag "$tag" + done + + local actual + actual=$(run_latest_release_tag "$repo_dir") + assert_eq "$expected" "$actual" "$test_name" +} + +assert_latest_release_tag "stable tag wins over newer RC" \ + "53.1.0" \ + "53.0.0" "53.1.0-rc1" "53.1.0" "54.0.0-rc1" + +assert_latest_release_tag "semver sort handles double-digit versions" \ + "10.0.0" \ + "9.9.9" "10.0.0" "10.0.1-rc1" + +assert_latest_release_tag "malformed and namespaced tags are ignored" \ + "2.0.0" \ + "ballista-9.0.0" "python-99.0.0" "2.0" "2.0.0" "3.0.0-alpha1" + +no_tags_repo=$(new_git_repo) +if run_latest_release_tag "$no_tags_repo" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then + echo "FAIL: no tags should fail" >&2 + exit 1 +fi +assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "no tags error" + +only_rc_repo=$(new_git_repo) +git -C "$only_rc_repo" tag "53.1.0-rc1" +git -C "$only_rc_repo" tag "54.0.0-rc1" +if run_latest_release_tag "$only_rc_repo" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then + echo "FAIL: only RC tags should fail" >&2 + exit 1 +fi +assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "only RC tags error" + +echo "changed_crates.sh tests passed" From 49d8e286c3ff956c3fb391ac02cc856196a0eff2 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 15:29:47 +0800 Subject: [PATCH 3/8] feat: centralize setup and streamline workflows for semver checks - Updated `.github/actions/setup-semver-check/action.yml` to centralize: - Fetching PR base branch - Determining changed crates - Installing protobuf - Installing cargo-semver-checks - Added output for packages - Modified `.github/workflows/breaking_changes_detector.yml` to: - Reuse shared action for advisory and blocking jobs - Remove duplicated steps for fetching, changed crates, and installation --- .github/actions/setup-semver-check/action.yml | 36 +++++++++++- .../workflows/breaking_changes_detector.yml | 56 +++++-------------- 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/.github/actions/setup-semver-check/action.yml b/.github/actions/setup-semver-check/action.yml index da5b9be856ea9..c414214807c46 100644 --- a/.github/actions/setup-semver-check/action.yml +++ b/.github/actions/setup-semver-check/action.yml @@ -16,20 +16,54 @@ # under the License. name: Setup semver check -description: Install tools required by cargo-semver-checks for changed crates. +description: Fetch PR base, determine changed crates, and install tools required by cargo-semver-checks. +inputs: + base_ref: + description: Pull request base branch ref. + required: true + repository: + description: Repository to fetch the base branch from. + required: true +outputs: + packages: + description: Space-separated changed publishable crate names. + value: ${{ steps.changed_crates.outputs.packages }} runs: using: composite steps: + # `origin` may point at a fork (when a contributor runs this locally) or + # at a stale ref. Fetch the base branch from the PR's upstream repo into + # a dedicated `apache/` ref so the baseline is unambiguous and the + # same ref name works locally (`git remote add apache ...`) and in CI. + - name: Fetch base branch + shell: bash + env: + BASE_REF: ${{ inputs.base_ref }} + REPO: ${{ inputs.repository }} + run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" + + - name: Determine changed crates + id: changed_crates + shell: bash + env: + BASE_REF: ${{ inputs.base_ref }} + run: | + PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") + echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" + echo "Changed crates: $PACKAGES" + # `datafusion-substrait` (and crates that depend on it via sqllogictest) # have a build script that calls protoc, which is not preinstalled on # ubuntu-latest runners. - name: Install Protobuf Compiler + if: steps.changed_crates.outputs.packages != '' shell: bash run: | sudo apt-get update sudo apt-get install -y protobuf-compiler - name: Install cargo-semver-checks + if: steps.changed_crates.outputs.packages != '' uses: taiki-e/install-action@c070f87102a1c75b3183910f391c1cb887fe13c8 # v2.77.6 with: tool: cargo-semver-checks diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index eab26f12065cd..e153023d7ffbe 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -59,35 +59,19 @@ jobs: with: fetch-depth: 0 - # `origin` may point at a fork (when a contributor runs this locally) or - # at a stale ref. Fetch the base branch from the PR's upstream repo into - # a dedicated `apache/` ref so the baseline is unambiguous and the - # same ref name works locally (`git remote add apache ...`) and in CI. - - name: Fetch base branch - env: - BASE_REF: ${{ github.base_ref }} - REPO: ${{ github.repository }} - run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" - - - name: Determine changed crates - id: changed_crates - env: - BASE_REF: ${{ github.base_ref }} - run: | - PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") - echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" - echo "Changed crates: $PACKAGES" - - name: Setup semver check - if: steps.changed_crates.outputs.packages != '' + id: semver_setup uses: ./.github/actions/setup-semver-check + with: + base_ref: ${{ github.base_ref }} + repository: ${{ github.repository }} - name: Run cargo-semver-checks against PR base branch id: check_semver - if: steps.changed_crates.outputs.packages != '' + if: steps.semver_setup.outputs.packages != '' env: BASE_REF: ${{ github.base_ref }} - PACKAGES: ${{ steps.changed_crates.outputs.packages }} + PACKAGES: ${{ steps.semver_setup.outputs.packages }} run: | set +e # `tee` lets cargo's output stream live into the Actions log @@ -143,11 +127,12 @@ jobs: with: fetch-depth: 0 - - name: Fetch base branch - env: - BASE_REF: ${{ github.base_ref }} - REPO: ${{ github.repository }} - run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/apache/${BASE_REF}" + - name: Setup semver check + id: semver_setup + uses: ./.github/actions/setup-semver-check + with: + base_ref: ${{ github.base_ref }} + repository: ${{ github.repository }} - name: Determine latest stable release tag id: latest_release @@ -157,25 +142,12 @@ jobs: echo "tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" echo "Latest stable release tag: $LATEST_RELEASE_TAG" - - name: Determine changed crates - id: changed_crates - env: - BASE_REF: ${{ github.base_ref }} - run: | - PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "apache/${BASE_REF}") - echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" - echo "Changed crates: $PACKAGES" - - - name: Setup semver check - if: steps.changed_crates.outputs.packages != '' - uses: ./.github/actions/setup-semver-check - - name: Run cargo-semver-checks against latest stable release id: check_semver - if: steps.changed_crates.outputs.packages != '' + if: steps.semver_setup.outputs.packages != '' env: LATEST_RELEASE_TAG: ${{ steps.latest_release.outputs.tag }} - PACKAGES: ${{ steps.changed_crates.outputs.packages }} + PACKAGES: ${{ steps.semver_setup.outputs.packages }} run: | set +e ci/scripts/changed_crates.sh semver-check "$LATEST_RELEASE_TAG" $PACKAGES \ From c1b73887c0f32fbb617bd3da19d147d505651ff0 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 15:43:09 +0800 Subject: [PATCH 4/8] feat: enhance GitHub Actions and workflows - Added `.github/actions/run-semver-check/action.yml` for shared semver run with log teeing and result output. - Added `.github/actions/stage-semver-artifact/action.yml` for shared artifact staging and ANSI stripping. - Updated `.github/workflows/breaking_changes_detector.yml` to use both actions in advisory/blocking jobs and removed duplicate run/stage shell blocks. - Updated `.github/workflows/breaking_changes_detector_comment.yml` by adding local bash helpers: `require_regex`, `require_result`, `write_output`, and `write_multiline_output`, while removing repeated validation/heredoc logic. - Updated `ci/scripts/test_changed_crates.sh` to add `tag_repo`, `assert_latest_release_tag_fails`, and removed duplicate negative-test setup. --- .github/actions/run-semver-check/action.yml | 58 ++++++++++ .../actions/stage-semver-artifact/action.yml | 64 +++++++++++ .../workflows/breaking_changes_detector.yml | 89 +++++---------- .../breaking_changes_detector_comment.yml | 106 +++++++++--------- ci/scripts/test_changed_crates.sh | 48 ++++---- 5 files changed, 231 insertions(+), 134 deletions(-) create mode 100644 .github/actions/run-semver-check/action.yml create mode 100644 .github/actions/stage-semver-artifact/action.yml diff --git a/.github/actions/run-semver-check/action.yml b/.github/actions/run-semver-check/action.yml new file mode 100644 index 0000000000000..4fc1ac729d879 --- /dev/null +++ b/.github/actions/run-semver-check/action.yml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run semver check +description: Run cargo-semver-checks, preserve logs, and expose success/failure without failing the job. +inputs: + baseline_ref: + description: Baseline git ref or tag for cargo-semver-checks. + required: true + packages: + description: Space-separated package names to check. + required: true + log_path: + description: Path where semver output should be copied. + required: true +outputs: + result: + description: "Semver check result: success or failure." + value: ${{ steps.check.outputs.result }} +runs: + using: composite + steps: + - name: Run cargo-semver-checks + id: check + shell: bash + env: + BASELINE_REF: ${{ inputs.baseline_ref }} + PACKAGES: ${{ inputs.packages }} + LOG_PATH: ${{ inputs.log_path }} + run: | + set +e + # `tee` lets cargo's output stream live into the Actions log + # while we also keep a copy for the PR comment. + ci/scripts/changed_crates.sh semver-check "$BASELINE_REF" $PACKAGES \ + 2>&1 | tee "$LOG_PATH" + EXIT_CODE=${PIPESTATUS[0]} + # Pass the result through an output instead of failing the job: + # semver breakage should surface as a comment/label signal while this + # CI job status stays green. + if [ "$EXIT_CODE" -eq 0 ]; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi diff --git a/.github/actions/stage-semver-artifact/action.yml b/.github/actions/stage-semver-artifact/action.yml new file mode 100644 index 0000000000000..3e6e4b1d80542 --- /dev/null +++ b/.github/actions/stage-semver-artifact/action.yml @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Stage semver artifact +description: Stage semver result fields and sanitized logs for the comment workflow. +inputs: + artifact_dir: + description: Directory to write artifact files into. + required: true + pr_number: + description: Pull request number. + required: true + result: + description: "Semver result: success or failure." + required: true + baseline_ref: + description: Baseline git ref or tag used by the semver check. + required: true + log_path: + description: Path to raw semver output log. + required: true + latest_release_tag: + description: Latest release tag, for blocking release-baseline checks. + required: false + default: "" +runs: + using: composite + steps: + - name: Stage artifact files + shell: bash + env: + ARTIFACT_DIR: ${{ inputs.artifact_dir }} + PR_NUMBER: ${{ inputs.pr_number }} + RESULT: ${{ inputs.result }} + BASELINE_REF: ${{ inputs.baseline_ref }} + LOG_PATH: ${{ inputs.log_path }} + LATEST_RELEASE_TAG: ${{ inputs.latest_release_tag }} + run: | + mkdir -p "$ARTIFACT_DIR" + echo "$PR_NUMBER" > "$ARTIFACT_DIR/pr_number" + echo "$RESULT" > "$ARTIFACT_DIR/result" + echo "$BASELINE_REF" > "$ARTIFACT_DIR/baseline_ref" + if [ -n "$LATEST_RELEASE_TAG" ]; then + echo "$LATEST_RELEASE_TAG" > "$ARTIFACT_DIR/latest_release_tag" + fi + if [ -f "$LOG_PATH" ]; then + sed 's/\x1b\[[0-9;]*m//g' "$LOG_PATH" > "$ARTIFACT_DIR/logs" + else + : > "$ARTIFACT_DIR/logs" + fi diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index e153023d7ffbe..f932f603d9499 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -69,25 +69,11 @@ jobs: - name: Run cargo-semver-checks against PR base branch id: check_semver if: steps.semver_setup.outputs.packages != '' - env: - BASE_REF: ${{ github.base_ref }} - PACKAGES: ${{ steps.semver_setup.outputs.packages }} - run: | - set +e - # `tee` lets cargo's output stream live into the Actions log - # while we also keep a copy for the PR comment. - # Using `apache` remote here to point to the repository the pull request is against. - ci/scripts/changed_crates.sh semver-check "apache/${BASE_REF}" $PACKAGES \ - 2>&1 | tee /tmp/advisory-semver-output.txt - EXIT_CODE=${PIPESTATUS[0]} - # Pass the result through an output instead of failing the job: - # a detected PR-local breaking change should surface as an advisory - # PR comment, not a red check. - if [ "$EXIT_CODE" -eq 0 ]; then - echo "result=success" >> "$GITHUB_OUTPUT" - else - echo "result=failure" >> "$GITHUB_OUTPUT" - fi + uses: ./.github/actions/run-semver-check + with: + baseline_ref: apache/${{ github.base_ref }} + packages: ${{ steps.semver_setup.outputs.packages }} + log_path: /tmp/advisory-semver-output.txt # Stage the data the companion comment workflow needs into a single # directory. We default the result to "success" so the comment @@ -95,20 +81,13 @@ jobs: # (e.g. no published crates changed). - name: Stage advisory artifact for comment workflow if: always() - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - BASE_REF: ${{ github.base_ref }} - CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} - run: | - mkdir -p semver-advisory-artifact - echo "$PR_NUMBER" > semver-advisory-artifact/pr_number - echo "$CHECK_RESULT" > semver-advisory-artifact/result - echo "apache/${BASE_REF}" > semver-advisory-artifact/baseline_ref - if [ -f /tmp/advisory-semver-output.txt ]; then - sed 's/\x1b\[[0-9;]*m//g' /tmp/advisory-semver-output.txt > semver-advisory-artifact/logs - else - : > semver-advisory-artifact/logs - fi + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-advisory-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: apache/${{ github.base_ref }} + log_path: /tmp/advisory-semver-output.txt - name: Upload advisory artifact if: always() @@ -145,40 +124,22 @@ jobs: - name: Run cargo-semver-checks against latest stable release id: check_semver if: steps.semver_setup.outputs.packages != '' - env: - LATEST_RELEASE_TAG: ${{ steps.latest_release.outputs.tag }} - PACKAGES: ${{ steps.semver_setup.outputs.packages }} - run: | - set +e - ci/scripts/changed_crates.sh semver-check "$LATEST_RELEASE_TAG" $PACKAGES \ - 2>&1 | tee /tmp/blocking-semver-output.txt - EXIT_CODE=${PIPESTATUS[0]} - # Pass the result through an output instead of failing the job: - # release-baseline breakage should emit a blocking warning comment - # and label, while this CI job status stays green. - if [ "$EXIT_CODE" -eq 0 ]; then - echo "result=success" >> "$GITHUB_OUTPUT" - else - echo "result=failure" >> "$GITHUB_OUTPUT" - fi + uses: ./.github/actions/run-semver-check + with: + baseline_ref: ${{ steps.latest_release.outputs.tag }} + packages: ${{ steps.semver_setup.outputs.packages }} + log_path: /tmp/blocking-semver-output.txt - name: Stage blocking artifact for comment workflow if: always() - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }} - LATEST_RELEASE_TAG: ${{ steps.latest_release.outputs.tag }} - run: | - mkdir -p semver-blocking-artifact - echo "$PR_NUMBER" > semver-blocking-artifact/pr_number - echo "$CHECK_RESULT" > semver-blocking-artifact/result - echo "$LATEST_RELEASE_TAG" > semver-blocking-artifact/baseline_ref - echo "$LATEST_RELEASE_TAG" > semver-blocking-artifact/latest_release_tag - if [ -f /tmp/blocking-semver-output.txt ]; then - sed 's/\x1b\[[0-9;]*m//g' /tmp/blocking-semver-output.txt > semver-blocking-artifact/logs - else - : > semver-blocking-artifact/logs - fi + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-blocking-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: ${{ steps.latest_release.outputs.tag }} + latest_release_tag: ${{ steps.latest_release.outputs.tag }} + log_path: /tmp/blocking-semver-output.txt - name: Upload blocking artifact if: always() diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index fad51ba00f736..8a7f6d1387af0 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -89,18 +89,52 @@ jobs: id: read run: | set -euo pipefail + require_regex() { + local name=$1 + local value=$2 + local regex=$3 + if ! [[ "$value" =~ $regex ]]; then + echo "Invalid $name: $value" >&2 + exit 1 + fi + } + + require_result() { + local name=$1 + local value=$2 + if [[ "$value" != "success" && "$value" != "failure" ]]; then + echo "Invalid $name check result: $value" >&2 + exit 1 + fi + } + + write_output() { + local key=$1 + local value=$2 + echo "$key=$value" >> "$GITHUB_OUTPUT" + } + + write_multiline_output() { + local key=$1 + local file=$2 + # Random delimiter so a malicious log line can't close the heredoc + # and inject extra output keys. See: + # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings + local delim + delim="EOF_$(openssl rand -hex 16)" + { + echo "${key}<<${delim}" + cat "$file" + echo "$delim" + } >> "$GITHUB_OUTPUT" + } + # Validate every field: the artifacts come from workflow runs # that compiled fork-controlled code, so their contents are untrusted. ADVISORY_PR_NUMBER=$(cat ./semver-advisory-artifact/pr_number) BLOCKING_PR_NUMBER=$(cat ./semver-blocking-artifact/pr_number) - if ! [[ "$ADVISORY_PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Invalid advisory PR number: $ADVISORY_PR_NUMBER" >&2 - exit 1 - fi - if ! [[ "$BLOCKING_PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Invalid blocking PR number: $BLOCKING_PR_NUMBER" >&2 - exit 1 - fi + require_regex "advisory PR number" "$ADVISORY_PR_NUMBER" '^[0-9]+$' + require_regex "blocking PR number" "$BLOCKING_PR_NUMBER" '^[0-9]+$' if [ "$ADVISORY_PR_NUMBER" != "$BLOCKING_PR_NUMBER" ]; then echo "Mismatched PR numbers: $ADVISORY_PR_NUMBER != $BLOCKING_PR_NUMBER" >&2 exit 1 @@ -108,58 +142,28 @@ jobs: ADVISORY_RESULT=$(cat ./semver-advisory-artifact/result) BLOCKING_RESULT=$(cat ./semver-blocking-artifact/result) - if [[ "$ADVISORY_RESULT" != "success" && "$ADVISORY_RESULT" != "failure" ]]; then - echo "Invalid advisory check result: $ADVISORY_RESULT" >&2 - exit 1 - fi - if [[ "$BLOCKING_RESULT" != "success" && "$BLOCKING_RESULT" != "failure" ]]; then - echo "Invalid blocking check result: $BLOCKING_RESULT" >&2 - exit 1 - fi + require_result "advisory" "$ADVISORY_RESULT" + require_result "blocking" "$BLOCKING_RESULT" ADVISORY_BASELINE=$(cat ./semver-advisory-artifact/baseline_ref) BLOCKING_BASELINE=$(cat ./semver-blocking-artifact/baseline_ref) LATEST_RELEASE_TAG=$(cat ./semver-blocking-artifact/latest_release_tag) - if ! [[ "$ADVISORY_BASELINE" =~ ^[A-Za-z0-9._/-]+$ ]]; then - echo "Invalid advisory baseline: $ADVISORY_BASELINE" >&2 - exit 1 - fi - if ! [[ "$BLOCKING_BASELINE" =~ ^[A-Za-z0-9._/-]+$ ]]; then - echo "Invalid blocking baseline: $BLOCKING_BASELINE" >&2 - exit 1 - fi - if ! [[ "$LATEST_RELEASE_TAG" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - echo "Invalid latest release tag: $LATEST_RELEASE_TAG" >&2 - exit 1 - fi + require_regex "advisory baseline" "$ADVISORY_BASELINE" '^[A-Za-z0-9._/-]+$' + require_regex "blocking baseline" "$BLOCKING_BASELINE" '^[A-Za-z0-9._/-]+$' + require_regex "latest release tag" "$LATEST_RELEASE_TAG" '^[0-9]+\.[0-9]+\.[0-9]+$' if [ "$BLOCKING_BASELINE" != "$LATEST_RELEASE_TAG" ]; then echo "Mismatched blocking baseline and latest release tag: $BLOCKING_BASELINE != $LATEST_RELEASE_TAG" >&2 exit 1 fi - echo "pr_number=$ADVISORY_PR_NUMBER" >> "$GITHUB_OUTPUT" - echo "advisory_result=$ADVISORY_RESULT" >> "$GITHUB_OUTPUT" - echo "blocking_result=$BLOCKING_RESULT" >> "$GITHUB_OUTPUT" - echo "advisory_baseline=$ADVISORY_BASELINE" >> "$GITHUB_OUTPUT" - echo "blocking_baseline=$BLOCKING_BASELINE" >> "$GITHUB_OUTPUT" - echo "latest_release_tag=$LATEST_RELEASE_TAG" >> "$GITHUB_OUTPUT" - - # Multi-line output: random delimiter so a malicious log line can't - # close the heredoc and inject extra output keys. See: - # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings - ADVISORY_DELIM="EOF_$(openssl rand -hex 16)" - { - echo "advisory_logs<<${ADVISORY_DELIM}" - cat ./semver-advisory-artifact/logs - echo "${ADVISORY_DELIM}" - } >> "$GITHUB_OUTPUT" - - BLOCKING_DELIM="EOF_$(openssl rand -hex 16)" - { - echo "blocking_logs<<${BLOCKING_DELIM}" - cat ./semver-blocking-artifact/logs - echo "${BLOCKING_DELIM}" - } >> "$GITHUB_OUTPUT" + write_output "pr_number" "$ADVISORY_PR_NUMBER" + write_output "advisory_result" "$ADVISORY_RESULT" + write_output "blocking_result" "$BLOCKING_RESULT" + write_output "advisory_baseline" "$ADVISORY_BASELINE" + write_output "blocking_baseline" "$BLOCKING_BASELINE" + write_output "latest_release_tag" "$LATEST_RELEASE_TAG" + write_multiline_output "advisory_logs" ./semver-advisory-artifact/logs + write_multiline_output "blocking_logs" ./semver-blocking-artifact/logs # The marker `` is what makes the comment # "sticky": maintain-one-comment uses it to find and replace (or diff --git a/ci/scripts/test_changed_crates.sh b/ci/scripts/test_changed_crates.sh index 64279135ca04f..6d3d5886f5221 100755 --- a/ci/scripts/test_changed_crates.sh +++ b/ci/scripts/test_changed_crates.sh @@ -55,6 +55,15 @@ assert_eq() { fi } +tag_repo() { + local repo_dir=$1 + shift + + for tag in "$@"; do + git -C "$repo_dir" tag "$tag" + done +} + assert_latest_release_tag() { local test_name=$1 local expected=$2 @@ -62,16 +71,28 @@ assert_latest_release_tag() { local repo_dir repo_dir=$(new_git_repo) - - for tag in "$@"; do - git -C "$repo_dir" tag "$tag" - done + tag_repo "$repo_dir" "$@" local actual actual=$(run_latest_release_tag "$repo_dir") assert_eq "$expected" "$actual" "$test_name" } +assert_latest_release_tag_fails() { + local test_name=$1 + shift + + local repo_dir + repo_dir=$(new_git_repo) + tag_repo "$repo_dir" "$@" + + if run_latest_release_tag "$repo_dir" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then + echo "FAIL: $test_name" >&2 + exit 1 + fi + assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "$test_name" +} + assert_latest_release_tag "stable tag wins over newer RC" \ "53.1.0" \ "53.0.0" "53.1.0-rc1" "53.1.0" "54.0.0-rc1" @@ -84,20 +105,9 @@ assert_latest_release_tag "malformed and namespaced tags are ignored" \ "2.0.0" \ "ballista-9.0.0" "python-99.0.0" "2.0" "2.0.0" "3.0.0-alpha1" -no_tags_repo=$(new_git_repo) -if run_latest_release_tag "$no_tags_repo" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then - echo "FAIL: no tags should fail" >&2 - exit 1 -fi -assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "no tags error" - -only_rc_repo=$(new_git_repo) -git -C "$only_rc_repo" tag "53.1.0-rc1" -git -C "$only_rc_repo" tag "54.0.0-rc1" -if run_latest_release_tag "$only_rc_repo" >"$TMP_ROOT/out" 2>"$TMP_ROOT/err"; then - echo "FAIL: only RC tags should fail" >&2 - exit 1 -fi -assert_eq "No stable release tags found" "$(cat "$TMP_ROOT/err")" "only RC tags error" +assert_latest_release_tag_fails "no tags error" + +assert_latest_release_tag_fails "only RC tags error" \ + "53.1.0-rc1" "54.0.0-rc1" echo "changed_crates.sh tests passed" From 0b61961639f78efef93baffcc6650a5d3884d7ae Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 16:15:44 +0800 Subject: [PATCH 5/8] feat: add ci-script-tests job to dev workflow - Updated .github/workflows/dev.yml to include a new job that runs ci/scripts/test_changed_crates.sh. --- .github/workflows/dev.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 43a441f1ab576..19aee2674c916 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -78,6 +78,14 @@ jobs: - run: pip install pyyaml - run: python3 ci/scripts/check_asf_yaml_status_checks.py + ci-script-tests: + name: Test CI scripts + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Test changed_crates.sh + run: ci/scripts/test_changed_crates.sh + typos: name: Spell Check with Typos runs-on: ubuntu-latest From c8c9878b78498a65d2068ecd34e080fc4d5ceb45 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 16:23:40 +0800 Subject: [PATCH 6/8] feat: update breaking changes detector workflow to support legacy-compatible semver-check-result uploads and retain new artifacts --- .../workflows/breaking_changes_detector.yml | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml index f932f603d9499..41340391d8dfa 100644 --- a/.github/workflows/breaking_changes_detector.yml +++ b/.github/workflows/breaking_changes_detector.yml @@ -148,3 +148,26 @@ jobs: name: semver-blocking-check-result path: semver-blocking-artifact/ retention-days: 1 + + # workflow_run workflows execute from the default branch. While this PR is + # under review, the default-branch comment workflow still expects the old + # single-artifact contract. Upload a legacy-compatible copy sourced from + # the blocking latest-release signal so in-flight PR checks keep working. + - name: Stage legacy artifact for default-branch comment workflow + if: always() + uses: ./.github/actions/stage-semver-artifact + with: + artifact_dir: semver-artifact + pr_number: ${{ github.event.pull_request.number }} + result: ${{ steps.check_semver.outputs.result || 'success' }} + baseline_ref: ${{ steps.latest_release.outputs.tag }} + latest_release_tag: ${{ steps.latest_release.outputs.tag }} + log_path: /tmp/blocking-semver-output.txt + + - name: Upload legacy artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: semver-check-result + path: semver-artifact/ + retention-days: 1 From ef8409c04be922f4437159225210ad58b42b88b9 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 17:14:40 +0800 Subject: [PATCH 7/8] Revert "feat: add ci-script-tests job to dev workflow" This reverts commit 0b61961639f78efef93baffcc6650a5d3884d7ae. --- .github/workflows/dev.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 19aee2674c916..43a441f1ab576 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -78,14 +78,6 @@ jobs: - run: pip install pyyaml - run: python3 ci/scripts/check_asf_yaml_status_checks.py - ci-script-tests: - name: Test CI scripts - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Test changed_crates.sh - run: ci/scripts/test_changed_crates.sh - typos: name: Spell Check with Typos runs-on: ubuntu-latest From 6c1d2152e990d3c2de673d1ac38ef2378122a1cd Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Fri, 15 May 2026 18:02:20 +0800 Subject: [PATCH 8/8] chore: update wording in breaking_changes_detector_comment.yml - Renamed sections for clarity: - Changed "Latest release compatibility" to "blocking" - Changed "Base branch compatibility" to "advisory" - Added an interpretation guide: - "base warns + release passes" indicates advisory unreleased API churn - "release warns + base passes" indicates blocking release-user risk --- .../breaking_changes_detector_comment.yml | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/breaking_changes_detector_comment.yml b/.github/workflows/breaking_changes_detector_comment.yml index 8a7f6d1387af0..d2178036ead68 100644 --- a/.github/workflows/breaking_changes_detector_comment.yml +++ b/.github/workflows/breaking_changes_detector_comment.yml @@ -181,15 +181,25 @@ jobs: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported semver compatibility warnings for changed published crates. - ## Blocking latest-release signal + DataFusion runs two SemVer checks: + + - **Latest release compatibility** compares this PR with the latest stable release tag. This is the blocking user-facing compatibility signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label when it fails. + - **Base branch compatibility** compares this PR with its base branch. This is advisory reviewer information for API churn on unreleased `main`. + + Interpretation guide: + + - **Base branch warning, latest release passes**: advisory only. The PR changes API that exists on unreleased `main`, but not in the latest release. Reviewers should confirm the unreleased API change is intentional. + - **Latest release warning, base branch passes**: blocking. The base branch may already contain an unreleased breaking API change, but users upgrading from the latest release can still be affected. + + ## Latest release compatibility — blocking Result: `${{ steps.read.outputs.blocking_result }}` Latest release tag: `${{ steps.read.outputs.latest_release_tag }}` - If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with the latest stable release tag. This is the blocking semver warning signal and applies the `${{ env.BREAKING_CHANGE_LABEL }}` label. + A `failure` here means changed published crates are not SemVer-compatible with the latest stable release tag.
- Blocking latest-release details + Latest release compatibility details ``` ${{ steps.read.outputs.blocking_logs }} @@ -197,15 +207,15 @@ jobs:
- ## Advisory PR-local signal + ## Base branch compatibility — advisory Result: `${{ steps.read.outputs.advisory_result }}` Baseline: `${{ steps.read.outputs.advisory_baseline }}` - If this result is `failure`, the pull request contains API changes that are not SemVer-compatible with its base branch. This is advisory review information only. + A `failure` here means changed published crates are not SemVer-compatible with the PR base branch. This is advisory review information only.
- Advisory PR-local details + Base branch compatibility details ``` ${{ steps.read.outputs.advisory_logs }}