diff --git a/.github/workflows/breaking_changes_detector.yml b/.github/workflows/breaking_changes_detector.yml new file mode 100644 index 0000000000000..ebe33edcbe6a5 --- /dev/null +++ b/.github/workflows/breaking_changes_detector.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Detect semver-incompatible (breaking) API changes in crates modified by a PR. +# +# Only public workspace crates that have file changes are checked. +# Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded. +# +# If breaking changes are found, a sticky comment is posted on the PR. +# The comment is removed automatically once the issues are resolved. + +name: "Detect breaking changes" + +on: + pull_request: + branches: + - main + +permissions: + contents: read + +jobs: + check-semver: + name: Check semver + runs-on: ubuntu-latest + outputs: + logs: ${{ steps.check_semver.outputs.logs }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # For fork PRs, `origin` points to the fork, not the upstream repo. + # Explicitly fetch the base branch from the upstream repo so we have + # a valid baseline ref for both diff and semver-checks. + - name: Fetch base branch + env: + BASE_REF: ${{ github.base_ref }} + REPO: ${{ github.repository }} + run: git fetch "https://github.com/${REPO}.git" "${BASE_REF}:refs/remotes/origin/${BASE_REF}" + + - name: Determine changed crates + id: changed_crates + env: + BASE_REF: ${{ github.base_ref }} + run: | + PACKAGES=$(ci/scripts/changed_crates.sh changed-crates "origin/${BASE_REF}") + echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT" + echo "Changed crates: $PACKAGES" + + - name: Install cargo-semver-checks + if: steps.changed_crates.outputs.packages != '' + run: cargo install cargo-semver-checks + + - name: Run cargo-semver-checks + id: check_semver + if: steps.changed_crates.outputs.packages != '' + env: + BASE_REF: ${{ github.base_ref }} + PACKAGES: ${{ steps.changed_crates.outputs.packages }} + run: | + set +e + OUTPUT=$(ci/scripts/changed_crates.sh semver-check "origin/${BASE_REF}" $PACKAGES) + EXIT_CODE=$? + echo "logs<> "$GITHUB_OUTPUT" + echo "$OUTPUT" >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + exit $EXIT_CODE + + # Post or remove a sticky comment on the PR based on the semver check result. + comment-on-pr: + name: Comment on pull request + runs-on: ubuntu-latest + needs: check-semver + if: always() + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + sparse-checkout: ci/scripts + + - name: Update PR comment + env: + GH_TOKEN: ${{ github.token }} + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + CHECK_RESULT: ${{ needs.check-semver.result }} + SEMVER_LOGS: ${{ needs.check-semver.outputs.logs }} + run: | + ci/scripts/changed_crates.sh comment \ + "$REPO" "$PR_NUMBER" "$CHECK_RESULT" "$SEMVER_LOGS" diff --git a/ci/scripts/changed_crates.sh b/ci/scripts/changed_crates.sh new file mode 100755 index 0000000000000..50fb68601dcdd --- /dev/null +++ b/ci/scripts/changed_crates.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Helper script for the breaking-changes-detector workflow. +# +# Subcommands: +# changed-crates +# Print space-separated list of crate names whose files changed vs base_ref. +# +# semver-check +# Run cargo-semver-checks for the given packages against base_ref. +# Prints the (ANSI-stripped) log output to stdout. +# Exit code matches cargo-semver-checks (0 = pass, non-zero = breaking). +# +# comment [logs] +# Upsert or delete a sticky PR comment based on check_result. +# check_result: "success" deletes any existing comment, +# anything else upserts the comment with the provided logs. +# Requires GH_TOKEN to be set. + +set -euo pipefail + +MARKER="" + +# ── changed-crates ────────────────────────────────────────────────── +cmd_changed_crates() { + local base_ref="${1:?Usage: changed_crates.sh changed-crates }" + + # Parse workspace members from root Cargo.toml, excluding internal crates + # that are not published / not part of the public API. + local members + members=$(sed -n '/^members = \[/,/\]/p' Cargo.toml | grep '"' | sed 's/.*"\(.*\)".*/\1/' \ + | grep -v -e '^benchmarks$' -e '^test-utils$' -e '^datafusion/sqllogictest$' -e '^datafusion/doc$') + + local changed_files + changed_files=$(git diff --name-only "${base_ref}...HEAD") + + local packages="" + for member in $members; do + if echo "$changed_files" | grep -q "^${member}/"; then + local pkg + pkg=$(grep '^name\s*=' "$member/Cargo.toml" | head -1 | sed 's/.*=\s*"\(.*\)"/\1/') + if [ -n "$pkg" ]; then + packages="$packages $pkg" + fi + fi + done + + echo "$packages" | xargs +} + +# ── semver-check ──────────────────────────────────────────────────── +cmd_semver_check() { + local base_ref="${1:?Usage: changed_crates.sh semver-check }" + shift + + local args="" + for pkg in "$@"; do + args="$args --package $pkg" + done + + set +e + # Compare the PR's code against the base branch to detect breaking changes. + # Use tee to show output in the Actions log while also capturing it. + cargo semver-checks --baseline-rev "$base_ref" $args 2>&1 | tee /tmp/semver-output.txt + local exit_code=${PIPESTATUS[0]} + set -e + + # Strip ANSI escape codes from the captured output for the PR comment. + sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt + return "$exit_code" +} + +# ── comment ───────────────────────────────────────────────────────── +cmd_comment() { + local repo="${1:?Usage: changed_crates.sh comment [logs]}" + local pr_number="${2:?}" + local check_result="${3:?}" + local logs="${4:-}" + + # Find existing comment with our marker + local comment_id + comment_id=$(gh api "repos/${repo}/issues/${pr_number}/comments" \ + --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" | head -1) + + if [ "$check_result" = "success" ]; then + # Delete the comment if one exists + if [ -n "$comment_id" ]; then + gh api "repos/${repo}/issues/comments/${comment_id}" --method DELETE + fi + else + local body="${MARKER} +Thank you for opening this pull request! + +Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes made since the last release. + +Details: + +\`\`\` +${logs} +\`\`\`" + + if [ -n "$comment_id" ]; then + gh api "repos/${repo}/issues/comments/${comment_id}" \ + --method PATCH --field body="$body" + else + gh api "repos/${repo}/issues/${pr_number}/comments" \ + --method POST --field body="$body" + fi + fi +} + +# ── main ──────────────────────────────────────────────────────────── +cmd="${1:?Usage: changed_crates.sh [args...]}" +shift + +case "$cmd" in + changed-crates) cmd_changed_crates "$@" ;; + semver-check) cmd_semver_check "$@" ;; + comment) cmd_comment "$@" ;; + *) echo "Unknown command: $cmd" >&2; exit 1 ;; +esac