From 515af66b314bdf1a2b0dd78e06a6c455c7ee0aab Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:01:46 +0800 Subject: [PATCH 1/6] ci: remove production changelog gate --- .github/workflows/README.md | 14 +- .github/workflows/run-sweep.yml | 130 +++++++-------- KLAUD_DEBUG.md | 7 +- .../test_run_sweep_gating.py | 153 +++++------------- .../test_validate_perf_changelog.py | 47 ++---- utils/merge_with_reuse.sh | 51 +----- utils/validate_perf_changelog.py | 2 +- 7 files changed, 128 insertions(+), 276 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ee5330602..f2e513843 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -204,20 +204,14 @@ must still contain complete artifacts for the merge run's expected matrix. The comment is the reuse authorization, so adding it does not trigger or cancel a PR sweep. Once the comment is present, later commits pushed to a PR with a -full-sweep label do not start another benchmark sweep. GitHub still runs the -CPU-only `check-changelog` job on the new commit before inspecting the reuse -authorization. That job validates the complete YAML/schema, append-only entry -ordering, duplicate YAML keys, byte-for-byte preservation of historical -content, PR links, the generated sweep config, and sweep-label exclusivity. -Link-only correction PRs stop after this CPU check because they have no -benchmark matrix to generate. Only appended entries can continue to the reuse -gate and sweep setup. Removing and re-adding a sweep label explicitly starts a -new sweep. +full-sweep label do not start another benchmark sweep. The synchronize run +checks the authorization before setup. Removing and re-adding a sweep label +explicitly starts a new sweep. `utils/merge_with_reuse.sh ` is the supported merge path for reuse. It merges `main`, preserves the current main changelog bytes, canonicalizes an appended `XXX` link to the PR URL, pushes a fresh synchronization commit, and -waits for `check-changelog` on that exact SHA before merging. +waits for the PR checks before merging. On the push-to-main run, `run-sweep.yml` resolves the merged PR from the merge commit, verifies the source run is an eligible `pull_request` `run-sweep.yml` diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index e496f372b..71b37fc60 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -34,76 +34,13 @@ on: - "perf-changelog.yaml" jobs: - check-changelog: - runs-on: ubuntu-latest - if: >- - github.event_name != 'pull_request' || - !github.event.pull_request.draft - outputs: - has-additions: ${{ steps.validate.outputs.has-additions }} - metadata-only: ${{ steps.validate.outputs.metadata-only }} - steps: - - name: Reject conflicting sweep labels - if: github.event_name == 'pull_request' - env: - SWEEP_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - run: | - count=$(jq ' - [ - .[] | - select( - . == "sweep-enabled" or - . == "full-sweep-enabled" or - . == "non-canary-full-sweep-enabled" or - . == "full-sweep-fail-fast" or - . == "full-sweep-fail-fast-no-canary" - ) - ] | - length - ' <<<"$SWEEP_LABELS") - if [ "$count" -gt 1 ]; then - echo "::error::PR has multiple conflicting sweep labels. Pick exactly one." - exit 1 - fi - - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - - - name: Validate perf-changelog.yaml - id: validate - run: | - pip install pydantic pyyaml - - PR_ARGS=() - if [ "${{ github.event_name }}" = "pull_request" ]; then - BASE_REF="origin/${{ github.base_ref }}" - HEAD_REF="${{ github.event.pull_request.head.sha }}" - PR_ARGS=(--pr-number "${{ github.event.pull_request.number }}") - else - BASE_REF="${{ github.event.before }}" - HEAD_REF="${{ github.event.after }}" - fi - - python3 utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref "$BASE_REF" \ - --head-ref "$HEAD_REF" \ - --github-output "$GITHUB_OUTPUT" \ - "${PR_ARGS[@]}" - reuse-sweep-gate: - needs: check-changelog runs-on: ubuntu-latest permissions: contents: read issues: read pull-requests: read if: >- - always() && - needs.check-changelog.result == 'success' && - needs.check-changelog.outputs.has-additions == 'true' && github.event_name == 'pull_request' && github.event.action == 'synchronize' && !github.event.pull_request.draft && @@ -133,13 +70,45 @@ jobs: --ref "${{ github.ref }}" \ --workflow-id "run-sweep.yml" + check-newline: + needs: reuse-sweep-gate + runs-on: ubuntu-latest + if: >- + always() && + ( + needs.reuse-sweep-gate.result == 'skipped' || + ( + needs.reuse-sweep-gate.result == 'success' && + needs.reuse-sweep-gate.outputs.skip-pr-sweep != 'true' + ) + ) && + github.event_name == 'pull_request' && + !github.event.pull_request.draft && + ( + (github.event.action != 'labeled' && github.event.action != 'unlabeled') || + github.event.label.name == 'sweep-enabled' || + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' || + github.event.label.name == 'full-sweep-fail-fast' || + github.event.label.name == 'full-sweep-fail-fast-no-canary' + ) + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Check perf-changelog.yaml ends with newline + run: | + if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then + echo "::error::perf-changelog.yaml must end with a newline character" + echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs." + exit 1 + fi + setup: - needs: [check-changelog, reuse-sweep-gate] + needs: reuse-sweep-gate runs-on: ubuntu-latest if: >- always() && - needs.check-changelog.result == 'success' && - needs.check-changelog.outputs.has-additions == 'true' && ( needs.reuse-sweep-gate.result == 'skipped' || ( @@ -181,6 +150,29 @@ jobs: reuse-source-pr-number: ${{ steps.setup.outputs.reuse-source-pr-number }} reuse-source-head-sha: ${{ steps.setup.outputs.reuse-source-head-sha }} steps: + - name: Reject conflicting sweep labels + if: github.event_name == 'pull_request' + env: + SWEEP_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} + run: | + count=$(jq ' + [ + .[] | + select( + . == "sweep-enabled" or + . == "full-sweep-enabled" or + . == "non-canary-full-sweep-enabled" or + . == "full-sweep-fail-fast" or + . == "full-sweep-fail-fast-no-canary" + ) + ] | + length + ' <<<"$SWEEP_LABELS") + if [ "$count" -gt 1 ]; then + echo "::error::PR has multiple conflicting sweep labels. Pick exactly one." + exit 1 + fi + - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -626,9 +618,9 @@ jobs: reuse-ingest-artifacts: needs: setup - # `setup` runs via always() and depends on `check-changelog` plus - # `reuse-sweep-gate`, which is skipped on every push-to-main. Without - # always() here, that skipped gate poisons this job's implicit + # `setup` runs via always() and depends on `reuse-sweep-gate`, which is + # skipped on every push-to-main. Without always() here, that skipped + # gate poisons this job's implicit # success() check and it is skipped even when setup succeeded with # reuse-enabled=true — silently breaking the merge-time ingest. Guard # explicitly on setup success instead (same pattern as diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md index 06290692d..118c279a1 100644 --- a/KLAUD_DEBUG.md +++ b/KLAUD_DEBUG.md @@ -33,10 +33,9 @@ python3 -c "import yaml; yaml.safe_load(open('perf-changelog.yaml'))" Do **not** try a 3-way merge of `perf-changelog.yaml` — whitespace edits will silently re-trigger the deletion check. -After committing and pushing the resolution, wait for `check-changelog` on the -new head SHA. `/reuse-sweep-run` skips setup and GPU jobs only after this -CPU-only validation succeeds. `utils/merge_with_reuse.sh ` performs this -wait automatically. +After committing and pushing the resolution, the synchronize run checks the +reuse authorization before setup. `utils/merge_with_reuse.sh ` performs +the push and waits for the PR checks automatically. --- diff --git a/utils/changelog_gate_tests/test_run_sweep_gating.py b/utils/changelog_gate_tests/test_run_sweep_gating.py index e8530aafa..e158e18f1 100644 --- a/utils/changelog_gate_tests/test_run_sweep_gating.py +++ b/utils/changelog_gate_tests/test_run_sweep_gating.py @@ -1,15 +1,10 @@ -"""Exhaustively verify run-sweep.yml's changelog gating for every case. +"""Exhaustively verify run-sweep.yml's sweep gating for every case. The simulation jobs in `.github/workflows/test-changelog-gate.yml` hand-copy -two of the gating `if` conditions and exercise two scenarios. This test instead -parses the REAL `check-changelog` -> `reuse-sweep-gate` -> `setup` conditions -out of `run-sweep.yml` and evaluates them with a minimal GitHub Actions -expression engine, so it cannot drift from production and it covers every -distinct skip/run decision (PR and push, draft, label, reuse, metadata-only, -and validation-failure paths). - -The engine is grounded against a real GitHub outcome in -`test_engine_matches_real_github_run`. +two of the gating `if` conditions and exercise two scenarios. This test parses +the real `reuse-sweep-gate` -> `setup` conditions out of `run-sweep.yml` and +evaluates them with a minimal GitHub Actions expression engine, so it cannot +drift from production and it covers every distinct skip/run decision. """ from __future__ import annotations @@ -27,7 +22,6 @@ (REPO_ROOT / ".github/workflows/run-sweep.yml").read_text(), Loader=yaml.BaseLoader, ) -CHECK_IF = _WF["jobs"]["check-changelog"]["if"] GATE_IF = _WF["jobs"]["reuse-sweep-gate"]["if"] SETUP_IF = _WF["jobs"]["setup"]["if"] PR_TYPES = set(_WF["on"]["pull_request"]["types"]) @@ -188,7 +182,7 @@ def _eval(expr: str, ctx: dict) -> bool: # -------------------------------------------------------------------------- -# DAG evaluation: check-changelog -> reuse-sweep-gate -> setup +# DAG evaluation: reuse-sweep-gate -> setup # -------------------------------------------------------------------------- def _ctx(sc: dict) -> dict: return { @@ -201,35 +195,10 @@ def _ctx(sc: dict) -> dict: } -def _ran_check_outcome(sc: dict) -> tuple[str, str]: - """Outcome of a check-changelog job that actually runs (not a draft PR). - - Models the PR-only "Reject conflicting sweep labels" step (which fails the - job when more than one sweep label is present); otherwise the validator - step's success/failure governs, and has-additions is empty unless success. - """ - labels = set(sc.get("labels", [])) - if sc["event"] == "pull_request" and len(labels & SWEEP_LABELS) > 1: - return "failure", "" - result = sc["check"] - return result, (sc.get("has", "") if result == "success" else "") - - -def run_dag(sc: dict) -> tuple[str, str, str]: - """Return (check-changelog result, reuse-sweep-gate result, setup decision). - - The job `if` conditions are evaluated from the REAL run-sweep.yml strings; - only the in-job step outcomes (validator / conflicting-labels) are modelled. - """ +def run_dag(sc: dict) -> tuple[str, str]: + """Return (reuse-sweep-gate result, setup decision).""" ctx = _ctx(sc) - if not _eval(CHECK_IF, ctx): - cc_result, has = "skipped", "" - else: - cc_result, has = _ran_check_outcome(sc) - ctx["needs.check-changelog.result"] = cc_result - ctx["needs.check-changelog.outputs.has-additions"] = has - if not _eval(GATE_IF, ctx): gate_result, skip = "skipped", "" else: @@ -239,83 +208,54 @@ def run_dag(sc: dict) -> tuple[str, str, str]: ctx["needs.reuse-sweep-gate.outputs.skip-pr-sweep"] = skip setup = "RUN" if _eval(SETUP_IF, ctx) else "SKIP" - return cc_result, gate_result, setup + return gate_result, setup -_PR = {"event": "pull_request", "draft": False, "check": "success"} +_PR = {"event": "pull_request", "draft": False} -# (id, scenario, expected (check, reuse, setup)) +# (id, scenario, expected (reuse, setup)) CASES = [ ("PR-sync-full-noreuse", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": False}, ("success", "success", "RUN")), + "reuse_auth": False}, ("success", "RUN")), ("PR-sync-full-reuse-authorized", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": True}, ("success", "success", "SKIP")), + "reuse_auth": True}, ("success", "SKIP")), ("PR-sync-trim-sweep-enabled", - {**_PR, "action": "synchronize", "labels": ["sweep-enabled"], - "has": "true"}, ("success", "skipped", "RUN")), + {**_PR, "action": "synchronize", "labels": ["sweep-enabled"]}, + ("skipped", "RUN")), ("PR-sync-no-sweep-label", - {**_PR, "action": "synchronize", "labels": [], "has": "true"}, - ("success", "skipped", "SKIP")), + {**_PR, "action": "synchronize", "labels": []}, + ("skipped", "SKIP")), ("PR-labeled-with-sweep-label", {**_PR, "action": "labeled", "label_name": "full-sweep-enabled", - "labels": ["full-sweep-enabled"], "has": "true"}, - ("success", "skipped", "RUN")), + "labels": ["full-sweep-enabled"]}, ("skipped", "RUN")), ("PR-labeled-with-unrelated-label", {**_PR, "action": "labeled", "label_name": "documentation", - "labels": ["full-sweep-enabled"], "has": "true"}, - ("success", "skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "SKIP")), ("PR-unlabeled-removed-sweep-label", {**_PR, "action": "unlabeled", "label_name": "full-sweep-enabled", - "labels": [], "has": "true"}, ("success", "skipped", "SKIP")), + "labels": []}, ("skipped", "SKIP")), ("PR-draft", {**_PR, "action": "synchronize", "draft": True, - "labels": ["full-sweep-enabled"], "has": "true"}, - ("skipped", "skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "SKIP")), ("PR-ready-for-review", {**_PR, "action": "ready_for_review", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": False}, ("success", "skipped", "RUN")), - ("PR-sync-metadata-only", - {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "false"}, ("success", "skipped", "SKIP")), - ("PR-sync-changelog-invalid", - {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "check": "failure"}, ("failure", "skipped", "SKIP")), - ("PR-sync-conflicting-sweep-labels", - {**_PR, "action": "synchronize", - "labels": ["sweep-enabled", "full-sweep-enabled"], "check": "failure"}, - ("failure", "skipped", "SKIP")), + "reuse_auth": False}, ("skipped", "RUN")), ("push-additions-no-skip", - {"event": "push", "check": "success", "has": "true", - "msg": "feat: add model"}, ("success", "skipped", "RUN")), + {"event": "push", "msg": "feat: add model"}, ("skipped", "RUN")), ("push-skip-sweep-tag", - {"event": "push", "check": "success", "has": "true", - "msg": "fix: x [skip-sweep]"}, ("success", "skipped", "SKIP")), - ("push-metadata-only", - {"event": "push", "check": "success", "has": "false", - "msg": "fix: link"}, ("success", "skipped", "SKIP")), - # malformed changelog merged to main -> ingest blocked (recovery needed). - ("push-changelog-invalid", - {"event": "push", "check": "failure", "msg": "feat: add model"}, - ("failure", "skipped", "SKIP")), + {"event": "push", "msg": "fix: x [skip-sweep]"}, + ("skipped", "SKIP")), ] @pytest.mark.parametrize("scenario,expected", [(c[1], c[2]) for c in CASES], ids=[c[0] for c in CASES]) -def test_gating_decision(scenario: dict, expected: tuple[str, str, str]) -> None: +def test_gating_decision(scenario: dict, expected: tuple[str, str]) -> None: assert run_dag(scenario) == expected -def test_engine_matches_real_github_run() -> None: - # Ground truth: run-sweep run 27737489942 on PR #1821 (no labels, not a - # draft, metadata-only) recorded check-changelog=success, - # reuse-sweep-gate=skipped, setup=skipped. - real = {**_PR, "action": "synchronize", "labels": [], "has": "false"} - assert run_dag(real) == ("success", "skipped", "SKIP") - - def test_engine_self_consistency() -> None: checks = [ ("always()", {}, True), @@ -349,24 +289,14 @@ def test_trigger_types_enable_gated_events() -> None: # both the reference spec and the engine driving the REAL run-sweep.yml `if` # strings; any disagreement is either a spec error or a gating bug. # -------------------------------------------------------------------------- -def reference_gate(sc: dict) -> tuple[str, str, str]: - """Hand-written reference for (check, reuse, setup) from documented intent.""" +def reference_gate(sc: dict) -> tuple[str, str]: + """Hand-written reference for (reuse, setup) from documented intent.""" labels = set(sc.get("labels", [])) draft = sc.get("draft", False) is_pr = sc["event"] == "pull_request" - if is_pr and draft: - check, has = "skipped", "" - elif is_pr and len(labels & SWEEP_LABELS) > 1: - check, has = "failure", "" - else: - check = sc["check"] - has = sc.get("has", "") if check == "success" else "" - gate_runs = ( - check == "success" - and has == "true" - and is_pr + is_pr and sc.get("action") == "synchronize" and not draft and bool(labels & REUSE_ELIGIBLE_LABELS) @@ -384,8 +314,8 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: else: event_ok = "[skip-sweep]" not in sc.get("msg", "") - runs = check == "success" and has == "true" and reuse_clause and event_ok - return check, reuse, ("RUN" if runs else "SKIP") + runs = reuse_clause and event_ok + return reuse, ("RUN" if runs else "SKIP") def _all_scenarios() -> list[dict]: @@ -405,23 +335,16 @@ def _all_scenarios() -> list[dict]: [False, True], # draft label_cfgs, # labels ["full-sweep-enabled", "sweep-enabled", "documentation", None], # label.name - ["success", "failure"], # validator outcome - ["true", "false"], # has-additions [False, True], # reuse authorized ) scenarios = [ {"event": "pull_request", "action": a, "draft": d, "labels": labs, - "label_name": ln, "check": chk, "has": h, "reuse_auth": r} - for a, d, labs, ln, chk, h, r in pr_axes + "label_name": ln, "reuse_auth": r} + for a, d, labs, ln, r in pr_axes ] - push_axes = itertools.product( - ["success", "failure"], # validator outcome - ["true", "false"], # has-additions - ["feat: add model", "fix: thing [skip-sweep]"], # commit message - ) scenarios += [ - {"event": "push", "check": chk, "has": h, "msg": m} - for chk, h, m in push_axes + {"event": "push", "msg": msg} + for msg in ("feat: add model", "fix: thing [skip-sweep]") ] return scenarios @@ -435,9 +358,9 @@ def test_exhaustive_cross_product() -> None: ] assert not mismatches, mismatches[:10] # Sanity: confirm the sweep actually covered the whole input space - # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 check x - # 2 has-additions x 2 reuse = 2304 PR cases, plus 8 push cases). - assert len(scenarios) == 2312 + # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 reuse = + # 576 PR cases, plus 2 push cases). + assert len(scenarios) == 578 def test_named_cases_match_reference_spec() -> None: diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index 34316e7c9..1c67d9514 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -203,7 +203,7 @@ def test_raw_correction_rejects_whitespace_only_history_change() -> None: ) -def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: +def test_run_sweep_does_not_prevalidate_changelog_before_setup() -> None: repo_root = Path(__file__).resolve().parents[2] workflow = yaml.load( (repo_root / ".github/workflows/run-sweep.yml").read_text(), @@ -212,59 +212,40 @@ def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: jobs = workflow["jobs"] pull_request_types = workflow["on"]["pull_request"]["types"] - assert "needs" not in jobs["check-changelog"] + assert "check-changelog" not in jobs + assert "check-newline" in jobs # opened/reopened are intentionally excluded so opening or reopening a PR # that already carries a sweep label does not start a sweep. assert {"synchronize", "labeled", "unlabeled", "ready_for_review"}.issubset( set(pull_request_types) ) assert {"opened", "reopened"}.isdisjoint(set(pull_request_types)) - check_step_names = [ - step.get("name") - for step in jobs["check-changelog"]["steps"] - ] setup_step_names = [ step.get("name") for step in jobs["setup"]["steps"] ] - assert "Reject conflicting sweep labels" in check_step_names - assert "Reject conflicting sweep labels" not in setup_step_names - assert jobs["check-changelog"]["outputs"]["has-additions"] == ( - "${{ steps.validate.outputs.has-additions }}" - ) - assert jobs["reuse-sweep-gate"]["needs"] == "check-changelog" - assert ( - "needs.check-changelog.result == 'success'" - in jobs["reuse-sweep-gate"]["if"] - ) - assert ( - "needs.check-changelog.outputs.has-additions == 'true'" - in jobs["reuse-sweep-gate"]["if"] - ) - assert jobs["setup"]["needs"] == [ - "check-changelog", - "reuse-sweep-gate", - ] - assert "needs.check-changelog.result == 'success'" in jobs["setup"]["if"] - assert ( - "needs.check-changelog.outputs.has-additions == 'true'" - in jobs["setup"]["if"] - ) + assert "Reject conflicting sweep labels" in setup_step_names + assert "needs" not in jobs["reuse-sweep-gate"] + assert jobs["setup"]["needs"] == "reuse-sweep-gate" + assert "needs.check-changelog" not in jobs["reuse-sweep-gate"]["if"] + assert "needs.check-changelog" not in jobs["setup"]["if"] -def test_merge_helper_waits_for_changelog_check_before_merge() -> None: +def test_merge_helper_waits_for_pr_checks_before_merge() -> None: repo_root = Path(__file__).resolve().parents[2] script = (repo_root / "utils/merge_with_reuse.sh").read_text() push_index = script.index('git push origin "${LOCAL_BRANCH}:${HEAD_BRANCH}"') - wait_index = script.index( - 'wait_for_check "$POST_MERGE" "check-changelog"' + checks_index = script.index( + 'gh pr checks "$PR" --repo "$REPO" --watch --fail-fast' ) merge_index = script.index( 'gh pr merge "$PR" --repo "$REPO" --squash --admin' ) - assert push_index < wait_index < merge_index + assert push_index < checks_index < merge_index + assert "wait_for_check" not in script + assert "check-changelog" not in script assert "prepare_perf_changelog_merge.py" in script assert "git commit --allow-empty" in script assert script.count('CURRENT_HEAD="$(gh pr view') == 2 diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh index e5387d296..a29004640 100755 --- a/utils/merge_with_reuse.sh +++ b/utils/merge_with_reuse.sh @@ -6,21 +6,18 @@ # 2. Merge origin/main into the PR branch. Any `perf-changelog.yaml` # conflict is auto-resolved by accepting main's entries and re-appending # the PR's entry at the bottom with `XXX` -> the canonical PR URL. -# 3. Canonicalize appended links, push a fresh commit, and wait for -# `check-changelog` on that exact SHA. -# The PR synchronize run then observes the reuse authorization and skips -# sweep setup and benchmark jobs. -# 4. Squash-merge the PR to main (--admin) only after validation succeeds. +# 3. Canonicalize appended links and push a fresh synchronization commit. +# The PR run observes the reuse authorization and skips sweep setup and +# benchmark jobs. +# 4. Wait for the PR checks, then squash-merge the PR to main (--admin). # # Usage: utils/merge_with_reuse.sh # Env: REPO (default SemiAnalysisAI/InferenceX) -# CHECK_TIMEOUT_SECONDS (default 900) set -euo pipefail REPO="${REPO:-SemiAnalysisAI/InferenceX}" CHANGELOG="perf-changelog.yaml" -CHECK_TIMEOUT_SECONDS="${CHECK_TIMEOUT_SECONDS:-900}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [ $# -ne 1 ] || ! [[ "$1" =~ ^[0-9]+$ ]]; then @@ -35,38 +32,6 @@ die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; } [ -z "$(git status --porcelain)" ] || die "Working tree is not clean" -wait_for_check() { - local sha="$1" - local check_name="$2" - local deadline=$((SECONDS + CHECK_TIMEOUT_SECONDS)) - - log "Waiting for ${check_name} on ${sha:0:8}" - while ((SECONDS < deadline)); do - local checks check status conclusion details - checks="$(gh api "repos/${REPO}/commits/${sha}/check-runs?per_page=100")" - check="$(jq -c --arg name "$check_name" ' - [.check_runs[] | select(.name == $name)] - | sort_by(.started_at) - | last // {} - ' <<<"$checks")" - status="$(jq -r '.status // ""' <<<"$check")" - conclusion="$(jq -r '.conclusion // ""' <<<"$check")" - details="$(jq -r '.details_url // ""' <<<"$check")" - - if [ "$status" = "completed" ]; then - if [ "$conclusion" = "success" ]; then - ok "${check_name} passed${details:+ — ${details}}" - return 0 - fi - die "${check_name} concluded ${conclusion:-unknown}${details:+ — ${details}}" - fi - - sleep 5 - done - - die "Timed out after ${CHECK_TIMEOUT_SECONDS}s waiting for ${check_name} on ${sha}" -} - ORIGINAL_BRANCH="$(git symbolic-ref --quiet --short HEAD || git rev-parse HEAD)" LOCAL_BRANCH="" cleanup() { @@ -194,19 +159,17 @@ if ! git diff --quiet -- "$CHANGELOG"; then fi # Always create a synchronize event when the branch was already prepared. -# This guarantees that check-changelog exists for the exact SHA we validate. +# This guarantees the reuse gate sees the authorization on the current SHA. if [ "$PRE_MERGE" = "$(git rev-parse HEAD)" ]; then git commit --allow-empty \ - -m "chore: validate PR #${PR} changelog before reuse [skip-sweep]" + -m "chore: refresh PR #${PR} for sweep reuse [skip-sweep]" fi # --- step 3: push prepared commit -------------------------------------------- POST_MERGE="$(git rev-parse HEAD)" log "Pushing prepared commit ${POST_MERGE:0:8}" git push origin "${LOCAL_BRANCH}:${HEAD_BRANCH}" -ok "Push complete; changelog validation will run before the reuse gate" - -wait_for_check "$POST_MERGE" "check-changelog" +ok "Push complete; reuse authorization will be evaluated on the new head" # --- step 4: squash-merge to main ------------------------------------------- CURRENT_HEAD="$(gh pr view "$PR" --repo "$REPO" --json headRefOid --jq '.headRefOid')" diff --git a/utils/validate_perf_changelog.py b/utils/validate_perf_changelog.py index a020365aa..d1d8c6bfc 100644 --- a/utils/validate_perf_changelog.py +++ b/utils/validate_perf_changelog.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Validate perf-changelog.yaml before sweep reuse can skip setup.""" +"""Validate append-only perf-changelog.yaml changes.""" from __future__ import annotations From 92d1aad4ba9871f7902431772ca4fa2139a5df96 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:04:30 +0800 Subject: [PATCH 2/6] ci: run newline check before reuse gate --- .github/workflows/README.md | 5 +- .github/workflows/run-sweep.yml | 72 ++++++++------- KLAUD_DEBUG.md | 5 +- .../test_run_sweep_gating.py | 90 ++++++++++++------- .../test_validate_perf_changelog.py | 16 +++- 5 files changed, 113 insertions(+), 75 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index f2e513843..2c34cf332 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -205,8 +205,9 @@ must still contain complete artifacts for the merge run's expected matrix. The comment is the reuse authorization, so adding it does not trigger or cancel a PR sweep. Once the comment is present, later commits pushed to a PR with a full-sweep label do not start another benchmark sweep. The synchronize run -checks the authorization before setup. Removing and re-adding a sweep label -explicitly starts a new sweep. +checks the changelog's final newline before inspecting the authorization and +continuing to setup. Removing and re-adding a sweep label explicitly starts a +new sweep. `utils/merge_with_reuse.sh ` is the supported merge path for reuse. It merges `main`, preserves the current main changelog bytes, canonicalizes an diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 71b37fc60..86018e303 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -34,13 +34,41 @@ on: - "perf-changelog.yaml" jobs: + check-newline: + runs-on: ubuntu-latest + if: >- + github.event_name == 'pull_request' && + !github.event.pull_request.draft && + ( + (github.event.action != 'labeled' && github.event.action != 'unlabeled') || + github.event.label.name == 'sweep-enabled' || + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' || + github.event.label.name == 'full-sweep-fail-fast' || + github.event.label.name == 'full-sweep-fail-fast-no-canary' + ) + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Check perf-changelog.yaml ends with newline + run: | + if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then + echo "::error::perf-changelog.yaml must end with a newline character" + echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs." + exit 1 + fi + reuse-sweep-gate: + needs: check-newline runs-on: ubuntu-latest permissions: contents: read issues: read pull-requests: read if: >- + always() && + needs.check-newline.result == 'success' && github.event_name == 'pull_request' && github.event.action == 'synchronize' && !github.event.pull_request.draft && @@ -70,45 +98,15 @@ jobs: --ref "${{ github.ref }}" \ --workflow-id "run-sweep.yml" - check-newline: - needs: reuse-sweep-gate + setup: + needs: [check-newline, reuse-sweep-gate] runs-on: ubuntu-latest if: >- always() && ( - needs.reuse-sweep-gate.result == 'skipped' || - ( - needs.reuse-sweep-gate.result == 'success' && - needs.reuse-sweep-gate.outputs.skip-pr-sweep != 'true' - ) + needs.check-newline.result == 'success' || + needs.check-newline.result == 'skipped' ) && - github.event_name == 'pull_request' && - !github.event.pull_request.draft && - ( - (github.event.action != 'labeled' && github.event.action != 'unlabeled') || - github.event.label.name == 'sweep-enabled' || - github.event.label.name == 'full-sweep-enabled' || - github.event.label.name == 'non-canary-full-sweep-enabled' || - github.event.label.name == 'full-sweep-fail-fast' || - github.event.label.name == 'full-sweep-fail-fast-no-canary' - ) - steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Check perf-changelog.yaml ends with newline - run: | - if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then - echo "::error::perf-changelog.yaml must end with a newline character" - echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs." - exit 1 - fi - - setup: - needs: reuse-sweep-gate - runs-on: ubuntu-latest - if: >- - always() && ( needs.reuse-sweep-gate.result == 'skipped' || ( @@ -618,9 +616,9 @@ jobs: reuse-ingest-artifacts: needs: setup - # `setup` runs via always() and depends on `reuse-sweep-gate`, which is - # skipped on every push-to-main. Without always() here, that skipped - # gate poisons this job's implicit + # `setup` runs via always() and depends on `check-newline` and + # `reuse-sweep-gate`, which are skipped on every push-to-main. Without + # always() here, those skipped jobs poison this job's implicit # success() check and it is skipped even when setup succeeded with # reuse-enabled=true — silently breaking the merge-time ingest. Guard # explicitly on setup success instead (same pattern as diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md index 118c279a1..9d1995e44 100644 --- a/KLAUD_DEBUG.md +++ b/KLAUD_DEBUG.md @@ -34,8 +34,9 @@ python3 -c "import yaml; yaml.safe_load(open('perf-changelog.yaml'))" Do **not** try a 3-way merge of `perf-changelog.yaml` — whitespace edits will silently re-trigger the deletion check. After committing and pushing the resolution, the synchronize run checks the -reuse authorization before setup. `utils/merge_with_reuse.sh ` performs -the push and waits for the PR checks automatically. +changelog's final newline and then the reuse authorization before setup. +`utils/merge_with_reuse.sh ` performs the push and waits for the PR checks +automatically. --- diff --git a/utils/changelog_gate_tests/test_run_sweep_gating.py b/utils/changelog_gate_tests/test_run_sweep_gating.py index e158e18f1..7e6622159 100644 --- a/utils/changelog_gate_tests/test_run_sweep_gating.py +++ b/utils/changelog_gate_tests/test_run_sweep_gating.py @@ -2,9 +2,10 @@ The simulation jobs in `.github/workflows/test-changelog-gate.yml` hand-copy two of the gating `if` conditions and exercise two scenarios. This test parses -the real `reuse-sweep-gate` -> `setup` conditions out of `run-sweep.yml` and -evaluates them with a minimal GitHub Actions expression engine, so it cannot -drift from production and it covers every distinct skip/run decision. +the real `check-newline` -> `reuse-sweep-gate` -> `setup` conditions out of +`run-sweep.yml` and evaluates them with a minimal GitHub Actions expression +engine, so it cannot drift from production and it covers every distinct +skip/run decision. """ from __future__ import annotations @@ -22,6 +23,7 @@ (REPO_ROOT / ".github/workflows/run-sweep.yml").read_text(), Loader=yaml.BaseLoader, ) +NEWLINE_IF = _WF["jobs"]["check-newline"]["if"] GATE_IF = _WF["jobs"]["reuse-sweep-gate"]["if"] SETUP_IF = _WF["jobs"]["setup"]["if"] PR_TYPES = set(_WF["on"]["pull_request"]["types"]) @@ -182,7 +184,7 @@ def _eval(expr: str, ctx: dict) -> bool: # -------------------------------------------------------------------------- -# DAG evaluation: reuse-sweep-gate -> setup +# DAG evaluation: check-newline -> reuse-sweep-gate -> setup # -------------------------------------------------------------------------- def _ctx(sc: dict) -> dict: return { @@ -195,10 +197,16 @@ def _ctx(sc: dict) -> dict: } -def run_dag(sc: dict) -> tuple[str, str]: - """Return (reuse-sweep-gate result, setup decision).""" +def run_dag(sc: dict) -> tuple[str, str, str]: + """Return (check-newline result, reuse-sweep-gate result, setup decision).""" ctx = _ctx(sc) + if not _eval(NEWLINE_IF, ctx): + newline_result = "skipped" + else: + newline_result = sc.get("newline", "success") + ctx["needs.check-newline.result"] = newline_result + if not _eval(GATE_IF, ctx): gate_result, skip = "skipped", "" else: @@ -208,51 +216,58 @@ def run_dag(sc: dict) -> tuple[str, str]: ctx["needs.reuse-sweep-gate.outputs.skip-pr-sweep"] = skip setup = "RUN" if _eval(SETUP_IF, ctx) else "SKIP" - return gate_result, setup + return newline_result, gate_result, setup _PR = {"event": "pull_request", "draft": False} -# (id, scenario, expected (reuse, setup)) +# (id, scenario, expected (newline, reuse, setup)) CASES = [ ("PR-sync-full-noreuse", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "reuse_auth": False}, ("success", "RUN")), + "reuse_auth": False}, ("success", "success", "RUN")), ("PR-sync-full-reuse-authorized", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "reuse_auth": True}, ("success", "SKIP")), + "reuse_auth": True}, ("success", "success", "SKIP")), + ("PR-sync-full-newline-failure", + {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], + "newline": "failure"}, ("failure", "skipped", "SKIP")), ("PR-sync-trim-sweep-enabled", {**_PR, "action": "synchronize", "labels": ["sweep-enabled"]}, - ("skipped", "RUN")), + ("success", "skipped", "RUN")), ("PR-sync-no-sweep-label", {**_PR, "action": "synchronize", "labels": []}, - ("skipped", "SKIP")), + ("success", "skipped", "SKIP")), ("PR-labeled-with-sweep-label", {**_PR, "action": "labeled", "label_name": "full-sweep-enabled", - "labels": ["full-sweep-enabled"]}, ("skipped", "RUN")), + "labels": ["full-sweep-enabled"]}, ("success", "skipped", "RUN")), ("PR-labeled-with-unrelated-label", {**_PR, "action": "labeled", "label_name": "documentation", - "labels": ["full-sweep-enabled"]}, ("skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "skipped", "SKIP")), ("PR-unlabeled-removed-sweep-label", {**_PR, "action": "unlabeled", "label_name": "full-sweep-enabled", - "labels": []}, ("skipped", "SKIP")), + "labels": []}, ("success", "skipped", "SKIP")), ("PR-draft", {**_PR, "action": "synchronize", "draft": True, - "labels": ["full-sweep-enabled"]}, ("skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "skipped", "SKIP")), ("PR-ready-for-review", {**_PR, "action": "ready_for_review", "labels": ["full-sweep-enabled"], - "reuse_auth": False}, ("skipped", "RUN")), + "reuse_auth": False}, ("success", "skipped", "RUN")), ("push-additions-no-skip", - {"event": "push", "msg": "feat: add model"}, ("skipped", "RUN")), + {"event": "push", "msg": "feat: add model"}, + ("skipped", "skipped", "RUN")), ("push-skip-sweep-tag", {"event": "push", "msg": "fix: x [skip-sweep]"}, - ("skipped", "SKIP")), + ("skipped", "skipped", "SKIP")), ] @pytest.mark.parametrize("scenario,expected", [(c[1], c[2]) for c in CASES], ids=[c[0] for c in CASES]) -def test_gating_decision(scenario: dict, expected: tuple[str, str]) -> None: +def test_gating_decision( + scenario: dict, + expected: tuple[str, str, str], +) -> None: assert run_dag(scenario) == expected @@ -289,14 +304,26 @@ def test_trigger_types_enable_gated_events() -> None: # both the reference spec and the engine driving the REAL run-sweep.yml `if` # strings; any disagreement is either a spec error or a gating bug. # -------------------------------------------------------------------------- -def reference_gate(sc: dict) -> tuple[str, str]: - """Hand-written reference for (reuse, setup) from documented intent.""" +def reference_gate(sc: dict) -> tuple[str, str, str]: + """Hand-written reference for (newline, reuse, setup) from intent.""" labels = set(sc.get("labels", [])) draft = sc.get("draft", False) is_pr = sc["event"] == "pull_request" + action = sc.get("action") - gate_runs = ( + newline_runs = ( is_pr + and not draft + and ( + action not in ("labeled", "unlabeled") + or sc.get("label_name") in SWEEP_LABELS + ) + ) + newline = sc.get("newline", "success") if newline_runs else "skipped" + + gate_runs = ( + newline == "success" + and is_pr and sc.get("action") == "synchronize" and not draft and bool(labels & REUSE_ELIGIBLE_LABELS) @@ -306,7 +333,6 @@ def reference_gate(sc: dict) -> tuple[str, str]: reuse_clause = (reuse == "skipped") or (reuse == "success" and not authorized) if is_pr: - action = sc.get("action") action_ok = action not in ("labeled", "unlabeled") or ( sc.get("label_name") in SWEEP_LABELS ) @@ -314,8 +340,9 @@ def reference_gate(sc: dict) -> tuple[str, str]: else: event_ok = "[skip-sweep]" not in sc.get("msg", "") - runs = reuse_clause and event_ok - return reuse, ("RUN" if runs else "SKIP") + newline_clause = newline in ("success", "skipped") + runs = newline_clause and reuse_clause and event_ok + return newline, reuse, ("RUN" if runs else "SKIP") def _all_scenarios() -> list[dict]: @@ -336,11 +363,12 @@ def _all_scenarios() -> list[dict]: label_cfgs, # labels ["full-sweep-enabled", "sweep-enabled", "documentation", None], # label.name [False, True], # reuse authorized + ["success", "failure"], # newline outcome when it runs ) scenarios = [ {"event": "pull_request", "action": a, "draft": d, "labels": labs, - "label_name": ln, "reuse_auth": r} - for a, d, labs, ln, r in pr_axes + "label_name": ln, "reuse_auth": r, "newline": nl} + for a, d, labs, ln, r, nl in pr_axes ] scenarios += [ {"event": "push", "msg": msg} @@ -358,9 +386,9 @@ def test_exhaustive_cross_product() -> None: ] assert not mismatches, mismatches[:10] # Sanity: confirm the sweep actually covered the whole input space - # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 reuse = - # 576 PR cases, plus 2 push cases). - assert len(scenarios) == 578 + # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 reuse x + # 2 newline outcomes = 1152 PR cases, plus 2 push cases). + assert len(scenarios) == 1154 def test_named_cases_match_reference_spec() -> None: diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index 1c67d9514..aacefa3c3 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -203,7 +203,7 @@ def test_raw_correction_rejects_whitespace_only_history_change() -> None: ) -def test_run_sweep_does_not_prevalidate_changelog_before_setup() -> None: +def test_run_sweep_checks_newline_before_reuse_and_setup() -> None: repo_root = Path(__file__).resolve().parents[2] workflow = yaml.load( (repo_root / ".github/workflows/run-sweep.yml").read_text(), @@ -225,8 +225,18 @@ def test_run_sweep_does_not_prevalidate_changelog_before_setup() -> None: for step in jobs["setup"]["steps"] ] assert "Reject conflicting sweep labels" in setup_step_names - assert "needs" not in jobs["reuse-sweep-gate"] - assert jobs["setup"]["needs"] == "reuse-sweep-gate" + assert "needs" not in jobs["check-newline"] + assert jobs["reuse-sweep-gate"]["needs"] == "check-newline" + assert jobs["setup"]["needs"] == [ + "check-newline", + "reuse-sweep-gate", + ] + assert ( + "needs.check-newline.result == 'success'" + in jobs["reuse-sweep-gate"]["if"] + ) + assert "needs.check-newline.result == 'success'" in jobs["setup"]["if"] + assert "needs.check-newline.result == 'skipped'" in jobs["setup"]["if"] assert "needs.check-changelog" not in jobs["reuse-sweep-gate"]["if"] assert "needs.check-changelog" not in jobs["setup"]["if"] From c73adedca15c31081bf8250c2fafd3e7c128b980 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:10:43 +0800 Subject: [PATCH 3/6] ci: validate changelog matrix before sweep reuse --- .github/workflows/README.md | 7 +- .github/workflows/run-sweep.yml | 29 ++++--- KLAUD_DEBUG.md | 7 +- .../test_run_sweep_gating.py | 46 +++++------ .../test_validate_perf_changelog.py | 61 +++++++++++--- utils/validate_perf_changelog.py | 81 ++++++++++++++++++- 6 files changed, 177 insertions(+), 54 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 2c34cf332..5916afc3c 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -205,9 +205,10 @@ must still contain complete artifacts for the merge run's expected matrix. The comment is the reuse authorization, so adding it does not trigger or cancel a PR sweep. Once the comment is present, later commits pushed to a PR with a full-sweep label do not start another benchmark sweep. The synchronize run -checks the changelog's final newline before inspecting the authorization and -continuing to setup. Removing and re-adding a sweep label explicitly starts a -new sweep. +checks that the changelog diff can generate the setup matrix before inspecting +the authorization and continuing to setup. This catches malformed conflict +resolutions before reuse can skip setup. Removing and re-adding a sweep label +explicitly starts a new sweep. `utils/merge_with_reuse.sh ` is the supported merge path for reuse. It merges `main`, preserves the current main changelog bytes, canonicalizes an diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 86018e303..77c0f7623 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -34,7 +34,7 @@ on: - "perf-changelog.yaml" jobs: - check-newline: + check-changelog: runs-on: ubuntu-latest if: >- github.event_name == 'pull_request' && @@ -50,17 +50,20 @@ jobs: steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 - - name: Check perf-changelog.yaml ends with newline + - name: Validate perf-changelog matrix run: | - if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then - echo "::error::perf-changelog.yaml must end with a newline character" - echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs." - exit 1 - fi + pip install pydantic pyyaml + python3 utils/validate_perf_changelog.py \ + --matrix-compatible \ + --changelog-file perf-changelog.yaml \ + --base-ref "origin/${{ github.base_ref }}" \ + --head-ref "${{ github.event.pull_request.head.sha }}" reuse-sweep-gate: - needs: check-newline + needs: check-changelog runs-on: ubuntu-latest permissions: contents: read @@ -68,7 +71,7 @@ jobs: pull-requests: read if: >- always() && - needs.check-newline.result == 'success' && + needs.check-changelog.result == 'success' && github.event_name == 'pull_request' && github.event.action == 'synchronize' && !github.event.pull_request.draft && @@ -99,13 +102,13 @@ jobs: --workflow-id "run-sweep.yml" setup: - needs: [check-newline, reuse-sweep-gate] + needs: [check-changelog, reuse-sweep-gate] runs-on: ubuntu-latest if: >- always() && ( - needs.check-newline.result == 'success' || - needs.check-newline.result == 'skipped' + needs.check-changelog.result == 'success' || + needs.check-changelog.result == 'skipped' ) && ( needs.reuse-sweep-gate.result == 'skipped' || @@ -616,7 +619,7 @@ jobs: reuse-ingest-artifacts: needs: setup - # `setup` runs via always() and depends on `check-newline` and + # `setup` runs via always() and depends on `check-changelog` and # `reuse-sweep-gate`, which are skipped on every push-to-main. Without # always() here, those skipped jobs poison this job's implicit # success() check and it is skipped even when setup succeeded with diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md index 9d1995e44..29360e056 100644 --- a/KLAUD_DEBUG.md +++ b/KLAUD_DEBUG.md @@ -34,9 +34,10 @@ python3 -c "import yaml; yaml.safe_load(open('perf-changelog.yaml'))" Do **not** try a 3-way merge of `perf-changelog.yaml` — whitespace edits will silently re-trigger the deletion check. After committing and pushing the resolution, the synchronize run checks the -changelog's final newline and then the reuse authorization before setup. -`utils/merge_with_reuse.sh ` performs the push and waits for the PR checks -automatically. +changelog with the same matrix processor used by setup, then checks the reuse +authorization. This catches deleted history or malformed appended entries +before reuse can skip setup. `utils/merge_with_reuse.sh ` performs the push +and waits for the PR checks automatically. --- diff --git a/utils/changelog_gate_tests/test_run_sweep_gating.py b/utils/changelog_gate_tests/test_run_sweep_gating.py index 7e6622159..9ce38ba42 100644 --- a/utils/changelog_gate_tests/test_run_sweep_gating.py +++ b/utils/changelog_gate_tests/test_run_sweep_gating.py @@ -2,7 +2,7 @@ The simulation jobs in `.github/workflows/test-changelog-gate.yml` hand-copy two of the gating `if` conditions and exercise two scenarios. This test parses -the real `check-newline` -> `reuse-sweep-gate` -> `setup` conditions out of +the real `check-changelog` -> `reuse-sweep-gate` -> `setup` conditions out of `run-sweep.yml` and evaluates them with a minimal GitHub Actions expression engine, so it cannot drift from production and it covers every distinct skip/run decision. @@ -23,7 +23,7 @@ (REPO_ROOT / ".github/workflows/run-sweep.yml").read_text(), Loader=yaml.BaseLoader, ) -NEWLINE_IF = _WF["jobs"]["check-newline"]["if"] +CHECK_IF = _WF["jobs"]["check-changelog"]["if"] GATE_IF = _WF["jobs"]["reuse-sweep-gate"]["if"] SETUP_IF = _WF["jobs"]["setup"]["if"] PR_TYPES = set(_WF["on"]["pull_request"]["types"]) @@ -184,7 +184,7 @@ def _eval(expr: str, ctx: dict) -> bool: # -------------------------------------------------------------------------- -# DAG evaluation: check-newline -> reuse-sweep-gate -> setup +# DAG evaluation: check-changelog -> reuse-sweep-gate -> setup # -------------------------------------------------------------------------- def _ctx(sc: dict) -> dict: return { @@ -198,14 +198,14 @@ def _ctx(sc: dict) -> dict: def run_dag(sc: dict) -> tuple[str, str, str]: - """Return (check-newline result, reuse-sweep-gate result, setup decision).""" + """Return (check-changelog result, reuse-sweep-gate result, setup decision).""" ctx = _ctx(sc) - if not _eval(NEWLINE_IF, ctx): - newline_result = "skipped" + if not _eval(CHECK_IF, ctx): + check_result = "skipped" else: - newline_result = sc.get("newline", "success") - ctx["needs.check-newline.result"] = newline_result + check_result = sc.get("check", "success") + ctx["needs.check-changelog.result"] = check_result if not _eval(GATE_IF, ctx): gate_result, skip = "skipped", "" @@ -216,12 +216,12 @@ def run_dag(sc: dict) -> tuple[str, str, str]: ctx["needs.reuse-sweep-gate.outputs.skip-pr-sweep"] = skip setup = "RUN" if _eval(SETUP_IF, ctx) else "SKIP" - return newline_result, gate_result, setup + return check_result, gate_result, setup _PR = {"event": "pull_request", "draft": False} -# (id, scenario, expected (newline, reuse, setup)) +# (id, scenario, expected (check, reuse, setup)) CASES = [ ("PR-sync-full-noreuse", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], @@ -229,9 +229,9 @@ def run_dag(sc: dict) -> tuple[str, str, str]: ("PR-sync-full-reuse-authorized", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], "reuse_auth": True}, ("success", "success", "SKIP")), - ("PR-sync-full-newline-failure", + ("PR-sync-full-changelog-failure", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "newline": "failure"}, ("failure", "skipped", "SKIP")), + "check": "failure"}, ("failure", "skipped", "SKIP")), ("PR-sync-trim-sweep-enabled", {**_PR, "action": "synchronize", "labels": ["sweep-enabled"]}, ("success", "skipped", "RUN")), @@ -305,13 +305,13 @@ def test_trigger_types_enable_gated_events() -> None: # strings; any disagreement is either a spec error or a gating bug. # -------------------------------------------------------------------------- def reference_gate(sc: dict) -> tuple[str, str, str]: - """Hand-written reference for (newline, reuse, setup) from intent.""" + """Hand-written reference for (check, reuse, setup) from intent.""" labels = set(sc.get("labels", [])) draft = sc.get("draft", False) is_pr = sc["event"] == "pull_request" action = sc.get("action") - newline_runs = ( + check_runs = ( is_pr and not draft and ( @@ -319,10 +319,10 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: or sc.get("label_name") in SWEEP_LABELS ) ) - newline = sc.get("newline", "success") if newline_runs else "skipped" + check = sc.get("check", "success") if check_runs else "skipped" gate_runs = ( - newline == "success" + check == "success" and is_pr and sc.get("action") == "synchronize" and not draft @@ -340,9 +340,9 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: else: event_ok = "[skip-sweep]" not in sc.get("msg", "") - newline_clause = newline in ("success", "skipped") - runs = newline_clause and reuse_clause and event_ok - return newline, reuse, ("RUN" if runs else "SKIP") + check_clause = check in ("success", "skipped") + runs = check_clause and reuse_clause and event_ok + return check, reuse, ("RUN" if runs else "SKIP") def _all_scenarios() -> list[dict]: @@ -363,12 +363,12 @@ def _all_scenarios() -> list[dict]: label_cfgs, # labels ["full-sweep-enabled", "sweep-enabled", "documentation", None], # label.name [False, True], # reuse authorized - ["success", "failure"], # newline outcome when it runs + ["success", "failure"], # changelog outcome when it runs ) scenarios = [ {"event": "pull_request", "action": a, "draft": d, "labels": labs, - "label_name": ln, "reuse_auth": r, "newline": nl} - for a, d, labs, ln, r, nl in pr_axes + "label_name": ln, "reuse_auth": r, "check": chk} + for a, d, labs, ln, r, chk in pr_axes ] scenarios += [ {"event": "push", "msg": msg} @@ -387,7 +387,7 @@ def test_exhaustive_cross_product() -> None: assert not mismatches, mismatches[:10] # Sanity: confirm the sweep actually covered the whole input space # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 reuse x - # 2 newline outcomes = 1152 PR cases, plus 2 push cases). + # 2 changelog outcomes = 1152 PR cases, plus 2 push cases). assert len(scenarios) == 1154 diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index aacefa3c3..ffdf520c1 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -5,6 +5,7 @@ import pytest import yaml +import validate_perf_changelog as validator from validate_perf_changelog import ( ChangelogValidationError, compare_entries, @@ -203,7 +204,41 @@ def test_raw_correction_rejects_whitespace_only_history_change() -> None: ) -def test_run_sweep_checks_newline_before_reuse_and_setup() -> None: +def test_matrix_compatible_check_allows_pr_link_only_correction( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + validator, + "read_git_file", + lambda *_args: b" pr-link: https://example.com\n", + ) + + def reject_matrix(*_args: object) -> None: + raise ChangelogValidationError("matrix rejected") + + monkeypatch.setattr(validator, "validate_generated_config", reject_matrix) + monkeypatch.setattr( + validator, + "is_pr_link_only_correction", + lambda *_args: True, + ) + + assert not validator.validate_matrix_compatible_change("base", "head", "file") + + +def test_matrix_compatible_check_rejects_pr_1717_conflict_resolution() -> None: + with pytest.raises( + ChangelogValidationError, + match=r"Found deleted line: +pr-link: .*pull/1798", + ): + validator.validate_matrix_compatible_change( + "add33814cce15d0b71e3c98eca4bb2f7ad8aba96", + "60bf726a7f324a01e8850d228c8f0f7a6f203dbd", + "perf-changelog.yaml", + ) + + +def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: repo_root = Path(__file__).resolve().parents[2] workflow = yaml.load( (repo_root / ".github/workflows/run-sweep.yml").read_text(), @@ -212,8 +247,8 @@ def test_run_sweep_checks_newline_before_reuse_and_setup() -> None: jobs = workflow["jobs"] pull_request_types = workflow["on"]["pull_request"]["types"] - assert "check-changelog" not in jobs - assert "check-newline" in jobs + assert "check-changelog" in jobs + assert "check-newline" not in jobs # opened/reopened are intentionally excluded so opening or reopening a PR # that already carries a sweep label does not start a sweep. assert {"synchronize", "labeled", "unlabeled", "ready_for_review"}.issubset( @@ -225,20 +260,24 @@ def test_run_sweep_checks_newline_before_reuse_and_setup() -> None: for step in jobs["setup"]["steps"] ] assert "Reject conflicting sweep labels" in setup_step_names - assert "needs" not in jobs["check-newline"] - assert jobs["reuse-sweep-gate"]["needs"] == "check-newline" + assert "needs" not in jobs["check-changelog"] + assert jobs["reuse-sweep-gate"]["needs"] == "check-changelog" assert jobs["setup"]["needs"] == [ - "check-newline", + "check-changelog", "reuse-sweep-gate", ] assert ( - "needs.check-newline.result == 'success'" + "needs.check-changelog.result == 'success'" in jobs["reuse-sweep-gate"]["if"] ) - assert "needs.check-newline.result == 'success'" in jobs["setup"]["if"] - assert "needs.check-newline.result == 'skipped'" in jobs["setup"]["if"] - assert "needs.check-changelog" not in jobs["reuse-sweep-gate"]["if"] - assert "needs.check-changelog" not in jobs["setup"]["if"] + assert "needs.check-changelog.result == 'success'" in jobs["setup"]["if"] + assert "needs.check-changelog.result == 'skipped'" in jobs["setup"]["if"] + check_script = "\n".join( + step.get("run", "") + for step in jobs["check-changelog"]["steps"] + ) + assert "validate_perf_changelog.py" in check_script + assert "--matrix-compatible" in check_script def test_merge_helper_waits_for_pr_checks_before_merge() -> None: diff --git a/utils/validate_perf_changelog.py b/utils/validate_perf_changelog.py index d1d8c6bfc..56cf3dee4 100644 --- a/utils/validate_perf_changelog.py +++ b/utils/validate_perf_changelog.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Validate append-only perf-changelog.yaml changes.""" +"""Validate perf-changelog.yaml changes.""" from __future__ import annotations @@ -343,6 +343,63 @@ def validate_generated_config(base_ref: str, head_ref: str, path: str) -> None: ) from exc +def is_pr_link_only_correction( + base_ref: str, + head_ref: str, + path: str, +) -> bool: + """Return whether every substantive changed line is a pr-link replacement.""" + diff = run_git( + "diff", + "--unified=0", + base_ref, + head_ref, + "--", + path, + ).stdout + removed: list[str] = [] + added: list[str] = [] + + for line in diff.splitlines(): + if line.startswith("---") or line.startswith("+++"): + continue + if line.startswith("-") and line[1:].strip(): + removed.append(line[1:]) + elif line.startswith("+") and line[1:].strip(): + added.append(line[1:]) + + return ( + bool(removed) + and bool(added) + and len(removed) == len(added) + and all(line.startswith(" pr-link:") for line in [*removed, *added]) + ) + + +def validate_matrix_compatible_change( + base_ref: str, + head_ref: str, + path: str, +) -> bool: + """Validate the diff accepted by sweep setup, or a pr-link-only correction. + + Returns whether the change generated a sweep matrix. + """ + head_raw = read_git_file(head_ref, path) + if not head_raw.endswith(b"\n"): + raise ChangelogValidationError( + f"{path} at {head_ref} does not end with a newline" + ) + + try: + validate_generated_config(base_ref, head_ref, path) + except ChangelogValidationError: + if is_pr_link_only_correction(base_ref, head_ref, path): + return False + raise + return True + + def validate_changelog( base_ref: str, head_ref: str, @@ -419,6 +476,14 @@ def main() -> int: parser.add_argument("--head-ref", required=True) parser.add_argument("--changelog-file", default="perf-changelog.yaml") parser.add_argument("--pr-number", type=int) + parser.add_argument( + "--matrix-compatible", + action="store_true", + help=( + "run the same matrix validation as sweep setup, while allowing " + "pr-link-only corrections that do not produce a matrix" + ), + ) parser.add_argument( "--github-output", default=os.environ.get("GITHUB_OUTPUT"), @@ -426,6 +491,20 @@ def main() -> int: args = parser.parse_args() try: + if args.matrix_compatible: + generated_matrix = validate_matrix_compatible_change( + args.base_ref, + args.head_ref, + args.changelog_file, + ) + detail = ( + "matrix generated" + if generated_matrix + else "pr-link-only correction; no matrix required" + ) + print(f"Validated {args.changelog_file}: {detail}") + return 0 + additions, corrections = validate_changelog( args.base_ref, args.head_ref, From 3ff26c740dabe13a38c1a128e202af822e36eef0 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:14:19 +0800 Subject: [PATCH 4/6] ci: require changelog matrix generation --- .../test_validate_perf_changelog.py | 25 +++++--- utils/validate_perf_changelog.py | 62 +++---------------- 2 files changed, 26 insertions(+), 61 deletions(-) diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index ffdf520c1..5a9798908 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -204,26 +204,35 @@ def test_raw_correction_rejects_whitespace_only_history_change() -> None: ) -def test_matrix_compatible_check_allows_pr_link_only_correction( +def test_matrix_compatible_check_rejects_missing_final_newline( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr( validator, "read_git_file", - lambda *_args: b" pr-link: https://example.com\n", + lambda *_args: b"- config-keys: []", ) - def reject_matrix(*_args: object) -> None: - raise ChangelogValidationError("matrix rejected") + with pytest.raises(ChangelogValidationError, match="end with a newline"): + validator.validate_matrix_compatible_change("base", "head", "file") - monkeypatch.setattr(validator, "validate_generated_config", reject_matrix) + +def test_matrix_compatible_check_propagates_matrix_rejection( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.setattr( validator, - "is_pr_link_only_correction", - lambda *_args: True, + "read_git_file", + lambda *_args: b"- config-keys: []\n", ) - assert not validator.validate_matrix_compatible_change("base", "head", "file") + def reject_matrix(*_args: object) -> None: + raise ChangelogValidationError("matrix rejected") + + monkeypatch.setattr(validator, "validate_generated_config", reject_matrix) + + with pytest.raises(ChangelogValidationError, match="matrix rejected"): + validator.validate_matrix_compatible_change("base", "head", "file") def test_matrix_compatible_check_rejects_pr_1717_conflict_resolution() -> None: diff --git a/utils/validate_perf_changelog.py b/utils/validate_perf_changelog.py index 56cf3dee4..fc1df5a21 100644 --- a/utils/validate_perf_changelog.py +++ b/utils/validate_perf_changelog.py @@ -343,61 +343,19 @@ def validate_generated_config(base_ref: str, head_ref: str, path: str) -> None: ) from exc -def is_pr_link_only_correction( - base_ref: str, - head_ref: str, - path: str, -) -> bool: - """Return whether every substantive changed line is a pr-link replacement.""" - diff = run_git( - "diff", - "--unified=0", - base_ref, - head_ref, - "--", - path, - ).stdout - removed: list[str] = [] - added: list[str] = [] - - for line in diff.splitlines(): - if line.startswith("---") or line.startswith("+++"): - continue - if line.startswith("-") and line[1:].strip(): - removed.append(line[1:]) - elif line.startswith("+") and line[1:].strip(): - added.append(line[1:]) - - return ( - bool(removed) - and bool(added) - and len(removed) == len(added) - and all(line.startswith(" pr-link:") for line in [*removed, *added]) - ) - - def validate_matrix_compatible_change( base_ref: str, head_ref: str, path: str, -) -> bool: - """Validate the diff accepted by sweep setup, or a pr-link-only correction. - - Returns whether the change generated a sweep matrix. - """ +) -> None: + """Validate the final newline and the diff accepted by sweep setup.""" head_raw = read_git_file(head_ref, path) if not head_raw.endswith(b"\n"): raise ChangelogValidationError( f"{path} at {head_ref} does not end with a newline" ) - try: - validate_generated_config(base_ref, head_ref, path) - except ChangelogValidationError: - if is_pr_link_only_correction(base_ref, head_ref, path): - return False - raise - return True + validate_generated_config(base_ref, head_ref, path) def validate_changelog( @@ -480,8 +438,8 @@ def main() -> int: "--matrix-compatible", action="store_true", help=( - "run the same matrix validation as sweep setup, while allowing " - "pr-link-only corrections that do not produce a matrix" + "check the final newline and run the same matrix validation as " + "sweep setup" ), ) parser.add_argument( @@ -492,17 +450,15 @@ def main() -> int: try: if args.matrix_compatible: - generated_matrix = validate_matrix_compatible_change( + validate_matrix_compatible_change( args.base_ref, args.head_ref, args.changelog_file, ) - detail = ( - "matrix generated" - if generated_matrix - else "pr-link-only correction; no matrix required" + print( + f"Validated {args.changelog_file}: " + "final newline present and matrix generated" ) - print(f"Validated {args.changelog_file}: {detail}") return 0 additions, corrections = validate_changelog( From 60928c8637a5f3c665634f0f5a43cc35d0e251a8 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:16:23 +0800 Subject: [PATCH 5/6] ci: remove obsolete strict changelog validation --- .github/workflows/run-sweep.yml | 1 - .github/workflows/test-changelog-gate.yml | 33 ----- .../test_validate_perf_changelog.py | 3 +- utils/validate_perf_changelog.py | 118 +----------------- 4 files changed, 4 insertions(+), 151 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 77c0f7623..82b8eaf47 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -57,7 +57,6 @@ jobs: run: | pip install pydantic pyyaml python3 utils/validate_perf_changelog.py \ - --matrix-compatible \ --changelog-file perf-changelog.yaml \ --base-ref "origin/${{ github.base_ref }}" \ --head-ref "${{ github.event.pull_request.head.sha }}" diff --git a/.github/workflows/test-changelog-gate.yml b/.github/workflows/test-changelog-gate.yml index 484b9c982..f5d1d27eb 100644 --- a/.github/workflows/test-changelog-gate.yml +++ b/.github/workflows/test-changelog-gate.yml @@ -63,36 +63,3 @@ jobs: utils/test_validate_reusable_sweep_artifacts.py \ utils/changelog_gate_tests/test_run_sweep_gating.py \ -v - - - name: Test historical push validation - run: | - expect_rejected() { - local label="$1" - shift - local output - if output="$("$@" 2>&1)"; then - echo "::error::${label} was unexpectedly accepted" - return 1 - fi - echo "${label} was rejected as expected" - printf '%s\n' "$output" - } - - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref d99c824b1c4f0b1b007631191657e458ef2a332c^ \ - --head-ref d99c824b1c4f0b1b007631191657e458ef2a332c - - expect_rejected "PR #1767 malformed merge state" \ - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref 7b9843d3a6e1fe7a2d92d327e25aae57ed3506c5^ \ - --head-ref 7b9843d3a6e1fe7a2d92d327e25aae57ed3506c5 \ - --pr-number 1767 - - expect_rejected "PR #1798 invalid merge state" \ - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref 8aaff9fbb645ae9df9ba7593fa63273f839b62fb^1 \ - --head-ref 8aaff9fbb645ae9df9ba7593fa63273f839b62fb \ - --pr-number 1798 diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index 5a9798908..d8bf314b1 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -286,7 +286,8 @@ def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: for step in jobs["check-changelog"]["steps"] ) assert "validate_perf_changelog.py" in check_script - assert "--matrix-compatible" in check_script + assert "--base-ref" in check_script + assert "--head-ref" in check_script def test_merge_helper_waits_for_pr_checks_before_merge() -> None: diff --git a/utils/validate_perf_changelog.py b/utils/validate_perf_changelog.py index fc1df5a21..204687965 100644 --- a/utils/validate_perf_changelog.py +++ b/utils/validate_perf_changelog.py @@ -5,7 +5,6 @@ import argparse import json -import os import re import subprocess import sys @@ -71,21 +70,6 @@ def construct_unique_mapping( ) -def run_git(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]: - """Run git and return captured text output.""" - result = subprocess.run( - ["git", *args], - capture_output=True, - text=True, - ) - if check and result.returncode != 0: - detail = result.stderr.strip() or result.stdout.strip() - raise ChangelogValidationError( - f"git {' '.join(args)} failed: {detail}" - ) - return result - - def read_git_file(ref: str, path: str) -> bytes: """Read a repository file exactly as stored at a git ref.""" result = subprocess.run( @@ -358,114 +342,19 @@ def validate_matrix_compatible_change( validate_generated_config(base_ref, head_ref, path) -def validate_changelog( - base_ref: str, - head_ref: str, - path: str, - pr_number: int | None, -) -> tuple[int, int]: - """Validate the complete head file and its change from base to head.""" - diff_check = run_git( - "diff", - "--check", - base_ref, - head_ref, - "--", - path, - check=False, - ) - if diff_check.returncode != 0: - detail = diff_check.stdout.strip() or diff_check.stderr.strip() - raise ChangelogValidationError( - f"git diff --check failed for {path}:\n{detail}" - ) - - base_raw = read_git_file(base_ref, path) - head_raw = read_git_file(head_ref, path) - base_entries = parse_changelog( - base_raw, - f"{path} at {base_ref}", - ) - head_entries = parse_changelog( - head_raw, - f"{path} at {head_ref}", - ) - additions, corrections = compare_entries( - base_entries, - head_entries, - pr_number, - ) - validate_raw_change( - base_raw, - head_raw, - len(additions), - corrections, - ) - - if additions: - validate_generated_config(base_ref, head_ref, path) - elif not corrections: - raise ChangelogValidationError(f"{path} has no changes") - - return len(additions), corrections - - -def write_github_outputs( - path: str | None, - additions: int, - corrections: int, -) -> None: - """Expose changelog change type to downstream GitHub Actions jobs.""" - if not path: - return - with open(path, "a") as handle: - handle.write(f"has-additions={'true' if additions else 'false'}\n") - handle.write( - f"metadata-only={'true' if corrections and not additions else 'false'}\n" - ) - handle.write(f"addition-count={additions}\n") - handle.write(f"correction-count={corrections}\n") - - def main() -> int: """CLI entry point.""" parser = argparse.ArgumentParser() parser.add_argument("--base-ref", required=True) parser.add_argument("--head-ref", required=True) parser.add_argument("--changelog-file", default="perf-changelog.yaml") - parser.add_argument("--pr-number", type=int) - parser.add_argument( - "--matrix-compatible", - action="store_true", - help=( - "check the final newline and run the same matrix validation as " - "sweep setup" - ), - ) - parser.add_argument( - "--github-output", - default=os.environ.get("GITHUB_OUTPUT"), - ) args = parser.parse_args() try: - if args.matrix_compatible: - validate_matrix_compatible_change( - args.base_ref, - args.head_ref, - args.changelog_file, - ) - print( - f"Validated {args.changelog_file}: " - "final newline present and matrix generated" - ) - return 0 - - additions, corrections = validate_changelog( + validate_matrix_compatible_change( args.base_ref, args.head_ref, args.changelog_file, - args.pr_number, ) except ChangelogValidationError as exc: print(f"ERROR: {exc}", file=sys.stderr) @@ -473,11 +362,8 @@ def main() -> int: print( f"Validated {args.changelog_file}: " - f"{additions} appended entr{'y' if additions == 1 else 'ies'}, " - f"{corrections} pr-link correction" - f"{'' if corrections == 1 else 's'}" + "final newline present and matrix generated" ) - write_github_outputs(args.github_output, additions, corrections) return 0 From 11f5456251da26df00a5cb40f7e55363209d69dd Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:22:37 +0800 Subject: [PATCH 6/6] fix: preserve sweep gate invariants --- .github/workflows/run-sweep.yml | 45 +++++++++---------- .../test_run_sweep_gating.py | 13 +++++- .../test_validate_perf_changelog.py | 35 ++++++++------- utils/merge_with_reuse.sh | 37 +++++++++++++++ 4 files changed, 91 insertions(+), 39 deletions(-) diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index 82b8eaf47..00798aece 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -48,6 +48,28 @@ jobs: github.event.label.name == 'full-sweep-fail-fast-no-canary' ) steps: + - name: Reject conflicting sweep labels + env: + SWEEP_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} + run: | + count=$(jq ' + [ + .[] | + select( + . == "sweep-enabled" or + . == "full-sweep-enabled" or + . == "non-canary-full-sweep-enabled" or + . == "full-sweep-fail-fast" or + . == "full-sweep-fail-fast-no-canary" + ) + ] | + length + ' <<<"$SWEEP_LABELS") + if [ "$count" -gt 1 ]; then + echo "::error::PR has multiple conflicting sweep labels. Pick exactly one." + exit 1 + fi + - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -150,29 +172,6 @@ jobs: reuse-source-pr-number: ${{ steps.setup.outputs.reuse-source-pr-number }} reuse-source-head-sha: ${{ steps.setup.outputs.reuse-source-head-sha }} steps: - - name: Reject conflicting sweep labels - if: github.event_name == 'pull_request' - env: - SWEEP_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - run: | - count=$(jq ' - [ - .[] | - select( - . == "sweep-enabled" or - . == "full-sweep-enabled" or - . == "non-canary-full-sweep-enabled" or - . == "full-sweep-fail-fast" or - . == "full-sweep-fail-fast-no-canary" - ) - ] | - length - ' <<<"$SWEEP_LABELS") - if [ "$count" -gt 1 ]; then - echo "::error::PR has multiple conflicting sweep labels. Pick exactly one." - exit 1 - fi - - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: diff --git a/utils/changelog_gate_tests/test_run_sweep_gating.py b/utils/changelog_gate_tests/test_run_sweep_gating.py index 9ce38ba42..ea0aae528 100644 --- a/utils/changelog_gate_tests/test_run_sweep_gating.py +++ b/utils/changelog_gate_tests/test_run_sweep_gating.py @@ -203,6 +203,8 @@ def run_dag(sc: dict) -> tuple[str, str, str]: if not _eval(CHECK_IF, ctx): check_result = "skipped" + elif len(set(sc.get("labels", [])) & SWEEP_LABELS) > 1: + check_result = "failure" else: check_result = sc.get("check", "success") ctx["needs.check-changelog.result"] = check_result @@ -229,6 +231,10 @@ def run_dag(sc: dict) -> tuple[str, str, str]: ("PR-sync-full-reuse-authorized", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], "reuse_auth": True}, ("success", "success", "SKIP")), + ("PR-sync-conflicting-labels-reuse-authorized", + {**_PR, "action": "synchronize", + "labels": ["full-sweep-enabled", "full-sweep-fail-fast"], + "reuse_auth": True}, ("failure", "skipped", "SKIP")), ("PR-sync-full-changelog-failure", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], "check": "failure"}, ("failure", "skipped", "SKIP")), @@ -319,7 +325,12 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: or sc.get("label_name") in SWEEP_LABELS ) ) - check = sc.get("check", "success") if check_runs else "skipped" + if not check_runs: + check = "skipped" + elif len(labels & SWEEP_LABELS) > 1: + check = "failure" + else: + check = sc.get("check", "success") gate_runs = ( check == "success" diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index d8bf314b1..8b31d8f94 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -5,11 +5,11 @@ import pytest import yaml -import validate_perf_changelog as validator from validate_perf_changelog import ( ChangelogValidationError, compare_entries, parse_changelog, + validate_matrix_compatible_change, validate_raw_change, ) @@ -208,31 +208,32 @@ def test_matrix_compatible_check_rejects_missing_final_newline( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr( - validator, - "read_git_file", + "validate_perf_changelog.read_git_file", lambda *_args: b"- config-keys: []", ) with pytest.raises(ChangelogValidationError, match="end with a newline"): - validator.validate_matrix_compatible_change("base", "head", "file") + validate_matrix_compatible_change("base", "head", "file") def test_matrix_compatible_check_propagates_matrix_rejection( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr( - validator, - "read_git_file", + "validate_perf_changelog.read_git_file", lambda *_args: b"- config-keys: []\n", ) def reject_matrix(*_args: object) -> None: raise ChangelogValidationError("matrix rejected") - monkeypatch.setattr(validator, "validate_generated_config", reject_matrix) + monkeypatch.setattr( + "validate_perf_changelog.validate_generated_config", + reject_matrix, + ) with pytest.raises(ChangelogValidationError, match="matrix rejected"): - validator.validate_matrix_compatible_change("base", "head", "file") + validate_matrix_compatible_change("base", "head", "file") def test_matrix_compatible_check_rejects_pr_1717_conflict_resolution() -> None: @@ -240,7 +241,7 @@ def test_matrix_compatible_check_rejects_pr_1717_conflict_resolution() -> None: ChangelogValidationError, match=r"Found deleted line: +pr-link: .*pull/1798", ): - validator.validate_matrix_compatible_change( + validate_matrix_compatible_change( "add33814cce15d0b71e3c98eca4bb2f7ad8aba96", "60bf726a7f324a01e8850d228c8f0f7a6f203dbd", "perf-changelog.yaml", @@ -264,11 +265,13 @@ def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: set(pull_request_types) ) assert {"opened", "reopened"}.isdisjoint(set(pull_request_types)) - setup_step_names = [ + check_step_names = [ step.get("name") - for step in jobs["setup"]["steps"] + for step in jobs["check-changelog"]["steps"] ] - assert "Reject conflicting sweep labels" in setup_step_names + setup_step_names = [step.get("name") for step in jobs["setup"]["steps"]] + assert "Reject conflicting sweep labels" in check_step_names + assert "Reject conflicting sweep labels" not in setup_step_names assert "needs" not in jobs["check-changelog"] assert jobs["reuse-sweep-gate"]["needs"] == "check-changelog" assert jobs["setup"]["needs"] == [ @@ -295,6 +298,9 @@ def test_merge_helper_waits_for_pr_checks_before_merge() -> None: script = (repo_root / "utils/merge_with_reuse.sh").read_text() push_index = script.index('git push origin "${LOCAL_BRANCH}:${HEAD_BRANCH}"') + wait_index = script.index( + 'wait_for_check "$POST_MERGE" "check-changelog"' + ) checks_index = script.index( 'gh pr checks "$PR" --repo "$REPO" --watch --fail-fast' ) @@ -302,9 +308,8 @@ def test_merge_helper_waits_for_pr_checks_before_merge() -> None: 'gh pr merge "$PR" --repo "$REPO" --squash --admin' ) - assert push_index < checks_index < merge_index - assert "wait_for_check" not in script - assert "check-changelog" not in script + assert push_index < wait_index < checks_index < merge_index + assert "CHECK_TIMEOUT_SECONDS" in script assert "prepare_perf_changelog_merge.py" in script assert "git commit --allow-empty" in script assert script.count('CURRENT_HEAD="$(gh pr view') == 2 diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh index a29004640..1655746b5 100755 --- a/utils/merge_with_reuse.sh +++ b/utils/merge_with_reuse.sh @@ -13,11 +13,13 @@ # # Usage: utils/merge_with_reuse.sh # Env: REPO (default SemiAnalysisAI/InferenceX) +# CHECK_TIMEOUT_SECONDS (default 900) set -euo pipefail REPO="${REPO:-SemiAnalysisAI/InferenceX}" CHANGELOG="perf-changelog.yaml" +CHECK_TIMEOUT_SECONDS="${CHECK_TIMEOUT_SECONDS:-900}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [ $# -ne 1 ] || ! [[ "$1" =~ ^[0-9]+$ ]]; then @@ -32,6 +34,38 @@ die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; } [ -z "$(git status --porcelain)" ] || die "Working tree is not clean" +wait_for_check() { + local sha="$1" + local check_name="$2" + local deadline=$((SECONDS + CHECK_TIMEOUT_SECONDS)) + + log "Waiting for ${check_name} on ${sha:0:8}" + while ((SECONDS < deadline)); do + local checks check status conclusion details + checks="$(gh api "repos/${REPO}/commits/${sha}/check-runs?per_page=100")" + check="$(jq -c --arg name "$check_name" ' + [.check_runs[] | select(.name == $name)] + | sort_by(.started_at) + | last // {} + ' <<<"$checks")" + status="$(jq -r '.status // ""' <<<"$check")" + conclusion="$(jq -r '.conclusion // ""' <<<"$check")" + details="$(jq -r '.details_url // ""' <<<"$check")" + + if [ "$status" = "completed" ]; then + if [ "$conclusion" = "success" ]; then + ok "${check_name} passed${details:+ - ${details}}" + return 0 + fi + die "${check_name} concluded ${conclusion:-unknown}${details:+ - ${details}}" + fi + + sleep 5 + done + + die "Timed out after ${CHECK_TIMEOUT_SECONDS}s waiting for ${check_name} on ${sha}" +} + ORIGINAL_BRANCH="$(git symbolic-ref --quiet --short HEAD || git rev-parse HEAD)" LOCAL_BRANCH="" cleanup() { @@ -176,6 +210,9 @@ CURRENT_HEAD="$(gh pr view "$PR" --repo "$REPO" --json headRefOid --jq '.headRef [ "$CURRENT_HEAD" = "$POST_MERGE" ] \ || die "PR head changed to ${CURRENT_HEAD:0:8}; expected ${POST_MERGE:0:8}" +# `gh pr checks --watch` fails if GitHub has not registered any checks yet. +wait_for_check "$POST_MERGE" "check-changelog" + log "Waiting for all PR checks" gh pr checks "$PR" --repo "$REPO" --watch --fail-fast ok "All PR checks passed"