diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ee5330602..5916afc3c 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -204,20 +204,16 @@ must still contain complete artifacts for the merge run's expected matrix. The comment is the reuse authorization, so adding it does not trigger or cancel a PR sweep. Once the comment is present, later commits pushed to a PR with a -full-sweep label do not start another benchmark sweep. GitHub still runs the -CPU-only `check-changelog` job on the new commit before inspecting the reuse -authorization. That job validates the complete YAML/schema, append-only entry -ordering, duplicate YAML keys, byte-for-byte preservation of historical -content, PR links, the generated sweep config, and sweep-label exclusivity. -Link-only correction PRs stop after this CPU check because they have no -benchmark matrix to generate. Only appended entries can continue to the reuse -gate and sweep setup. Removing and re-adding a sweep label explicitly starts a -new sweep. +full-sweep label do not start another benchmark sweep. The synchronize run +checks that the changelog diff can generate the setup matrix before inspecting +the authorization and continuing to setup. This catches malformed conflict +resolutions before reuse can skip setup. Removing and re-adding a sweep label +explicitly starts a new sweep. `utils/merge_with_reuse.sh ` is the supported merge path for reuse. It merges `main`, preserves the current main changelog bytes, canonicalizes an appended `XXX` link to the PR URL, pushes a fresh synchronization commit, and -waits for `check-changelog` on that exact SHA before merging. +waits for the PR checks before merging. On the push-to-main run, `run-sweep.yml` resolves the merged PR from the merge commit, verifies the source run is an eligible `pull_request` `run-sweep.yml` diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index e496f372b..00798aece 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -37,14 +37,18 @@ jobs: check-changelog: runs-on: ubuntu-latest if: >- - github.event_name != 'pull_request' || - !github.event.pull_request.draft - outputs: - has-additions: ${{ steps.validate.outputs.has-additions }} - metadata-only: ${{ steps.validate.outputs.metadata-only }} + github.event_name == 'pull_request' && + !github.event.pull_request.draft && + ( + (github.event.action != 'labeled' && github.event.action != 'unlabeled') || + github.event.label.name == 'sweep-enabled' || + github.event.label.name == 'full-sweep-enabled' || + github.event.label.name == 'non-canary-full-sweep-enabled' || + github.event.label.name == 'full-sweep-fail-fast' || + github.event.label.name == 'full-sweep-fail-fast-no-canary' + ) steps: - name: Reject conflicting sweep labels - if: github.event_name == 'pull_request' env: SWEEP_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} run: | @@ -71,27 +75,13 @@ jobs: with: fetch-depth: 0 - - name: Validate perf-changelog.yaml - id: validate + - name: Validate perf-changelog matrix run: | pip install pydantic pyyaml - - PR_ARGS=() - if [ "${{ github.event_name }}" = "pull_request" ]; then - BASE_REF="origin/${{ github.base_ref }}" - HEAD_REF="${{ github.event.pull_request.head.sha }}" - PR_ARGS=(--pr-number "${{ github.event.pull_request.number }}") - else - BASE_REF="${{ github.event.before }}" - HEAD_REF="${{ github.event.after }}" - fi - python3 utils/validate_perf_changelog.py \ --changelog-file perf-changelog.yaml \ - --base-ref "$BASE_REF" \ - --head-ref "$HEAD_REF" \ - --github-output "$GITHUB_OUTPUT" \ - "${PR_ARGS[@]}" + --base-ref "origin/${{ github.base_ref }}" \ + --head-ref "${{ github.event.pull_request.head.sha }}" reuse-sweep-gate: needs: check-changelog @@ -103,7 +93,6 @@ jobs: if: >- always() && needs.check-changelog.result == 'success' && - needs.check-changelog.outputs.has-additions == 'true' && github.event_name == 'pull_request' && github.event.action == 'synchronize' && !github.event.pull_request.draft && @@ -138,8 +127,10 @@ jobs: runs-on: ubuntu-latest if: >- always() && - needs.check-changelog.result == 'success' && - needs.check-changelog.outputs.has-additions == 'true' && + ( + needs.check-changelog.result == 'success' || + needs.check-changelog.result == 'skipped' + ) && ( needs.reuse-sweep-gate.result == 'skipped' || ( @@ -626,9 +617,9 @@ jobs: reuse-ingest-artifacts: needs: setup - # `setup` runs via always() and depends on `check-changelog` plus - # `reuse-sweep-gate`, which is skipped on every push-to-main. Without - # always() here, that skipped gate poisons this job's implicit + # `setup` runs via always() and depends on `check-changelog` and + # `reuse-sweep-gate`, which are skipped on every push-to-main. Without + # always() here, those skipped jobs poison this job's implicit # success() check and it is skipped even when setup succeeded with # reuse-enabled=true — silently breaking the merge-time ingest. Guard # explicitly on setup success instead (same pattern as diff --git a/.github/workflows/test-changelog-gate.yml b/.github/workflows/test-changelog-gate.yml index 484b9c982..f5d1d27eb 100644 --- a/.github/workflows/test-changelog-gate.yml +++ b/.github/workflows/test-changelog-gate.yml @@ -63,36 +63,3 @@ jobs: utils/test_validate_reusable_sweep_artifacts.py \ utils/changelog_gate_tests/test_run_sweep_gating.py \ -v - - - name: Test historical push validation - run: | - expect_rejected() { - local label="$1" - shift - local output - if output="$("$@" 2>&1)"; then - echo "::error::${label} was unexpectedly accepted" - return 1 - fi - echo "${label} was rejected as expected" - printf '%s\n' "$output" - } - - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref d99c824b1c4f0b1b007631191657e458ef2a332c^ \ - --head-ref d99c824b1c4f0b1b007631191657e458ef2a332c - - expect_rejected "PR #1767 malformed merge state" \ - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref 7b9843d3a6e1fe7a2d92d327e25aae57ed3506c5^ \ - --head-ref 7b9843d3a6e1fe7a2d92d327e25aae57ed3506c5 \ - --pr-number 1767 - - expect_rejected "PR #1798 invalid merge state" \ - python utils/validate_perf_changelog.py \ - --changelog-file perf-changelog.yaml \ - --base-ref 8aaff9fbb645ae9df9ba7593fa63273f839b62fb^1 \ - --head-ref 8aaff9fbb645ae9df9ba7593fa63273f839b62fb \ - --pr-number 1798 diff --git a/KLAUD_DEBUG.md b/KLAUD_DEBUG.md index 06290692d..29360e056 100644 --- a/KLAUD_DEBUG.md +++ b/KLAUD_DEBUG.md @@ -33,10 +33,11 @@ python3 -c "import yaml; yaml.safe_load(open('perf-changelog.yaml'))" Do **not** try a 3-way merge of `perf-changelog.yaml` — whitespace edits will silently re-trigger the deletion check. -After committing and pushing the resolution, wait for `check-changelog` on the -new head SHA. `/reuse-sweep-run` skips setup and GPU jobs only after this -CPU-only validation succeeds. `utils/merge_with_reuse.sh ` performs this -wait automatically. +After committing and pushing the resolution, the synchronize run checks the +changelog with the same matrix processor used by setup, then checks the reuse +authorization. This catches deleted history or malformed appended entries +before reuse can skip setup. `utils/merge_with_reuse.sh ` performs the push +and waits for the PR checks automatically. --- diff --git a/utils/changelog_gate_tests/test_run_sweep_gating.py b/utils/changelog_gate_tests/test_run_sweep_gating.py index e8530aafa..ea0aae528 100644 --- a/utils/changelog_gate_tests/test_run_sweep_gating.py +++ b/utils/changelog_gate_tests/test_run_sweep_gating.py @@ -1,15 +1,11 @@ -"""Exhaustively verify run-sweep.yml's changelog gating for every case. +"""Exhaustively verify run-sweep.yml's sweep gating for every case. The simulation jobs in `.github/workflows/test-changelog-gate.yml` hand-copy -two of the gating `if` conditions and exercise two scenarios. This test instead -parses the REAL `check-changelog` -> `reuse-sweep-gate` -> `setup` conditions -out of `run-sweep.yml` and evaluates them with a minimal GitHub Actions -expression engine, so it cannot drift from production and it covers every -distinct skip/run decision (PR and push, draft, label, reuse, metadata-only, -and validation-failure paths). - -The engine is grounded against a real GitHub outcome in -`test_engine_matches_real_github_run`. +two of the gating `if` conditions and exercise two scenarios. This test parses +the real `check-changelog` -> `reuse-sweep-gate` -> `setup` conditions out of +`run-sweep.yml` and evaluates them with a minimal GitHub Actions expression +engine, so it cannot drift from production and it covers every distinct +skip/run decision. """ from __future__ import annotations @@ -201,34 +197,17 @@ def _ctx(sc: dict) -> dict: } -def _ran_check_outcome(sc: dict) -> tuple[str, str]: - """Outcome of a check-changelog job that actually runs (not a draft PR). - - Models the PR-only "Reject conflicting sweep labels" step (which fails the - job when more than one sweep label is present); otherwise the validator - step's success/failure governs, and has-additions is empty unless success. - """ - labels = set(sc.get("labels", [])) - if sc["event"] == "pull_request" and len(labels & SWEEP_LABELS) > 1: - return "failure", "" - result = sc["check"] - return result, (sc.get("has", "") if result == "success" else "") - - def run_dag(sc: dict) -> tuple[str, str, str]: - """Return (check-changelog result, reuse-sweep-gate result, setup decision). - - The job `if` conditions are evaluated from the REAL run-sweep.yml strings; - only the in-job step outcomes (validator / conflicting-labels) are modelled. - """ + """Return (check-changelog result, reuse-sweep-gate result, setup decision).""" ctx = _ctx(sc) if not _eval(CHECK_IF, ctx): - cc_result, has = "skipped", "" + check_result = "skipped" + elif len(set(sc.get("labels", [])) & SWEEP_LABELS) > 1: + check_result = "failure" else: - cc_result, has = _ran_check_outcome(sc) - ctx["needs.check-changelog.result"] = cc_result - ctx["needs.check-changelog.outputs.has-additions"] = has + check_result = sc.get("check", "success") + ctx["needs.check-changelog.result"] = check_result if not _eval(GATE_IF, ctx): gate_result, skip = "skipped", "" @@ -239,83 +218,65 @@ def run_dag(sc: dict) -> tuple[str, str, str]: ctx["needs.reuse-sweep-gate.outputs.skip-pr-sweep"] = skip setup = "RUN" if _eval(SETUP_IF, ctx) else "SKIP" - return cc_result, gate_result, setup + return check_result, gate_result, setup -_PR = {"event": "pull_request", "draft": False, "check": "success"} +_PR = {"event": "pull_request", "draft": False} # (id, scenario, expected (check, reuse, setup)) CASES = [ ("PR-sync-full-noreuse", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": False}, ("success", "success", "RUN")), + "reuse_auth": False}, ("success", "success", "RUN")), ("PR-sync-full-reuse-authorized", {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": True}, ("success", "success", "SKIP")), + "reuse_auth": True}, ("success", "success", "SKIP")), + ("PR-sync-conflicting-labels-reuse-authorized", + {**_PR, "action": "synchronize", + "labels": ["full-sweep-enabled", "full-sweep-fail-fast"], + "reuse_auth": True}, ("failure", "skipped", "SKIP")), + ("PR-sync-full-changelog-failure", + {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], + "check": "failure"}, ("failure", "skipped", "SKIP")), ("PR-sync-trim-sweep-enabled", - {**_PR, "action": "synchronize", "labels": ["sweep-enabled"], - "has": "true"}, ("success", "skipped", "RUN")), + {**_PR, "action": "synchronize", "labels": ["sweep-enabled"]}, + ("success", "skipped", "RUN")), ("PR-sync-no-sweep-label", - {**_PR, "action": "synchronize", "labels": [], "has": "true"}, + {**_PR, "action": "synchronize", "labels": []}, ("success", "skipped", "SKIP")), ("PR-labeled-with-sweep-label", {**_PR, "action": "labeled", "label_name": "full-sweep-enabled", - "labels": ["full-sweep-enabled"], "has": "true"}, - ("success", "skipped", "RUN")), + "labels": ["full-sweep-enabled"]}, ("success", "skipped", "RUN")), ("PR-labeled-with-unrelated-label", {**_PR, "action": "labeled", "label_name": "documentation", - "labels": ["full-sweep-enabled"], "has": "true"}, - ("success", "skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "skipped", "SKIP")), ("PR-unlabeled-removed-sweep-label", {**_PR, "action": "unlabeled", "label_name": "full-sweep-enabled", - "labels": [], "has": "true"}, ("success", "skipped", "SKIP")), + "labels": []}, ("success", "skipped", "SKIP")), ("PR-draft", {**_PR, "action": "synchronize", "draft": True, - "labels": ["full-sweep-enabled"], "has": "true"}, - ("skipped", "skipped", "SKIP")), + "labels": ["full-sweep-enabled"]}, ("skipped", "skipped", "SKIP")), ("PR-ready-for-review", {**_PR, "action": "ready_for_review", "labels": ["full-sweep-enabled"], - "has": "true", "reuse_auth": False}, ("success", "skipped", "RUN")), - ("PR-sync-metadata-only", - {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "has": "false"}, ("success", "skipped", "SKIP")), - ("PR-sync-changelog-invalid", - {**_PR, "action": "synchronize", "labels": ["full-sweep-enabled"], - "check": "failure"}, ("failure", "skipped", "SKIP")), - ("PR-sync-conflicting-sweep-labels", - {**_PR, "action": "synchronize", - "labels": ["sweep-enabled", "full-sweep-enabled"], "check": "failure"}, - ("failure", "skipped", "SKIP")), + "reuse_auth": False}, ("success", "skipped", "RUN")), ("push-additions-no-skip", - {"event": "push", "check": "success", "has": "true", - "msg": "feat: add model"}, ("success", "skipped", "RUN")), + {"event": "push", "msg": "feat: add model"}, + ("skipped", "skipped", "RUN")), ("push-skip-sweep-tag", - {"event": "push", "check": "success", "has": "true", - "msg": "fix: x [skip-sweep]"}, ("success", "skipped", "SKIP")), - ("push-metadata-only", - {"event": "push", "check": "success", "has": "false", - "msg": "fix: link"}, ("success", "skipped", "SKIP")), - # malformed changelog merged to main -> ingest blocked (recovery needed). - ("push-changelog-invalid", - {"event": "push", "check": "failure", "msg": "feat: add model"}, - ("failure", "skipped", "SKIP")), + {"event": "push", "msg": "fix: x [skip-sweep]"}, + ("skipped", "skipped", "SKIP")), ] @pytest.mark.parametrize("scenario,expected", [(c[1], c[2]) for c in CASES], ids=[c[0] for c in CASES]) -def test_gating_decision(scenario: dict, expected: tuple[str, str, str]) -> None: +def test_gating_decision( + scenario: dict, + expected: tuple[str, str, str], +) -> None: assert run_dag(scenario) == expected -def test_engine_matches_real_github_run() -> None: - # Ground truth: run-sweep run 27737489942 on PR #1821 (no labels, not a - # draft, metadata-only) recorded check-changelog=success, - # reuse-sweep-gate=skipped, setup=skipped. - real = {**_PR, "action": "synchronize", "labels": [], "has": "false"} - assert run_dag(real) == ("success", "skipped", "SKIP") - - def test_engine_self_consistency() -> None: checks = [ ("always()", {}, True), @@ -350,22 +311,29 @@ def test_trigger_types_enable_gated_events() -> None: # strings; any disagreement is either a spec error or a gating bug. # -------------------------------------------------------------------------- def reference_gate(sc: dict) -> tuple[str, str, str]: - """Hand-written reference for (check, reuse, setup) from documented intent.""" + """Hand-written reference for (check, reuse, setup) from intent.""" labels = set(sc.get("labels", [])) draft = sc.get("draft", False) is_pr = sc["event"] == "pull_request" + action = sc.get("action") - if is_pr and draft: - check, has = "skipped", "" - elif is_pr and len(labels & SWEEP_LABELS) > 1: - check, has = "failure", "" + check_runs = ( + is_pr + and not draft + and ( + action not in ("labeled", "unlabeled") + or sc.get("label_name") in SWEEP_LABELS + ) + ) + if not check_runs: + check = "skipped" + elif len(labels & SWEEP_LABELS) > 1: + check = "failure" else: - check = sc["check"] - has = sc.get("has", "") if check == "success" else "" + check = sc.get("check", "success") gate_runs = ( check == "success" - and has == "true" and is_pr and sc.get("action") == "synchronize" and not draft @@ -376,7 +344,6 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: reuse_clause = (reuse == "skipped") or (reuse == "success" and not authorized) if is_pr: - action = sc.get("action") action_ok = action not in ("labeled", "unlabeled") or ( sc.get("label_name") in SWEEP_LABELS ) @@ -384,7 +351,8 @@ def reference_gate(sc: dict) -> tuple[str, str, str]: else: event_ok = "[skip-sweep]" not in sc.get("msg", "") - runs = check == "success" and has == "true" and reuse_clause and event_ok + check_clause = check in ("success", "skipped") + runs = check_clause and reuse_clause and event_ok return check, reuse, ("RUN" if runs else "SKIP") @@ -405,23 +373,17 @@ def _all_scenarios() -> list[dict]: [False, True], # draft label_cfgs, # labels ["full-sweep-enabled", "sweep-enabled", "documentation", None], # label.name - ["success", "failure"], # validator outcome - ["true", "false"], # has-additions [False, True], # reuse authorized + ["success", "failure"], # changelog outcome when it runs ) scenarios = [ {"event": "pull_request", "action": a, "draft": d, "labels": labs, - "label_name": ln, "check": chk, "has": h, "reuse_auth": r} - for a, d, labs, ln, chk, h, r in pr_axes + "label_name": ln, "reuse_auth": r, "check": chk} + for a, d, labs, ln, r, chk in pr_axes ] - push_axes = itertools.product( - ["success", "failure"], # validator outcome - ["true", "false"], # has-additions - ["feat: add model", "fix: thing [skip-sweep]"], # commit message - ) scenarios += [ - {"event": "push", "check": chk, "has": h, "msg": m} - for chk, h, m in push_axes + {"event": "push", "msg": msg} + for msg in ("feat: add model", "fix: thing [skip-sweep]") ] return scenarios @@ -435,9 +397,9 @@ def test_exhaustive_cross_product() -> None: ] assert not mismatches, mismatches[:10] # Sanity: confirm the sweep actually covered the whole input space - # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 check x - # 2 has-additions x 2 reuse = 2304 PR cases, plus 8 push cases). - assert len(scenarios) == 2312 + # (4 actions x 2 draft x 9 label-configs x 4 label-names x 2 reuse x + # 2 changelog outcomes = 1152 PR cases, plus 2 push cases). + assert len(scenarios) == 1154 def test_named_cases_match_reference_spec() -> None: diff --git a/utils/changelog_gate_tests/test_validate_perf_changelog.py b/utils/changelog_gate_tests/test_validate_perf_changelog.py index 34316e7c9..8b31d8f94 100644 --- a/utils/changelog_gate_tests/test_validate_perf_changelog.py +++ b/utils/changelog_gate_tests/test_validate_perf_changelog.py @@ -9,6 +9,7 @@ ChangelogValidationError, compare_entries, parse_changelog, + validate_matrix_compatible_change, validate_raw_change, ) @@ -203,6 +204,50 @@ def test_raw_correction_rejects_whitespace_only_history_change() -> None: ) +def test_matrix_compatible_check_rejects_missing_final_newline( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "validate_perf_changelog.read_git_file", + lambda *_args: b"- config-keys: []", + ) + + with pytest.raises(ChangelogValidationError, match="end with a newline"): + validate_matrix_compatible_change("base", "head", "file") + + +def test_matrix_compatible_check_propagates_matrix_rejection( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "validate_perf_changelog.read_git_file", + lambda *_args: b"- config-keys: []\n", + ) + + def reject_matrix(*_args: object) -> None: + raise ChangelogValidationError("matrix rejected") + + monkeypatch.setattr( + "validate_perf_changelog.validate_generated_config", + reject_matrix, + ) + + with pytest.raises(ChangelogValidationError, match="matrix rejected"): + validate_matrix_compatible_change("base", "head", "file") + + +def test_matrix_compatible_check_rejects_pr_1717_conflict_resolution() -> None: + with pytest.raises( + ChangelogValidationError, + match=r"Found deleted line: +pr-link: .*pull/1798", + ): + validate_matrix_compatible_change( + "add33814cce15d0b71e3c98eca4bb2f7ad8aba96", + "60bf726a7f324a01e8850d228c8f0f7a6f203dbd", + "perf-changelog.yaml", + ) + + def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: repo_root = Path(__file__).resolve().parents[2] workflow = yaml.load( @@ -212,7 +257,8 @@ def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: jobs = workflow["jobs"] pull_request_types = workflow["on"]["pull_request"]["types"] - assert "needs" not in jobs["check-changelog"] + assert "check-changelog" in jobs + assert "check-newline" not in jobs # opened/reopened are intentionally excluded so opening or reopening a PR # that already carries a sweep label does not start a sweep. assert {"synchronize", "labeled", "unlabeled", "ready_for_review"}.issubset( @@ -223,36 +269,31 @@ def test_run_sweep_checks_changelog_before_reuse_and_setup() -> None: step.get("name") for step in jobs["check-changelog"]["steps"] ] - setup_step_names = [ - step.get("name") - for step in jobs["setup"]["steps"] - ] + setup_step_names = [step.get("name") for step in jobs["setup"]["steps"]] assert "Reject conflicting sweep labels" in check_step_names assert "Reject conflicting sweep labels" not in setup_step_names - assert jobs["check-changelog"]["outputs"]["has-additions"] == ( - "${{ steps.validate.outputs.has-additions }}" - ) + assert "needs" not in jobs["check-changelog"] assert jobs["reuse-sweep-gate"]["needs"] == "check-changelog" - assert ( - "needs.check-changelog.result == 'success'" - in jobs["reuse-sweep-gate"]["if"] - ) - assert ( - "needs.check-changelog.outputs.has-additions == 'true'" - in jobs["reuse-sweep-gate"]["if"] - ) assert jobs["setup"]["needs"] == [ "check-changelog", "reuse-sweep-gate", ] - assert "needs.check-changelog.result == 'success'" in jobs["setup"]["if"] assert ( - "needs.check-changelog.outputs.has-additions == 'true'" - in jobs["setup"]["if"] + "needs.check-changelog.result == 'success'" + in jobs["reuse-sweep-gate"]["if"] ) + assert "needs.check-changelog.result == 'success'" in jobs["setup"]["if"] + assert "needs.check-changelog.result == 'skipped'" in jobs["setup"]["if"] + check_script = "\n".join( + step.get("run", "") + for step in jobs["check-changelog"]["steps"] + ) + assert "validate_perf_changelog.py" in check_script + assert "--base-ref" in check_script + assert "--head-ref" in check_script -def test_merge_helper_waits_for_changelog_check_before_merge() -> None: +def test_merge_helper_waits_for_pr_checks_before_merge() -> None: repo_root = Path(__file__).resolve().parents[2] script = (repo_root / "utils/merge_with_reuse.sh").read_text() @@ -260,11 +301,15 @@ def test_merge_helper_waits_for_changelog_check_before_merge() -> None: wait_index = script.index( 'wait_for_check "$POST_MERGE" "check-changelog"' ) + checks_index = script.index( + 'gh pr checks "$PR" --repo "$REPO" --watch --fail-fast' + ) merge_index = script.index( 'gh pr merge "$PR" --repo "$REPO" --squash --admin' ) - assert push_index < wait_index < merge_index + assert push_index < wait_index < checks_index < merge_index + assert "CHECK_TIMEOUT_SECONDS" in script assert "prepare_perf_changelog_merge.py" in script assert "git commit --allow-empty" in script assert script.count('CURRENT_HEAD="$(gh pr view') == 2 diff --git a/utils/merge_with_reuse.sh b/utils/merge_with_reuse.sh index e5387d296..1655746b5 100755 --- a/utils/merge_with_reuse.sh +++ b/utils/merge_with_reuse.sh @@ -6,11 +6,10 @@ # 2. Merge origin/main into the PR branch. Any `perf-changelog.yaml` # conflict is auto-resolved by accepting main's entries and re-appending # the PR's entry at the bottom with `XXX` -> the canonical PR URL. -# 3. Canonicalize appended links, push a fresh commit, and wait for -# `check-changelog` on that exact SHA. -# The PR synchronize run then observes the reuse authorization and skips -# sweep setup and benchmark jobs. -# 4. Squash-merge the PR to main (--admin) only after validation succeeds. +# 3. Canonicalize appended links and push a fresh synchronization commit. +# The PR run observes the reuse authorization and skips sweep setup and +# benchmark jobs. +# 4. Wait for the PR checks, then squash-merge the PR to main (--admin). # # Usage: utils/merge_with_reuse.sh # Env: REPO (default SemiAnalysisAI/InferenceX) @@ -55,10 +54,10 @@ wait_for_check() { if [ "$status" = "completed" ]; then if [ "$conclusion" = "success" ]; then - ok "${check_name} passed${details:+ — ${details}}" + ok "${check_name} passed${details:+ - ${details}}" return 0 fi - die "${check_name} concluded ${conclusion:-unknown}${details:+ — ${details}}" + die "${check_name} concluded ${conclusion:-unknown}${details:+ - ${details}}" fi sleep 5 @@ -194,25 +193,26 @@ if ! git diff --quiet -- "$CHANGELOG"; then fi # Always create a synchronize event when the branch was already prepared. -# This guarantees that check-changelog exists for the exact SHA we validate. +# This guarantees the reuse gate sees the authorization on the current SHA. if [ "$PRE_MERGE" = "$(git rev-parse HEAD)" ]; then git commit --allow-empty \ - -m "chore: validate PR #${PR} changelog before reuse [skip-sweep]" + -m "chore: refresh PR #${PR} for sweep reuse [skip-sweep]" fi # --- step 3: push prepared commit -------------------------------------------- POST_MERGE="$(git rev-parse HEAD)" log "Pushing prepared commit ${POST_MERGE:0:8}" git push origin "${LOCAL_BRANCH}:${HEAD_BRANCH}" -ok "Push complete; changelog validation will run before the reuse gate" - -wait_for_check "$POST_MERGE" "check-changelog" +ok "Push complete; reuse authorization will be evaluated on the new head" # --- step 4: squash-merge to main ------------------------------------------- CURRENT_HEAD="$(gh pr view "$PR" --repo "$REPO" --json headRefOid --jq '.headRefOid')" [ "$CURRENT_HEAD" = "$POST_MERGE" ] \ || die "PR head changed to ${CURRENT_HEAD:0:8}; expected ${POST_MERGE:0:8}" +# `gh pr checks --watch` fails if GitHub has not registered any checks yet. +wait_for_check "$POST_MERGE" "check-changelog" + log "Waiting for all PR checks" gh pr checks "$PR" --repo "$REPO" --watch --fail-fast ok "All PR checks passed" diff --git a/utils/validate_perf_changelog.py b/utils/validate_perf_changelog.py index a020365aa..204687965 100644 --- a/utils/validate_perf_changelog.py +++ b/utils/validate_perf_changelog.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 -"""Validate perf-changelog.yaml before sweep reuse can skip setup.""" +"""Validate perf-changelog.yaml changes.""" from __future__ import annotations import argparse import json -import os import re import subprocess import sys @@ -71,21 +70,6 @@ def construct_unique_mapping( ) -def run_git(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]: - """Run git and return captured text output.""" - result = subprocess.run( - ["git", *args], - capture_output=True, - text=True, - ) - if check and result.returncode != 0: - detail = result.stderr.strip() or result.stdout.strip() - raise ChangelogValidationError( - f"git {' '.join(args)} failed: {detail}" - ) - return result - - def read_git_file(ref: str, path: str) -> bytes: """Read a repository file exactly as stored at a git ref.""" result = subprocess.run( @@ -343,73 +327,19 @@ def validate_generated_config(base_ref: str, head_ref: str, path: str) -> None: ) from exc -def validate_changelog( +def validate_matrix_compatible_change( base_ref: str, head_ref: str, path: str, - pr_number: int | None, -) -> tuple[int, int]: - """Validate the complete head file and its change from base to head.""" - diff_check = run_git( - "diff", - "--check", - base_ref, - head_ref, - "--", - path, - check=False, - ) - if diff_check.returncode != 0: - detail = diff_check.stdout.strip() or diff_check.stderr.strip() +) -> None: + """Validate the final newline and the diff accepted by sweep setup.""" + head_raw = read_git_file(head_ref, path) + if not head_raw.endswith(b"\n"): raise ChangelogValidationError( - f"git diff --check failed for {path}:\n{detail}" + f"{path} at {head_ref} does not end with a newline" ) - base_raw = read_git_file(base_ref, path) - head_raw = read_git_file(head_ref, path) - base_entries = parse_changelog( - base_raw, - f"{path} at {base_ref}", - ) - head_entries = parse_changelog( - head_raw, - f"{path} at {head_ref}", - ) - additions, corrections = compare_entries( - base_entries, - head_entries, - pr_number, - ) - validate_raw_change( - base_raw, - head_raw, - len(additions), - corrections, - ) - - if additions: - validate_generated_config(base_ref, head_ref, path) - elif not corrections: - raise ChangelogValidationError(f"{path} has no changes") - - return len(additions), corrections - - -def write_github_outputs( - path: str | None, - additions: int, - corrections: int, -) -> None: - """Expose changelog change type to downstream GitHub Actions jobs.""" - if not path: - return - with open(path, "a") as handle: - handle.write(f"has-additions={'true' if additions else 'false'}\n") - handle.write( - f"metadata-only={'true' if corrections and not additions else 'false'}\n" - ) - handle.write(f"addition-count={additions}\n") - handle.write(f"correction-count={corrections}\n") + validate_generated_config(base_ref, head_ref, path) def main() -> int: @@ -418,19 +348,13 @@ def main() -> int: parser.add_argument("--base-ref", required=True) parser.add_argument("--head-ref", required=True) parser.add_argument("--changelog-file", default="perf-changelog.yaml") - parser.add_argument("--pr-number", type=int) - parser.add_argument( - "--github-output", - default=os.environ.get("GITHUB_OUTPUT"), - ) args = parser.parse_args() try: - additions, corrections = validate_changelog( + validate_matrix_compatible_change( args.base_ref, args.head_ref, args.changelog_file, - args.pr_number, ) except ChangelogValidationError as exc: print(f"ERROR: {exc}", file=sys.stderr) @@ -438,11 +362,8 @@ def main() -> int: print( f"Validated {args.changelog_file}: " - f"{additions} appended entr{'y' if additions == 1 else 'ies'}, " - f"{corrections} pr-link correction" - f"{'' if corrections == 1 else 's'}" + "final newline present and matrix generated" ) - write_github_outputs(args.github_output, additions, corrections) return 0