diff --git a/errors/concurrency-timing/ct-103.yml b/errors/concurrency-timing/ct-103.yml new file mode 100644 index 0000000..7c1ab1d --- /dev/null +++ b/errors/concurrency-timing/ct-103.yml @@ -0,0 +1,100 @@ +id: ct-103 +title: '`github.head_ref` Is Empty String on Push Events — Concurrency Key Collapses All Push Runs Into One Group' +category: concurrency-timing +severity: silent-failure +tags: + - concurrency + - head_ref + - push + - pull_request + - empty-string + - cancel-in-progress + - branch-name +patterns: + - regex: 'group:\s*\$\{\{\s*github\.head_ref\s*\}\}' + flags: 'i' + - regex: 'group:\s*[''"][^''"]*\$\{\{\s*github\.head_ref\s*\}\}[^''"]*[''"]' + flags: 'i' + - regex: 'This run was cancelled' + flags: 'i' +error_messages: + - "This run was cancelled." + - "Run was cancelled." +root_cause: | + `github.head_ref` is populated only for `pull_request` and `pull_request_target` + events — it contains the source branch name of the PR (e.g., `feature/my-change`). + + For `push` events, `github.head_ref` is an **empty string** because push events + are not associated with a pull request and have no "head" vs "base" distinction. + + When developers write a concurrency group key using only `github.head_ref`: + + ```yaml + concurrency: + group: ${{ github.head_ref }} + cancel-in-progress: true + ``` + + All `push` event workflow runs evaluate the concurrency group key to `""` (empty + string). Every push run to any branch — `main`, `develop`, `feature/x` — shares + the same empty concurrency key. With `cancel-in-progress: true`, each new push + cancels ALL other in-progress push runs across every branch simultaneously. + + This is a silent failure because: + - Push runs from unrelated branches unexpectedly cancel each other. + - PR runs still work correctly (they get a proper `head_ref` key). + - The concurrency group `""` is valid YAML, so no error is shown. +fix: | + Use the `||` fallback operator to substitute `github.ref_name` (available for + push events) when `github.head_ref` is empty: + + ```yaml + concurrency: + group: ${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + ``` + + `github.ref_name` returns the short branch name for push events (e.g., `main`). + For PR events, `github.head_ref` takes precedence and provides the source branch. + + Alternatively, include the workflow name to avoid cross-workflow concurrency collisions: + + ```yaml + concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + ``` +fix_code: + - language: yaml + label: 'WRONG — head_ref is empty on push, all pushes share empty concurrency group' + code: | + on: [push, pull_request] + concurrency: + group: ${{ github.head_ref }} # empty string for all push events! + cancel-in-progress: true + - language: yaml + label: 'CORRECT — fallback to ref_name when head_ref is empty' + code: | + on: [push, pull_request] + concurrency: + # head_ref is populated for PRs; ref_name is the branch name for push events + group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + - language: yaml + label: 'CORRECT — using github.ref for a stable, always-populated key' + code: | + on: [push, pull_request] + concurrency: + # github.ref is always set: refs/heads/main (push) or refs/pull/42/merge (PR) + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +prevention: + - 'Never use `github.head_ref` alone as a concurrency key if the workflow runs on both `push` and `pull_request` events.' + - 'Use `github.head_ref || github.ref_name` or `github.ref` to ensure a non-empty concurrency group key for all event types.' + - 'Test concurrency configuration by triggering push events from multiple branches simultaneously and verifying only intra-branch cancellations occur.' + - 'Always prefix the concurrency group with `${{ github.workflow }}` to prevent cross-workflow concurrency collisions.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-concurrency' + label: 'GitHub Docs — Using concurrency' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#github-context' + label: 'GitHub Docs — github context (head_ref, ref_name, ref)' diff --git a/errors/silent-failures/sf-225.yml b/errors/silent-failures/sf-225.yml new file mode 100644 index 0000000..6f99263 --- /dev/null +++ b/errors/silent-failures/sf-225.yml @@ -0,0 +1,114 @@ +id: sf-225 +title: '`if: always()` Combined With `needs.result` Conditions Still Causes Job Skip When All Dependencies Are Skipped' +category: silent-failures +severity: silent-failure +tags: + - needs + - always + - skipped + - job-conditions + - status-check + - if-expression + - pipeline +patterns: + - regex: 'always\(\)\s*&&\s*needs\.\w+\.result\s*!=\s*[''"](?:failure|failed)[''"]' + flags: 'i' + - regex: 'always\(\)\s*&&\s*needs\.\w+\.result\s*==\s*[''"](?:success|skipped)[''"]' + flags: 'i' + - regex: 'This job was skipped' + flags: 'i' +error_messages: + - "This job was skipped." + - "Skipping this job because a previous job in the chain was skipped." +root_cause: | + GitHub Actions evaluates `if:` expressions on jobs in two passes. First, it applies + an implicit default status check: if no status check function is present, only jobs + whose `needs` dependencies all succeeded will run. + + `always()` overrides this implicit check and forces evaluation. However, when + `always()` is combined with additional boolean conditions that inspect `needs.X.result` + — such as `if: always() && needs.deploy.result != 'failure'` — the behavior can + still produce a skip in certain multi-dependency graphs. + + The root cause (documented in actions/runner#2205, 83 reactions) is that + `always()` does NOT make all sub-expressions of the `&&` chain evaluate in a + "permissive" mode. If any `needs` dependency is in a `skipped` state and the + combined condition evaluates to a result that the runner interprets as "the + pre-condition for running this job was not met," the job is silently skipped. + + Community-confirmed workaround: replace `if: always()` or + `if: always() && needs.X.result != 'failed'` with + `if: !failure() && !cancelled()`. This idiom explicitly allows the job to run + when all predecessors either succeeded or were skipped, without triggering + on genuine failures or cancellations. +fix: | + Replace `if: always()` (or `if: always() && needs.X.result != 'failed'`) with + `if: !failure() && !cancelled()` on downstream jobs that should run whenever + upstream jobs either succeeded OR were skipped — but NOT when something actively + failed. + + Use `if: always()` only for jobs that must run unconditionally, regardless of + any upstream failure (e.g., final cleanup or notification steps). +fix_code: + - language: yaml + label: 'PROBLEMATIC — job may still be skipped when needs deps are skipped' + code: | + jobs: + optional-build: + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' + steps: + - run: make build + + notify: + needs: optional-build + runs-on: ubuntu-latest + # WRONG: always() + needs condition can still cause skip + if: always() && needs.optional-build.result != 'failure' + steps: + - run: echo "Notifying..." + - language: yaml + label: 'CORRECT — use !failure() && !cancelled() for reliable skip-tolerant jobs' + code: | + jobs: + optional-build: + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' + steps: + - run: make build + + notify: + needs: optional-build + runs-on: ubuntu-latest + # CORRECT: runs when build succeeded OR was skipped; does not run on failure/cancel + if: '!failure() && !cancelled()' + steps: + - run: echo "Notifying..." + - language: yaml + label: 'CORRECT — unconditional run (cleanup/final step always runs)' + code: | + jobs: + deploy: + runs-on: ubuntu-latest + steps: + - run: ./deploy.sh + + report: + needs: [build, deploy] + runs-on: ubuntu-latest + # Use always() only when the job must run regardless of ANY outcome + if: always() + steps: + - run: echo "Pipeline result for ${{ needs.deploy.result }}" +prevention: + - 'Use `if: !failure() && !cancelled()` for jobs that should run when predecessors succeeded or were skipped, but NOT on failure.' + - 'Use `if: always()` only for unconditional jobs (cleanup, summaries) that must run regardless of any failure.' + - 'Avoid `if: always() && needs.X.result != ''failure''` — this combined form can behave unexpectedly when all needs are in skipped state.' + - 'Test workflows with `workflow_dispatch` and manually skip a dependency to verify downstream jobs behave correctly.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-conditions-to-control-job-execution' + label: 'GitHub Docs — Using conditions to control job execution' + - url: 'https://github.com/actions/runner/issues/2205' + label: 'actions/runner#2205 — Jobs skipped when NEEDS job ran successfully (83 reactions)' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#status-check-functions' + label: 'GitHub Docs — Status check functions (always, failure, cancelled, success)' diff --git a/errors/silent-failures/sf-226.yml b/errors/silent-failures/sf-226.yml new file mode 100644 index 0000000..4dad2fc --- /dev/null +++ b/errors/silent-failures/sf-226.yml @@ -0,0 +1,115 @@ +id: sf-226 +title: '`github.event.commits` Is Absent in `pull_request` Events — Expressions Accessing Commit Data Silently Return Empty' +category: silent-failures +severity: silent-failure +tags: + - github-context + - event-payload + - commits + - pull-request + - push + - null-context + - commit-message + - silent-skip +patterns: + - regex: 'github\.event\.commits\[?\d*\]?\.' + flags: 'i' + - regex: 'github\.event\.commits.*pull_request' + flags: 'i' + - regex: 'contains\(toJSON\(github\.event\.commits\)' + flags: 'i' +error_messages: + - "Error: Unhandled error: TypeError: Cannot read properties of undefined (reading '0')" + - "Expression evaluation result is empty for github.event.commits[0].message" + - "Step was skipped because the condition was false: contains(toJSON(github.event.commits), '[deploy]')" +root_cause: | + The `github.event.commits` array is only present in the event payload for `push` + events (and related events like `create`). It is **absent** — not null but completely + missing — from the `pull_request`, `pull_request_target`, `workflow_dispatch`, + `schedule`, and most other event payloads. + + Developers commonly write expressions like: + - `contains(toJSON(github.event.commits), '[deploy]')` — to check commit messages + - `github.event.commits[0].message` — to read the latest commit's message + - `github.event.commits.*.author.name` — to iterate commit authors + + These expressions silently return empty string or evaluate to `false` (not an error) + when the workflow runs on a `pull_request` event, because the `commits` key is + undefined in the JSON payload. This causes: + - Conditional steps to be silently skipped. + - Commit message parsing logic to silently do nothing on PRs. + - Downstream job logic that depends on commit message flags to never fire on PRs. + + The closely related `github.event.head_commit` is null (not absent) on PRs — see + sf-037 for that pattern. `github.event.commits` follows the same spirit but uses + array absence rather than null. +fix: | + Guard all `github.event.commits` accesses behind an `event_name` check, or use + alternative sources for commit information that work across event types. + + For commit message inspection: use `git log` inside a `run:` step and set an output + variable, which works for both push and PR events after checkout. + + For PR events specifically: `github.event.pull_request.head.sha` gives the HEAD + commit SHA, but NOT the commit message — you need to use the API or `git log`. +fix_code: + - language: yaml + label: 'WRONG — commits array absent on pull_request events; step silently skipped' + code: | + jobs: + check-commit: + runs-on: ubuntu-latest + steps: + - if: contains(toJSON(github.event.commits), '[deploy]') + # This condition is always FALSE on pull_request events because + # github.event.commits is absent (undefined), not an empty array + run: echo "Deploy flag found in commits" + - language: yaml + label: 'CORRECT — guard with event_name or use git log for cross-event commit access' + code: | + jobs: + check-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # Option 1: guard with event_name + - if: github.event_name == 'push' && contains(toJSON(github.event.commits), '[deploy]') + run: echo "Deploy flag found in push commits" + + # Option 2: use git log for both push and PR events (requires checkout) + - id: check-flag + run: | + if git log -1 --format='%s' | grep -q '\[deploy\]'; then + echo "deploy=true" >> "$GITHUB_OUTPUT" + fi + - if: steps.check-flag.outputs.deploy == 'true' + run: echo "Deploy flag found" + - language: yaml + label: 'CORRECT — per-event payload for commit SHA on PR vs push' + code: | + jobs: + get-sha: + runs-on: ubuntu-latest + steps: + - run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + # Use PR head SHA for pull_request events + SHA="${{ github.event.pull_request.head.sha }}" + else + # Use github.sha for push and other events + SHA="${{ github.sha }}" + fi + echo "Commit SHA: $SHA" +prevention: + - 'Never access `github.event.commits` without first checking `github.event_name == ''push''`.' + - 'Use `git log` after `actions/checkout` for commit message inspection that works on both push and PR events.' + - 'Review the GitHub Actions event payload reference to understand which fields are present for each event type.' + - 'Add explicit `event_name` guards when a workflow is triggered by multiple events with different payload shapes.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#push' + label: 'GitHub Docs — push event payload (commits array present here)' + - url: 'https://docs.github.com/en/webhooks/webhook-events-and-payloads#pull_request' + label: 'GitHub Docs — pull_request event payload (no commits array)' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#github-context' + label: 'GitHub Docs — github context reference' diff --git a/errors/silent-failures/sf-227.yml b/errors/silent-failures/sf-227.yml new file mode 100644 index 0000000..d474c2f --- /dev/null +++ b/errors/silent-failures/sf-227.yml @@ -0,0 +1,104 @@ +id: sf-227 +title: '`if: failure()` on a Step After a `continue-on-error: true` Step Never Triggers — Failure Is Absorbed' +category: silent-failures +severity: silent-failure +tags: + - continue-on-error + - failure + - if-condition + - step-context + - status-check-function + - cleanup + - silent-skip +patterns: + - regex: 'continue-on-error:\s*true' + flags: 'i' + - regex: 'if:\s*failure\(\)' + flags: 'i' + - regex: 'if:\s*\$\{\{\s*failure\(\)\s*\}\}' + flags: 'i' +error_messages: + - "Step was skipped because the condition was false: failure()" + - "Cleanup step never runs despite previous step failing" +root_cause: | + `continue-on-error: true` on a step instructs the runner to continue executing the + job even if that step exits with a non-zero code. Crucially, it also marks the step's + `conclusion` as `success` in the `steps` context — the failure is absorbed at the + job level. + + The `failure()` status check function evaluates whether the **current job is in a + failed state** at the point where the `if:` condition is evaluated. Because + `continue-on-error: true` prevents the previous step's failure from changing the + job's status to "failed," `failure()` evaluates to `false` on all subsequent steps. + + As a result, any step guarded by `if: failure()` that comes AFTER a + `continue-on-error: true` step that actually failed will be **silently skipped**. + This is the most common cause of cleanup or error-handling steps never running + despite a step failure. + + Note: The raw failure IS still accessible via `steps..outcome == 'failure'` + (the `outcome` property reflects the exit code before `continue-on-error` is + applied). See sf-068 for the `conclusion` vs `outcome` distinction. +fix: | + Replace `if: failure()` with a direct check on `steps..outcome == 'failure'` + to detect the raw failure of a specific step, regardless of `continue-on-error`. + + If you need cleanup that runs after ANY failure in the job (not just one specific + step), restructure so that the failing step does NOT use `continue-on-error`. + Instead, capture its exit code into an output and make the cleanup conditional on + that output. +fix_code: + - language: yaml + label: 'WRONG — failure() never triggers after continue-on-error: true step' + code: | + steps: + - id: risky-step + run: ./might-fail.sh + continue-on-error: true # absorbs the failure at job level + + - name: cleanup + # WRONG: failure() evaluates to false because the job is not in failed state + if: failure() + run: echo "This never runs even when risky-step fails!" + - language: yaml + label: 'CORRECT — use steps..outcome to detect absorbed failures' + code: | + steps: + - id: risky-step + run: ./might-fail.sh + continue-on-error: true # absorbs the failure at job level + + - name: cleanup-on-failure + # CORRECT: outcome reflects the raw exit code before continue-on-error + if: steps.risky-step.outcome == 'failure' + run: echo "Runs only when risky-step actually failed" + + - name: always-cleanup + # CORRECT: always() or no condition to always run + if: always() + run: echo "Runs regardless of risky-step outcome" + - language: yaml + label: 'CORRECT — avoid continue-on-error when downstream failure() checks are needed' + code: | + steps: + - id: risky-step + run: | + ./might-fail.sh || echo "failed=true" >> "$GITHUB_OUTPUT" + + - name: cleanup + # CORRECT: now failure() on a previous step propagates normally + # because we removed continue-on-error from risky-step + if: steps.risky-step.outputs.failed == 'true' + run: echo "Runs when risky-step reports failure via output" +prevention: + - 'Never use `if: failure()` on a step that comes after a step with `continue-on-error: true` — use `steps..outcome == ''failure''` instead.' + - 'Remember: `steps..outcome` = raw exit code result; `steps..conclusion` = result after applying `continue-on-error`.' + - 'For unconditional cleanup steps, use `if: always()` — not `if: failure()` — when `continue-on-error` is in play anywhere in the job.' + - 'Prefer `|| echo "failed=true" >> "$GITHUB_OUTPUT"` patterns over `continue-on-error: true` when downstream steps need to react to failures via expressions.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#status-check-functions' + label: 'GitHub Docs — Status check functions (failure, success, always, cancelled)' + - url: 'https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idstepscontinue-on-error' + label: 'GitHub Docs — continue-on-error' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#steps-context' + label: 'GitHub Docs — steps context (outcome vs conclusion)'