diff --git a/errors/caching-artifacts/alpine-busybox-tar-p-flag-unsupported.yml b/errors/caching-artifacts/alpine-busybox-tar-p-flag-unsupported.yml new file mode 100644 index 0000000..c1d4131 --- /dev/null +++ b/errors/caching-artifacts/alpine-busybox-tar-p-flag-unsupported.yml @@ -0,0 +1,84 @@ +id: caching-artifacts-150 +title: 'actions/cache Fails in Alpine Containers — BusyBox tar Does Not Support -P Flag' +category: caching-artifacts +severity: error +tags: + - alpine + - busybox + - tar + - container + - cache + - linux +patterns: + - regex: 'tar: unrecognized option: P' + flags: 'i' + - regex: 'BusyBox.*?tar.*?unrecognized.*?option' + flags: 'i' + - regex: 'Tar failed with error: The process .*/bin/tar. failed with exit code 1' + flags: 'i' +error_messages: + - "/bin/tar: unrecognized option: P" + - "BusyBox v1.31.1 () multi-call binary." + - "[warning]Tar failed with error: The process '/bin/tar' failed with exit code 1" +root_cause: | + The `actions/cache` action uses GNU tar with the `-P` flag (preserve absolute path names) + when creating and extracting cache archives. Alpine Linux containers ship with BusyBox, + which provides a minimal tar implementation that does not recognise the `-P` flag. + + When the cache step runs inside an Alpine container, `/bin/tar` is BusyBox tar and the + command fails immediately with "unrecognized option: P". The restore step returns exit + code 1 and the workflow stops or the cache is silently skipped depending on fail-on-cache-miss. + + This is a long-standing issue first reported in actions/cache#352 and re-surfaced in + actions/cache#1765 (June 2026). No workaround has been added to the action itself. +fix: | + Install GNU tar in the Alpine container before the cache step using apk: + + steps: + - name: Install GNU tar (required for actions/cache) + run: apk add --no-cache tar + - name: Cache dependencies + uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + + Alternatively, switch the job to a Debian/Ubuntu-based container image where + GNU tar is already the default (/usr/bin/tar). +fix_code: + - language: yaml + label: Install GNU tar in Alpine before using actions/cache + code: | + - name: Install GNU tar + run: apk add --no-cache tar + + - name: Cache dependencies + uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - language: yaml + label: Switch to Debian/Ubuntu container to avoid BusyBox tar + code: | + jobs: + build: + runs-on: ubuntu-latest + container: + image: debian:bookworm-slim # GNU tar available by default + steps: + - uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} +prevention: + - "Run tar --version inside your Alpine container; if it shows BusyBox, add apk add --no-cache tar as the first step." + - "Set container: debian:bookworm-slim or ubuntu:latest instead of alpine when the job uses actions/cache." + - "Add apk add --no-cache tar as the very first step in any job that uses actions/cache inside an Alpine container." + - "Check the actions/cache documentation for container compatibility notes before choosing a base image." +docs: + - url: "https://github.com/actions/cache/issues/1765" + label: "actions/cache issue #1765 — Post cache not working on alpine runners (2026)" + - url: "https://github.com/actions/cache/issues/352" + label: "actions/cache issue #352 — Original Alpine BusyBox tar report" + - url: "https://docs.github.com/en/actions/using-containerized-services/about-service-containers" + label: "GitHub Docs — About service containers" diff --git a/errors/concurrency-timing/environment-deployment-branch-scoped-concurrency-cross-branch-collision.yml b/errors/concurrency-timing/environment-deployment-branch-scoped-concurrency-cross-branch-collision.yml new file mode 100644 index 0000000..bd02803 --- /dev/null +++ b/errors/concurrency-timing/environment-deployment-branch-scoped-concurrency-cross-branch-collision.yml @@ -0,0 +1,130 @@ +id: ct-104 +title: 'Branch-Scoped Concurrency Group Allows Simultaneous Deployments to the Same Environment from Different Branches' +category: concurrency-timing +severity: silent-failure +tags: + - concurrency + - environment + - deployment + - branch + - github-ref + - cross-branch + - concurrent-deploy + - job-environment +patterns: + - regex: 'group:\s*[''"]?[^''"\n]*\$\{\{\s*github\.ref[^}]*\}\}[^''"\n]*' + flags: 'i' + - regex: 'group:\s*[''"]?[^''"\n]*\$\{\{\s*github\.ref_name[^}]*\}\}[^''"\n]*' + flags: 'i' + - regex: 'group:\s*[''"]?[^''"\n]*\$\{\{\s*github\.head_ref[^}]*\}\}[^''"\n]*' + flags: 'i' +error_messages: + - "# No error — two branches deploy to the same environment simultaneously; may cause partial state or deployment races" +root_cause: | + Workflows that include `github.ref`, `github.ref_name`, or `github.head_ref` in + their concurrency group key create **separate concurrency groups per branch**. When + two branches push to a workflow that deploys to the same environment, they get + different concurrency groups and do NOT queue or cancel each other. + + Example: + - Branch `feature/alpha` pushes → group: `deploy-staging-refs/heads/feature/alpha` + - Branch `main` pushes → group: `deploy-staging-refs/heads/main` + - Groups are different → both jobs run simultaneously → both deploy to `staging` + + This pattern is correct for CI workflows (each branch's tests should run + independently), but incorrect for shared deployment environments where only one + deployment should be active at a time. + + The confusion is common because teams copy a per-branch concurrency pattern from + CI into deploy workflows without adjusting the key. Environment protection rules + (required reviewers, wait timers) gate each individual job but do NOT prevent + multiple simultaneous deployments from different concurrency groups. + + GitHub Actions provides the `job.environment` context — the environment name string + for the current job — which creates a stable, per-environment concurrency key that + applies across all branches deploying to that environment. +fix: | + Key the concurrency group on the environment name, not the branch ref, so all + branches deploying to the same environment share one concurrency slot: + + concurrency: + group: deploy-${{ job.environment }} + cancel-in-progress: false # queue; do not discard deploys + + Important: `job.environment` is only populated inside a job that declares + `environment:`. Set the concurrency group at the **job level**, not at the + workflow level, when using `job.environment`. + + Use `cancel-in-progress: false` for deployments to ensure every triggered deploy + runs in order rather than being silently dropped. +fix_code: + - language: yaml + label: 'WRONG — branch-scoped group; feature/ and main can deploy to staging simultaneously' + code: | + on: [push] + + jobs: + deploy: + runs-on: ubuntu-latest + environment: staging + concurrency: + # BAD: different branches get different concurrency slots + # feature/alpha and main can both deploy to staging at the same time + group: deploy-staging-${{ github.ref }} + cancel-in-progress: true + steps: + - run: ./deploy.sh staging + - language: yaml + label: 'CORRECT — environment-scoped group; only one deploy to staging at a time' + code: | + on: [push] + + jobs: + deploy: + runs-on: ubuntu-latest + environment: staging + concurrency: + # GOOD: all branches deploying to staging share one concurrency slot + # job.environment is the environment name ("staging") + group: deploy-${{ job.environment }} + cancel-in-progress: false # queue — do not skip any deploys + steps: + - run: ./deploy.sh staging + - language: yaml + label: 'MULTI-ENV — staging and production each get their own independent slot' + code: | + on: + push: + branches: [main] + + jobs: + deploy-staging: + runs-on: ubuntu-latest + environment: staging + concurrency: + group: deploy-${{ job.environment }} # "staging" slot + cancel-in-progress: false + steps: + - run: ./deploy.sh staging + + deploy-production: + needs: deploy-staging + runs-on: ubuntu-latest + environment: production + concurrency: + group: deploy-${{ job.environment }} # "production" slot (separate) + cancel-in-progress: false + steps: + - run: ./deploy.sh production +prevention: + - 'For deployment workflows, key concurrency groups on the environment name (`job.environment`), not on the branch ref.' + - 'Use `cancel-in-progress: false` for deployment jobs — silently dropping a deploy means a commit never reaches the environment.' + - 'Apply per-branch concurrency groups to CI jobs only; apply per-environment concurrency groups to deploy jobs.' + - 'Set job-level `concurrency:` (not workflow-level) when using `job.environment`, since that context is only available within a job.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-concurrency' + label: 'GitHub Docs — Using concurrency' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#job-context' + label: 'GitHub Docs — job context (job.environment)' + - url: 'https://docs.github.com/en/actions/managing-workflow-runs-and-deployments/managing-deployments/managing-environments-for-deployment' + label: 'GitHub Docs — Managing environments for deployment' diff --git a/errors/known-unsolved/ku-143.yml b/errors/known-unsolved/ku-143.yml new file mode 100644 index 0000000..f79351e --- /dev/null +++ b/errors/known-unsolved/ku-143.yml @@ -0,0 +1,126 @@ +id: known-unsolved-143 +title: 'JIT Token Expires During Sequential max-parallel:1 Matrix Workflows' +category: known-unsolved +severity: error +tags: + - jit-runner + - self-hosted + - sequential + - max-parallel + - matrix + - token-expiry + - ephemeral +patterns: + - regex: 'The operation was canceled\.' + flags: 'i' + - regex: 'jit.*token.*expir|token.*expir.*jit' + flags: 'i' + - regex: 'Failed to connect to the GitHub Actions service' + flags: 'i' +error_messages: + - 'The operation was canceled.' + - 'Error: The operation was canceled.' + - 'Jobs 11+ fail with "The operation was canceled"' +root_cause: | + JIT (Just-In-Time) runner tokens are scoped to a single job and expire after + approximately 60 minutes. When a matrix workflow uses `max-parallel: 1` to + enforce sequential execution, later jobs queue behind earlier ones. If the total + cumulative runtime of all preceding jobs exceeds ~60 minutes, the JIT token for + the waiting runner expires before GitHub dispatches the job to it. + + The failure mode: + 1. A serverless runner (Modal, AWS Lambda, Fargate, etc.) fetches a JIT config + on webhook receipt for job N. + 2. Jobs 1–(N-1) run sequentially (max-parallel: 1), each taking several minutes. + 3. By the time jobs 10+ become eligible to run, the 60-minute JIT token for those + runners has expired. + 4. The runner attempts to connect to the GitHub Actions service, but the token + is no longer valid — the connection is cancelled. + 5. The job fails with "The operation was canceled." even though the runner process + started and the worker code is intact. + + This is a fundamental GitHub Actions JIT architecture limitation: JIT tokens are + not renewable and have no configurable TTL. There is no server-side mechanism to + refresh a JIT token while it is waiting in the queue. + + This differs from the false-positive "lost communication" error (known-unsolved-058), + which affects already-running ephemeral jobs that complete successfully but appear + to disconnect. The JIT sequential expiry causes actual job failure before the job + begins executing user steps. +fix: | + There is no direct fix — the 60-minute JIT token TTL is enforced by GitHub and is + not configurable. The following architectural workarounds are available: + + 1. Reduce total sequential runtime below 60 minutes: + - Combine short jobs into fewer, longer jobs to reduce the number of sequential + steps that must queue. + - Profile which matrix slices are slow and optimize or parallelize them. + + 2. Increase max-parallel (remove the strict sequential constraint): + - If ordering is required only between specific jobs, use `needs:` chains instead + of `max-parallel: 1` on a single matrix. + - This allows later jobs to obtain fresh JIT tokens earlier without waiting. + + 3. Use persistent (non-ephemeral) self-hosted runners: + - Persistent runners hold a long-lived registration token, not a JIT token. + - They do not expire while waiting in the queue. + - Trade-off: persistent runners have higher operational overhead. + + 4. Delay JIT token fetch until job dispatch (not webhook receipt): + - If the runner platform supports it, fetch the JIT config lazily at dispatch + time rather than pre-fetching on webhook receipt. + - This avoids holding a token that expires before the job starts. +fix_code: + - language: yaml + label: 'Replace max-parallel:1 matrix with needs: chain to avoid JIT expiry' + code: | + # ❌ Problematic: max-parallel:1 matrix — job 11+ JIT token expires after 60 min + jobs: + sequential-work: + runs-on: [self-hosted, ephemeral] + strategy: + max-parallel: 1 + matrix: + job_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + + # ✅ Workaround A: Remove max-parallel constraint so JIT tokens are fetched + # when jobs actually start, not when the workflow is dispatched + jobs: + parallel-work: + runs-on: [self-hosted, ephemeral] + strategy: + matrix: + job_id: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + + # ✅ Workaround B: Use needs: chains for true ordering without JIT expiry + jobs: + job-1: + runs-on: [self-hosted, ephemeral] + steps: [...] + job-2: + needs: job-1 + runs-on: [self-hosted, ephemeral] + steps: [...] + - language: yaml + label: 'Use persistent runners when sequential execution with long total runtime is required' + code: | + # Persistent runners are not affected by the 60-min JIT token TTL + jobs: + sequential-build: + runs-on: [self-hosted, linux, persistent] # NOT ephemeral + strategy: + max-parallel: 1 + matrix: + job_id: [1, 2, 3, ..., 37] # 37 x 6 min = 222 min total — safe on persistent +prevention: + - 'Estimate total sequential runtime before using max-parallel:1 with JIT runners: N_jobs × avg_job_minutes must stay under 60 minutes.' + - 'Prefer needs: dependency chains over max-parallel:1 for ordered execution with ephemeral JIT runners.' + - 'Use persistent self-hosted runners for long-running sequential workflows that cannot be parallelized.' + - 'Monitor for "The operation was canceled." errors on jobs with high matrix indices — they are the signature of JIT token expiry, not infra failures.' +docs: + - url: 'https://github.com/actions/runner/issues/4248' + label: 'actions/runner#4248 — JIT Token Expiration with Long-Running Sequential Workflows (2 comments, June 2026)' + - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners#using-just-in-time-runners' + label: 'GitHub Docs — Just-in-time (JIT) runners' + - url: 'https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idstrategymax-parallel' + label: 'Workflow syntax — jobs..strategy.max-parallel' diff --git a/errors/runner-environment/checkout-git-config-global-auth-placeholder-fail.yml b/errors/runner-environment/checkout-git-config-global-auth-placeholder-fail.yml new file mode 100644 index 0000000..2c03885 --- /dev/null +++ b/errors/runner-environment/checkout-git-config-global-auth-placeholder-fail.yml @@ -0,0 +1,89 @@ +id: runner-environment-497 +title: 'actions/checkout Auth Setup Fails When GIT_CONFIG_GLOBAL Is Set in Runner Environment' +category: runner-environment +severity: error +tags: + - checkout + - git-config + - GIT_CONFIG_GLOBAL + - self-hosted + - auth + - submodules + - git-credentials +patterns: + - regex: 'Unable to replace auth placeholder in.*?\.gitconfig' + flags: 'i' + - regex: 'Error.*?Unable to replace.*?placeholder.*?gitconfig' + flags: 'i' + - regex: 'Failed to configure the global config' + flags: 'i' +error_messages: + - "##[error]Unable to replace auth placeholder in /home/runner/work/_temp//.gitconfig" + - "Error: Unable to replace auth placeholder" +root_cause: | + When actions/checkout sets up global git authentication (triggered by submodules: true, + persist-credentials: true, or private-repo checkout), it creates a temporary gitconfig + by following these steps: + + 1. Copies ~/.gitconfig into a temp directory (e.g. /home/runner/work/_temp//). + 2. Overrides the HOME environment variable to point at that temp directory. + 3. Writes an auth token placeholder via `git config --global url..insteadOf` + — expecting it to land in the temp config because HOME now points there. + 4. Reads the temp config back and replaces the placeholder with the real token. + + The problem: git honours GIT_CONFIG_GLOBAL OVER HOME when locating the global + config file. If GIT_CONFIG_GLOBAL is already set in the runner environment (a common + practice on self-hosted runners to isolate per-job git config), step 3 writes to + THAT file instead of the temp config. Step 4 reads the temp config, finds no + placeholder, and fails with "Unable to replace auth placeholder". + + This is an open bug in actions/checkout (issue #2449). The checkout action only + overrides HOME, but never pins GIT_CONFIG_GLOBAL. +fix: | + Unset GIT_CONFIG_GLOBAL for the checkout step using the step-level env block: + + - name: Checkout + uses: actions/checkout@v4 + env: + GIT_CONFIG_GLOBAL: "" # unset so checkout controls global config + with: + submodules: true + + If using actions/checkout@v6, also try persist-credentials: false combined with + manually configured HTTPS credentials. Track actions/checkout#2449 for an + official fix that pins GIT_CONFIG_GLOBAL alongside HOME. +fix_code: + - language: yaml + label: Unset GIT_CONFIG_GLOBAL for the checkout step + code: | + - name: Checkout + uses: actions/checkout@v4 + env: + GIT_CONFIG_GLOBAL: "" # prevent git from redirecting config writes + with: + submodules: true + token: ${{ secrets.GITHUB_TOKEN }} + - language: yaml + label: Unset at job level to protect all checkout steps + code: | + jobs: + build: + runs-on: self-hosted + env: + GIT_CONFIG_GLOBAL: "" # reset for all steps in this job + steps: + - uses: actions/checkout@v4 + with: + submodules: true +prevention: + - "Avoid setting GIT_CONFIG_GLOBAL at workflow or job level when using actions/checkout." + - "On self-hosted runners, use GIT_CONFIG_NOSYSTEM=1 instead of GIT_CONFIG_GLOBAL for job isolation where possible." + - "If GIT_CONFIG_GLOBAL must be set on the runner host, override it to an empty string in the checkout step env block." + - "Track actions/checkout#2449 and upgrade to the patched version once released." +docs: + - url: "https://github.com/actions/checkout/issues/2449" + label: "actions/checkout issue #2449 — Fix global auth when GIT_CONFIG_GLOBAL is set" + - url: "https://git-scm.com/docs/git-config#Documentation/git-config.txt-GITCONFIGGLOBAL" + label: "Git documentation — GIT_CONFIG_GLOBAL environment variable" + - url: "https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions" + label: "GitHub Docs — Security hardening for GitHub Actions" diff --git a/errors/runner-environment/checkout-v6-credentials-docker-container-action-not-available.yml b/errors/runner-environment/checkout-v6-credentials-docker-container-action-not-available.yml new file mode 100644 index 0000000..7d757a1 --- /dev/null +++ b/errors/runner-environment/checkout-v6-credentials-docker-container-action-not-available.yml @@ -0,0 +1,111 @@ +id: runner-environment-498 +title: 'actions/checkout@v6 Persisted Credentials Not Available Inside Subsequent Docker Container Actions' +category: runner-environment +severity: error +tags: + - checkout + - checkout-v6 + - docker + - container-action + - credentials + - git-auth + - persist-credentials + - v6-regression +patterns: + - regex: 'fatal: could not read Username for .https://github\.com.: terminal prompts disabled' + flags: 'i' + - regex: 'fatal: repository .https://github\.com/.*?. not found' + flags: 'i' + - regex: 'remote: Repository not found\.' + flags: 'i' + - regex: 'Authentication failed for .https://github\.com' + flags: 'i' +error_messages: + - "fatal: could not read Username for 'https://github.com': terminal prompts disabled" + - "remote: Repository not found." + - "fatal: repository 'https://github.com///' not found" + - "Error: Authentication failed for 'https://github.com/'" +root_cause: | + actions/checkout@v6 introduced a new credential persistence mechanism that requires + runner v2.329.0+. In this design the checkout action stores git credentials via the + runner's credential-store rather than writing to the container's filesystem, enabling + persistent access across steps. + + However, subsequent DOCKER CONTAINER ACTIONS run inside an isolated container + filesystem that does not mount or inherit the host runner's credential store. Git + operations inside a Docker container action that attempt to authenticate against + GitHub will fail because: + + 1. The credential helper configured by checkout@v6 points to a host-side file or + socket that is not mounted inside the container. + 2. The container's git has no fallback credentials and interactive prompts are + disabled in CI (GIT_TERMINAL_PROMPT=0). + 3. Even with persist-credentials: true (the default), the container cannot read + the stored credentials. + + The v6-beta release notes stated that Docker container action support would be + available from runner v2.329.0+, but this was not fully implemented as of June 2026. + This is an open upstream bug (actions/checkout#2359). +fix: | + Downgrade to actions/checkout@v4 in any workflow that includes Docker container + actions that require git access. checkout@v4 stores credentials in ~/.git-credentials + on the host filesystem, which IS accessible to Docker container actions via the + default workspace mount. + + - name: Checkout + uses: actions/checkout@v4 # v4 credentials are accessible to Docker container actions + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + persist-credentials: true + + If v6 is required, pass the token to the Docker container action explicitly as an + environment variable and configure git inside the container manually. +fix_code: + - language: yaml + label: Downgrade to checkout@v4 when workflow uses Docker container actions + code: | + steps: + # v4 stores credentials in ~/.git-credentials accessible to Docker containers + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + persist-credentials: true + + # Docker container action can now access git credentials + - name: Run Docker container action + uses: org/docker-container-action@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - language: yaml + label: Pass token explicitly to Docker container action (v6 workaround) + code: | + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + persist-credentials: false # disable v6 mechanism + + # Manually configure credentials accessible inside the container + - name: Configure git credentials + run: | + git config --global credential.helper store + echo "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com" \ + >> ~/.git-credentials + + - name: Run Docker container action + uses: org/docker-container-action@v1 +prevention: + - "Check the release notes before upgrading checkout versions in workflows that include Docker container actions." + - "Pin checkout to @v4 in workflows that use Docker container actions until actions/checkout#2359 is resolved." + - "After upgrading checkout versions, verify end-to-end in a test branch before rolling out to all workflows." + - "Track actions/checkout#2359 for the upstream fix and re-evaluate once it ships." +docs: + - url: "https://github.com/actions/checkout/issues/2359" + label: "actions/checkout issue #2359 — v6 credentials don't work with Docker container actions" + - url: "https://github.com/actions/checkout/releases/tag/v6.0.0" + label: "actions/checkout v6 release notes" + - url: "https://docs.github.com/en/actions/sharing-automations/creating-actions/creating-a-docker-container-action" + label: "GitHub Docs — Creating a Docker container action" diff --git a/errors/silent-failures/sf-231.yml b/errors/silent-failures/sf-231.yml new file mode 100644 index 0000000..92dcbdb --- /dev/null +++ b/errors/silent-failures/sf-231.yml @@ -0,0 +1,107 @@ +id: silent-failures-231 +title: '`job.workflow_ref` and `job.workflow_*` Context Properties Return Empty String on Runner < v2.334.0' +category: silent-failures +severity: silent-failure +tags: + - job-context + - workflow-ref + - workflow-sha + - self-hosted + - runner-version + - context-empty + - reusable-workflow +patterns: + - regex: 'job\.workflow_ref|job\.workflow_sha|job\.workflow_repository|job\.workflow_file_path' + flags: 'i' +error_messages: + - '${{ job.workflow_ref }} expands to empty string' + - 'job.workflow_ref is empty' + - 'workflow_ref context not available' +root_cause: | + GitHub Actions runner v2.334.0 (released April 21, 2026) added four new typed + accessors to the `job` expression context (PR #4335): + + - `job.workflow_ref` — full ref (owner/repo/.github/workflows/file.yml@refs/heads/branch) + - `job.workflow_sha` — commit SHA of the resolved workflow file + - `job.workflow_repository` — owner/repo of the workflow file + - `job.workflow_file_path` — .github/workflows/file.yml path only + + These properties are particularly useful in reusable workflows that need to identify + their own source identity at runtime. + + On any self-hosted runner older than v2.334.0, these properties are not populated + by the runner's JobContext hydration. Because GitHub Actions silently treats unknown + context properties as empty strings (rather than erroring), workflows that reference + `${{ job.workflow_ref }}` will appear to succeed but the expression expands to an + empty string. Downstream steps that depend on a non-empty value — such as conditional + checks, logging, or attestation steps — will silently produce wrong results. + + GitHub-hosted runners (ubuntu-latest, windows-latest, macos-latest) are always on a + recent runner version and are not affected. Only self-hosted runners that have not been + updated since before April 21, 2026 are impacted. +fix: | + Update self-hosted runners to v2.334.0 or later. The runner auto-updates when a new + version is available if `ACTIONS_RUNNER_UPDATE_VERSION` is set, or update manually via + the runner configuration script. + + To verify a runner's version: check the "Set up job" step in any workflow run — it logs + "Runner name: ..., Runner version: 2.XXX.X". Alternatively, check the runner's + _diag/Worker_*.log file for the version string. + + As a short-term workaround before updating, use the `github` context instead: + - `github.workflow_ref` — available in all runner versions as a top-level context + - `github.workflow_sha` — similarly available top-level + + Note: `github.workflow_ref` has been available since runner v2.304.0 and is safe + to use as a drop-in substitute for `job.workflow_ref` in most scenarios. +fix_code: + - language: yaml + label: 'Use github.workflow_ref instead of job.workflow_ref for compatibility' + code: | + jobs: + identify: + runs-on: self-hosted + steps: + # ❌ Empty on runner < v2.334.0: + - run: echo "Workflow ref: ${{ job.workflow_ref }}" + + # ✅ Available on all modern runners (since v2.304.0): + - run: echo "Workflow ref: ${{ github.workflow_ref }}" + + # ✅ job.workflow_* safe to use once runners are updated to v2.334.0+: + - run: | + echo "ref: ${{ job.workflow_ref }}" + echo "sha: ${{ job.workflow_sha }}" + echo "repo: ${{ job.workflow_repository }}" + echo "file path: ${{ job.workflow_file_path }}" + - language: yaml + label: 'Check runner version in a prior step before relying on job.workflow_ref' + code: | + jobs: + debug: + runs-on: self-hosted + steps: + - name: Check runner version (must be >= 2.334.0 for job.workflow_*) + run: echo "Runner version is shown in the Set up job log header" + + - name: Safe usage — falls back to github.workflow_ref if needed + run: | + # job.workflow_ref is safe on v2.334.0+; use github.workflow_ref as fallback + JOB_REF="${{ job.workflow_ref }}" + GITHUB_REF="${{ github.workflow_ref }}" + EFFECTIVE_REF="${JOB_REF:-$GITHUB_REF}" + echo "Effective workflow ref: $EFFECTIVE_REF" +prevention: + - 'Keep self-hosted runners updated to the latest version to receive new context properties as they are added.' + - 'In reusable workflows, prefer `github.workflow_ref` over `job.workflow_ref` for maximum runner version compatibility.' + - 'When upgrading workflows to use new context properties, add a version note comment indicating the minimum required runner version.' + - 'Test workflows against self-hosted runners with the actual installed runner version before relying on recently added context properties.' +docs: + - url: 'https://github.com/actions/runner/pull/4335' + label: 'actions/runner PR #4335 — feat: add job.workflow_* typed accessors to JobContext (shipped in v2.334.0)' + - url: 'https://github.com/actions/runner/releases/tag/v2.334.0' + label: 'GitHub Actions Runner v2.334.0 release notes (April 21, 2026)' + - url: 'https://docs.github.com/en/actions/writing-workflows/contexts#job-context' + label: 'GitHub Docs — job context properties' + - url: 'https://docs.github.com/en/actions/writing-workflows/contexts#github-context' + label: 'GitHub Docs — github context (github.workflow_ref available as fallback)' diff --git a/errors/silent-failures/sf-232.yml b/errors/silent-failures/sf-232.yml new file mode 100644 index 0000000..5599919 --- /dev/null +++ b/errors/silent-failures/sf-232.yml @@ -0,0 +1,127 @@ +id: silent-failures-232 +title: '`actions/checkout` v6.0.0–v6.0.1 Silently Converts Annotated Tags to Lightweight Tags' +category: silent-failures +severity: silent-failure +tags: + - checkout + - annotated-tags + - lightweight-tags + - git-tag + - tagger-metadata + - v6 + - tag-annotation + - version-pinned +patterns: + - regex: 'taggeremail|taggername|taggerdate' + flags: 'i' + - regex: 'git tag.*--format.*tagger|tag.*points-at.*tagger' + flags: 'i' +error_messages: + - 'git tag --list --points-at HEAD --format ''%(taggeremail)'' returns empty' + - 'git cat-file tag v1.2.3 — not a tag object' + - 'error: v1.2.3 is not a tag object' +root_cause: | + `actions/checkout` v6.0.0 and v6.0.1 fetch a tag by its commit hash rather than + by the tag reference itself. The fetch refspec used was: + + git fetch origin +{COMMIT_SHA}:refs/tags/{TAG_NAME} + + This creates a local tag that is a direct reference to the commit object, stripping + any tag object that wrapped it. Annotated tags created with `git tag -a` contain a + tag object with tagger metadata (name, email, date, message), which is separate from + the commit object. By fetching the commit SHA directly instead of the tag refspec, + the tag object is never downloaded — only the commit is fetched. + + The result: the local tag exists and `git checkout refs/tags/v1.2.3` works, but: + - `git tag -l --points-at HEAD --format '%(taggeremail)'` returns empty + - `git cat-file -t v1.2.3` returns "commit" instead of "tag" + - `git log --no-walk --tags --pretty=format:%D` omits the annotation + - Tools that read tagger information (release scripts, changelogs, signing verifiers) + silently get wrong or empty metadata + + This is a silent failure because: the checkout step succeeds with exit 0, the tag + name is present, git commands that only need the commit (e.g. `git log`) work fine. + Only operations that specifically need the tag object annotation fail. + + Fixed in `actions/checkout` v6.0.2 (PR #2356, released January 9, 2026). The fix + uses `+refs/tags/*:refs/tags/*` as the refspec, fetching the actual tag objects. +fix: | + Upgrade `actions/checkout` to v6.0.2 or later. The fix is already included in the + floating `v6` tag, so workflows using `uses: actions/checkout@v6` are already + unaffected. + + Workflows pinned to a specific SHA or to `v6.0.0` / `v6.0.1` must be updated. + + To verify you are on a fixed version: check the "Set up job" log which shows the + resolved action SHA, or add a diagnostic step to verify tag annotation is preserved. +fix_code: + - language: yaml + label: 'Upgrade checkout to v6.0.2+ to preserve annotated tag metadata' + code: | + steps: + # ❌ Broken — annotated tag annotation is stripped: + - uses: actions/checkout@v6.0.0 # or v6.0.1 + with: + ref: ${{ github.ref }} # e.g. refs/tags/v1.2.3 + + # ✅ Fixed — upgrade to v6.0.2 or floating v6 tag: + - uses: actions/checkout@v6.0.2 + with: + ref: ${{ github.ref }} + + # ✅ Or use floating v6 (always gets the latest patch): + - uses: actions/checkout@v6 + with: + ref: ${{ github.ref }} + - language: yaml + label: 'Diagnostic step to verify annotated tag annotation is preserved' + code: | + steps: + - uses: actions/checkout@v6.0.2 + with: + ref: ${{ github.ref }} + + - name: Verify annotated tag annotation is present + if: startsWith(github.ref, 'refs/tags/') + run: | + TAG="${{ github.ref_name }}" + TYPE=$(git cat-file -t "$TAG" 2>/dev/null || echo "not-found") + if [ "$TYPE" = "tag" ]; then + echo "✅ Annotated tag object preserved" + git tag -l --points-at HEAD --format '%(taggername) %(taggeremail) %(taggerdate)' + elif [ "$TYPE" = "commit" ]; then + echo "❌ Tag is a lightweight reference to commit — annotation was stripped" + echo "Upgrade to actions/checkout@v6.0.2 or later" + exit 1 + else + echo "⚠️ Tag $TAG not found locally" + fi + - language: yaml + label: 'Workaround for pinned old versions: re-fetch tag by refspec after checkout' + code: | + steps: + # If upgrading checkout is not immediately possible, re-fetch the tag object: + - uses: actions/checkout@v6.0.0 # or v6.0.1 + with: + ref: ${{ github.ref }} + + - name: Re-fetch tag to restore annotation (workaround for v6.0.0/v6.0.1) + if: startsWith(github.ref, 'refs/tags/') + run: | + # Fetch the actual tag object (not just the commit SHA) + git fetch -f origin "${{ github.ref }}:${{ github.ref }}" + echo "Tag type is now: $(git cat-file -t '${{ github.ref_name }}')" +prevention: + - 'Pin to `actions/checkout@v6` (floating) or `v6.0.2+` to avoid this regression in v6.0.0/v6.0.1.' + - 'In tag-triggered release workflows, add a diagnostic step verifying `git cat-file -t ${{ github.ref_name }}` returns "tag" not "commit".' + - 'Avoid pinning checkout to specific pre-patch SHAs — the floating major/minor tag (v6) always includes the latest bug fixes.' + - 'Tools that read tagger metadata (GPG signing verifiers, changelog generators, `git describe` with annotation) should include a pre-flight check for tag type.' +docs: + - url: 'https://github.com/actions/checkout/pull/2356' + label: 'actions/checkout PR #2356 — Fix tag handling: preserve annotations and explicit fetch-tags (fixed in v6.0.2)' + - url: 'https://github.com/actions/checkout/issues/290' + label: 'actions/checkout#290 — Preserve tag annotations (original report, 2020)' + - url: 'https://github.com/actions/checkout/releases/tag/v6.0.2' + label: 'actions/checkout v6.0.2 release notes (January 9, 2026)' + - url: 'https://git-scm.com/book/en/v2/Git-Basics-Tagging' + label: 'Git documentation — Annotated Tags vs Lightweight Tags' diff --git a/errors/triggers/workflow-run-requested-type-fires-before-upstream-executes.yml b/errors/triggers/workflow-run-requested-type-fires-before-upstream-executes.yml new file mode 100644 index 0000000..841da6e --- /dev/null +++ b/errors/triggers/workflow-run-requested-type-fires-before-upstream-executes.yml @@ -0,0 +1,131 @@ +id: tr-118 +title: '`workflow_run` `requested` Activity Type Fires When Upstream Is Queued — Before Execution, Artifacts Unavailable' +category: triggers +severity: silent-failure +tags: + - workflow_run + - requested + - activity-types + - artifacts + - default-branch + - timing + - pre-execution +patterns: + - regex: 'types:\s*\[.*\brequested\b' + flags: 'si' + - regex: 'types:\s*\n(\s+- [^\n]+\n)*\s+- requested' + flags: 'si' +error_messages: + - "Unable to find artifact" + - "Error: Artifact not found for associated workflow run" + - "No artifact found with name" +root_cause: | + The `workflow_run` event supports three activity types: `completed`, `in_progress`, + and `requested`. The `requested` type fires when the upstream workflow run is + **queued** — before any steps have executed. + + Developers who use `types: [requested]` expecting to access upstream workflow + results (artifacts, job outputs, step conclusions) will find nothing is available. + The upstream workflow has not run yet. + + Key behaviors of the `requested` type: + + 1. **No artifacts** — The upstream workflow has not uploaded anything yet. + 2. **Null conclusion** — `github.event.workflow_run.conclusion` is `null` at + request time; the run has not completed. + 3. **Fires for subsequently cancelled runs** — If the upstream workflow is cancelled + before execution (e.g., superseded by a concurrency group), the downstream + `requested`-triggered workflow still runs to completion, wasting runner minutes + processing a run that never produced output. + 4. **Default-branch requirement** — Like ALL `workflow_run` triggers, the listening + workflow must be on the repository's default branch. Even if the upstream runs + on a feature branch, the downstream always executes from the default branch. + 5. **No upstream-blocking capability** — `workflow_run` is downstream-only. A + `requested` listener cannot pause or influence the upstream workflow. + + The docs say `requested` fires "when a workflow run is requested", which developers + interpret as "just before my CI runs — time to prepare". In practice, it fires the + moment the run is queued, with zero upstream data available. +fix: | + Use `completed` to access upstream artifacts, outputs, and conclusion: + + on: + workflow_run: + workflows: ["CI"] + types: [completed] + + Use `in_progress` to react as soon as the upstream workflow begins executing + (artifacts still unavailable but the run is confirmed started and not cancelled). + + Use `requested` only for pure notification/audit workflows that need no upstream + results — for example, posting "CI run queued" to a webhook using only metadata + fields like `github.event.workflow_run.html_url` and `github.event.workflow_run.id`. + + If you need to set up resources BEFORE an upstream workflow runs, `workflow_run` + is not the right tool — it cannot block the upstream. Use job-dependency ordering + within a single workflow, or a `repository_dispatch` from a setup job. +fix_code: + - language: yaml + label: 'WRONG — requested type; artifacts unavailable; upstream may be queued but not run' + code: | + on: + workflow_run: + workflows: ["Build"] + types: [requested] # fires when Build is QUEUED, not when it finishes + + jobs: + process: + runs-on: ubuntu-latest + steps: + # FAILS: No artifact exists — Build has not executed yet + - uses: actions/download-artifact@v4 + with: + run-id: ${{ github.event.workflow_run.id }} + name: build-output + - language: yaml + label: 'CORRECT — use completed type and guard on conclusion' + code: | + on: + workflow_run: + workflows: ["Build"] + types: [completed] # fires AFTER Build finishes (artifacts available) + + jobs: + process: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + run-id: ${{ github.event.workflow_run.id }} + name: build-output + - run: echo "Processing build artifact..." + - language: yaml + label: 'VALID use of requested — notification only (no upstream artifacts needed)' + code: | + on: + workflow_run: + workflows: ["CI"] + types: [requested] # appropriate: notification only, no artifacts needed + + jobs: + notify: + runs-on: ubuntu-latest + steps: + - name: Post CI started notification + run: | + curl -s -X POST "$WEBHOOK_URL" \ + -d "CI run queued: ${{ github.event.workflow_run.html_url }}" + env: + WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }} +prevention: + - 'Use `completed` type when your downstream workflow needs upstream artifacts, outputs, or conclusion.' + - 'Use `in_progress` to react as soon as the upstream workflow begins executing (no artifacts yet, but run is confirmed started).' + - '`requested` is only appropriate for workflows that need NO upstream results — pure notifications or audit logs.' + - 'Always guard `completed`-triggered jobs with `if: github.event.workflow_run.conclusion == ''success''` to skip failed upstream runs.' + - 'Remember: all `workflow_run` listeners execute from the default branch regardless of the upstream workflow''s branch.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run' + label: 'GitHub Docs — workflow_run event and activity types' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#using-data-from-the-triggering-workflow' + label: 'GitHub Docs — Using data from the triggering workflow' diff --git a/errors/yaml-syntax/startswith-endswith-contains-case-insensitive-expression.yml b/errors/yaml-syntax/startswith-endswith-contains-case-insensitive-expression.yml new file mode 100644 index 0000000..21e135a --- /dev/null +++ b/errors/yaml-syntax/startswith-endswith-contains-case-insensitive-expression.yml @@ -0,0 +1,101 @@ +id: ys-120 +title: '`startsWith()`, `endsWith()`, and `contains()` Are Case-Insensitive in GitHub Actions Expressions' +category: yaml-syntax +severity: silent-failure +tags: + - expressions + - startsWith + - endsWith + - contains + - case-insensitive + - if-condition +patterns: + - regex: '\$\{\{[^}]*\bstartsWith\s*\([^}]*\}\}' + flags: 'i' + - regex: '\$\{\{[^}]*\bendsWith\s*\([^}]*\}\}' + flags: 'i' + - regex: '\$\{\{[^}]*\bcontains\s*\([^}]*\}\}' + flags: 'i' +error_messages: + - "# No error message — case-insensitive match succeeds silently when developer expects case-sensitive behavior" +root_cause: | + The `startsWith()`, `endsWith()`, and `contains()` functions in GitHub Actions + expressions are **case-insensitive** — they match regardless of letter casing. This + is documented but commonly overlooked, because the same functions in most programming + languages (JavaScript, Python, Go) perform case-sensitive comparisons. + + Example: `startsWith('refs/heads/Release/1.0', 'refs/heads/release/')` evaluates + to `true` despite the capital `R` in `Release`, because the comparison ignores case. + + This causes two categories of problems: + + 1. **Unintended matches** — A condition meant to fire only for lowercase `release/` + branches also fires for `RELEASE/`, `Release/`, or any mixed-case variant. Branch + protection gates, deploy guards, and environment filters may activate unexpectedly. + + 2. **Misleading specificity** — Developers write precise-looking conditions such as + `startsWith(github.ref, 'refs/heads/Hotfix/')` assuming only that exact casing + matches, but the condition matches `hotfix/`, `HOTFIX/`, and every other variant. + + GitHub Actions expressions use `System.String.StartsWith` with `OrdinalIgnoreCase` + internally, making these three functions behave differently from most languages. +fix: | + To perform a **case-sensitive** comparison, use the `==` operator directly, or + normalize casing with `toLower()` / `toUpper()` before comparing: + + - Exact case-sensitive equality: `github.ref == 'refs/heads/release/1.0'` + - Explicit case-insensitive prefix: `startsWith(toLower(github.ref), 'refs/heads/release/')` + (use `toLower()` to document the intent explicitly, even though `startsWith` is + already case-insensitive without it) + + If you WANT case-insensitive matching, no change is needed — just add a comment + to document that the behavior is intentionally case-insensitive so future + maintainers do not "fix" it. +fix_code: + - language: yaml + label: 'SURPRISING — startsWith matches Release/ even though pattern is lowercase release/' + code: | + on: [push] + jobs: + deploy: + runs-on: ubuntu-latest + # Developer assumes this only matches refs/heads/release/* (lowercase) + # ACTUAL: also matches refs/heads/Release/*, refs/heads/RELEASE/*, etc. + if: ${{ startsWith(github.ref, 'refs/heads/release/') }} + steps: + - run: echo "Deploying release branch" + - language: yaml + label: 'EXPLICIT — toLower() makes case-insensitive intent obvious' + code: | + on: [push] + jobs: + deploy: + runs-on: ubuntu-latest + # toLower() is redundant (startsWith is already case-insensitive) + # but documents the intent clearly for future maintainers + if: ${{ startsWith(toLower(github.ref), 'refs/heads/release/') }} + steps: + - run: echo "Deploying release branch (any casing)" + - language: yaml + label: 'CASE-SENSITIVE — use == for exact case-sensitive matches' + code: | + on: [push] + jobs: + deploy-exact: + runs-on: ubuntu-latest + # Only matches exact lowercase ref — use == not startsWith + if: ${{ github.ref == 'refs/heads/release/1.0' }} + steps: + - run: echo "Deploying exactly refs/heads/release/1.0" +prevention: + - 'Treat `startsWith()`, `endsWith()`, and `contains()` as case-insensitive in GitHub Actions — document this in workflow comments when the behavior matters.' + - 'Use `toLower()` before comparisons when you want to make case-normalization visible to future readers.' + - 'Use `==` for exact case-sensitive equality checks instead of `startsWith`/`endsWith` when casing must be preserved.' + - 'Test branch-name filters with uppercase and mixed-case branch names to confirm the intended match behavior.' +docs: + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#startswith' + label: 'GitHub Docs — startsWith function (case-insensitive note)' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#endswith' + label: 'GitHub Docs — endsWith function (case-insensitive note)' + - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#contains' + label: 'GitHub Docs — contains function (case-insensitive note)'