From acfcc1700a90cd2b11b4c3852774ac40bd31fd3c Mon Sep 17 00:00:00 2001 From: Hector Flores Date: Thu, 11 Jun 2026 21:27:20 -0500 Subject: [PATCH] feat: add 3 new error entries (runner-environment x3) --- ...runner-oom-score-adj-permission-denied.yml | 130 ++++++++++++++++++ ...-arm-wine-programs-hang-broken-package.yml | 114 +++++++++++++++ ...m64-larger-runner-docker-not-installed.yml | 127 +++++++++++++++++ 3 files changed, 371 insertions(+) create mode 100644 errors/runner-environment/kubernetes-arc-runner-oom-score-adj-permission-denied.yml create mode 100644 errors/runner-environment/ubuntu-2404-arm-wine-programs-hang-broken-package.yml create mode 100644 errors/runner-environment/ubuntu-arm64-larger-runner-docker-not-installed.yml diff --git a/errors/runner-environment/kubernetes-arc-runner-oom-score-adj-permission-denied.yml b/errors/runner-environment/kubernetes-arc-runner-oom-score-adj-permission-denied.yml new file mode 100644 index 0000000..1dffb67 --- /dev/null +++ b/errors/runner-environment/kubernetes-arc-runner-oom-score-adj-permission-denied.yml @@ -0,0 +1,130 @@ +id: runner-environment-477 +title: 'Self-hosted Kubernetes/ARC runner: "Access to the path /proc/[pid]/oom_score_adj is denied" cancels job' +category: runner-environment +severity: error +tags: + - self-hosted + - kubernetes + - arc + - eks + - oom-score-adj + - unprivileged-container + - docker-in-docker + - dind +patterns: + - regex: 'Failed to update oom_score_adj for PID' + flags: 'i' + - regex: 'System\.UnauthorizedAccessException: Access to the path ''/proc/\d+/oom_score_adj'' is denied' + flags: 'i' + - regex: 'Access to the path.*oom_score_adj.*is denied' + flags: 'i' +error_messages: + - "Failed to update oom_score_adj for PID: 554." + - "System.UnauthorizedAccessException: Access to the path '/proc/554/oom_score_adj' is denied." + - "System.IO.IOException: Permission denied" + - "at GitHub.Runner.Sdk.ProcessInvoker.WriteProcessOomScoreAdj(Int32 processId, Int32 oomScoreAdj)" +root_cause: | + The GitHub Actions runner writes an OOM (Out-Of-Memory) score adjustment to + `/proc//oom_score_adj` for every subprocess it spawns. This helps the Linux + kernel's OOM killer preserve the runner process if the system runs low on memory. + + On **unprivileged containerized runners** — including: + - Actions Runner Controller (ARC) on Kubernetes + - Self-hosted runners on EKS with Docker-in-Docker (DinD) + - Custom Kubernetes deployments where the runner container runs without elevated privileges + + …the `/proc//oom_score_adj` file is not writable by the runner process. + When the runner tries to set the OOM score for each spawned process, it receives + `System.UnauthorizedAccessException: Permission denied`, which causes the runner + to cancel the job mid-execution. + + This error repeats on every subprocess launch (once per step, per shell invocation), + flooding the runner diagnostic logs (`Worker_*.log`) and causing the job to be cancelled + or marked as failed depending on the runner version. + + **Fix introduced in runner v2.335.x:** A new environment variable + `ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ=true` (or `=1`) was added. When set, the runner + skips all `oom_score_adj` writes and emits a Verbose trace instead of throwing an exception. + Self-hosted runner operators on Kubernetes/ARC should set this variable to suppress the error. +fix: | + **Runner v2.335.x and later — set the env var (recommended):** + Set `ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ=true` in the runner container's environment. + This suppresses all `/proc/[pid]/oom_score_adj` writes without affecting job execution. + + For ARC (Actions Runner Controller) using a `RunnerDeployment` or `HorizontalRunnerAutoscaler`, + add the env var to the runner container spec: + + template: + spec: + containers: + - name: runner + env: + - name: ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ + value: "true" + + **Runner < v2.335.x — grant SYS_PTRACE or proc access:** + If upgrading the runner is not immediately possible, grant the container sufficient + privilege to write to `/proc//oom_score_adj`. Options include: + - Adding `SYS_ADMIN` or `SYS_PTRACE` capability + - Running the container as root (not recommended for security) + - Using a Pod Security Policy / Pod Security Admission that allows procfs writes + + **Alternative — use `securityContext.allowPrivilegeEscalation: false` + upgrade runner:** + The `ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ` flag is the cleanest long-term fix. +fix_code: + - language: yaml + label: 'ARC RunnerDeployment — set ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ (runner v2.335.x+)' + code: | + apiVersion: actions.summerwind.dev/v1alpha1 + kind: RunnerDeployment + metadata: + name: my-runner + spec: + template: + spec: + containers: + - name: runner + image: ghcr.io/actions/actions-runner:latest + env: + - name: ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ + value: "true" + + - language: yaml + label: 'Docker Compose DinD runner — set env var' + code: | + services: + runner: + image: ghcr.io/actions/actions-runner:latest + environment: + - ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ=true + volumes: + - /var/run/docker.sock:/var/run/docker.sock + + - language: yaml + label: 'Kubernetes pod spec — securityContext + env var' + code: | + spec: + containers: + - name: runner + image: ghcr.io/actions/actions-runner:latest + env: + - name: ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ + value: "true" + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + runAsNonRoot: true +prevention: + - "Always set ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ=true in the runner container environment when deploying on Kubernetes or any unprivileged container environment." + - "Keep self-hosted runners updated to v2.335.x or later to benefit from the ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ env var support." + - "Monitor runner diagnostic logs in _diag/Worker_*.log for repeated oom_score_adj exceptions if jobs are unexpectedly cancelled." + - "When running on EKS, ARC, or custom K8s deployments, test a workflow that spawns multiple steps to verify no oom_score_adj errors appear before deploying to production." +docs: + - url: 'https://github.com/actions/runner/issues/3380' + label: 'actions/runner#3380: Self-hosted K8s runner fails with oom_score_adj permission denied' + - url: 'https://github.com/actions/runner/pull/4478' + label: 'actions/runner PR#4478: Allow skipping oom_score_adj write via ACTIONS_RUNNER_DISABLE_OOM_SCORE_ADJ env var (v2.335.x)' + - url: 'https://github.com/actions/actions-runner-controller/issues/3132' + label: 'actions-runner-controller#3132: Related ARC oom_score_adj issue' + - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners' + label: 'GitHub Docs: About self-hosted runners' diff --git a/errors/runner-environment/ubuntu-2404-arm-wine-programs-hang-broken-package.yml b/errors/runner-environment/ubuntu-2404-arm-wine-programs-hang-broken-package.yml new file mode 100644 index 0000000..c21c38f --- /dev/null +++ b/errors/runner-environment/ubuntu-2404-arm-wine-programs-hang-broken-package.yml @@ -0,0 +1,114 @@ +id: runner-environment-478 +title: 'ubuntu-24.04-arm: Wine programs hang or time out — broken Ubuntu 24.04 ARM Wine packaging' +category: runner-environment +severity: error +tags: + - ubuntu-24.04-arm + - arm64 + - wine + - cross-compile + - windows-arm + - packaging-bug +patterns: + - regex: 'wine.*timeout|wine.*hung|wine.*hang' + flags: 'i' + - regex: 'virtual_setup_exception stack overflow.*wine|022c:err:virtual' + flags: 'i' + - regex: 'wine.*killed|timeout.*wine.*process' + flags: 'i' +error_messages: + - "Process killed after timeout running wine program" + - "022c:err:virtual:virtual_setup_exception stack overflow 880 bytes addr 0x6fffffd95894" + - "wine: could not exec the wine loader" +root_cause: | + The `wine` package shipped in **Ubuntu 24.04 (Noble Numbat)** for ARM64 was compiled without + a required build fix (tracked as Ubuntu bug #2102681 / Debian bug #1100695). The resulting + binary is incorrectly built and cannot reliably execute Windows ARM programs — programs + either hang on startup and are eventually killed by the job timeout, or throw + stack-overflow / exception-handling errors. + + This affects the `ubuntu-24.04-arm` GitHub Actions runner image, which uses the system + Ubuntu 24.04 ARM64 Wine package. The same issue does not affect: + - `ubuntu-22.04-arm` (older, working Wine build) + - `ubuntu-26.04` and `ubuntu-26.04-arm` (Wine packaging bug was fixed in 26.04) + + Workflows cross-compiling Windows ARM binaries with Meson, Cargo (via cross-compilation), + or other toolchains and then running them under Wine for testing will intermittently fail + because Wine hangs indefinitely waiting to handle the program's startup exception. + + On `ubuntu-26.04`, Wine programs that throw C++ exceptions may additionally fail with: + `022c:err:virtual:virtual_setup_exception stack overflow` + This is a separate but related ARM Wine issue in the Ubuntu 26.04 package (as of June 2026). +fix: | + **Option 1 — Use ubuntu-22.04-arm (recommended short-term):** + Downgrade the runner to `ubuntu-22.04-arm`. The Wine package in Ubuntu 22.04 ARM64 + is correctly built and executes Windows ARM programs reliably. + + **Option 2 — Use a Docker container with a correctly compiled Wine:** + Use a container image with Wine compiled from source (without the packaging bug): + container: ghcr.io/mstorsjo/wine + + This image contains Wine correctly compiled for ARM64 and avoids the Ubuntu packaging issue. + + **Option 3 — Build Wine from source in the workflow:** + Install build dependencies and compile Wine from upstream source on the runner. This is + time-consuming but produces a correctly built binary. Only practical if compile time is + acceptable. + + **Option 4 — Wait for ubuntu-24.04-arm update or use ubuntu-26.04-arm:** + The fix is already in Ubuntu 26.04. Once Ubuntu 24.04 ARM receives a backported Wine fix + (unlikely given Ubuntu's backport policy for this type of packaging regression), or + ubuntu-26.04-arm becomes stable, migrating to that image will resolve the issue. +fix_code: + - language: yaml + label: 'Failing — ubuntu-24.04-arm with Wine programs hanging' + code: | + jobs: + test-wine: + runs-on: ubuntu-24.04-arm # Wine packaging bug causes hangs + steps: + - uses: actions/checkout@v4 + - name: Install Wine + run: sudo apt-get install -y wine + - name: Run Windows binary under Wine + run: wine ./my-windows-arm64.exe # hangs indefinitely + + - language: yaml + label: 'Fixed — use ubuntu-22.04-arm with working Wine' + code: | + jobs: + test-wine: + runs-on: ubuntu-22.04-arm # Wine package correctly built in 22.04 + steps: + - uses: actions/checkout@v4 + - name: Install Wine + run: sudo apt-get install -y wine + - name: Run Windows binary under Wine + run: wine ./my-windows-arm64.exe + + - language: yaml + label: 'Alternative — use Docker container with correctly compiled Wine' + code: | + jobs: + test-wine: + runs-on: ubuntu-24.04-arm + container: ghcr.io/mstorsjo/wine # correctly compiled Wine for ARM64 + steps: + - uses: actions/checkout@v4 + - name: Run Windows binary under Wine + run: wine ./my-windows-arm64.exe + +prevention: + - "Test Wine-dependent workflows on ubuntu-22.04-arm before relying on ubuntu-24.04-arm for ARM Wine execution." + - "Add a `wine --version` step early in the workflow and verify the Wine binary launches a trivial test program before running production binaries under Wine." + - "If using Wine on ubuntu-26.04-arm, validate that C++ exception handling works correctly before relying on it in CI." + - "Pin Wine-dependent jobs to known-good runner images or use a Docker container image where Wine is compiled from upstream source." +docs: + - url: 'https://github.com/actions/runner-images/issues/14094' + label: 'runner-images#14094: [ubuntu-24.04-arm] Wine programs seem to hang' + - url: 'https://bugs.launchpad.net/ubuntu/+source/wine/+bug/2102681' + label: 'Ubuntu bug #2102681: wine incorrectly compiled in Ubuntu 24.04 ARM' + - url: 'https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1100695' + label: 'Debian bug #1100695: wine ARM packaging regression (fixed in Debian, backported to Ubuntu 26.04)' + - url: 'https://github.com/mstorsjo/wine' + label: 'mstorsjo/wine: Docker image with correctly compiled Wine for ARM64' diff --git a/errors/runner-environment/ubuntu-arm64-larger-runner-docker-not-installed.yml b/errors/runner-environment/ubuntu-arm64-larger-runner-docker-not-installed.yml new file mode 100644 index 0000000..5f05565 --- /dev/null +++ b/errors/runner-environment/ubuntu-arm64-larger-runner-docker-not-installed.yml @@ -0,0 +1,127 @@ +id: runner-environment-476 +title: 'Ubuntu ARM64 larger/partner runners missing Docker — "Unable to locate executable file: docker"' +category: runner-environment +severity: error +tags: + - ubuntu + - arm64 + - docker + - larger-runners + - partner-runners + - docker-buildx + - ecr +patterns: + - regex: 'Unable to locate executable file: docker' + flags: 'i' + - regex: 'failed to connect to the docker API at unix:///var/run/docker\.sock: connect: no such file or directory' + flags: 'i' + - regex: 'failed to initialize builder.*unix:///var/run/docker\.sock' + flags: 'i' +error_messages: + - "Unable to locate executable file: docker." + - "ERROR: failed to initialize builder ... failed to connect to the docker API at unix:///var/run/docker.sock: connect: no such file or directory" + - "ERROR: failed to initialize builder: could not create builder: failed to connect to the docker API at unix:///var/run/docker.sock: connect: no such file or directory" +root_cause: | + On GitHub-hosted Ubuntu ARM64 runners — particularly **partner runner images** (exposed via + `runs-on: ubuntu-24.04-arm`, `runs-on: ubuntu-22.04-arm`, and GitHub Actions larger runners + such as 8-core, 16-core, 32-core, and 64-core ARM64 variants) — Docker may be absent or + mis-configured in some image versions. + + The `partner-runner-images` repository documents Docker as pre-installed, but certain + image builds (particularly older variants and some larger-runner pool images) shipped + without Docker or with the Docker daemon socket absent from `/var/run/docker.sock`. + + This causes any step that invokes `docker`, `docker/setup-buildx-action`, `docker/login-action`, + `aws-actions/amazon-ecr-login`, or any Docker-dependent CLI to fail immediately. + + The issue was partially fixed for standard 2-core and 4-core ARM64 runners in June 2026 + (image version ~20260611+). However, some **64-core ARM64 larger runners** continued to + exhibit the same `docker.sock` missing symptom even after the standard-runner fix was + deployed, suggesting the larger runner pool images are on a slower rollout cadence. + + Root causes vary by pool: + - **Partner images** — Docker was missing from initial ARM64 image builds, corrected in the + June 2026 image update cycle. + - **Larger runners (64-core)** — still potentially affected after the 32-core fix shipped, + indicating the fix did not propagate to all pool sizes simultaneously. +fix: | + **Option 1 — Re-run the failing job (short-term):** + The image rollout is incremental. Re-running the job may land it on an updated host where + Docker is present. This is only reliable once the fix has been fully deployed across all pool + sizes. + + **Option 2 — Install Docker in the workflow:** + If Docker is absent, add an explicit installation step before the first Docker-dependent step: + + - name: Install Docker + run: | + sudo apt-get update -y + sudo apt-get install -y docker.io + sudo systemctl start docker + sudo usermod -aG docker runner + + **Option 3 — Use a self-hosted ARM64 runner with Docker pre-installed:** + For consistent Docker availability, use a self-hosted ARM64 runner whose image is fully + controlled and includes Docker. + + **Option 4 — Check runner image version:** + Add a diagnostic step to confirm the Docker socket presence: + + - name: Check Docker availability + run: | + echo "Runner image version:" && cat /etc/image-release 2>/dev/null || true + which docker && docker version || echo "Docker not found on PATH" + ls -l /var/run/docker.sock 2>/dev/null || echo "docker.sock absent" +fix_code: + - language: yaml + label: 'Failing — Ubuntu ARM64 runner missing Docker' + code: | + jobs: + build: + runs-on: ubuntu-24.04-arm # Docker may be absent on partner/larger ARM runners + steps: + - uses: docker/setup-buildx-action@v3 # fails: Unable to locate executable file: docker + + - language: yaml + label: 'Workaround — install Docker explicitly if absent' + code: | + jobs: + build: + runs-on: ubuntu-24.04-arm + steps: + - name: Install Docker if absent + run: | + if ! command -v docker &>/dev/null; then + sudo apt-get update -y + sudo apt-get install -y docker.io + sudo systemctl start docker + sudo usermod -aG docker runner + fi + + - uses: docker/setup-buildx-action@v3 + + - language: yaml + label: 'Diagnostic — check runner image and Docker availability' + code: | + jobs: + diagnose: + runs-on: ubuntu-24.04-arm + steps: + - name: Runner and Docker diagnostic + run: | + uname -m + cat /etc/image-release 2>/dev/null || true + which docker && docker version || echo "docker not found" + ls -l /var/run/docker.sock 2>/dev/null || echo "docker.sock absent" +prevention: + - "Pin to a known-good image version using a specific `runs-on` image tag if your ARM64 workflow requires Docker and you experience intermittent failures." + - "Add a `docker version` or `docker info` step early in ARM64 workflows to fail fast with a clear error rather than a confusing downstream failure." + - "For production ARM64 + Docker workloads, prefer self-hosted runners where the image is fully controlled and validated." + - "Monitor runner-images release notes for ARM64 image updates that confirm Docker availability across all pool sizes." +docs: + - url: 'https://github.com/actions/runner-images/issues/14051' + label: 'runner-images#14051: Ubuntu 24.04 ARM64 missing Docker (June 2026, 7 reactions)' + - url: 'https://github.com/actions/partner-runner-images/blob/main/images/Ubuntu2404-Readme.md' + label: 'Partner runner images: Ubuntu 24.04 ARM64 README (Docker listed as installed)' + - url: 'https://docs.github.com/en/actions/using-github-hosted-runners/using-larger-runners/about-larger-runners' + label: 'GitHub Docs: About larger runners'