diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..43ad5c7 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,27 @@ +# EditorConfig — https://editorconfig.org. Encodes RigForge's house style so editors match CI +# (shfmt -i 4, yamllint, markdownlint) without per-editor setup. +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + +# Shell is the core of the repo: 4-space indent, matching `shfmt -i 4` (see the Makefile lint target). +[*.sh] +indent_size = 4 + +# Make needs real tabs in recipes. +[Makefile] +indent_style = tab + +# YAML and JSON conventionally use 2-space indent. +[*.{yml,yaml,json,json.template}] +indent_size = 2 + +# In Markdown, two trailing spaces is a hard line break — don't strip it. +[*.md] +trim_trailing_whitespace = false diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 8ba6044..65ef1b1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -8,7 +8,7 @@ assignees: '' ## What happened -A clear description of the bug — what you expected and what actually happened. +A clear description of the bug: what you expected and what actually happened. ## Environment @@ -27,14 +27,14 @@ A clear description of the bug — what you expected and what actually happened. Relevant output from the setup script and/or the miner. On Linux: -``` +```bash sudo journalctl -u xmrig --no-pager | tail -n 50 ```
Logs -``` +```text paste logs here ``` diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 6464862..326b851 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -21,5 +21,5 @@ Other approaches you thought about, and why they fall short. ## Additional context -Anything else — links, related issues, or notes on portability across +Anything else: links, related issues, or notes on portability across Ubuntu/Debian/macOS. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 71e4f3c..53daf44 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,5 +6,5 @@ - [ ] `make lint` (ShellCheck + shfmt) passes locally - [ ] Changes are portable bash (Ubuntu/Debian and macOS) -- [ ] Docs updated (README / other) if behaviour or options changed +- [ ] Docs updated (README / other) if behavior or options changed - [ ] PR is focused and ready for review diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f4e6d77 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,22 @@ +# Keep the SHA-pinned GitHub Actions current. Dependabot bumps the commit pin AND the trailing +# "# vX.Y.Z" comment together, and opens PRs for any security advisories affecting an action we use. +# +# Scope is github-actions ONLY (#117): RigForge is pure shell — no pip, npm, or docker ecosystems +# to track. The XMRig source is pinned by version+commit and verified at build time, not via a +# package manager, so it isn't a Dependabot ecosystem either. +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" # github-actions ecosystem watches .github/workflows/ + schedule: + interval: "weekly" + commit-message: + prefix: "ci" # -> "ci(deps): bump actions/checkout ..." + include: "scope" + labels: + - "infra" + groups: + # One rollup PR for all action bumps rather than one-per-action — low-noise for a repo this small. + github-actions: + patterns: + - "*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d6b6c56..f50ca1e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,43 @@ name: CI on: push: - branches: [main] + branches: [main, develop] # develop is the integration branch; main is the release branch pull_request: +# Least privilege: every job here only reads the repo to lint/test/build — nothing publishes or +# writes. Set the floor once at the top so all jobs inherit a read-only GITHUB_TOKEN (zizmor: +# excessive-permissions). +permissions: + contents: read + jobs: + lint-yaml: + name: Lint (yamllint) + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked + # pipx is preinstalled on ubuntu-24.04 (same path as diff-cover/zizmor); pin the version so the + # ruleset can't drift with the runner image. + - name: Install pinned yamllint + run: pipx install "yamllint==1.38.0" + # Via `make lint-yaml` so the file set + .yamllint config live in one place and can't drift from local. + - name: Run yamllint (make lint-yaml) + run: make lint-yaml + + lint-md: + name: Lint (markdownlint) + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked + # node is preinstalled on ubuntu-24.04; `make lint-md` runs the version-pinned markdownlint-cli2 + # via npx, reading .markdownlint-cli2.yaml — same invocation as local. + - name: Run markdownlint (make lint-md) + run: make lint-md + lint: name: Lint (shellcheck + shfmt) runs-on: ubuntu-24.04 @@ -15,7 +48,9 @@ jobs: SHFMT_VERSION: "3.13.1" SHFMT_SHA256: "fb096c5d1ac6beabbdbaa2874d025badb03ee07929f0c9ff67563ce8c75398b1" steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # no pushes from CI; don't leave the token in .git/config (zizmor: artipacked) # Install PINNED, checksum-verified shellcheck + shfmt instead of the runner's preinstalled # builds, so lint/format results are reproducible and don't drift with the runner image (#6). # Downloaded straight from the upstream releases — no `apt-get update`, avoiding the mirror @@ -45,7 +80,9 @@ jobs: name: Test suite runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked # Ubuntu is RigForge's supported target. The suite still exercises the macOS code path here via # stubs (OS_TYPE=Darwin + faked sysctl), so the macOS config profile is covered without a Mac # runner; contributors on macOS can also run `make test` locally. jq is preinstalled on the runner. @@ -56,7 +93,9 @@ jobs: name: Test suite (macOS) runs-on: macos-14 # Apple silicon; pinned (not macos-latest) to avoid image drift steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked # The Linux job only SIMULATES macOS (STUB_UNAME_S=Darwin + stubbed sed/launchctl). Here the suite # runs on real macOS, natively exercising the Darwin paths — BSD `sed` (the donate.h patch), the # macOS config profile, `mac_*` process control + the launchd login agent, and BSD `tar`/`date` @@ -78,7 +117,9 @@ jobs: name: End-to-end (Docker) runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked # Runs the real script end-to-end inside a disposable Ubuntu container (RigForge's documented # Linux target), exercising the genuine Linux deploy path and /etc idempotency with real tools. - name: Run end-to-end suite @@ -91,12 +132,16 @@ jobs: # Pin diff-cover (the patch-coverage gate); kcov + jq are pinned inside tests/coverage.sh. DIFF_COVER_VERSION: "10.3.0" PATCH_COVERAGE_MIN: "90" # new/changed lines in rigforge.sh + util must be this % covered + # Diff the PR against the branch it targets (develop or main). On a push, github.base_ref is empty, + # so fall back to the pushed branch itself — that's an empty diff that trivially passes. + BASE_REF: ${{ github.base_ref || github.ref_name }} steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 with: - fetch-depth: 0 # diff-cover needs history to diff the PR against origin/main + fetch-depth: 0 # diff-cover needs history to diff the PR against its base branch + persist-credentials: false # the fetch below is read-only on a public repo (zizmor: artipacked) - name: Fetch base branch for diff-cover - run: git fetch --no-tags origin main + run: git fetch --no-tags origin "$BASE_REF" # Runs tests/run.sh under a digest-pinned kcov image, reports rigforge.sh + util/proposed-grub.sh # coverage, and fails if the total drops below the committed floor (tests/coverage-floor.txt). - name: Measure coverage (kcov) + enforce the total floor @@ -106,4 +151,4 @@ jobs: - name: Install pinned diff-cover run: pipx install "diff-cover==${DIFF_COVER_VERSION}" - name: Enforce patch coverage on changed lines - run: diff-cover coverage/cobertura.xml --compare-branch=origin/main --fail-under="${PATCH_COVERAGE_MIN}" + run: diff-cover coverage/cobertura.xml --compare-branch="origin/$BASE_REF" --fail-under="${PATCH_COVERAGE_MIN}" diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml new file mode 100644 index 0000000..235222d --- /dev/null +++ b/.github/workflows/links.yml @@ -0,0 +1,41 @@ +name: Links + +# Link-checking the docs is kept OFF the PR path on purpose: external links are flaky-by-nature +# (rate limits, transient downtime) and shouldn't block unrelated PRs (#118). Instead it runs on a +# weekly schedule against the default branch, plus on demand via the Actions tab. Run it locally any +# time with `make lint-links`. +on: + schedule: + - cron: "0 6 * * 1" # Mondays 06:00 UTC + workflow_dispatch: + +permissions: + contents: read + +jobs: + lychee: + name: Link check (lychee) + runs-on: ubuntu-24.04 + env: + # Pinned + checksum-verified, like the gitleaks/shellcheck installs. Keep in lockstep with the + # version `make lint-links` expects locally. + LYCHEE_VERSION: "0.24.2" + LYCHEE_SHA256: "1f4e0ef7f6554a6ed33dd7ac144fb2e1bbed98598e7af973042fc5cd43951c9a" + # lychee uses GITHUB_TOKEN to make authenticated GitHub requests and dodge the anon rate limit. + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked + - name: Install pinned lychee + run: | + set -euo pipefail + tarball="lychee-x86_64-unknown-linux-gnu.tar.gz" + curl -fsSL "https://github.com/lycheeverse/lychee/releases/download/lychee-v${LYCHEE_VERSION}/${tarball}" -o "$tarball" + echo "${LYCHEE_SHA256} ${tarball}" | sha256sum -c - + tar -xzf "$tarball" + sudo install "lychee-x86_64-unknown-linux-gnu/lychee" /usr/local/bin/lychee + lychee --version + # Via `make lint-links` so the file set + .lychee.toml config are the same locally and in CI. + - name: Link-check docs (make lint-links) + run: make lint-links diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6b19816..ffdbd06 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,7 +21,11 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG: ${{ github.ref_name }} steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + # Releasing goes through `gh` with GH_TOKEN below, not `git push`, so the checkout never + # needs the token left in .git/config (zizmor: artipacked). + persist-credentials: false - name: Verify the tag matches VERSION run: | @@ -38,7 +42,7 @@ jobs: stage="rigforge-$TAG" mkdir -p "$stage" # Runtime files a user needs to deploy a worker — no tests/, .github/, or dev cruft. - cp -a rigforge.sh util systemd config.json.template config.advanced.example.json README.md docs images LICENSE VERSION "$stage/" + cp -a rigforge.sh util systemd config.minimal.json config.reference.json README.md docs images LICENSE VERSION "$stage/" zip -rq "$stage.zip" "$stage" tar -czf "$stage.tar.gz" "$stage" sha256sum "$stage.zip" "$stage.tar.gz" > SHA256SUMS diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..c08a064 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,75 @@ +name: Security + +# Supply-chain & secrets gates (#117): +# - gitleaks: scan the full git history for committed secrets (pool creds, the stratum +# access-password from #113, tokens) on every push and PR. +# - zizmor: static-audit the GitHub Actions workflows themselves (template injection, +# over-broad GITHUB_TOKEN, unpinned actions, credential persistence) AND cross-reference the +# actions we pin against the GitHub Advisory Database (online audit). +# Dependabot (github-actions) lives in .github/dependabot.yml; the matching gitleaks pre-commit +# hook lives in .pre-commit-config.yaml. + +on: + push: + branches: [main, develop] # develop is the integration branch; main is the release branch + pull_request: + # Re-audit main on a schedule so a newly-published advisory against an action we pin trips the gate + # even during quiet periods with no pushes — the online zizmor audit is time-varying by design. + schedule: + - cron: "0 7 * * 1" # Mondays 07:00 UTC + +# Both jobs only read the tree to scan it. Pin the floor to read-only (zizmor: excessive-permissions). +permissions: + contents: read + +jobs: + gitleaks: + name: Secret scan (gitleaks) + runs-on: ubuntu-24.04 + # The weekly tick exists for zizmor's advisory re-audit; history doesn't change between pushes, so + # there's nothing new for gitleaks to scan on a schedule. + if: github.event_name != 'schedule' + env: + # Pinned + checksum-verified, same as the shellcheck/shfmt installs in ci.yml — reproducible and + # immune to runner-image drift. Keep GITLEAKS_VERSION in lockstep with .pre-commit-config.yaml. + GITLEAKS_VERSION: "8.30.1" + GITLEAKS_SHA256: "551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb" + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + fetch-depth: 0 # scan EVERY commit, not just the tip — a secret is still a leak once pushed + persist-credentials: false # zizmor: artipacked + - name: Install pinned gitleaks + run: | + set -euo pipefail + tarball="gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + curl -fsSL "https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/${tarball}" -o "$tarball" + echo "${GITLEAKS_SHA256} ${tarball}" | sha256sum -c - + tar -xzf "$tarball" gitleaks + sudo install gitleaks /usr/local/bin/gitleaks + gitleaks version + # Full-history scan with the built-in ruleset. --redact keeps any match out of the public logs; + # the job still fails (non-zero exit) so a leak blocks the merge. + - name: Scan git history for secrets + run: gitleaks git . --redact --no-banner --verbose + + zizmor: + name: Workflow audit (zizmor) + runs-on: ubuntu-24.04 + env: + ZIZMOR_VERSION: "1.25.2" + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false # zizmor: artipacked + # pipx is preinstalled on ubuntu-24.04; same install path as diff-cover in ci.yml's coverage job. + - name: Install pinned zizmor + run: pipx install "zizmor==${ZIZMOR_VERSION}" + # Online audits ON (zizmor's default): GH_TOKEN lets the `known-vulnerable-actions` audit query + # the GitHub Advisory Database, so a CVE disclosed against an action we pin fails the gate. The + # built-in token (read-only here) is enough — advisory data is public; it's only for API access. + # This complements Dependabot: zizmor blocks the merge, Dependabot opens the bump. + - name: Audit GitHub Actions workflows + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: zizmor .github/workflows/ diff --git a/.gitignore b/.gitignore index e2d1f9c..5816397 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -# User-generated config (created from config.json.template) +# User-generated config (created from config.minimal.json) config.json # Runtime artifacts diff --git a/.lychee.toml b/.lychee.toml new file mode 100644 index 0000000..2ba152a --- /dev/null +++ b/.lychee.toml @@ -0,0 +1,12 @@ +# lychee link-checker config for RigForge docs. Run locally via `make lint-links`; in CI it runs on +# a weekly schedule (external links are flaky-by-nature, so link-checking doesn't gate PRs). +max_retries = 3 +retry_wait_time = 2 +timeout = 20 + +# GitHub is the main rate-limiter; CI passes a GITHUB_TOKEN so authenticated requests dodge the +# anonymous limit. Still accept 429 so a transient rate-limit on some other host can't fail the run. +accept = ["200..=299", "429"] + +# Some hosts reject non-browser user agents; present a common one. +user_agent = "Mozilla/5.0 (compatible; lychee/0.24; +https://github.com/lycheeverse/lychee)" diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml new file mode 100644 index 0000000..ae1ebdf --- /dev/null +++ b/.markdownlint-cli2.yaml @@ -0,0 +1,30 @@ +# markdownlint-cli2 config for RigForge docs. Run via `make lint-md`. +# Structural rules (blank lines around headings/lists/fences, code-fence languages, heading levels) +# are kept and the docs fixed to satisfy them; purely stylistic rules that fight the repo's +# deliberate house style are turned off below. +config: + default: true + # Long, deliberate prose/command lines — length is a review concern, not a linter one. + MD013: false + # Inline HTML is used on purpose:
in issue templates, /badges in the README. + MD033: false + # Bold lead-ins like **HugePages** are inline labels, not headings. + MD036: false + # Templates and partials legitimately don't open with a top-level H1. + MD041: false + # Adjacent-but-separate blockquote callouts are intentional; don't force them to merge. + MD028: false + # Table pipe-spacing is cosmetic; the existing tables are readable and consistent as written. + MD060: false + # Keep-a-Changelog repeats "### Added"/"### Fixed" across version sections — allow non-sibling dups. + MD024: + siblings_only: true + # The repo uses *asterisk* emphasis and **asterisk** strong consistently. + MD049: + style: asterisk + MD050: + style: asterisk +globs: + - "**/*.md" +ignores: + - "node_modules" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..bf08da7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,35 @@ +# RigForge pre-commit hooks. Install once per clone: +# +# pipx install pre-commit # or: pip install pre-commit +# pre-commit install +# +# Local == CI: the shell hook calls `make lint`, so the Makefile's SHELL_FILES stays the single +# source of truth (no duplicated file list here). gitleaks is pinned to the same version CI runs +# (.github/workflows/security.yml). The config/docs linters (yamllint, markdownlint, lychee) aren't +# commit-time hooks — run them with `make lint-yaml` / `make lint-md` / `make lint-links`; they gate +# in CI. +repos: + - repo: https://github.com/gitleaks/gitleaks + rev: v8.30.1 # keep in lockstep with GITLEAKS_VERSION in .github/workflows/security.yml + hooks: + - id: gitleaks + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: detect-private-key + - id: check-added-large-files + - id: end-of-file-fixer + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] # two trailing spaces is a hard line break in Markdown + + - repo: local + hooks: + # Reuse `make lint` rather than re-listing files, so shellcheck/shfmt run over the exact same + # SHELL_FILES set as CI and `make lint`. Requires shellcheck + shfmt on PATH (see CONTRIBUTING). + - id: make-lint + name: shellcheck + shfmt (make lint) + entry: make lint + language: system + files: \.sh$ + pass_filenames: false diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..b37a77d --- /dev/null +++ b/.yamllint @@ -0,0 +1,18 @@ +--- +# yamllint config for RigForge, tuned to the repo's house style rather than yamllint's strict +# defaults: workflows carry long, deliberate explanatory comments, and inline comments use a single +# leading space (the same `key: value # note` style as the shell code). Run via `make lint-yaml`. +extends: default + +rules: + # The workflows intentionally wrap explanation into long inline comments; enforcing 80 cols here + # would mean butchering them. Length is already governed by review, not a linter. + line-length: disable + # Single-document files — the leading `---` adds nothing, so don't require it. + document-start: disable + comments: + # Match the repo style of one space before an inline comment (yamllint defaults to two). + min-spaces-from-content: 1 + truthy: + # Don't flag GitHub Actions' `on:` key as a "truthy" value. + check-keys: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b29b6f..f74d565 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,61 @@ All notable changes to RigForge are documented here. The format is based on ## [Unreleased] +## [1.1.0] - 2026-07-01 + +### Added + +- **Supply-chain & secret-scanning CI gates (#117).** Three cross-cutting hardening gates on top of + the existing SHA-pinned actions and commit-verified XMRig build: + - **gitleaks** — a new `Security` workflow scans the full git history for committed secrets (pool + credentials, tokens, the stratum access-password) on every push and PR, plus a matching + [`.pre-commit-config.yaml`](./.pre-commit-config.yaml) hook so a leak is caught before it's pushed. + The binary is version- and checksum-pinned, like the existing shellcheck/shfmt installs. + - **Dependabot** ([`.github/dependabot.yml`](./.github/dependabot.yml)) — keeps the hand-pinned + GitHub Actions current (`github-actions` ecosystem only; RigForge has no pip/npm/docker deps) and + surfaces action security advisories. + - **zizmor** — static-audits the workflows for template injection, over-broad `GITHUB_TOKEN`, and + credential persistence, and (online) cross-references the actions we pin against the GitHub + Advisory Database. Runs on push/PR plus a weekly schedule, so a CVE disclosed against a pinned + action trips the gate even with no open PRs. Hardened the existing `ci.yml`/`release.yml` to a + read-only default token and `persist-credentials: false` on checkout to make the audit clean. +- **DX glue + config/docs lint (#118).** Rounds out the non-shell tooling around the existing + shellcheck/shfmt + kcov core: + - **`.editorconfig`** — encodes the whitespace house style (`shfmt -i 4`, LF, final newline) so + editors match CI without per-editor setup. + - **pre-commit** — `.pre-commit-config.yaml` now orchestrates `make lint` (shellcheck/shfmt via the + Makefile's `SHELL_FILES`, no duplicated list), the existing gitleaks hook, and freebie hygiene + hooks (private-key detection, large-file guard, end-of-file + trailing-whitespace fixers). + - **yamllint + markdownlint** — new CI gates (and `make lint-yaml` / `make lint-md`) over the + workflows/configs and the docs, each with a tuned config (`.yamllint`, `.markdownlint-cli2.yaml`). + - **lychee** — a link-checker (`make lint-links`) that runs on a weekly schedule rather than per-PR, + since external links are flaky-by-nature. +- **Contributing: inbound contributions are MIT-licensed (#119).** `CONTRIBUTING.md` now states that + contributions are licensed under the project's MIT License — a lightweight alternative to a CLA. + +### Changed + +- **Worker HTTP API is now OPEN (read-only) by default.** `ACCESS_TOKEN` no longer defaults to the rig + name; left unset, the rig's `:8080` API is served `restricted` (read-only) with **no token** — which + matches Pithead's new default no-auth stats probe, so a stock rig needs zero token coordination. Set + `ACCESS_TOKEN` to require a `Bearer` token (then match it on the dashboard with `workers.api_auth: + token`/`name`). Pairs with pithead [#171](https://github.com/p2pool-starter-stack/pithead/issues/171) + / [#172](https://github.com/p2pool-starter-stack/pithead/issues/172). + +### Fixed + +- **Live tuning works with the new open API default.** Every live-hashrate read — `autotune` and its + monthly timer, `tune --live`, `tune --confirm`, and the `upgrade` re-tune — always sent an + `Authorization: Bearer` header. Once the API defaulted to open with no token, that empty Bearer drew a + `401`, and under `set -e` the failed `curl -f` aborted the read, silently breaking live tuning on a + stock config. The header is now sent only when `ACCESS_TOKEN` is set. The dependency-free suite stubs + the API, so this surfaced only on the real-hardware release gate — which now sends its warmup probe the + same way. + ## [1.0.1] - 2026-06-13 ### Fixed + - **HugePage sizing is now NUMA-aware (1 GB pages) (#111).** RandomX fast mode keeps a NUMA-local copy of the ~2080 MB dataset **per NUMA node**, but the reservation math multiplied the per-dataset 1 GB pages by the **socket** count, not the NUMA-node count. On a single-socket, multi-NUMA CPU — e.g. an EPYC 7642 with 4 @@ -83,6 +135,7 @@ The full walkthrough — prerequisites, the Linux reboot, and verification — i Full 1.0.0 feature list — every capability and hardening that went into this release #### Added + - **Privacy & security, documented up front (#109).** A new README "Privacy & security" section and a SECURITY.md "What RigForge exposes (and what it doesn't)" section state it plainly: **no telemetry** (the only outbound traffic is your pool, the commit-verified XMRig clone, and your distro's package mirrors); @@ -308,6 +361,7 @@ The full walkthrough — prerequisites, the Linux reboot, and verification — i Release with `.zip`/`.tar.gz` deploy bundles, `SHA256SUMS`, and changelog-derived notes (#3, #36). #### Changed + - **Live auto-tuning converges in one run.** Each `autotune` run **live-sweeps every prefetch mode** against the running miner and adopts the fastest (median measurement + a margin gate, else it keeps the current mode), converging in a single ~minutes-long pass. The safety-net timer fires **monthly** by @@ -373,6 +427,7 @@ The full walkthrough — prerequisites, the Linux reboot, and verification — i fixes across the docs. #### Fixed + - **`setup` re-run now applies your `config.json` edits (#109).** On a no-rebuild re-run, the regenerated config was written to the worker root instead of the build directory the service loads (`--config=$BUILD_DIR/config.json`), so an edit-then-`setup` silently kept mining the old config. setup @@ -451,6 +506,7 @@ The full walkthrough — prerequisites, the Linux reboot, and verification — i
-[Unreleased]: https://github.com/p2pool-starter-stack/rigforge/compare/v1.0.1...main +[Unreleased]: https://github.com/p2pool-starter-stack/rigforge/compare/v1.1.0...main +[1.1.0]: https://github.com/p2pool-starter-stack/rigforge/releases/tag/v1.1.0 [1.0.1]: https://github.com/p2pool-starter-stack/rigforge/releases/tag/v1.0.1 [1.0.0]: https://github.com/p2pool-starter-stack/rigforge/releases/tag/v1.0.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d855b35..595823f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,27 +1,26 @@ # Contributing to RigForge -Thanks for your interest in improving RigForge! Whether it's a bug fix, a new -CPU tuning profile, or a docs tweak, contributions are welcome. - RigForge is the companion miner for the -[Pithead](https://github.com/p2pool-starter-stack/pithead) P2Pool stack. If your -idea is really about the stack as a whole rather than the miner, that repo may be -the better home for it. +[Pithead](https://github.com/p2pool-starter-stack/pithead) P2Pool stack. Bug +fixes, CPU tuning profiles, and docs changes are all welcome. + +If your idea is about the stack as a whole rather than the miner, the Pithead +repo may be the better home for it. ## Before you start -- For anything beyond a small fix, **open an issue first** so we can agree on the - approach before you spend time on it. This avoids duplicated or wasted effort. +- For anything beyond a small fix, open an issue first so we can agree on the + approach before you spend time on it. This avoids duplicated work. - Check the existing issues to see if someone is already on it. ## Making changes -RigForge is portable Bash that has to run on Ubuntu/Debian and macOS, so: +RigForge is portable Bash that has to run on Ubuntu/Debian and macOS: -- Keep it **portable bash** — avoid GNU-only flags and other Linux-isms where a +- Keep it portable. Avoid GNU-only flags and other Linux-isms where a POSIX-friendly alternative exists, and guard platform-specific code paths. -- Run **`make lint`** before you push and fix any warnings — it runs ShellCheck and `shfmt` over the - script, utilities, **and** the test scripts, exactly as CI does: +- Run `make lint` before you push and fix any warnings. It runs ShellCheck and `shfmt` over the + script, utilities, and the test scripts, exactly as CI does: ```bash make lint # or: make test (lint + the full dependency-free suite) @@ -29,16 +28,53 @@ RigForge is portable Bash that has to run on Ubuntu/Debian and macOS, so: CI runs the same checks, so a clean local run keeps your PR green. (`make fmt` auto-applies the `shfmt` formatting.) -- Update the README or other docs when you change behaviour or add options. +- Update the README or other docs when you change behavior or add options. + +## Pre-commit hooks + +Install the hooks once and they run on every commit, catching issues before they reach CI: + +```bash +pipx install pre-commit # or: pip install pre-commit +pre-commit install +``` + +This runs `make lint` (ShellCheck + shfmt over the Makefile's `SHELL_FILES`), +[gitleaks](https://github.com/gitleaks/gitleaks) secret scanning (the same pinned version CI runs, so +a committed token or pool credential is caught before it's pushed), and a few hygiene checks: +private-key detection, a large-file guard, and final-newline and trailing-whitespace fixers. + +### Config & docs linting + +The YAML, Markdown, and link checks gate in CI and have matching Make targets for local runs: + +```bash +make lint-yaml # yamllint the workflows + configs (.yamllint) +make lint-md # markdownlint the docs (.markdownlint-cli2.yaml; needs node) +make lint-links # lychee link-check the docs (.lychee.toml; needs lychee — runs weekly in CI) +make lint-all # shell + yaml + markdown in one go +``` + +An [`.editorconfig`](./.editorconfig) encodes the whitespace conventions (`shfmt -i 4`, LF, final +newline) so most editors match these checks automatically. + +## Branching + +RigForge uses a two-branch model (same as [Pithead](https://github.com/p2pool-starter-stack/pithead)): + +- `develop` is the default, integration branch. All PRs target `develop`. +- `main` is the release branch. `develop` is merged into `main` at each release, and version tags + are cut from `main`. ## Submitting a pull request -1. Fork the repo and create a topic branch off `main`. +1. Fork the repo and create a topic branch off `develop`. 2. Make your change and confirm `shellcheck` passes. -3. Open a PR against `main` and fill out the template. -4. **All PRs require review** before merging — a code owner will take a look. +3. Open a PR against `develop` and fill out the template. +4. All PRs require review before merging; a code owner will take a look. -Keep PRs focused and the description clear about *what* changed and *why*. Small, +Keep PRs focused and the description clear about what changed and why. Small, reviewable changes get merged faster. -Thanks again for contributing! 🔥 +By contributing, you agree that your contributions are licensed under the project's +[MIT License](LICENSE). diff --git a/Makefile b/Makefile index 164a8eb..0ef0321 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,13 @@ # Local test entry points (mirror the GitHub Actions CI jobs). -.PHONY: help test test-suite test-e2e test-e2e-macos smoke coverage e2e-real lint fmt +.PHONY: help test test-suite test-e2e test-e2e-macos smoke coverage e2e-real lint fmt lint-yaml lint-md lint-links lint-all SHELL_FILES = rigforge.sh util/proposed-grub.sh tests/run.sh tests/e2e/linux.sh tests/e2e/in-container.sh tests/e2e/macos.sh tests/smoke.sh tests/coverage.sh tests/e2e-real.sh +# Config/docs lint file sets, derived from what's tracked so CI and local stay in sync (like SHELL_FILES). +YAML_FILES = $(shell git ls-files '*.yml' '*.yaml') +MD_FILES = $(shell git ls-files '*.md') +MARKDOWNLINT_VERSION = 0.22.1 + # Keep `make` (no target) running the default dev check; `make help` lists every target. .DEFAULT_GOAL := test @@ -35,3 +40,14 @@ lint: ## shellcheck + shfmt (check) the script, utilities, and test scripts fmt: ## auto-format all shell scripts with shfmt (resolves shfmt lint failures) shfmt -i 4 -w $(SHELL_FILES) + +lint-yaml: ## yamllint the YAML (workflows, dependabot, configs) — uses .yamllint, strict + yamllint --strict $(YAML_FILES) + +lint-md: ## markdownlint the docs — uses .markdownlint-cli2.yaml (needs node/npx) + npx --yes markdownlint-cli2@$(MARKDOWNLINT_VERSION) $(MD_FILES) + +lint-links: ## lychee link-check the docs — uses .lychee.toml (needs lychee; hits external links) + lychee $(MD_FILES) + +lint-all: lint lint-yaml lint-md ## run every fast linter (shell + yaml + markdown; not the link check) diff --git a/README.md b/README.md index c018fbb..99960dd 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # RigForge -### Provision a hardware-tuned XMRig miner in one command. +

Provision a hardware-tuned XMRig miner in one command

[![CI](https://github.com/p2pool-starter-stack/rigforge/actions/workflows/ci.yml/badge.svg)](https://github.com/p2pool-starter-stack/rigforge/actions/workflows/ci.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE) @@ -12,41 +12,41 @@ [![Miner: XMRig](https://img.shields.io/badge/Miner-XMRig-F26822?logo=monero&logoColor=white)](https://github.com/xmrig/xmrig) [![Companion: Pithead](https://img.shields.io/badge/Companion-Pithead-F26822)](https://github.com/p2pool-starter-stack/pithead) -RigForge turns a fresh Ubuntu/Debian (or macOS) machine into a fully tuned [XMRig](https://github.com/xmrig/xmrig) -mining worker — it installs the toolchain, compiles XMRig from source, applies kernel- and CPU-level -tuning for maximum RandomX hashrate, and runs it as a managed service. You point it at a pool and -walk away. +RigForge provisions a tuned [XMRig](https://github.com/xmrig/xmrig) mining worker on a fresh +Ubuntu/Debian (or macOS) machine. It installs the toolchain, compiles XMRig from source, applies +kernel- and CPU-level tuning for RandomX hashrate, and runs it as a managed service. Point it at a +pool to start mining. -It works against **any RandomX Stratum pool**, and it's built as the companion miner for -**[Pithead](https://github.com/p2pool-starter-stack/pithead)** — connect as many RigForge workers as +It works against any RandomX Stratum pool, and it's the companion miner for +[Pithead](https://github.com/p2pool-starter-stack/pithead). Connect as many RigForge workers as you like to your stack's single endpoint. -> **RigForge is not a custom miner.** It compiles stock, upstream XMRig and wraps it in the setup, +> RigForge is not a custom miner. It compiles stock, upstream XMRig and wraps it in the setup, > hardware tuning, and service management that are otherwise fiddly to get right by hand. --- ## ✨ What it does -- **Automated setup** — installs build dependencies (`cmake`, `libuv`, `hwloc`, …) and compiles a - pinned, commit-verified XMRig from source. -- **Hardware-aware tuning** — leans on XMRig's cache-aware auto-detection (thread count, assembly - path, MSR preset, NUMA) and layers on dedicated-miner defaults for maximum hashrate. -- **Kernel & system tuning (Linux)** — topology-aware HugePages (1 GB and 2 MB), MSR access for - hardware-prefetcher control, and `hugetlbfs` mounts + memlock limits. -- **Service management (Linux)** — runs XMRig as a `systemd` service with a `cpupower` performance - governor and automatic log rotation. -- **Interactive config** — if no config exists, it asks for the one thing it needs: your pool URL. -- **Idempotent** — re-running skips the recompile when the pinned XMRig is already built and never +- Installs build dependencies (`cmake`, `libuv`, `hwloc`, …) and compiles a pinned, commit-verified + XMRig from source. +- Tunes for the detected CPU: XMRig's cache-aware auto-detection (thread count, assembly path, MSR + preset, NUMA), plus dedicated-miner defaults. +- Applies kernel and system tuning on Linux: topology-aware HugePages (1 GB and 2 MB), MSR access + for hardware-prefetcher control, and `hugetlbfs` mounts + memlock limits. +- Runs XMRig as a `systemd` service on Linux, with a `cpupower` performance governor and log + rotation. +- Asks for your pool URL on first run if no config exists. +- Is idempotent: re-running skips the recompile when the pinned XMRig is already built and never double-applies system tuning. --- ## 📊 Does it actually help? -Measured on a Ryzen **7800X3D**, **mining live** to a real pool (not a synthetic `--bench`): +Measured on a Ryzen **7800X3D**, mining live to a real pool (not a synthetic `--bench`): | | Stock XMRig | RigForge | | |---|--:|--:|:--| @@ -54,10 +54,10 @@ Measured on a Ryzen **7800X3D**, **mining live** to a real pool (not a synthetic | **Power** | 86.8 W | **83.5 W** | **−3.8%** | | **Efficiency** | 120.1 H/s/W | **129.2 H/s/W** | **+7.6%** | -Stock XMRig burns *more* power for *less* work — without HugePages the CPU stalls on memory. RigForge is -faster **and** cooler, free, in one command. On a **48-core EPYC** the gap is bigger (**+6.6%**), and -RigForge even **matched an expert's hand-tuned config** while dodging a prefetch setting that *halves* -RandomX on that chip. Full method, both CPUs, and honest caveats: **[Benchmarks →](docs/benchmarks.md)** +Without HugePages the CPU stalls on memory, so stock XMRig draws more power for less work. RigForge is +faster and runs cooler. On a 48-core EPYC the gap is +6.6%, and RigForge matched an expert's +hand-tuned config while avoiding a prefetch setting that halves RandomX on that chip. Full method, +both CPUs, and the caveats: [Benchmarks](docs/benchmarks.md). --- @@ -73,13 +73,13 @@ sudo ./rigforge.sh ``` The script needs root to install packages and tune the system. On first run it asks for your pool URL -and writes a minimal `config.json`. On **Linux**, reboot once afterwards to apply the -HugePages tuning — the `xmrig` service then starts automatically. +and writes a minimal `config.json`. On Linux, reboot once afterward to apply the HugePages tuning; +the `xmrig` service then starts automatically. -> **Mining to a public pool like SupportXMR?** Point `url` at the pool and set your **Monero wallet** -> as the pool `user` (most pools also want their TLS port) — see +> Mining to a public pool like SupportXMR? Point `url` at the pool and set your Monero wallet +> as the pool `user` (most pools also want their TLS port). See > [Configuration › Connecting to a public pool](docs/configuration.md#connecting-to-a-public-pool-supportxmr-etc). -> With a [Pithead](https://github.com/p2pool-starter-stack/pithead) stack you need no wallet — just the +> With a [Pithead](https://github.com/p2pool-starter-stack/pithead) stack you need no wallet, just the > stack's `host:3333`. ➡️ **Full walkthrough:** [docs/getting-started.md](docs/getting-started.md) @@ -95,7 +95,7 @@ HugePages tuning — the `xmrig` service then starts automatically. | **[Benchmarks](docs/benchmarks.md)** | Measured stock-vs-tuned hashrate and efficiency on real hardware, with the method and caveats. | | **[Configuration](docs/configuration.md)** | Every `config.json` key and default, and how the XMRig config is generated. | | **[Operations & Maintenance](docs/operations.md)** | The full command reference, service management, logs, upgrades, and troubleshooting. | -| **[How It Works](docs/how-it-works.md)** | What the script actually does — compile, HugePages, MSR, NUMA, governor, service. | +| **[How It Works](docs/how-it-works.md)** | What the script actually does: compile, HugePages, MSR, NUMA, governor, service. | | **[Pithead Integration](docs/pithead-integration.md)** | The worker ↔ dashboard contract: discovery via `:3333`, the read-only API on `:8080`, and the token rules. | | **[FAQ](docs/faq.md)** | Common questions, plus why RigForge vs. doing it by hand. | @@ -105,8 +105,8 @@ Browse the full index at **[docs/](docs/README.md)**. ## 🛠️ Common commands -The everyday tasks, each a single command — there's a task-by-task cheat sheet in -[Operations › Common tasks](docs/operations.md#common-tasks): +The everyday tasks, each a single command. There's a task-by-task cheat sheet in +[Operations › Common tasks](docs/operations.md#common-tasks). ```bash # Change a setting — edit config.json first, then: @@ -129,17 +129,17 @@ See [Operations › Commands](docs/operations.md#commands) for the full referenc RigForge runs as root, so it's worth being explicit about what it does and doesn't do: -- **No telemetry, ever.** No analytics, no version ping, no usage beacon. The only outbound traffic is - to *your* pool, to the pinned XMRig source on GitHub (cloned and **commit-verified** before building), +- No telemetry. No analytics, no version ping, no usage beacon. The only outbound traffic is + to *your* pool, to the pinned XMRig source on GitHub (cloned and commit-verified before building), and to your distro's package mirrors. -- **Honest dev fee.** The XMRig donation defaults to **1%** — XMRig's own upstream default, not a - RigForge markup — and goes to the XMRig project. Set `"DONATION": 0` to turn it off. -- **Read-only stats API.** Each worker exposes XMRig's HTTP API on `:8080` for the +- The XMRig donation defaults to **1%**. That's XMRig's own upstream default, not a RigForge markup, + and it goes to the XMRig project. Set `"DONATION": 0` to turn it off. +- Each worker exposes XMRig's HTTP API on `:8080` for the [Pithead](https://github.com/p2pool-starter-stack/pithead) dashboard. It's `restricted` (read-only, can't control the miner) and token-gated. It binds the LAN by default; if you mine solo or to a public pool you don't need it at all and can firewall the port off. -Full detail — and the exact `ufw` commands to lock down `:8080` — are in [SECURITY.md](./SECURITY.md). +Full detail, and the exact `ufw` commands to lock down `:8080`, are in [SECURITY.md](./SECURITY.md). --- @@ -158,21 +158,21 @@ make smoke # release pre-tag gate (quick): real xmrig --bench proves the b make e2e-real # release pre-tag gate (full): real build+tune+bench+doctor+uninstall on a rig (root) ``` -**What `make test` covers** — it sources `rigforge.sh` and exercises its functions in isolation, with +**What `make test` covers.** It sources `rigforge.sh` and exercises its functions in isolation, with every external/privileged command (`git`, `make`, `cmake`, `sudo`, `systemctl`, `modprobe`, `apt-get`, …) and all hardware detection (`uname`, `lscpu`, `sysctl`, `nproc`, `hostname`) replaced by -fakes on `PATH`. Because the hardware is faked, **one run on any machine simulates every supported -platform** — it asserts the generated XMRig config for EPYC / Ryzen X3D / generic-Linux inputs and the +fakes on `PATH`. Because the hardware is faked, one run on any machine simulates every supported +platform. It asserts the generated XMRig config for EPYC / Ryzen X3D / generic-Linux inputs and the macOS path, plus config parsing, `DONATION` validation, host resolution, and a full stubbed deployment run (executed twice to prove idempotency). -**What `make test-e2e` adds** — it runs the *real* `rigforge.sh` end-to-end inside a throwaway +**What `make test-e2e` adds.** It runs the *real* `rigforge.sh` end-to-end inside a throwaway `ubuntu` container (RigForge's documented Linux target, `linux/amd64`), against a real, disposable -`/etc`. This validates the Linux-only deploy path with genuine tools — GNU `sed`, `envsubst`, the -`fstab`/`limits`/GRUB edits and their idempotency — which can't run natively on a macOS host. Only the +`/etc`. This validates the Linux-only deploy path with genuine tools (GNU `sed`, `envsubst`, the +`fstab`/`limits`/GRUB edits and their idempotency) which can't run natively on a macOS host. Only the heavy XMRig compile and the package install are stubbed. It skips cleanly if Docker isn't available. -No XMRig binary is compiled by the tests — the heavy native build is stubbed; the suite asserts the +No XMRig binary is compiled by the tests; the heavy native build is stubbed. The suite asserts the *orchestration* (clone → patch `donate.h` → cmake → make) and the generated configuration instead. **`make coverage`** measures line coverage of `rigforge.sh` + `util/proposed-grub.sh` by running the @@ -180,13 +180,13 @@ suite under [kcov](https://github.com/SimonKagstrom/kcov) (in a digest-pinned co Linux/ptrace based). The black-box tests run the *real* script against a sandbox via `RIGFORGE_HOME`, so both the sourced functions and the command-dispatch paths are credited. CI enforces two gates: a committed **total floor** ([`tests/coverage-floor.txt`](tests/coverage-floor.txt), ratcheted up over -time) and, the important lever, **patch coverage** — new/changed lines must be tested (`diff-cover` -against `main`). Neither needs an external service. +time) and, the important lever, **patch coverage** (`diff-cover` against `main`) so new/changed lines +must be tested. Neither needs an external service. **`make smoke`** closes that gap at release time. Because the suites never compile or run XMRig, they can't prove the shipped binary actually starts and hashes. `make smoke` benches a real worker (`xmrig --bench`, fully offline) on a real rig and passes only if a hashrate is reported and the run is -clean — it's a manual, Linux-only-for-full-effect pre-tag gate, not a CI job. See +clean. It's a manual, Linux-only-for-full-effect pre-tag gate, not a CI job. See [RELEASING.md](./RELEASING.md). For how RigForge is versioned and released, see [RELEASING.md](./RELEASING.md) and @@ -198,7 +198,9 @@ For how RigForge is versioned and released, see [RELEASING.md](./RELEASING.md) a If RigForge saved you time and you'd like to support it, donations to this XMR wallet are appreciated: - 486aGn4qhH1MkaASjnEWMDN7stD1SVtPF5fvihmjffeBE5ACL1u1jU95KxiqmoiaPZMexi4R4W11MLXut66XWVVF8wjAE5R +```text +486aGn4qhH1MkaASjnEWMDN7stD1SVtPF5fvihmjffeBE5ACL1u1jU95KxiqmoiaPZMexi4R4W11MLXut66XWVVF8wjAE5R +``` --- diff --git a/RELEASING.md b/RELEASING.md index b019711..ab881d5 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -5,70 +5,96 @@ tracked in [`VERSION`](./VERSION) and the history in [`CHANGELOG.md`](./CHANGELO ## Versioning -- **MAJOR** — incompatible `config.json` / CLI / behaviour changes. -- **MINOR** — new, backwards-compatible functionality. -- **PATCH** — backwards-compatible fixes. +- MAJOR: incompatible `config.json` / CLI / behavior changes. +- MINOR: new, backwards-compatible functionality. +- PATCH: backwards-compatible fixes. -From `1.0.0` on, the `config.json` and CLI surface is stable — a breaking change bumps **MAJOR**. (Pre-1.0 +From `1.0.0` on, the `config.json` and CLI surface is stable, so a breaking change bumps MAJOR. (Pre-1.0 `0.x` releases could break the interface between minor versions while it settled.) ## Cutting a release -1. Ensure `main` is green: `make test` (and `make test-e2e` if Docker is available). -2. **Full real-hardware e2e (the release gate).** CI exercises everything it can (lint, the - dependency-free suite, the Docker `/etc` e2e, the coverage gate) — but it can't compile XMRig, - reserve HugePages, write MSRs, set the governor, or actually hash. So on a **real Linux rig**, run +Work lands on `develop` (the integration branch); a release is the point where `develop` is +promoted to `main` and tagged. The steps below build the release commit on `develop`, merge it to +`main`, and tag from `main`. + +1. Ensure `develop` is green: `make test` (and `make test-e2e` if Docker is available). +2. Full real-hardware e2e (the release gate). CI exercises everything it can (lint, the + dependency-free suite, the Docker `/etc` e2e, the coverage gate), but it can't compile XMRig, + reserve HugePages, write MSRs, set the governor, or actually hash. So on a real Linux rig, run the genuine deploy end to end and assert each step: + ```bash sudo bash tests/e2e-real.sh provision # real deps + XMRig build + tuning + kernel tuning + service sudo reboot # HugePages (1G + GRUB cmdline) take effect on boot; reconnect sudo bash tests/e2e-real.sh verify # doctor (HugePages/MSR/governor/service) + bench (real H/s) + a short tune + a live auto-tune pass sudo bash tests/e2e-real.sh teardown # uninstall + assert a clean revert ``` - Each phase must report `E2E-REAL (): PASS`. This is what proves a release bundle actually - builds, tunes, and hashes on real hardware — the suites all stub XMRig and can't. - - **Put a real, reachable pool in `config.json` first.** Without one, `setup` writes an unroutable - placeholder and `verify` **fails** the connect + share-submission round-trip — that round-trip is + + Each phase must report `E2E-REAL (): PASS`. This proves a release bundle actually + builds, tunes, and hashes on real hardware, which the suites can't since they all stub XMRig. + - Put a real, reachable pool in `config.json` first. Without one, `setup` writes an unroutable + placeholder and `verify` fails the connect + share-submission round-trip. That round-trip is mandatory, since proving the rig really mines is the whole point of the gate. Point `pools[0].url` at a real low-difficulty pool you control (e.g. the stack's test pool). For a deliberate offline smoke run with no pool on hand, set `E2E_ALLOW_OFFLINE_POOL=1` to downgrade it to an explicit skip. - - **Quick subset:** `make smoke` (bench-only) is the fast version when you just need to confirm a + - Quick subset: `make smoke` (bench-only) is the fast version when you just need to confirm a built worker still hashes; the full `e2e-real` flow above supersedes it for a real release. - - Kept **out of CI** on purpose (a real build + HugePages + mining are flaky-by-nature and against - Actions' ToS) — it's a manual pre-tag gate the releaser runs. + - Kept out of CI on purpose (a real build + HugePages + mining are flaky by nature and against + Actions' ToS); it's a manual pre-tag gate the releaser runs. 3. In [`CHANGELOG.md`](./CHANGELOG.md), move the `## [Unreleased]` entries under a new `## [X.Y.Z] - YYYY-MM-DD` heading, then leave a fresh empty `## [Unreleased]` above it. 4. Bump [`VERSION`](./VERSION) to `X.Y.Z`. -5. Commit the two together: +5. Commit the two together on `develop`: + ```bash git commit -am "release: vX.Y.Z" + git push origin develop + ``` + +6. Promote `develop` to `main` **through a pull request** — `main` is a protected release branch, so the + promotion goes through a reviewable PR (its own gate + audit trail), not a direct push: + + ```bash + gh pr create --base main --head develop --title "release: vX.Y.Z" \ + --body "Promote develop to main for the vX.Y.Z release." ``` -6. Tag and push (annotated tag, **matching `VERSION`**): + + Review and merge it. Keep `main` linear with a **fast-forward (rebase) merge** so the tag sits on the + same commit as `develop`'s release commit: + + ```bash + gh pr merge --rebase --admin # fast-forward main to develop; --admin lets the releaser merge + ``` + +7. Tag and push from `main` (annotated tag, matching `VERSION`) once the PR is merged: + ```bash + git checkout main && git pull --ff-only origin main git tag -a vX.Y.Z -m "RigForge vX.Y.Z" git push origin main --follow-tags ``` -That's it — pushing the tag triggers the **release pipeline** +Pushing the tag triggers the release pipeline ([`.github/workflows/release.yml`](./.github/workflows/release.yml)), which: -- **verifies** the tag matches `VERSION` (the build fails otherwise), -- packages the deploy bundle (`rigforge.sh`, `util/`, `systemd/`, `config.json.template`, - `config.advanced.example.json`, `README.md`, `docs/`, `images/`, `LICENSE`, `VERSION`) as - `rigforge-vX.Y.Z.zip` **and** `.tar.gz` — `tests/`, `.github/`, and other dev files are excluded, +- verifies the tag matches `VERSION` (the build fails otherwise), +- packages the deploy bundle (`rigforge.sh`, `util/`, `systemd/`, `config.minimal.json`, + `config.reference.json`, `README.md`, `docs/`, `images/`, `LICENSE`, `VERSION`) as + `rigforge-vX.Y.Z.zip` and `.tar.gz` (`tests/`, `.github/`, and other dev files are excluded), - generates `SHA256SUMS` for the artifacts, - pulls that version's section from [`CHANGELOG.md`](./CHANGELOG.md) as the release notes, -- creates the GitHub Release as a **draft** — review the generated notes and bundles, then click - **Publish** (pre-1.0 `0.x` tags are marked pre-release; `1.0.0`+ are full releases). +- creates the GitHub Release as a draft. Review the generated notes and bundles, then click + Publish (pre-1.0 `0.x` tags are marked pre-release; `1.0.0`+ are full releases). To verify a downloaded bundle: `sha256sum -c SHA256SUMS`. -> The release is created as a **draft** so a human reviews it before it goes public — a deliberate gate +> The release is created as a draft so a human reviews it before it goes public, a deliberate gate > for a tool that installs a root miner. Drop `--draft` from `release.yml` to auto-publish on tag instead. ## Notes -- Keep `VERSION` and the latest `CHANGELOG.md` heading in lock-step — the test suite checks `VERSION` +- Keep `VERSION` and the latest `CHANGELOG.md` heading in lock-step; the test suite checks `VERSION` is valid SemVer. - `VERSION` is also surfaced at runtime: `rigforge.sh version` (or `--version` / `-v`) reads it, so a release tag, the changelog heading, and what the script reports all stay in agreement. diff --git a/SECURITY.md b/SECURITY.md index 614503c..63e2067 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,34 +1,34 @@ # Security Policy RigForge compiles XMRig from upstream source and applies privileged system -tuning — it runs as root, configures kernel HugePages and MSR access, and -installs a `systemd` service. Because of that footprint, we take security -reports seriously and appreciate responsible disclosure. +tuning. It runs as root, configures kernel HugePages and MSR access, and +installs a `systemd` service. Given that footprint, we take security reports +seriously and appreciate responsible disclosure. ## What RigForge exposes (and what it doesn't) -**No telemetry, ever.** RigForge never phones home — there is no analytics, no +No telemetry, ever. RigForge never phones home. There is no analytics, no version ping, and no usage beacon. The only outbound connections it makes are to *your* pool, to the pinned XMRig source on GitHub (a shallow clone whose commit is verified against a hardcoded hash before it's built), and to your distro's package -mirrors. The XMRig developer donation defaults to **1%** — XMRig's own upstream -default, not a RigForge markup — goes to the XMRig project's address (RigForge -substitutes no wallet of its own into the mining path), and is set to **0** with +mirrors. The XMRig developer donation defaults to 1%, XMRig's own upstream +default rather than a RigForge markup. It goes to the XMRig project's address (RigForge +substitutes no wallet of its own into the mining path), and is set to 0 with `"DONATION": 0` in `config.json`. -**The worker stats API (`:8080`).** Each worker runs XMRig's HTTP API so a +The worker stats API (`:8080`). Each worker runs XMRig's HTTP API so a [Pithead](https://github.com/p2pool-starter-stack/pithead) dashboard can read per-rig stats over the LAN. Know exactly what it is: -- **Read-only.** It's configured `restricted: true` — the API can be *read* but - never used to *control* the miner (no remote pause, config change, or shutdown). -- **Token-gated.** Reads require a bearer token (the rig name by default), so it +- Read-only. It's configured `restricted: true`, so the API can be read but + never used to control the miner (no remote pause, config change, or shutdown). +- Token-gated. Reads require a bearer token (the rig name by default), so it isn't an open endpoint. -- **LAN-bound by default.** It binds `0.0.0.0:8080` because the Pithead dashboard +- LAN-bound by default. It binds `0.0.0.0:8080` because the Pithead dashboard polls each worker from the stack host. The data it can return is mining stats: hashrate, the configured pool URL, the worker label, and the CPU model. -**Not running Pithead?** Nothing else needs the port — `tune` and `doctor` read +Not running Pithead? Nothing else needs the port; `tune` and `doctor` read the API over `127.0.0.1`. So if you mine solo or to a public pool, you can firewall `:8080` off entirely without losing anything: @@ -37,6 +37,19 @@ sudo ufw deny 8080/tcp # block it outright … sudo ufw allow from to any port 8080 proto tcp # … or scope it to one host ``` +## Supply chain & secret scanning + +RigForge is built to be reproducible and tamper-evident: + +- Pinned, verified inputs. XMRig is cloned at a pinned commit and verified against a hardcoded + hash before it builds; GitHub Actions are SHA-pinned; CI tool installs (shellcheck, shfmt, gitleaks) + are version- and checksum-verified. Dependabot keeps the action pins current and flags advisories. +- Secret scanning. [gitleaks](https://github.com/gitleaks/gitleaks) scans the full git history on + every push and PR, and runs as a pre-commit hook, so credentials can't slip into the repo. +- Workflow auditing. [zizmor](https://github.com/zizmorcore/zizmor) static-audits the CI workflows + for template injection, over-broad token scopes, and credential persistence; jobs run with a + least-privilege, read-only `GITHUB_TOKEN` by default. + ## Supported versions Only the latest `main` is supported. Please reproduce against current `main` @@ -49,7 +62,7 @@ before reporting. ## Reporting a vulnerability -**Please do not open a public issue for security problems.** +Please do not open a public issue for security problems. Instead, use GitHub's private vulnerability reporting on this repository: diff --git a/VERSION b/VERSION index 7dea76e..9084fa2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.1 +1.1.0 diff --git a/config.json.template b/config.minimal.json similarity index 100% rename from config.json.template rename to config.minimal.json diff --git a/config.advanced.example.json b/config.reference.json similarity index 57% rename from config.advanced.example.json rename to config.reference.json index 4356348..b38ea7b 100644 --- a/config.advanced.example.json +++ b/config.reference.json @@ -1,5 +1,5 @@ { - "_docs": "Reference for every config.json key, each shown with its default value. Copy ONLY the keys you actually want to change into config.json; any key you omit keeps the default shown here. The pool target is the native XMRig 'pools' array: each pool needs a 'url' (host:port); every other field falls back to a Pithead-friendly default. A pool's 'user' is the rig's label on the dashboard (defaults to the machine hostname); list multiple entries for failover. An empty ACCESS_TOKEN means 'use the rig name' (the first pool's user). 'autotune' is one of \"disabled\" (default — no scheduled tuning), \"performance\" (monthly tune for raw hashrate), or \"efficiency\" (monthly tune for hashrate-per-watt). Full descriptions: https://github.com/p2pool-starter-stack/rigforge/blob/main/docs/configuration.md#configuration-reference", + "_docs": "Reference for every config.json key, each shown with its default value. Copy ONLY the keys you actually want to change into config.json; any key you omit keeps the default shown here. The pool target is the native XMRig 'pools' array: each pool needs a 'url' (host:port); every other field falls back to a Pithead-friendly default. A pool's 'user' is the rig's label on the dashboard (defaults to the machine hostname); list multiple entries for failover. An empty ACCESS_TOKEN (the default) leaves the rig's read-only HTTP API open (no token) — which matches Pithead's default no-auth stats probe; set a value only to require a Bearer token (then match it dashboard-side). 'autotune' is one of \"disabled\" (default — no scheduled tuning), \"performance\" (monthly tune for raw hashrate), or \"efficiency\" (monthly tune for hashrate-per-watt). Full descriptions: https://github.com/p2pool-starter-stack/rigforge/blob/main/docs/configuration.md#configuration-reference", "pools": [ { "url": ":3333", "user": "", "pass": "x", "keepalive": true, "tls": false, "enabled": true } diff --git a/docs/README.md b/docs/README.md index b902ac1..5b6e486 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,8 +1,8 @@ # Documentation -Everything you need to provision, configure, and operate a **RigForge** mining worker. +Reference for provisioning, configuring, and operating a RigForge mining worker. -New here? Start with the [Getting Started](getting-started.md) guide — it takes you from a fresh +New here? Start with the [Getting Started](getting-started.md) guide. It takes you from a fresh Ubuntu machine to a tuned, running XMRig worker in one command. The other guides go deeper on individual topics once you're up and running. @@ -12,10 +12,10 @@ individual topics once you're up and running. |---|---| | [Getting Started](getting-started.md) | Prerequisites, installation, first-run setup, the Linux reboot, and how to verify the worker is mining. | | [Hardware Requirements](hardware.md) | Worker CPU / RAM / HugePages requirements and the per-CPU tuning profiles RigForge applies. | -| [Benchmarks](benchmarks.md) | Measured stock-vs-tuned hashrate and efficiency (H/s per watt) on real hardware, mining live — with the method and honest caveats. | +| [Benchmarks](benchmarks.md) | Measured stock-vs-tuned hashrate and efficiency (H/s per watt) on real hardware, mining live, with the method and caveats. | | [Configuration](configuration.md) | Every `config.json` key and default, minimal vs. advanced setups, and how the XMRig config is generated. | | [Operations & Maintenance](operations.md) | The full command reference, service management, logs, upgrades, and troubleshooting. | -| [How It Works](how-it-works.md) | What the script actually does — dependencies, compile-from-source, HugePages, MSR, NUMA, the governor, and the systemd service. | +| [How It Works](how-it-works.md) | What the script actually does: dependencies, compile-from-source, HugePages, MSR, NUMA, the governor, and the systemd service. | | [Pithead Integration](pithead-integration.md) | The worker ↔ dashboard contract: discovery via `:3333`, the read-only HTTP API on `:8080`, and the token rules. | | [FAQ](faq.md) | Common questions, plus why RigForge vs. setting XMRig up by hand. | diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 393efff..7c1124d 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -1,29 +1,27 @@ -# Benchmarks — what the tuning actually buys you +# Benchmarks -RigForge's whole pitch is "stock XMRig, but with the setup and tuning that are fiddly to get right by -hand." So: how much does that tuning actually move the needle? Every number here is **measured on real -hardware, mining live** — not synthetic `--bench` runs — so it reflects what you'd see in the wild. +What RigForge's tuning buys you over stock XMRig. Every number here is measured on real hardware, mining +live, not synthetic `--bench` runs. -> **TL;DR** — across two very different CPUs, RigForge's one-command tuning beats stock XMRig by -> **+3.5%** (desktop Ryzen 7800X3D) to **+6.6%** (48-core EPYC 7642) in hashrate, and **+7.6% / +6.0%** -> in efficiency (H/s per watt). On the EPYC it also **matched an expert's hand-tuned config**, and -> auto-dodged a CPU-specific landmine — a prefetch setting that *halves* RandomX there but *wins* on the -> X3D. All free, in one command, with the honest nuance kept in. +> TL;DR: across two very different CPUs, RigForge's one-command tuning beats stock XMRig by +3.5% +> (desktop Ryzen 7800X3D) to +6.6% (48-core EPYC 7642) in hashrate, and +7.6% / +6.0% in efficiency (H/s +> per watt). On the EPYC it also matched an expert's hand-tuned config, and auto-dodged a CPU-specific +> landmine: a prefetch setting that halves RandomX there but wins on the X3D. ## How it's measured -Each configuration runs as its own XMRig process **mining to the live pool**. After a warm-up to steady -state, we sample the **hashrate** (XMRig's HTTP API, 60-second average) and **CPU-package power** (RAPL -energy counter) over several-minute windows, repeated across rounds. RandomX is low-variance and both -rigs were thermally steady, so the means are solid (hashrate within ~0.1%). +Each configuration runs as its own XMRig process mining to the live pool. After a warm-up to steady +state, we sample the hashrate (XMRig's HTTP API, 60-second average) and CPU-package power (RAPL energy +counter) over several-minute windows, repeated across rounds. RandomX is low-variance and both rigs were +thermally steady, so the means are solid (hashrate within ~0.1%). The baselines: -- **Stock XMRig** — upstream `./xmrig` on a fresh box: **no explicit HugePages**, prefetcher **MSRs at - firmware default**, **default governor**. (Transparent HugePages stay at Ubuntu's `madvise` default, as - a real user would have.) -- **RigForge** — `setup`'s kernel work (2 MB + 1 GB **HugePages**, the per-family **MSR prefetcher** - preset, **`performance`** governor) plus the winning knobs from a full live `tune`. +- Stock XMRig: upstream `./xmrig` on a fresh box: no explicit HugePages, prefetcher MSRs at firmware + default, default governor. Transparent HugePages stay at Ubuntu's `madvise` default, as a real user + would have. +- RigForge: `setup`'s kernel work (2 MB + 1 GB HugePages, the per-family MSR prefetcher preset, + `performance` governor) plus the winning knobs from a full live `tune`. ## Rig 1 — Ryzen 7800X3D (desktop) @@ -41,16 +39,16 @@ The baselines: | **RigForge** — tuned (efficiency) | 10,779 | 83.5 | **129.2** | | **Stock → tuned** | **+3.5%** | **−3.8%** | **+7.6%** | -Stock XMRig here burns *more* watts for *less* work — without HugePages the CPU stalls on memory, drawing -~87 W to produce *fewer* hashes; tuned is faster **and** cooler. Performance and efficiency tuning landed -on the **same** config: RigForge measured the power and found this chip pins ~84 W in any all-core setup, -so there's no hashrate-for-watts trade-off to make — and it correctly didn't invent one. +Stock XMRig here burns more watts for less work. Without HugePages the CPU stalls on memory, drawing +~87 W to produce fewer hashes; tuned is faster and cooler. Performance and efficiency tuning landed on the +same config: RigForge measured the power and found this chip pins ~84 W in any all-core setup, so there's +no hashrate-for-watts trade-off to make, and it didn't invent one. ## Rig 2 — EPYC 7642 (48-core server) · RigForge vs an expert hand-tune -This box is the interesting one: it was already running a **hand-tuned** miner (the worker an operator had -configured for this EPYC by hand) at 36,860 H/s. So it's not just "RigForge vs naive XMRig" — it's -**RigForge's one-command auto-tune vs a human who tuned it themselves**. +This box was already running a hand-tuned miner (the worker an operator had configured for this EPYC by +hand) at 36,860 H/s. So it's not just "RigForge vs naive XMRig"; it's RigForge's one-command auto-tune +against a human who tuned it themselves. | | | |---|---| @@ -64,28 +62,27 @@ configured for this EPYC by hand) at 36,860 H/s. So it's not just "RigForge vs n | **RigForge** — tuned (XMRig 6.26) | **36,866** | 229.5 | **160.6** | | **Stock → RigForge** | **+6.6%** | ~flat | **+6.0%** | -- **RigForge matched the human expert** (within 0.02%) — and did it with a *newer* XMRig (6.26 vs 6.25), - `cpu.yield` off (the expert left it on), and **5× fewer HugePages** (266 vs 1,280 reserved, both hitting - 100%) — i.e. the same result with ~2 GB less RAM tied up. -- **The auto-tune dodged a landmine.** On this EPYC, **prefetch mode 2 *halves* the hashrate** (~17,900 - H/s) — the *exact opposite* of the 7800X3D, where mode 2 is the winner. A fixed "golden profile" would - get one of these two chips badly wrong; the per-CPU live tune measured it and stayed on the right mode - for each. -- The HugePages win is **bigger here** (+6.6% vs the X3D's +3.5%) — a 4-NUMA EPYC with a per-node dataset - leans much harder on huge pages than a single-die desktop chip. Efficiency and performance tuning again +- RigForge matched the human expert (within 0.02%), and did it with a newer XMRig (6.26 vs 6.25), + `cpu.yield` off (the expert left it on), and 5× fewer HugePages (266 vs 1,280 reserved, both hitting + 100%): the same result with ~2 GB less RAM tied up. +- The auto-tune dodged a landmine. On this EPYC, prefetch mode 2 halves the hashrate (~17,900 H/s), the + exact opposite of the 7800X3D, where mode 2 is the winner. A fixed "golden profile" would get one of + these two chips badly wrong; the per-CPU live tune measured it and stayed on the right mode for each. +- The HugePages win is bigger here (+6.6% vs the X3D's +3.5%): a 4-NUMA EPYC with a per-node dataset leans + much harder on huge pages than a single-die desktop chip. Efficiency and performance tuning again converged (power ~230 W in any config). ## Caveats (read before quoting a number) -- **Two CPUs, two systems — RandomX gains vary a lot.** A desktop X3D and a 48-core EPYC already differ by +- Two CPUs, two systems, and RandomX gains vary a lot. A desktop X3D and a 48-core EPYC already differ by more than 3× in raw hashrate; your CPU, RAM speed, NUMA layout, and kernel all matter. Treat the percentages as illustrative, not a guarantee. -- **Modern kernels narrow the stock gap.** Ubuntu 24.04's Transparent HugePages (`madvise`) back some of - even the "stock" allocation with 2 MB pages, so the stock baseline is *closer* to tuned than on an older - kernel or with THP off — don't expect the 20–30% some older write-ups quote. -- **`setup` does the heavy lifting; `tune` refines.** Most of the win is the system tuning; the knob - search confirms you're at the optimum (and, as the EPYC shows, keeps you off the landmines) rather than - adding a big jump on top. +- Modern kernels narrow the stock gap. Ubuntu 24.04's Transparent HugePages (`madvise`) back some of even + the "stock" allocation with 2 MB pages, so the stock baseline is closer to tuned than on an older kernel + or with THP off. Don't expect the 20–30% some older write-ups quote. +- `setup` does the heavy lifting; `tune` refines. Most of the win is the system tuning; the knob search + confirms you're at the optimum (and, as the EPYC shows, keeps you off the landmines) rather than adding + a big jump on top. ## Reproduce it diff --git a/docs/configuration.md b/docs/configuration.md index 0e0847c..e647218 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,23 +1,22 @@ # Configuration -RigForge is driven by a small `config.json` in the repo root. It holds only the handful of things the -script can't infer — everything else (CPU profile, thread count, HugePage sizing) is detected and -applied for you. +RigForge reads a small `config.json` in the repo root. It holds only what the script can't infer. The +rest (CPU profile, thread count, HugePage sizing) is detected and applied for you. On first run, if there's no `config.json`, `setup` creates a minimal one interactively (it asks for your pool URL). You can also pre-create one from -[`config.json.template`](../config.json.template). +[`config.minimal.json`](../config.minimal.json). -Every field is **validated** when setup parses the config — a malformed pool URL, an out-of-range port, -a bad hostname, a non-boolean flag, or an unsafe `HOME_DIR` stops setup with a clear message rather than +Setup validates every field when it parses the config. A malformed pool URL, an out-of-range port, a +bad hostname, a non-boolean flag, or an unsafe `HOME_DIR` stops setup with a message rather than producing a config the miner would reject. --- ## Minimal config -The only thing you must set is the **pool** — RigForge uses XMRig's native `pools` array, and a pool -only needs its `url` (a `host:port`). Everything else falls back to a sensible default: +The only required field is the pool. RigForge uses XMRig's native `pools` array, and a pool only needs +its `url` (a `host:port`). Everything else falls back to a default: ```json { @@ -27,18 +26,17 @@ only needs its `url` (a `host:port`). Everything else falls back to a sensible d } ``` -That's a complete config — replace `:3333` with your pool's host and port (Pithead's +That's a complete config. Replace `:3333` with your pool's host and port (Pithead's proxy listens on `3333`). The interactive first-run setup writes exactly this minimal shape. -> **Mining to a public pool like [SupportXMR](https://www.supportxmr.com)?** A `url` alone isn't -> enough — public pools also need your **Monero wallet** as the pool `user` (and usually a TLS port). -> Jump to [Connecting to a public pool](#connecting-to-a-public-pool-supportxmr-etc) for a copy-paste -> example. +> Mining to a public pool like [SupportXMR](https://www.supportxmr.com)? A `url` alone isn't enough: +> public pools also need your Monero wallet as the pool `user` (and usually a TLS port). See +> [Connecting to a public pool](#connecting-to-a-public-pool-supportxmr-etc) for a copy-paste example. -> **Two-tier config (like Pithead).** Keep `config.json` minimal and only add the keys you actually -> want to change. [`config.advanced.example.json`](../config.advanced.example.json) is a reference that -> lists **every** key with its default — copy in only what you need; anything you omit keeps the -> default. The reference table below documents each key. +> Two-tier config (like Pithead): keep `config.json` minimal and add only the keys you want to change. +> [`config.reference.json`](../config.reference.json) lists every key with its default. +> Copy in what you need; anything you omit keeps the default. The reference table below documents each +> key. --- @@ -46,64 +44,65 @@ proxy listens on `3333`). The interactive first-run setup writes exactly this mi | Key | Default | What it does | |---|---|---| -| `pools` | _(required)_ | XMRig's native pools array — the pool(s) to mine to. Each entry needs a `url` (`host:port`); every other field falls back to a Pithead default. A pool's `user` is the rig's dashboard label (defaults to the hostname). List multiple entries for failover. See [Pools](#pools-full-control). | -| `ACCESS_TOKEN` | the rig name (first pool's `user`) | The XMRig HTTP API bearer token. Leave it unset so it defaults to the rig name — **Pithead authenticates as `Bearer `**, so the token must equal the rig name (or be unset). See [Pithead Integration](pithead-integration.md). | -| `DONATION` | `1` | XMRig donate level, an integer **0–100** (percent). Patched into the build (`donate.h`) **and** written to the generated config, so it must be a valid integer or setup fails fast. | +| `pools` | *(required)* | XMRig's native pools array — the pool(s) to mine to. Each entry needs a `url` (`host:port`); every other field falls back to a Pithead default. A pool's `user` is the rig's dashboard label (defaults to the hostname). List multiple entries for failover. See [Pools](#pools-full-control). | +| `ACCESS_TOKEN` | `""` *(open)* | Optional bearer token for the XMRig HTTP API. Unset (default) leaves the read-only API open, which matches Pithead's default no-auth probe. Set a value to require a `Bearer` token, then match it on the dashboard (`workers.api_auth: token` + `workers.api_token`, or `name` if you set it to the rig name). See [Pithead Integration](pithead-integration.md). | +| `DONATION` | `1` | XMRig donate level, an integer 0–100 (percent). Patched into the build (`donate.h`) and written to the generated config, so it must be a valid integer or setup fails fast. | | `HOME_DIR` | `DYNAMIC_HOME` | Where worker files live. `DYNAMIC_HOME` puts them in `data/worker` inside the repo; set an absolute path to use `/worker` instead. | -| `autotune` | `"disabled"` | Periodic live tuning, as a target: `"disabled"` (default) installs no timer; `"performance"` schedules a periodic tune for **raw hashrate**; `"efficiency"` schedules one for **hashrate-per-watt** (needs a power source — built-in RAPL or `TUNE_POWER_CMD` — else it falls back to `performance` with a warning). Legacy booleans still parse (`true` → `performance`, `false` → `disabled`). This key controls the *schedule*; to run one live pass by hand, use `tune --now` (or `tune --now --long` for a full all-knob sweep). See [Operations › Live auto-tuning](operations.md#live-auto-tuning-opt-in). | -| `add_to_path` | `false` | When `true`, setup installs a `rigforge` command on your PATH (a symlink in `/usr/local/bin`) so you can run `sudo rigforge ` from any directory. Off by default — setup makes no system-wide convenience change you didn't ask for. `uninstall` removes it. | +| `autotune` | `"disabled"` | Periodic live tuning, as a target: `"disabled"` (default) installs no timer; `"performance"` schedules a periodic tune for raw hashrate; `"efficiency"` schedules one for hashrate-per-watt (needs a power source, built-in RAPL or `TUNE_POWER_CMD`, else it falls back to `performance` with a warning). Legacy booleans still parse (`true` → `performance`, `false` → `disabled`). This key controls the schedule; to run one live pass by hand, use `tune --now` (or `tune --now --long` for a full all-knob sweep). See [Operations › Live auto-tuning](operations.md#live-auto-tuning-opt-in). | +| `add_to_path` | `false` | When `true`, setup installs a `rigforge` command on your PATH (a symlink in `/usr/local/bin`) so you can run `sudo rigforge ` from any directory. Off by default. `uninstall` removes it. | --- ## How the generated XMRig config is built -You don't write XMRig's config — RigForge generates the whole thing in-script and writes it into the -worker root as the live `config.json` the service runs from. There's no template file to keep in sync -and no config key for it. Every run (re-runs included) rebuilds the config from four sources: +You don't write XMRig's config. RigForge generates it in-script and writes it into the worker root as +the live `config.json` the service runs from. There's no template file to keep in sync and no config +key for it. Every run (re-runs included) rebuilds the config from four sources: -1. **Your `config.json`** — the `pools` array (with `user`/`pass`/`keepalive`/`tls` and failover +1. Your `config.json`: the `pools` array (with `user`/`pass`/`keepalive`/`tls` and failover defaults filled in), the `donate-level`, and the `http` API block (bound to the LAN, read-only, - token = rig name). These are the keys documented in the [reference table](#configuration-reference). -2. **Detected hardware** — the per-CPU `cpu`/`randomx` tuning (thread count, `asm`, MSR, NUMA, - HugePages). RigForge leans on XMRig's own cache-aware auto-detection rather than a CPU-model table, + open by default; set `ACCESS_TOKEN` for a bearer token). These are the keys documented in the + [reference table](#configuration-reference). +2. Detected hardware: the per-CPU `cpu`/`randomx` tuning (thread count, `asm`, MSR, NUMA, + HugePages). RigForge uses XMRig's own cache-aware auto-detection rather than a CPU-model table, so it stays correct for CPUs it's never seen. See [Hardware Requirements](hardware.md). -3. **Static defaults** — the fixed knobs every worker shares, emitted directly: `autosave`, +3. Static defaults: the fixed knobs every worker shares, emitted directly: `autosave`, `randomx.mode: fast`, `randomx.init`, `opencl`/`cuda` off, and the `http` port `8080`. -4. **Tuned overrides** *(if present)* — if you've run [`tune`](operations.md#tuning), its winning +4. Tuned overrides *(if present)*: if you've run [`tune`](operations.md#tuning), its winning knobs in `tune-overrides.json` are merged on top as the final step, so tuning wins for just the keys it sets and your `config.json` is never edited. Because the config is rebuilt from these sources every time, editing the generated `config.json` by -hand is pointless — change your repo-root `config.json` (or `tune`) and re-run instead. +hand has no effect. Change your repo-root `config.json` (or `tune`) and re-run instead. -> ⚠️ **Don't put a wallet address in the worker `user` when using Pithead.** The stack handles -> payouts centrally; the pool `user` is just a rig **label** (it defaults to the hostname so you can -> tell workers apart on the dashboard). +> Don't put a wallet address in the worker `user` when using Pithead. The stack handles payouts +> centrally; the pool `user` is just a rig label (it defaults to the hostname so you can tell workers +> apart on the dashboard). --- ## Pools (full control) -The pool target is XMRig's native **`pools`** array, passed straight through to XMRig — you can use any -field XMRig supports. Only `url` is **required**; every other field has a default, so you specify only -what you care about: +The pool target is XMRig's native `pools` array, passed straight through to XMRig, so you can use any +field XMRig supports. Only `url` is required; every other field has a default, so you specify only what +you care about: | Field | Default if blank/omitted | |---|---| -| `url` | _(required)_ — `host:port` (e.g. `pool.supportxmr.com:443` or `your-stack:3333`). For an IPv6 literal, use the bracketed `[2001:db8::1]:3333` form. | -| `user` | the machine hostname. For **Pithead** this is just the rig's dashboard **label**; for a **public pool** set it to your **Monero wallet address** (see below). | -| `pass` | `"x"` — the stratum password / worker name. For an **open** Pithead stack the default works; if the operator enabled the stack's `p2pool.stratum_password`, set this to that secret or the proxy rejects the rig. See [Pithead Integration › Stratum authentication](pithead-integration.md#stratum-authentication-optional). | +| `url` | *(required)* — `host:port` (e.g. `pool.supportxmr.com:443` or `your-stack:3333`). For an IPv6 literal, use the bracketed `[2001:db8::1]:3333` form. | +| `user` | the machine hostname. For Pithead this is the rig's dashboard label; for a public pool set it to your Monero wallet address (see below). | +| `pass` | `"x"` — the stratum password / worker name. For an open Pithead stack the default works; if the operator enabled the stack's `p2pool.stratum_password`, set this to that secret or the proxy rejects the rig. See [Pithead Integration › Stratum authentication](pithead-integration.md#stratum-authentication-optional). | | `keepalive` | `true` | | `tls` | `false` — set `true` when you connect on the pool's TLS/SSL port. | | `enabled` | `true` | -There are two common setups — pick the one that matches where you're mining. +Two common setups follow; pick the one that matches where you're mining. ### Connecting to a Pithead stack [Pithead](https://github.com/p2pool-starter-stack/pithead) handles pool selection, payouts, and the P2Pool/XvB split centrally, so the worker only needs the stack host and its proxy port (`3333`). The -`user` is just a **label** for the dashboard — **don't put a wallet address here**: +`user` is just a label for the dashboard, so don't put a wallet address here: ```json { @@ -113,14 +112,14 @@ P2Pool/XvB split centrally, so the worker only needs the stack host and its prox } ``` -`user` is optional (it defaults to the hostname); set it to tell workers apart on the dashboard. That's -the whole story — see [Pithead Integration](pithead-integration.md) for discovery and the API token. +`user` is optional (it defaults to the hostname); set it to tell workers apart on the dashboard. See +[Pithead Integration](pithead-integration.md) for discovery and the API token. ### Connecting to a public pool (SupportXMR, etc.) -A public pool pays **you**, so it needs your **Monero wallet address** as the login (`user`) and almost -always a **TLS port**. RigForge builds stock upstream XMRig, so it speaks standard Stratum to any -RandomX pool — just fill in the pool's endpoint and your wallet: +A public pool pays you, so it needs your Monero wallet address as the login (`user`) and almost always a +TLS port. RigForge builds stock upstream XMRig, so it speaks standard Stratum to any RandomX pool. Fill +in the pool's endpoint and your wallet: ```json { @@ -135,11 +134,11 @@ RandomX pool — just fill in the pool's endpoint and your wallet: } ``` -- **`user` = your Monero wallet address** — this is who gets paid. Many pools also accept +- `user` is your Monero wallet address. This is who gets paid. Many pools also accept `WALLET.workername` here to label the rig in their dashboard. -- **`pass` = a worker name** (or just `"x"` — most public pools ignore the password). -- **`url` + `tls` = the pool's stratum endpoint.** Use the pool's **TLS/SSL port** (often `:443` or - `:5555`) with `"tls": true`; a plain, unencrypted port needs no `tls`. Your pool's *Getting started* / +- `pass` is a worker name (or just `"x"`; most public pools ignore the password). +- `url` + `tls` is the pool's stratum endpoint. Use the pool's TLS/SSL port (often `:443` or `:5555`) + with `"tls": true`; a plain, unencrypted port needs no `tls`. Your pool's *Getting started* / *Connect* page lists its exact host, ports, and whether it wants `wallet` or `wallet.worker`. Save that as `config.json`, then `sudo ./rigforge.sh apply` (a fresh `setup` picks it up too). @@ -148,7 +147,7 @@ The pool host must be an IP or DNS-resolvable hostname; allow its Stratum port t ### Backup pools (failover) -List multiple entries — XMRig tries them **in order** and fails over to the next if one is unreachable, +List multiple entries. XMRig tries them in order and fails over to the next if one is unreachable, handy for a primary stack with a public-pool fallback: ```json @@ -173,22 +172,21 @@ Edit `config.json`, then apply it in one step: sudo ./rigforge.sh apply ``` -`apply` re-reads `config.json`, regenerates the live XMRig config, and **restarts the service** — no -recompile. It's the fast, purpose-built path for a `pools` change, a new rig label, TLS, failover -pools, and the like. (On macOS there's no service, so `apply` regenerates the config and you restart -the miner yourself — see [Operations › Running on macOS](operations.md#running-on-macos).) - -You can also re-run full setup (`sudo ./rigforge.sh`), but that's meant for **re-provisioning** the -whole worker (dependencies, build, kernel tuning, service). To avoid interrupting a running miner, a -setup re-run on an already-built worker regenerates the config **without restarting** — so the new -config only takes effect on the next restart. When you just want to apply an edit, reach for `apply`; -it does the restart for you. Both are idempotent and skip the recompile when the pinned XMRig is -already built. - -> **Note on `DONATION`:** the donate level is also compiled into the XMRig binary at build time, so on -> an already-built worker neither `apply` nor a setup re-run changes it — both update only the runtime -> config. To re-patch the binary, force a rebuild: remove `/xmrig` (or bump the pinned -> XMRig) and run setup, or run [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a-git-pull) after bumping the pin. +`apply` re-reads `config.json`, regenerates the live XMRig config, and restarts the service, with no +recompile. It's the path for a `pools` change, a new rig label, TLS, failover pools, and the like. (On +macOS there's no service, so `apply` regenerates the config and you restart the miner yourself; see +[Operations › Running on macOS](operations.md#running-on-macos).) + +You can also re-run full setup (`sudo ./rigforge.sh`), but that re-provisions the whole worker +(dependencies, build, kernel tuning, service). To avoid interrupting a running miner, a setup re-run on +an already-built worker regenerates the config without restarting, so the new config only takes effect +on the next restart. To apply an edit, use `apply`; it does the restart for you. Both are idempotent and +skip the recompile when the pinned XMRig is already built. + +> NOTE: `DONATION` is also compiled into the XMRig binary at build time, so on an already-built worker +> neither `apply` nor a setup re-run changes it; both update only the runtime config. To re-patch the +> binary, force a rebuild: remove `/xmrig` (or bump the pinned XMRig) and run setup, or run +> [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a-git-pull) after bumping the pin. --- diff --git a/docs/faq.md b/docs/faq.md index 3416b31..b848a74 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,71 +1,66 @@ # FAQ -Common questions about what RigForge is, what it does for you, and how it compares to setting XMRig up -by hand. New here? Start with [Getting Started](getting-started.md); the deeper "how" lives in -[How It Works](how-it-works.md). +Common questions about RigForge and how it compares to setting XMRig up by hand. New here? Start with +[Getting Started](getting-started.md). The mechanics are in [How It Works](how-it-works.md). --- ## Why RigForge vs. doing it by hand? -You can absolutely build, tune, and run XMRig yourself — it's an excellent, well-documented miner. Doing -it by hand means: +You can build, tune, and run XMRig yourself; it's a well-documented miner. By hand that means: - Installing the build toolchain and compiling XMRig from source. - Reading the [RandomX optimization guide](https://xmrig.com/docs/miner/randomx-optimization-guide) and - hand-configuring HugePages (1 GB + 2 MB), MSR registers, NUMA, and thread layout **for your specific - CPU**. -- Editing GRUB for persistent HugePages — without clobbering your existing kernel parameters. + hand-configuring HugePages (1 GB + 2 MB), MSR registers, NUMA, and thread layout for your specific CPU. +- Editing GRUB for persistent HugePages, without clobbering your existing kernel parameters. - Wiring up a systemd service, a performance governor, and log rotation. - Redoing the CPU-specific parts every time you deploy a different machine. -RigForge does all of that in one command, with tuning auto-detected from your CPU, idempotent -re-runs, and a build pinned to an audited XMRig version. It's the difference between a one-off -afternoon of tuning and `sudo ./rigforge.sh`. If you enjoy hand-wiring it, the manual route is a great -learning exercise — RigForge just compiles **stock upstream XMRig**, so you're never locked into a -custom fork. +RigForge does that in one command: tuning auto-detected from your CPU, idempotent re-runs, and a build +pinned to an audited XMRig version. The manual route is a fine learning exercise. RigForge compiles stock +upstream XMRig, so you're never locked into a custom fork. --- ## Is RigForge a custom miner? -No. RigForge compiles **stock, upstream [XMRig](https://github.com/xmrig/xmrig)** — it doesn't fork or -modify the miner itself. All it changes at build time is the donate level (so your configured -`DONATION` is honored); everything else is standard XMRig plus the setup/tuning/service wrapping. +No. RigForge compiles stock, upstream [XMRig](https://github.com/xmrig/xmrig); it doesn't fork or modify +the miner. The only build-time change is the donate level, so your configured `DONATION` is honored. +Everything else is standard XMRig plus the setup, tuning, and service wrapping. --- ## Do I need a specific XMRig version? -No. RigForge always builds a pinned, recent upstream XMRig, and any RandomX-capable XMRig (5.0+, 2019) -speaks the standard Stratum protocol that pools and Pithead's proxy accept. There's no version coupling -between the miner and the stack. +No. RigForge builds a pinned, recent upstream XMRig. Any RandomX-capable XMRig (5.0+, 2019) speaks the +standard Stratum protocol that pools and Pithead's proxy accept. There's no version coupling between the +miner and the stack. --- ## What hardware do I need? -A 64-bit x86 CPU with **AVX2**, ~2.3 GB of free RAM for RandomX fast mode (4 GB+ recommended), and — -for the HugePages/MSR speedups — a Linux box you can reboot once. Full sizing and how the tuning is chosen -are in [Hardware Requirements](hardware.md). Hashrate scales with cores **and L3 cache** (RandomX wants -~2 MB of L3 per thread). +A 64-bit x86 CPU with AVX2, ~2.3 GB of free RAM for RandomX fast mode (4 GB+ recommended), and, for the +HugePages/MSR speedups, a Linux box you can reboot once. Sizing and how the tuning is chosen are in +[Hardware Requirements](hardware.md). Hashrate scales with cores and L3 cache (RandomX wants ~2 MB of L3 +per thread). --- ## Do I have to use Pithead? -No. RigForge points XMRig at any RandomX Stratum pool — set that pool's endpoint as a `pools[].url`. -Pithead is the **flagship integration** (the API and discovery contract is wired up out of the box), -but it's not required. See [Configuration › Pools](configuration.md#pools-full-control). +No. RigForge points XMRig at any RandomX Stratum pool: set that pool's endpoint as a `pools[].url`. +Pithead is the flagship integration (the API and discovery contract is wired up by default), but it's not +required. See [Configuration › Pools](configuration.md#pools-full-control). --- ## Do I put my wallet address in the worker? -**It depends on the pool.** With **Pithead** the stack handles payouts centrally, so you don't — the -worker only needs the stack host and the `user` field is just a rig label. With a **public pool** -(SupportXMR and the like) you do: the pool pays whoever logs in, so set `pools[].user` to your **Monero -wallet address**. There's a copy-paste example in +It depends on the pool. With Pithead the stack handles payouts centrally, so you don't: the worker only +needs the stack host, and the `user` field is just a rig label. With a public pool (SupportXMR and the +like) you do. The pool pays whoever logs in, so set `pools[].user` to your Monero wallet address. There's +a copy-paste example in [Configuration › Connecting to a public pool](configuration.md#connecting-to-a-public-pool-supportxmr-etc). --- @@ -73,31 +68,32 @@ wallet address**. There's a copy-paste example in ## How do I change my pool (or another setting) later? Edit `config.json`, then run `sudo ./rigforge.sh apply`. That regenerates the live XMRig config and -restarts the worker — no rebuild. `apply` is the everyday command for config edits; `setup` is for -(re-)provisioning and `upgrade` is for moving to a newer pinned XMRig. See -[Configuration › Changing settings later](configuration.md#changing-settings-later). (On macOS, `apply` -regenerates the config; run `./rigforge.sh restart` to pick it up — see [Operations › Running on macOS](operations.md#running-on-macos).) +restarts the worker, with no rebuild. `apply` is the everyday command for config edits; `setup` is for +(re-)provisioning, and `upgrade` is for moving to a newer pinned XMRig. See +[Configuration › Changing settings later](configuration.md#changing-settings-later). On macOS, `apply` +regenerates the config; run `./rigforge.sh restart` to pick it up. See +[Operations › Running on macOS](operations.md#running-on-macos). --- ## Why does it need a reboot? -On Linux, persistent **HugePages** are configured via GRUB, which only takes effect after a reboot — -that's the single biggest RandomX performance lever. macOS doesn't expose HugePages, so it needs no -reboot. See [How It Works › Kernel tuning](how-it-works.md#kernel--system-tuning-linux-only). +On Linux, persistent HugePages are configured via GRUB, which only takes effect after a reboot. That's +the single biggest RandomX performance lever. macOS doesn't expose HugePages, so it needs no reboot. See +[How It Works › Kernel tuning](how-it-works.md#kernel--system-tuning-linux-only). --- ## I see MSR errors in the log. What's wrong? -Almost always **Secure Boot** blocking the `msr` kernel module. Disable Secure Boot in your BIOS/UEFI -and reboot. See [Operations › Troubleshooting](operations.md#troubleshooting). +Almost always Secure Boot blocking the `msr` kernel module. Disable Secure Boot in your BIOS/UEFI and +reboot. See [Operations › Troubleshooting](operations.md#troubleshooting). --- ## Is it safe to re-run the script? -Yes — `setup` is idempotent. It skips the recompile when the pinned XMRig is already built, never +Yes. `setup` is idempotent. It skips the recompile when the pinned XMRig is already built, never duplicates system-file edits (`fstab`, `limits.conf`, `/etc/modules`), merges (not overwrites) GRUB parameters, and archives a prior install rather than clobbering it. To rebuild only when the pinned version changes, use [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a-git-pull). @@ -106,31 +102,30 @@ version changes, use [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a- ## If I lose the disk (or have many machines), do I have to set up and tune each one again? -No — that's what `backup`/`restore` are for. `sudo ./rigforge.sh backup` snapshots the only -expensive-to-recreate state — your `config.json` and the tuning result — into `./backups`. After data -loss, `restore` it and re-run `setup` instead of re-tuning from scratch. For a **fleet**, tune one -machine, back it up, and `restore` the archive on each identical machine so they all share the same -config and tuning. Tuning is CPU-specific, so only reuse it between **identical** CPUs. See +No, that's what `backup`/`restore` are for. `sudo ./rigforge.sh backup` snapshots the only +expensive-to-recreate state (your `config.json` and the tuning result) into `./backups`. After data loss, +`restore` it and re-run `setup` instead of re-tuning from scratch. For a fleet, tune one machine, back it +up, and `restore` the archive on each identical machine so they all share the same config and tuning. +Tuning is CPU-specific, so only reuse it between identical CPUs. See [Operations › Backup & restore](operations.md#backup--restore). --- ## Does the worker need Tor? -No. Workers talk to the pool/stack over plain Stratum on your **local network**. Tor (for privacy and -no port-forwarding) is a stack-host concern, handled by Pithead — not the miner. +No. Workers talk to the pool/stack over plain Stratum on your local network. Tor (for privacy and no +port-forwarding) is a stack-host concern, handled by Pithead, not the miner. --- ## Is macOS supported? -macOS works for development and light use — RigForge builds and configures XMRig there — but **Ubuntu -is the supported deployment target**. The Linux-only tuning (HugePages, MSR, systemd, governor) doesn't -apply on macOS, which the macOS CPU profile accounts for, so the hashrate is lower than a tuned Linux -box. There's no systemd service either, so the miner doesn't auto-start — launch it with -`./rigforge.sh start` (the same `start`/`stop`/`restart`/`status`/`logs` verbs work on macOS). Full -details — what differs, how to run it, and which commands are Linux-only — are in -[Operations › Running on macOS](operations.md#running-on-macos). +macOS works for development and light use (RigForge builds and configures XMRig there), but Ubuntu is the +supported deployment target. The Linux-only tuning (HugePages, MSR, systemd, governor) doesn't apply on +macOS, which the macOS CPU profile accounts for, so the hashrate is lower than a tuned Linux box. There's +no systemd service either, so the miner doesn't auto-start; launch it with `./rigforge.sh start` (the same +`start`/`stop`/`restart`/`status`/`logs` verbs work on macOS). What differs, how to run it, and which +commands are Linux-only are in [Operations › Running on macOS](operations.md#running-on-macos). --- diff --git a/docs/getting-started.md b/docs/getting-started.md index 60997a8..c251d45 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,16 +1,18 @@ # Getting Started -This guide takes you from a fresh machine to a tuned, running XMRig worker. The whole process is -driven by a single script — `rigforge.sh` — and most of it is automated. +Install and tune an XMRig worker from a fresh machine. A single script, `rigforge.sh`, drives the +whole process, and most of it is automated. > **TL;DR** +> > ```bash > git clone https://github.com/p2pool-starter-stack/rigforge.git > cd rigforge > chmod +x rigforge.sh > sudo ./rigforge.sh > ``` -> Answer one prompt (your pool URL), let it build, and — on Linux — reboot once to apply the +> +> Answer one prompt (your pool URL), let it build, and on Linux reboot once to apply the > kernel tuning. The `xmrig` service starts automatically after the reboot. --- @@ -21,15 +23,15 @@ driven by a single script — `rigforge.sh` — and most of it is automated. |---|---| | **Operating system** | Ubuntu Server **22.04+** (or Debian 12) is the officially supported target. macOS works for development and light use; other Linux distros are courtesy. | | **CPU** | 64-bit x86 with **AVX2** is strongly recommended for RandomX performance. More and faster cores mean more hashrate. | -| **RAM** | **~2.3 GB free** for RandomX *fast mode* (a 2080 MB dataset + 256 MB cache), plus ~2 MB of L3 cache per mining thread. **4 GB+** recommended. | -| **Privileges** | `root` (the script installs packages and tunes the kernel — run it with `sudo`). | +| **RAM** | **~2.3 GB free** for RandomX fast mode (a 2080 MB dataset + 256 MB cache), plus ~2 MB of L3 cache per mining thread. **4 GB+** recommended. | +| **Privileges** | `root`. The script installs packages and tunes the kernel, so run it with `sudo`. | | **Network** | The worker must reach your pool / stack host on its Stratum port (Pithead uses **3333**). Workers run on a trusted LAN and do **not** need Tor. | -> 📐 **Full sizing guidance** — minimum vs. recommended specs and the per-CPU tuning profiles — is in -> **[Hardware Requirements](hardware.md)**. The **stack host** these workers connect to is sized +> 📐 Full sizing guidance, including minimum vs. recommended specs and the per-CPU tuning profiles, +> is in **[Hardware Requirements](hardware.md)**. The **stack host** these workers connect to is sized > separately in [Pithead's hardware guide](https://github.com/p2pool-starter-stack/pithead/blob/main/docs/hardware.md). -You don't need to install build dependencies yourself — RigForge installs the toolchain (`cmake`, +You don't need to install build dependencies yourself. RigForge installs the toolchain (`cmake`, `libuv`, `hwloc`, …) for you on first run. You only need `git` to clone the repo. --- @@ -42,16 +44,16 @@ cd rigforge chmod +x rigforge.sh ``` -Have your **pool URL** ready — a `host:port`. For a Pithead stack that's the stack machine's address -and its proxy port `3333` (e.g. `stack.lan:3333`); with Pithead you do **not** need a wallet — the stack -handles payouts centrally. +Have your **pool URL** ready, a `host:port`. For a Pithead stack that's the stack machine's address +and its proxy port `3333` (e.g. `stack.lan:3333`); with Pithead you do **not** need a wallet, since the +stack handles payouts centrally. -> **Mining to a public pool (SupportXMR, etc.) instead of Pithead?** Public pools pay **you**, so they -> expect your **Monero wallet address as the login** (and usually a TLS port). The first-run prompt only +> Mining to a public pool (SupportXMR, etc.) instead of Pithead? Public pools pay **you**, so they +> expect your Monero wallet address as the login (and usually a TLS port). The first-run prompt only > asks for the pool URL, so afterwards set `pools[].user` to your wallet (and `tls`) and run -> `sudo ./rigforge.sh apply` — there's a copy-paste example in +> `sudo ./rigforge.sh apply`. There's a copy-paste example in > [Configuration › Connecting to a public pool](configuration.md#connecting-to-a-public-pool-supportxmr-etc). -> Otherwise your hashes credit the rig's hostname, not you. +> Otherwise your hashes credit the rig's hostname instead of you. --- @@ -64,8 +66,8 @@ sudo ./rigforge.sh `setup` is the default command and is safe to re-run. On a fresh machine it walks through: 1. **Dependencies.** Installs the build toolchain and runtime libraries for your OS. -2. **First-run config.** If there's no `config.json`, it asks for the one thing it needs — your - **pool URL** — and writes a minimal config. (You can also pre-create one; see +2. **First-run config.** If there's no `config.json`, it asks for the one thing it needs, your + **pool URL**, and writes a minimal config. (You can also pre-create one; see [Configuration](configuration.md).) 3. **Build.** Clones and compiles XMRig from source, pinned to a known version/commit and patched to your `DONATION` level. Build output is captured to a logfile. @@ -81,16 +83,16 @@ built, and it won't duplicate the kernel/limits edits. Later on, to apply a `con [`apply`](operations.md#applying-configuration-changes); to rebuild only when the pinned version changed, use [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a-git-pull). -**Optional — a `rigforge` command on your PATH.** Set `"add_to_path": true` in `config.json` and setup -installs a `rigforge` symlink in `/usr/local/bin`, so you can run it from any directory — `sudo rigforge -doctor`, `sudo rigforge tune`, … — instead of `./rigforge.sh`. It's **off by default**, so this guide +**Optional: a `rigforge` command on your PATH.** Set `"add_to_path": true` in `config.json` and setup +installs a `rigforge` symlink in `/usr/local/bin`, so you can run it from any directory (`sudo rigforge +doctor`, `sudo rigforge tune`, …) instead of `./rigforge.sh`. It's **off by default**, so this guide uses `./rigforge.sh` throughout. (`uninstall` removes the symlink.) --- ## 4. Reboot (Linux only) -To apply HugePages and the other kernel tuning, a reboot is **required** on Linux — the script tells +To apply HugePages and the other kernel tuning, a reboot is **required** on Linux. The script tells you when: ```bash @@ -117,9 +119,9 @@ grep -i msr data/worker/xmrig.log # MSR mod applied (no errors) ``` `` is `data/worker` inside the repo by default. If you see MSR errors, you may need to -**disable Secure Boot** in your BIOS/UEFI — see [Operations › Troubleshooting](operations.md#troubleshooting). +**disable Secure Boot** in your BIOS/UEFI; see [Operations › Troubleshooting](operations.md#troubleshooting). -> **On macOS** the steps above (the `systemd` service and the HugePages/MSR checks) don't apply — +> **On macOS** the steps above (the `systemd` service and the HugePages/MSR checks) don't apply: > there's no service and no kernel tuning. Start the miner yourself with `./rigforge.sh start` (then > `status` / `logs` / `stop`); see [Operations › Running on macOS](operations.md#running-on-macos). @@ -127,6 +129,6 @@ grep -i msr data/worker/xmrig.log # MSR mod applied (no errors) ## Next steps -- [Configuration](configuration.md) — every config key, and how the XMRig config is generated. -- [Operations & Maintenance](operations.md) — the command reference, logs, upgrades, troubleshooting. -- [Pithead Integration](pithead-integration.md) — how the dashboard discovers and reads each worker. +- [Configuration](configuration.md): every config key, and how the XMRig config is generated. +- [Operations & Maintenance](operations.md): the command reference, logs, upgrades, troubleshooting. +- [Pithead Integration](pithead-integration.md): how the dashboard discovers and reads each worker. diff --git a/docs/hardware.md b/docs/hardware.md index 430fb2e..0f4bf39 100644 --- a/docs/hardware.md +++ b/docs/hardware.md @@ -1,11 +1,10 @@ # Hardware Requirements -A worker is where the actual RandomX hashing happens, so its **CPU is what determines your -hashrate**. The requirements themselves are modest — most of the performance comes from tuning, -which RigForge applies for you. +A worker is where the RandomX hashing happens, so its CPU determines your hashrate. The requirements +are modest; most of the performance comes from tuning, which RigForge applies for you. -> This page sizes the **miner**. The **stack host** your workers connect to (Monero node, P2Pool, -> proxy, dashboard) is sized separately — see Pithead's +> This page sizes the miner. The stack host your workers connect to (Monero node, P2Pool, proxy, +> dashboard) is sized separately; see Pithead's > [Hardware Requirements](https://github.com/p2pool-starter-stack/pithead/blob/main/docs/hardware.md). --- @@ -14,35 +13,35 @@ which RigForge applies for you. | Resource | Requirement | Recommended | |---|---|---| -| **CPU** | 64-bit x86 with **AVX2** support | A high-core-count CPU (e.g. AMD Ryzen / EPYC) — more and faster cores mean more hashrate. XMRig auto-detects the CPU and sizes the tuning to it. | -| **RAM** | **~2.3 GB free** for RandomX *fast mode* — a 2080 MB dataset + 256 MB cache — plus **~2 MB of L3 cache per mining thread** | **4 GB+**; budget more on high-core-count CPUs. | -| **HugePages** | Optional, but a significant speedup | RigForge configures **2 MB and 1 GB** HugePages (plus MSR access) for you — Linux only, and it needs a **reboot** to take effect. | -| **OS** | Ubuntu 22.04+, Debian 12, or macOS | Ubuntu is the supported target. | -| **Network** | Reach your pool / stack host on its Stratum port (Pithead uses **3333**) | Local network; workers do **not** need Tor. | - -> RandomX *light mode* needs only 256 MB of RAM but is far slower — **fast mode** (the default) is -> what you want for real hashrate. These figures are from XMRig's own +| CPU | 64-bit x86 with AVX2 support | A high-core-count CPU (e.g. AMD Ryzen / EPYC); more and faster cores mean more hashrate. XMRig auto-detects the CPU and sizes the tuning to it. | +| RAM | ~2.3 GB free for RandomX fast mode (a 2080 MB dataset + 256 MB cache), plus ~2 MB of L3 cache per mining thread | 4 GB+; budget more on high-core-count CPUs. | +| HugePages | Optional, but a significant speedup | RigForge configures 2 MB and 1 GB HugePages (plus MSR access) for you. Linux only, and it needs a reboot to take effect. | +| OS | Ubuntu 22.04+, Debian 12, or macOS | Ubuntu is the supported target. | +| Network | Reach your pool / stack host on its Stratum port (Pithead uses 3333) | Local network; workers do not need Tor. | + +> RandomX light mode needs only 256 MB of RAM but is far slower; fast mode (the default) is what you +> want for real hashrate. These figures are from XMRig's own > [RandomX optimization guide](https://xmrig.com/docs/miner/randomx-optimization-guide). ### A note on L3 cache -RandomX is bottlenecked by **L3 cache**, not core count alone: each mining thread wants ~2 MB of L3. -A CPU with lots of cores but little L3 can't feed every core, so the effective thread count is -roughly `L3 size ÷ 2 MB`. This is exactly the math XMRig's auto thread-sizing uses (and the +RandomX is bottlenecked by L3 cache, not core count alone: each mining thread wants ~2 MB of L3. A CPU +with lots of cores but little L3 can't feed every core, so the effective thread count is roughly +`L3 size ÷ 2 MB`. This is the math XMRig's auto thread-sizing uses (and the `util/proposed-grub.sh` HugePage sizing helper). --- ## What "tuning" actually does -The bulk of a worker's performance comes from configuration RigForge applies automatically, not from -the raw silicon: +Most of a worker's performance comes from configuration RigForge applies automatically, not from the +raw silicon: -- **HugePages (1 GB + 2 MB)** — reduces TLB misses on the 2 GB RandomX dataset. Biggest single win. -- **MSR registers** — `randomx.wrmsr` tells XMRig to disable the hardware prefetchers (they hurt - RandomX's random access pattern); XMRig auto-detects your CPU family and applies the right preset. -- **Thread count, ASM, NUMA** — XMRig auto-detects these from the CPU topology (see below). -- **Performance governor** — `cpupower` pins the CPU to its performance frequency under load. +- HugePages (1 GB + 2 MB): reduces TLB misses on the 2 GB RandomX dataset. Biggest single win. +- MSR registers: `randomx.wrmsr` tells XMRig to disable the hardware prefetchers (they hurt + RandomX's random access pattern). XMRig auto-detects your CPU family and applies the right preset. +- Thread count, ASM, NUMA: XMRig auto-detects these from the CPU topology (see below). +- Performance governor: `cpupower` pins the CPU to its performance frequency under load. The full mechanics are in [How It Works](how-it-works.md). @@ -50,26 +49,26 @@ The full mechanics are in [How It Works](how-it-works.md). ## How RigForge tunes -RigForge **does not** keep a table of CPU models. Instead it relies on XMRig's own cache-aware -auto-detection and layers on a few defaults that make sense because the box is a **dedicated** miner: +RigForge does not keep a table of CPU models. It relies on XMRig's own cache-aware auto-detection and +adds a few defaults that make sense because the box is a dedicated miner: | Setting | Value | Why | |---|---|---| -| `cpu.rx` | `-1` (auto) | XMRig sizes the thread count to L3 cache (~2 MB/thread) from detected topology — correct on EPYC, Ryzen, Intel hybrid, and X3D (incl. dual-CCD parts) alike. | +| `cpu.rx` | `-1` (auto) | XMRig sizes the thread count to L3 cache (~2 MB/thread) from detected topology; correct on EPYC, Ryzen, Intel hybrid, and X3D (incl. dual-CCD parts) alike. | | `cpu.asm` | `auto` | XMRig picks the Ryzen / Intel / Bulldozer assembly path for the detected CPU. | | `randomx.wrmsr` | `true` | Auto-applies the correct per-family MSR preset (needs root + the `msr` module). | -| `randomx.numa` | `true` | A no-op on single-NUMA machines; on multi-NUMA CPUs it gives each node its own dataset copy. Note a single-socket EPYC can still expose several NUMA nodes — so RigForge sizes the 1 GB HugePage reservation per NUMA node, not per socket. | +| `randomx.numa` | `true` | A no-op on single-NUMA machines; on multi-NUMA CPUs it gives each node its own dataset copy. Note a single-socket EPYC can still expose several NUMA nodes, so RigForge sizes the 1 GB HugePage reservation per NUMA node rather than per socket. | | `cpu.yield` | `false` | Busy-wait for maximum hashrate (we own the whole machine). | | `cpu.priority` | `2` | Wins scheduling vs. background daemons (XMRig warns >2 can hang a desktop). | | `cpu.huge-pages` / `randomx.1gb-pages` | `true` (Linux) | The single biggest lever; see below. | -> **Why not a per-model lookup table?** XMRig's auto-config is cache-aware and updated every release, -> so it gets thread placement right for CPUs a static table would miss or mishandle — e.g. **dual-CCD -> X3D** parts (7950X3D/7900X3D), where only one CCD has the V-cache and blindly using *all* cores -> would push threads onto the slow CCD. Letting XMRig decide is both simpler and more correct. +> Why not a per-model lookup table? XMRig's auto-config is cache-aware and updated every release, so it +> gets thread placement right for CPUs a static table would miss or mishandle. Take dual-CCD X3D parts +> (7950X3D/7900X3D), where only one CCD has the V-cache and using all cores would push threads onto the +> slow CCD. Letting XMRig decide is both simpler and more correct. -The only branch that remains is **OS-level**: macOS has no HugePages or MSRs, so those are disabled and -the API binds IPv6 `::` instead of `0.0.0.0`. +The only branch that remains is OS-level: macOS has no HugePages or MSRs, so those are disabled and the +API binds IPv6 `::` instead of `0.0.0.0`. The resulting XMRig config (pools, donate level, API, CPU section) lives under your worker root; see [Configuration](configuration.md#how-the-generated-xmrig-config-is-built) for how it's generated. diff --git a/docs/how-it-works.md b/docs/how-it-works.md index 56efdd0..130043e 100644 --- a/docs/how-it-works.md +++ b/docs/how-it-works.md @@ -1,9 +1,8 @@ # How It Works -RigForge is **not a custom miner**. It compiles stock, upstream [XMRig](https://github.com/xmrig/xmrig) -and wraps it in the setup, hardware tuning, and service management that are otherwise fiddly to get -right by hand. This page explains what the script actually does, step by step — the RandomX analogue of -an architecture doc. +RigForge compiles stock, upstream [XMRig](https://github.com/xmrig/xmrig) and wraps it in the setup, +hardware tuning, and service management that are otherwise fiddly to get right by hand. It is not a +custom miner. This page describes what the script does, step by step. --- @@ -12,25 +11,25 @@ an architecture doc. A `setup` run executes these stages in order. Each is idempotent, so re-running skips work that's already done. -1. **Prerequisites** — detects the OS (Linux vs. macOS) and installs `jq` if it's missing. Privileged +1. Prerequisites: detect the OS (Linux vs. macOS) and install `jq` if it's missing. Privileged steps use `sudo` as needed, so run the script with `sudo` (or as root). -2. **Config** — creates a minimal `config.json` interactively if none exists, then parses and validates +2. Config: create a minimal `config.json` interactively if none exists, then parse and validate it (see [Configuration](configuration.md)). -3. **Rebuild decision** — figures out whether XMRig actually needs (re)building, based on the pinned +3. Rebuild decision: decide whether XMRig needs (re)building, based on the pinned version/commit vs. what's already compiled. -4. **Workspace** — prepares the worker root; any prior install is **archived, not clobbered**, and old - archives are pruned so re-runs don't leak disk. -5. **Dependencies** — installs the build toolchain and runtime libraries for the OS (`cmake`, `libuv`, +4. Workspace: prepare the worker root. Any prior install is archived rather than clobbered, and + old archives are pruned so re-runs don't leak disk. +5. Dependencies: install the build toolchain and runtime libraries for the OS (`cmake`, `libuv`, `hwloc`, OpenSSL, …). -6. **Compile** — clones XMRig at the pinned commit, patches the donate level, and builds it. Output is +6. Compile: clone XMRig at the pinned commit, patch the donate level, and build it. Output is captured to a logfile; `make -j` is capped by available RAM to avoid OOM on low-memory hosts. -7. **Generate config** — detects the CPU and writes the tuned XMRig `config.json` (pools, donate level, +7. Generate config: detect the CPU and write the tuned XMRig `config.json` (pools, donate level, HTTP API, and the per-CPU `cpu`/`randomx` sections). -8. **Kernel tuning (Linux)** — HugePages, MSR, and module loading. -9. **Limits (Linux)** — `hugetlbfs` mounts, `fstab`, and memlock limits. -10. **Service (Linux)** — installs and enables the `xmrig` systemd unit with a performance governor and +8. Kernel tuning (Linux): HugePages, MSR, and module loading. +9. Limits (Linux): `hugetlbfs` mounts, `fstab`, and memlock limits. +10. Service (Linux): install and enable the `xmrig` systemd unit with a performance governor and log rotation. -11. **Finish** — prints next steps (and, if the kernel was tuned, the reboot prompt). +11. Finish: print next steps (and, if the kernel was tuned, the reboot prompt). --- @@ -38,73 +37,73 @@ already done. RigForge builds XMRig from source rather than shipping a binary: -- **Pinned** to a known `XMRIG_VERSION` / `XMRIG_COMMIT`, and the checkout is **verified against the - pinned commit** (`git rev-parse HEAD` must match `XMRIG_COMMIT`, or the build aborts) — so every - worker runs the same audited source, and supply-chain risk is bounded. -- **Donate level patched at build time.** The configured `DONATION` is `sed`'d into `donate.h` so the +- Pinned to a known `XMRIG_VERSION` / `XMRIG_COMMIT`. The checkout is verified against the pinned + commit (`git rev-parse HEAD` must match `XMRIG_COMMIT`, or the build aborts), so every worker runs the + same audited source and supply-chain risk is bounded. +- Donate level patched at build time. The configured `DONATION` is `sed`'d into `donate.h` so the compiled binary honors it (XMRig's floor is otherwise 1%). It's also written into the runtime config. Because this patch happens during the compile, changing `DONATION` after XMRig is already built only - updates the runtime config — re-patching the binary requires a rebuild (see + updates the runtime config; re-patching the binary requires a rebuild (see [Configuration](configuration.md#changing-settings-later)). -- **Memory-guarded parallelism.** `make -j` is capped based on available RAM, so the build doesn't OOM +- Memory-guarded parallelism. `make -j` is capped based on available RAM, so the build doesn't OOM on small machines. -- **Idempotent.** If the pinned build already exists, setup skips the (slow) recompile entirely; the +- Idempotent. If the pinned build already exists, setup skips the recompile. The [`upgrade`](operations.md#upgrading-xmrig-redeploy-after-a-git-pull) command rebuilds only when the pin changes. --- ## Hardware tuning -The hashrate win comes from configuration, not the silicon alone. RigForge leans on XMRig's own +Most of the hashrate comes from configuration, not the silicon. RigForge uses XMRig's own auto-detection and adds dedicated-miner defaults: -- **Auto-detected thread count, ASM path, MSR preset and NUMA** — XMRig reads the CPU topology and +- Auto-detected thread count, ASM path, MSR preset and NUMA. XMRig reads the CPU topology and sizes everything to it (`cpu.rx: -1`, `cpu.asm: auto`, `randomx.wrmsr: true`, `randomx.numa: true`), which stays correct for CPUs a model-name table would miss. See [Hardware › How RigForge tunes](hardware.md#how-rigforge-tunes). -- **Dedicated-miner defaults** — `cpu.yield: false` (busy-wait for max hashrate) and `cpu.priority: 2`. -- **RandomX fast mode** — the full 2 GB dataset in memory for maximum hashrate. -- **Thread layout sized to L3** — RandomX wants ~2 MB of L3 per thread; XMRig sizes threads to the - detected L3 rather than blindly using every core. +- Dedicated-miner defaults: `cpu.yield: false` (busy-wait for max hashrate) and `cpu.priority: 2`. +- RandomX fast mode: the full 2 GB dataset in memory for maximum hashrate. +- Thread layout sized to L3. RandomX wants ~2 MB of L3 per thread, so XMRig sizes threads to the + detected L3 rather than using every core. --- ## Measured tuning: the `tune` search -The defaults above are good, but a handful of knobs have a best value that genuinely varies per CPU: the -RandomX prefetch mode, `cpu.yield`, the **thread count and placement** (`cpu.rx`), `1gb-pages`, and — opt -in — `cpu.huge-pages-jit` and `randomx.cache_qos`. The `tune` command **measures** rather than guesses. -By default it's an iterative, noise-aware **coordinate hill-climb**: +The defaults above are good, but a handful of knobs have a best value that varies per CPU: the RandomX +prefetch mode, `cpu.yield`, the thread count and placement (`cpu.rx`), `1gb-pages`, and, opt-in, +`cpu.huge-pages-jit` and `randomx.cache_qos`. The `tune` command measures the best value instead of +guessing. By default it's an iterative, noise-aware coordinate hill-climb: -1. **Seed.** Start from two candidate configurations — XMRig's auto baseline and an educated guess — so +1. Seed. Start from two candidate configurations, XMRig's auto baseline and an educated guess, so the search can escape a local optimum one seed happens to land in. -2. **Climb.** Sweep one knob at a time; for each, benchmark its candidate values (holding the others - fixed) and adopt the best — but only if it beats the current best by a minimum relative margin, so +2. Climb. Sweep one knob at a time. For each, benchmark its candidate values (holding the others + fixed) and adopt the best, but only if it beats the current best by a minimum relative margin, so benchmark noise can't masquerade as a win. -3. **Repeat until plateau.** Run passes over all knobs until a full pass yields no improvement, or a +3. Repeat until plateau. Run passes over all knobs until a full pass yields no improvement, or a round cap is hit. For a small knob space where you'd rather not risk a local optimum at all, `TUNE_SEARCH=grid` switches to -an **exhaustive** search of every combination — slower, but guaranteed to find the global best. +an exhaustive search of every combination: slower, but guaranteed to find the global best. -The thread search is **SMT-aware**: rather than only nudging ±1 around the L3 ÷ 2 MB estimate, it also -tries XMRig's own auto value and the **physical-** and **logical-core** counts, because RandomX often -peaks at one thread per physical core (SMT siblings share the L2/L3 each thread needs). +The thread search is SMT-aware. Rather than only nudging ±1 around the L3 ÷ 2 MB estimate, it also +tries XMRig's own auto value and the physical- and logical-core counts, because RandomX often peaks at +one thread per physical core (SMT siblings share the L2/L3 each thread needs). -A few design choices keep it honest and cheap on jittery RandomX hardware: +A few design choices keep it reliable and cheap on jittery RandomX hardware: -- **Median, not max.** Each candidate is measured as the median of several `xmrig --bench` runs, so one +- Median over max. Each candidate is measured as the median of several `xmrig --bench` runs, so one lucky spike doesn't crown a worse config. -- **Contention-free.** In `--bench` mode `tune` stops the miner service for the run (restarting it - after, even if interrupted), so the benchmark isn't fighting a live miner for cores and huge pages — - the single biggest source of bogus readings. -- **Memoized.** Because a coordinate climb keeps revisiting the current point, every measured - combination is cached — a combo is never benchmarked twice. +- Contention-free. In `--bench` mode `tune` stops the miner service for the run (restarting it + after, even if interrupted), so the benchmark isn't fighting a live miner for cores and huge pages. + That contention is the biggest source of bogus readings. +- Memoized. Because a coordinate climb keeps revisiting the current point, every measured + combination is cached, so a combo is never benchmarked twice. -Reboot-bound knobs are handled explicitly: `1gb-pages` only matters once 1G HugePages are reserved (a -GRUB change + reboot), so the search sweeps it only when they're actually present and otherwise skips it -with a note. The winning knobs are written to a separate overlay file (`tune-overrides.json`) that's -merged into the generated config — your `config.json` is never edited. +Reboot-bound knobs are handled explicitly. `1gb-pages` only matters once 1G HugePages are reserved (a +GRUB change plus reboot), so the search sweeps it only when they're actually present and otherwise skips +it with a note. The winning knobs are written to a separate overlay file (`tune-overrides.json`) that's +merged into the generated config, so your `config.json` is never edited. ### Tuning environment variables @@ -112,7 +111,7 @@ Every part of the search is overridable; the defaults favour a thorough one-time | Env var | Default | Meaning | |---|---|---| -| `TUNE_SEARCH` | `climb` | `climb` (hill-climb, fast) or `grid` (exhaustive over all knob combos, robust but slower). | +| `TUNE_SEARCH` | `climb` | `climb` (hill-climb, fast) or `grid` (exhaustive over all knob combos, thorough but slower). | | `TUNE_ITERS` | `5` | Benchmark runs per candidate; the median is used. | | `TUNE_BENCH` | `10M` | `xmrig --bench` size. Longer = steadier and closer to sustained load; set `1M` for a quick pass. | | `TUNE_MIN_DELTA` | `0.01` | Minimum *relative* gain (1%) needed to adopt a change. | @@ -120,43 +119,43 @@ Every part of the search is overridable; the defaults favour a thorough one-time | `TUNE_SEEDS` | `auto guess` | Starting points to climb from. | | `TUNE_PREFETCH_MODES` | `0 1 2 3` | Prefetch-mode candidates. | | `TUNE_YIELDS` | `true false` | `cpu.yield` candidates. | -| `TUNE_THREADS` | _(auto: SMT-aware set)_ | `cpu.rx` thread-count candidates. Defaults to auto + physical/logical cores + an L3 window; override with an explicit list. | +| `TUNE_THREADS` | *(auto: SMT-aware set)* | `cpu.rx` thread-count candidates. Defaults to auto + physical/logical cores + an L3 window; override with an explicit list. | | `TUNE_PRIORITIES` | `2` | `cpu.priority` candidates (single value ⇒ knob off; set e.g. `1 2 3 4 5` to sweep). | -| `TUNE_HPJIT` | _(off)_ | Set `false true` to sweep `cpu.huge-pages-jit` (XMRig: small Ryzen boost, unstable hashrate). | -| `TUNE_CACHEQOS` | _(off)_ | Set `false true` to sweep `randomx.cache_qos` (Intel L3 Cache Allocation Technology). | -| `TUNE_WRMSR` | _(off)_ | Sweep the `randomx.wrmsr` MSR preset, e.g. `true false` (or a preset number). Rarely needed — XMRig auto-picks the right preset; set this only to confirm it on unusual hardware. Applied per-bench, no reboot. | -| `TUNE_POWER_CMD` | _(RAPL)_ | Override the power source with a shell command that echoes **instantaneous watts** (IPMI, a smart plug, wall-AC). Without it, the built-in CPU-package RAPL reader is used on Linux. | -| `TUNE_TARGET` | _(follows `autotune` config)_ | Optimize for `perf` (raw H/s) or `efficiency` (hashrate-per-watt). Defaults to the `autotune` config value (so a manual `tune` matches the scheduled run); `--perf`/`--efficiency` or this env var override. Efficiency needs a power source or falls back to `perf`. | -| `TUNE_TEMP_CMD` | _(Linux thermal zone)_ | Optional shell command that echoes °C; defaults to `/sys/class/thermal/thermal_zone0/temp`. | +| `TUNE_HPJIT` | *(off)* | Set `false true` to sweep `cpu.huge-pages-jit` (XMRig: small Ryzen boost, unstable hashrate). | +| `TUNE_CACHEQOS` | *(off)* | Set `false true` to sweep `randomx.cache_qos` (Intel L3 Cache Allocation Technology). | +| `TUNE_WRMSR` | *(off)* | Sweep the `randomx.wrmsr` MSR preset, e.g. `true false` (or a preset number). Rarely needed — XMRig auto-picks the right preset; set this only to confirm it on unusual hardware. Applied per-bench, no reboot. | +| `TUNE_POWER_CMD` | *(RAPL)* | Override the power source with a shell command that echoes **instantaneous watts** (IPMI, a smart plug, wall-AC). Without it, the built-in CPU-package RAPL reader is used on Linux. | +| `TUNE_TARGET` | *(follows `autotune` config)* | Optimize for `perf` (raw H/s) or `efficiency` (hashrate-per-watt). Defaults to the `autotune` config value (so a manual `tune` matches the scheduled run); `--perf`/`--efficiency` or this env var override. Efficiency needs a power source or falls back to `perf`. | +| `TUNE_TEMP_CMD` | *(Linux thermal zone)* | Optional shell command that echoes °C; defaults to `/sys/class/thermal/thermal_zone0/temp`. | ### Power & efficiency -RandomX hashrate isn't free, so `tune` records **watts per candidate** and can rank by **hashrate-per-watt**. -On Linux it reads the CPU-package energy counter (RAPL) automatically — no configuration, run as root. -Watts are sampled **under load and averaged over the measurement window**, so the figure reflects real -mining power. `tune --efficiency` (or `TUNE_TARGET=efficiency`) then picks the most efficient config rather -than the raw-fastest — useful for a power-cost or heat/PSU-constrained rig; without a power source it warns -and falls back to `perf`. To measure whole-system wall power instead of the CPU package alone, point -`TUNE_POWER_CMD` at a source that echoes instantaneous watts. +RandomX hashrate isn't free, so `tune` records watts per candidate and can rank by hashrate-per-watt. +On Linux it reads the CPU-package energy counter (RAPL) automatically, with no configuration, run as +root. Watts are sampled under load and averaged over the measurement window, so the figure reflects real +mining power. `tune --efficiency` (or `TUNE_TARGET=efficiency`) then picks the most efficient config +rather than the raw-fastest, useful for a power-cost or heat/PSU-constrained rig; without a power source +it warns and falls back to `perf`. To measure whole-system wall power instead of the CPU package alone, +point `TUNE_POWER_CMD` at a source that echoes instantaneous watts. -The **periodic `autotune`** takes the same target: set `"autotune": "efficiency"` in `config.json` and the +The periodic `autotune` takes the same target: set `"autotune": "efficiency"` in `config.json` and the scheduled run ranks prefetch modes by hashrate-per-watt (sampling watts over the same live window), instead -of `"performance"`'s raw H/s. The target is baked into the systemd unit at setup; same RAPL/`TUNE_POWER_CMD` +of `"performance"`'s raw H/s. The target is baked into the systemd unit at setup; the same RAPL/`TUNE_POWER_CMD` sources and the same fall-back-to-`perf`-with-a-warning behavior apply. See [Operations → Live auto-tuning](operations.md#live-auto-tuning-opt-in). -> **`hs_per_watt` is relative, not absolute.** It only compares candidates measured by the **same method on -> the same machine**. Built-in RAPL counts the **CPU package only** (not RAM, board, PSU loss); a smart plug -> counts **whole-wall AC**. Don't compare the number across methods or across rigs. +> NOTE: `hs_per_watt` is relative, not absolute. It only compares candidates measured by the same method on +> the same machine. Built-in RAPL counts the CPU package only (not RAM, board, PSU loss); a smart plug +> counts whole-wall AC. Don't compare the number across methods or across rigs. ### Reservation-aware thread tuning -RandomX wants its scratchpads backed by **HugePages**. `setup` reserves a pool sized for an estimated thread -count; `tune` then benchmarks thread counts within that reservation. A thread count that needs *more* 2 MB -pages than are reserved still runs — but the extra threads fall back to normal pages, so its benchmark is a -**floor, not a fair reading**. `tune` flags each such candidate `hugepages_capped: true` in +RandomX wants its scratchpads backed by HugePages. `setup` reserves a pool sized for an estimated thread +count; `tune` then benchmarks thread counts within that reservation. A thread count that needs more 2 MB +pages than are reserved still runs, but the extra threads fall back to normal pages, so its benchmark is a +floor rather than a fair reading. `tune` flags each such candidate `hugepages_capped: true` in `rigforge-tune.json` and ends with a note listing the capped thread counts. To explore a higher count -*properly*, resize the reservation for it and re-tune: +fully, resize the reservation for it and re-tune: ```bash sudo RIGFORGE_THREADS= ./rigforge.sh setup # sizes the HugePages reservation for threads @@ -164,49 +163,49 @@ sudo reboot # the GRUB HugePages change need sudo ./rigforge.sh tune # now threads benchmarks with full backing ``` -`setup` also reads the **tuned** `cpu.rx` from `tune-overrides.json` automatically, so once you've tuned, a +`setup` also reads the tuned `cpu.rx` from `tune-overrides.json` automatically, so once you've tuned, a plain `sudo ./rigforge.sh setup` keeps the reservation matched to your winning thread count. -For how to *run* `tune` — the command, `--live`, `--efficiency`, `--confirm`, `--history`, and `--clear` — +For how to run `tune` (the command, `--live`, `--efficiency`, `--confirm`, `--history`, and `--clear`), see [Operations › Tuning](operations.md#tuning). --- ## Kernel & system tuning (Linux only) -These are why a **reboot** is needed on Linux: +These are why a reboot is needed on Linux: -- **HugePages (1 GB + 2 MB).** Backs the RandomX dataset with huge pages to cut TLB misses — the single - biggest performance lever. Sizing is topology-aware (see `util/proposed-grub.sh`). Making it - persistent edits **GRUB**, which takes effect on reboot. RigForge **merges** its parameters into the - existing `GRUB_CMDLINE_LINUX_DEFAULT` instead of overwriting it, so other kernel params are preserved - (a boot-safety fix). -- **MSR access.** Loads the `msr` module and sets the hardware-prefetcher / cache model-specific - registers XMRig recommends for the CPU. (Blocked by Secure Boot — see +- HugePages (1 GB + 2 MB). Backs the RandomX dataset with huge pages to cut TLB misses, the biggest + performance lever. Sizing is topology-aware (see `util/proposed-grub.sh`). Making it persistent edits + GRUB, which takes effect on reboot. RigForge merges its parameters into the existing + `GRUB_CMDLINE_LINUX_DEFAULT` instead of overwriting it, so other kernel params are preserved (a + boot-safety fix). +- MSR access. Loads the `msr` module and sets the hardware-prefetcher / cache model-specific + registers XMRig recommends for the CPU. (Blocked by Secure Boot; see [troubleshooting](operations.md#troubleshooting).) -- **`hugetlbfs` mounts + memlock limits.** Mounts the 1 GB HugePage filesystem and raises `memlock` in - `fstab` and `limits.conf` so XMRig can pin memory. These edits are applied **once** (append-only, +- `hugetlbfs` mounts + memlock limits. Mounts the 1 GB HugePage filesystem and raises `memlock` in + `fstab` and `limits.conf` so XMRig can pin memory. These edits are applied once (append-only, deduplicated) so re-runs don't accumulate duplicate lines. macOS doesn't expose HugePages or MSRs, so those stages are skipped there; the macOS path sets -XMRig accordingly (and there's no systemd service — you run the miner yourself). See +XMRig accordingly (and there's no systemd service, so you run the miner yourself). See [Operations › Running on macOS](operations.md#running-on-macos). --- ## Service management (Linux) -- **systemd unit.** XMRig runs as the `xmrig` service, enabled at boot, restarting on failure. -- **`cpupower` performance governor.** Pins the CPU to its performance frequency so it isn't throttled +- systemd unit. XMRig runs as the `xmrig` service, enabled at boot, restarting on failure. +- `cpupower` performance governor. Pins the CPU to its performance frequency so it isn't throttled down mid-hash. -- **Log rotation.** A `logrotate` policy compresses and archives `xmrig.log`. -- **Hardened unit.** The service runs as root (required for the MSR mod and HugePages) but with +- Log rotation. A `logrotate` policy compresses and archives `xmrig.log`. +- Hardened unit. The service runs as root (required for the MSR mod and HugePages) but with defense-in-depth sandboxing: `NoNewPrivileges`, `ProtectSystem=full` (read-only `/usr`,`/etc`,…), `PrivateTmp`, `ProtectControlGroups`, `LockPersonality`, and `ReadWritePaths` limited to the worker - root. Directives that would break RandomX are deliberately **not** set — `PrivateDevices` (hides + root. Directives that would break RandomX are deliberately not set: `PrivateDevices` (hides `/dev/cpu/*/msr`), `MemoryDenyWriteExecute` (blocks the JIT), and `ProtectKernelModules`. -- **Scoped `memlock`.** Unlimited `memlock` is granted to the **service** (`LimitMEMLOCK=infinity`) and, - for manual runs, to the **mining user only** in `limits.conf` — not to every account via `*`. +- Scoped `memlock`. Unlimited `memlock` is granted to the service (`LimitMEMLOCK=infinity`) and, + for manual runs, to the mining user only in `limits.conf`, not to every account via `*`. --- @@ -214,12 +213,12 @@ XMRig accordingly (and there's no systemd service — you run the miner yourself RigForge is built to be re-run: -- **Idempotent edits.** System-file changes (`fstab`, `limits.conf`, `/etc/modules`) are append-only - and deduplicated — running setup twice never doubles a line. -- **Non-destructive workspace.** A prior install is archived, not overwritten. -- **Fail-fast with context.** An `ERR` trap names the step that failed; config input is validated - before the slow build starts. -- **Tested.** A dependency-free suite fakes all hardware and privileged commands so every supported +- Idempotent edits. System-file changes (`fstab`, `limits.conf`, `/etc/modules`) are append-only + and deduplicated, so running setup twice never doubles a line. +- Non-destructive workspace. A prior install is archived, not overwritten. +- Fail-fast with context. An `ERR` trap names the step that failed; config input is validated + before the build starts. +- Tested. A dependency-free suite fakes all hardware and privileged commands so every supported platform's config generation and a full deployment (run twice for idempotency) are asserted on any machine; a Docker end-to-end run validates the real Linux path. See the project README's testing section. diff --git a/docs/operations.md b/docs/operations.md index 6d40e36..9f53f23 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -7,45 +7,45 @@ upgrading, and troubleshooting. ## Common tasks -Most days you'll only touch a handful of these. Each is a single command — the full [command +Most days you touch a handful of these. Each is a single command. The full [command reference](#commands) is below. | I want to… | Command | What happens | |---|---|---| -| **Change a setting** — pool, rig name, TLS, failover | edit `config.json`, then `sudo ./rigforge.sh apply` | Regenerates the live config and restarts. No rebuild. | -| **Redeploy after a `git pull`** | `git pull && sudo ./rigforge.sh upgrade` | Rebuilds + restarts (and re-tunes) **if** the XMRig pin moved; otherwise a no-op — see [the note below](#upgrading-xmrig-redeploy-after-a-git-pull). | -| **Run a live tune now** | `sudo ./rigforge.sh tune --now` | One live pass against the running miner; keeps the best prefetch mode if it wins. Linux only. | -| **Check the worker is healthy** | `sudo ./rigforge.sh doctor` | HugePages, MSR, governor, service — with a fix hint for anything off. | -| **Watch it mining** | `sudo ./rigforge.sh logs` | Live logs; `Ctrl-C` stops following (the miner keeps running). | -| **Stop / start / restart** | `sudo ./rigforge.sh stop` · `start` · `restart` | Control the miner service. | -| **Quick speed check** | `sudo ./rigforge.sh bench` | One-off offline benchmark; reports H/s. | -| **Save config + tuning** | `sudo ./rigforge.sh backup` | Snapshots the only hard-to-recreate state to `./backups`. | - -> On **macOS**, drop the `sudo` (the privileged steps are Linux-only) and run `./rigforge.sh restart` -> after `apply` to pick up changes. `doctor` and the live re-tunes (`tune --now`, `tune --live`) are -> Linux-only. See [Running on macOS](#running-on-macos). +| Change a setting (pool, rig name, TLS, failover) | edit `config.json`, then `sudo ./rigforge.sh apply` | Regenerates the live config and restarts. No rebuild. | +| Redeploy after a `git pull` | `git pull && sudo ./rigforge.sh upgrade` | Rebuilds + restarts (and re-tunes) if the XMRig pin moved; otherwise a no-op. See [the note below](#upgrading-xmrig-redeploy-after-a-git-pull). | +| Run a live tune now | `sudo ./rigforge.sh tune --now` | One live pass against the running miner; keeps the best prefetch mode if it wins. Linux only. | +| Check the worker is healthy | `sudo ./rigforge.sh doctor` | HugePages, MSR, governor, service, with a fix hint for anything off. | +| Watch it mining | `sudo ./rigforge.sh logs` | Live logs; `Ctrl-C` stops following (the miner keeps running). | +| Stop / start / restart | `sudo ./rigforge.sh stop` · `start` · `restart` | Control the miner service. | +| Quick speed check | `sudo ./rigforge.sh bench` | One-off offline benchmark; reports H/s. | +| Save config + tuning | `sudo ./rigforge.sh backup` | Snapshots the only hard-to-recreate state to `./backups`. | + +> On macOS, drop the `sudo` (the privileged steps are Linux-only) and run `./rigforge.sh restart` after +> `apply` to pick up changes. `doctor` and the live re-tunes (`tune --now`, `tune --live`) are Linux-only. +> See [Running on macOS](#running-on-macos). --- ## Commands -The complete surface — most days you only need the handful in [Common tasks](#common-tasks) above; the +The complete surface. Most days you only need the handful in [Common tasks](#common-tasks) above; the rest are here for completeness. -RigForge is a single script. Run it as `sudo ./rigforge.sh [command]`. _(Optional: set +RigForge is a single script. Run it as `sudo ./rigforge.sh [command]`. Optional: set `"add_to_path": true` in `config.json` and setup installs a `rigforge` command on your PATH, so you can -run `sudo rigforge [command]` from any directory; `uninstall` removes it.)_ +run `sudo rigforge [command]` from any directory; `uninstall` removes it. | Command | What it does | |---|---| -| `setup` _(default)_ | Provision the worker: dependencies, build, hardware + kernel tuning, and the service. Idempotent — safe to re-run; skips the recompile when the pinned XMRig is already built. | -| `upgrade` | Rebuild **and** restart **only if** the pinned XMRig version/commit changed. A no-op when you're already on the pinned build. If periodic autotune is enabled, it also **re-tunes the new build** (the fastest knobs can shift between versions). | -| `apply` | Re-read `config.json`, regenerate the live XMRig config, and restart — **without** recompiling. The fast path after editing `config.json`. On Linux it also reconciles the periodic-autotune timer with config (so changing the `autotune` target takes effect) and reports it (efficiency / performance / disabled). | -| `uninstall` | Remove the service and **revert all system changes** (fstab, limits, modules, GRUB) and the worker build/logs. Leaves `config.json`. Prompts first; add `--yes` to skip. | -| `doctor` | Read-only health check (run with `sudo` for the deepest checks). **Critical** findings (counted as issues): the service is active, HugePages are reserved, the `msr` module is loaded, and the **MSR mod actually applied** — confirmed from XMRig's log and, as root, an `rdmsr` register read-back (see [MSR mod verification](#msr-mod-verification)). **Advisory** findings (hints, not failures): CPU governor, 1 GB HugePages, HugePages 100%-backed (from the XMRig log), **hashrate-capping hardware** RigForge can't fix but you can — single-channel or slow RAM (via `dmidecode`) and a power/boost-capped CPU clock — and **BIOS/firmware** recommendations (board/BIOS context, plus enable XMP/EXPO/DOCP or SMT when they're off; manual BIOS changes RigForge can't make from the OS). Prints an actionable hint for anything off. | +| `setup` *(default)* | Provision the worker: dependencies, build, hardware + kernel tuning, and the service. Idempotent and safe to re-run; skips the recompile when the pinned XMRig is already built. | +| `upgrade` | Rebuild and restart only if the pinned XMRig version/commit changed. A no-op when you're already on the pinned build. If periodic autotune is enabled, it also re-tunes the new build (the fastest knobs can shift between versions). | +| `apply` | Re-read `config.json`, regenerate the live XMRig config, and restart, without recompiling. The fast path after editing `config.json`. On Linux it also reconciles the periodic-autotune timer with config (so changing the `autotune` target takes effect) and reports it (efficiency / performance / disabled). | +| `uninstall` | Remove the service and revert all system changes (fstab, limits, modules, GRUB) and the worker build/logs. Leaves `config.json`. Prompts first; add `--yes` to skip. | +| `doctor` | Read-only health check (run with `sudo` for the deepest checks). Critical findings (counted as issues): the service is active, HugePages are reserved, the `msr` module is loaded, and the MSR mod actually applied, confirmed from XMRig's log and, as root, an `rdmsr` register read-back (see [MSR mod verification](#msr-mod-verification)). Advisory findings (hints, not failures): CPU governor, 1 GB HugePages, HugePages 100%-backed (from the XMRig log), hashrate-capping hardware RigForge can't fix but you can (single-channel or slow RAM via `dmidecode`, and a power/boost-capped CPU clock), and BIOS/firmware recommendations (board/BIOS context, plus enable XMP/EXPO/DOCP or SMT when they're off; manual BIOS changes RigForge can't make from the OS). Prints an actionable hint for anything off. | | `bench` | Run a one-off `xmrig --bench` and report the hashrate (a quick perf/health check; set `BENCH=10M` for a longer run). | -| `tune` | The single command for tuning. A bare `tune` measures the fastest CPU-specific knobs (prefetch, `cpu.yield`, thread count) offline and keeps them — an **optional, one-time** step. Live variants: **`--now`** / **`--short`** (a quick prefetch re-tune against the running miner — *run a live tune now*), **`--now --long`** (a full live search of every knob, = `--live`), `--confirm` (A/B-check the winner live). Plus `--efficiency` / `--perf`, `--history`, `--clear`. See [Tuning](#tuning). | -| `autotune` | The **scheduled** live tuner. You normally don't type it — `tune --now` is the friendlier spelling for an on-demand run, and the periodic schedule is what this verb is really for: set `"autotune": "performance"` (raw H/s) or `"autotune": "efficiency"` (hashrate-per-watt) in `config.json` and setup installs a systemd timer (also re-tuned on `upgrade`). Conservative — keeps a change only if it beats the baseline by a margin, else rolls back. Linux-only. See [Live auto-tuning](#live-auto-tuning-opt-in). | +| `tune` | The single command for tuning. A bare `tune` measures the fastest CPU-specific knobs (prefetch, `cpu.yield`, thread count) offline and keeps them, an optional, one-time step. Live variants: `--now` / `--short` (a quick prefetch re-tune against the running miner, the *run a live tune now* path), `--now --long` (a full live search of every knob, = `--live`), `--confirm` (A/B-check the winner live). Plus `--efficiency` / `--perf`, `--history`, `--clear`. See [Tuning](#tuning). | +| `autotune` | The scheduled live tuner. You normally don't type it; `tune --now` is the friendlier spelling for an on-demand run, and the periodic schedule is what this verb is really for: set `"autotune": "performance"` (raw H/s) or `"autotune": "efficiency"` (hashrate-per-watt) in `config.json` and setup installs a systemd timer (also re-tuned on `upgrade`). Conservative: it keeps a change only if it beats the baseline by a margin, else rolls back. Linux-only. See [Live auto-tuning](#live-auto-tuning-opt-in). | | `backup` | Snapshot `config.json` + the tuning files into a timestamped `tar.gz` under `./backups`. See [Backup & restore](#backup--restore). | | `restore` | Restore `config.json` + tuning from a backup archive: `restore [-y] `. Prompts before overwriting. | | `status` | Show the systemd service status. | @@ -57,7 +57,7 @@ run `sudo rigforge [command]` from any directory; `uninstall` removes it.)_ `setup` is the default, so `sudo ./rigforge.sh` with no argument provisions (or re-provisions) the worker. The service verbs (`status`/`logs`/`start`/`stop`/`restart`/`enable`/`disable`) work on Linux -and macOS — systemd on Linux, a launchd login agent on macOS (`enable`/`disable`). `doctor`, +and macOS: systemd on Linux, a launchd login agent on macOS (`enable`/`disable`). `doctor`, `tune --live`, and `autotune` are Linux-only. See [Running on macOS](#running-on-macos). ### Health check @@ -68,17 +68,17 @@ After setup (and the reboot), confirm everything took effect: sudo ./rigforge.sh doctor ``` -It's the quickest way to catch the common silent failures — HugePages not reserved (needs a reboot) or -the MSR mod blocked by Secure Boot. See [Troubleshooting](#troubleshooting). +This catches the common silent failures: HugePages not reserved (needs a reboot) or the MSR mod blocked +by Secure Boot. See [Troubleshooting](#troubleshooting). -> On a fresh install `setup` **enables** the service but doesn't start it until you reboot (HugePages -> aren't reserved before then), so a `doctor` run between `setup` and the reboot will report "service is -> not active" — that's expected; it starts automatically after you reboot. +> On a fresh install `setup` enables the service but doesn't start it until you reboot (HugePages aren't +> reserved before then), so a `doctor` run between `setup` and the reboot will report "service is not +> active". That's expected; it starts automatically after you reboot. ### Tuning RigForge auto-configures the hashrate-critical settings, so a freshly-deployed worker already runs well. -`tune` is an **optional, one-time** step that *measures* the handful of knobs whose best value is genuinely +`tune` is an optional, one-time step that measures the handful of knobs whose best value is genuinely CPU-specific (the RandomX prefetch mode, `cpu.yield`, the thread count) and keeps the fastest: ```bash @@ -86,29 +86,29 @@ sudo ./rigforge.sh tune # measure the fastest knobs for this CPU — thoro sudo ./rigforge.sh apply # regenerate the config with them and restart ``` -> **Tune once, run for months.** The result is saved to a separate overlay (`tune-overrides.json`), so your +> Tune once, run for months. The result is saved to a separate overlay (`tune-overrides.json`), so your > `config.json` is never touched, and it's kept for the life of the rig. After an `upgrade` bumps XMRig, > RigForge reminds you to re-tune (the fastest knobs can shift between versions). -`tune` **optimizes for whatever your [`autotune`](configuration.md#configuration-reference) config is set to** -— so if `autotune` is `"efficiency"`, a plain `tune` measures hashrate-per-watt, matching what the scheduled -run does. Override per-run with `--perf` or `--efficiency`. It announces the target at the start, e.g. -`Optimization target: efficiency (hashrate-per-watt)`. Run it without `sudo` and it re-runs itself with -`sudo` for you. +`tune` optimizes for whatever your [`autotune`](configuration.md#configuration-reference) config is set +to, so if `autotune` is `"efficiency"`, a plain `tune` measures hashrate-per-watt, matching what the +scheduled run does. Override per-run with `--perf` or `--efficiency`. It announces the target at the +start, e.g. `Optimization target: efficiency (hashrate-per-watt)`. Run it without `sudo` and it re-runs +itself with `sudo` for you. -See what's tuned — and what the periodic auto-tuner has been doing — at any time: +See what's tuned, and what the periodic auto-tuner has been doing, at any time: ```bash ./rigforge.sh tune --history # applied knobs + the last full run + recent auto-tune decisions ``` -**Useful variants** (all optional): +Useful variants (all optional): | Command | What it does | |---|---| -| `tune --now` *(or `--short`)* | **Run a live tune now** — a quick convergent pass against the running miner that keeps the best prefetch mode if it wins. The everyday live re-tune; Linux only. | -| `tune --now --long` | A **full** live sweep — every knob (prefetch, `cpu.yield`, thread count, 1G-pages) against the running miner, not just the prefetch mode. Thorough but slower; measures your running pool's real conditions/algorithm. Alias: `tune --live`. Linux only. | -| `tune --efficiency` / `--perf` | Force the optimization target — **hashrate-per-watt** vs **raw speed** — overriding the `autotune` config default for this run (efficiency needs a power source). | +| `tune --now` *(or `--short`)* | Run a live tune now: a quick convergent pass against the running miner that keeps the best prefetch mode if it wins. The everyday live re-tune; Linux only. | +| `tune --now --long` | A full live sweep of every knob (prefetch, `cpu.yield`, thread count, 1G-pages) against the running miner, not just the prefetch mode. Thorough but slower; measures your running pool's real conditions/algorithm. Alias: `tune --live`. Linux only. | +| `tune --efficiency` / `--perf` | Force the optimization target, hashrate-per-watt vs raw speed, overriding the `autotune` config default for this run (efficiency needs a power source). | | `tune --confirm` | A/B-check the winner on the live miner and keep it only if it genuinely beats the previous config. Linux only. | | `tune --history` | Show the current tuning, the last full run, and recent auto-tune decisions. | | `tune --clear` | Discard all tuning and return to the auto defaults. | @@ -119,68 +119,67 @@ power/efficiency and reservation-aware details are all in ### Live auto-tuning (opt-in) -**Run one pass on demand** any time with `sudo ./rigforge.sh tune --now` — it sweeps the prefetch modes -against your running miner and keeps the best if it beats the current setting by a margin. Want a -thorough pass that sweeps **every** knob live (threads, yield, 1G-pages — not just prefetch)? Use -`tune --now --long` (the live equivalent of a bare `tune`). No scheduling needed; either is a quick way -to re-tune live after a BIOS, RAM, or cooling change. (`tune --now` is the friendly name for the -`autotune` engine — the standalone `autotune` verb still works and is what the scheduled timer below -runs.) +Run one pass on demand any time with `sudo ./rigforge.sh tune --now`. It sweeps the prefetch modes +against your running miner and keeps the best if it beats the current setting by a margin. For a thorough +pass that sweeps every knob live (threads, yield, 1G-pages, not just prefetch), use `tune --now --long` +(the live equivalent of a bare `tune`). No scheduling needed; either is a quick way to re-tune live after +a BIOS, RAM, or cooling change. `tune --now` is the friendly name for the `autotune` engine; the +standalone `autotune` verb still works and is what the scheduled timer below runs. -Prefer it hands-off? Set `autotune` in `config.json` to a target and re-run `setup` — RigForge installs a -**systemd timer** that periodically optimizes the prefetch mode against your *live* miner: +For a hands-off schedule, set `autotune` in `config.json` to a target and re-run `setup`. RigForge +installs a systemd timer that periodically optimizes the prefetch mode against your live miner: | `autotune` | What the scheduled run optimizes for | | --- | --- | -| `"disabled"` _(default)_ | Nothing — no timer is installed. | -| `"performance"` | **Raw hashrate** (H/s). | -| `"efficiency"` | **Hashrate-per-watt** (H/s/W) — for power-cost-, heat-, or PSU-limited rigs. Needs a power source (built-in RAPL, or a `TUNE_POWER_CMD` for a smart plug / IPMI); without one it falls back to `performance` with a warning. | +| `"disabled"` *(default)* | Nothing. No timer is installed. | +| `"performance"` | Raw hashrate (H/s). | +| `"efficiency"` | Hashrate-per-watt (H/s/W), for power-cost-, heat-, or PSU-limited rigs. Needs a power source (built-in RAPL, or a `TUNE_POWER_CMD` for a smart plug / IPMI); without one it falls back to `performance` with a warning. | -(Legacy booleans still work: `true` → `performance`, `false` → `disabled`.) The chosen target is baked -into the systemd unit at setup, so the scheduled run optimizes for what you picked — and `tune --history` +Legacy booleans still work: `true` → `performance`, `false` → `disabled`. The chosen target is baked +into the systemd unit at setup, so the scheduled run optimizes for what you picked, and `tune --history` shows it. -**Each run converges in one pass (~minutes).** It reads the current hashrate from the miner's API -(median of a few samples — plus average watts when the target is `efficiency`), then sweeps every prefetch -mode — applying each, restarting, and re-measuring over a warmup window — and adopts the best by the -target's metric, but **only if it beats the baseline by a margin** (else it keeps the current mode). So a -single run settles on the best prefetch mode; you don't wait days. The change is merged on top of any -offline `tune` result, so your tuned thread count and `cpu.yield` are preserved. +Each run converges in one pass (~minutes). It reads the current hashrate from the miner's API (median of +a few samples, plus average watts when the target is `efficiency`), then sweeps every prefetch mode +(applying each, restarting, and re-measuring over a warmup window) and adopts the best by the target's +metric, but only if it beats the baseline by a margin (else it keeps the current mode). A single run +settles on the best prefetch mode; you don't wait days. The change is merged on top of any offline `tune` +result, so your tuned thread count and `cpu.yield` are preserved. -**When it re-tunes.** Once the prefetch mode converges it's stable — so re-tuning is **event-driven**, not -a blind daily loop that churns the miner to re-confirm a result that rarely changes: +When it re-tunes: once the prefetch mode converges it's stable, so re-tuning is event-driven, not a blind +daily loop that churns the miner to re-confirm a result that rarely changes. -- **After an `upgrade`** — the real trigger. The fastest knobs can shift between XMRig versions, so once a +- After an `upgrade`, the real trigger. The fastest knobs can shift between XMRig versions, so once a rebuild finishes (and the new build is live) RigForge re-tunes it automatically. -- **A monthly safety-net timer** — the default cadence is **monthly**, to catch slow drift (thermal, - ambient temperature, fan/dust). Change it with `AUTOTUNE_ONCALENDAR` (any [systemd calendar](https://www.freedesktop.org/software/systemd/man/systemd.time.html) - spec) before `setup` — e.g. `AUTOTUNE_ONCALENDAR=weekly sudo ./rigforge.sh setup`. +- A monthly safety-net timer. The default cadence is monthly, to catch slow drift (thermal, ambient + temperature, fan/dust). Change it with `AUTOTUNE_ONCALENDAR` (any [systemd calendar](https://www.freedesktop.org/software/systemd/man/systemd.time.html) + spec) before `setup`, e.g. `AUTOTUNE_ONCALENDAR=weekly sudo ./rigforge.sh setup`. -Review the schedule, the next run, and recent decisions any time with **`rigforge tune --history`** (or +Review the schedule, the next run, and recent decisions any time with `rigforge tune --history` (or `journalctl -u rigforge-autotune`). -Auto-tune only touches the prefetch mode (the knob most worth re-checking live). For a **definitive, -one-time sweep of every knob**, run the offline [`tune`](#tuning) above. Linux only. +Auto-tune only touches the prefetch mode, the knob most worth re-checking live. For a definitive, +one-time sweep of every knob, run the offline [`tune`](#tuning) above. Linux only. ### MSR mod verification -The MSR "RandomX boost" (writing the CPU's prefetcher MSRs) is one of the biggest levers — ~10–15% — so -`doctor` verifies it actually took effect, not just that the `msr` module loaded: +The MSR "RandomX boost" (writing the CPU's prefetcher MSRs) is one of the biggest levers, worth ~10–15%, +so `doctor` verifies it actually took effect, not just that the `msr` module loaded: -- **From XMRig's log** (always): the `msr register values for "" preset have been set - successfully` line confirms XMRig wrote the per-family preset (e.g. `ryzen_19h_zen4`). A `FAILED` line - is flagged — usually Secure Boot or a missing `msr.allow_writes=on`. -- **Register read-back via `rdmsr`** (run `doctor` as root, with `msr-tools` installed — `setup` - installs it): `doctor` reads the prefetcher registers back and checks they hold the preset's values, - catching a write a hypervisor or kernel lockdown silently dropped even though XMRig reported success. - Run without root, without `rdmsr`, or with the `msr` module unloaded, this step is skipped with an - advisory — never a false alarm; the log check above still confirms the write. +- From XMRig's log (always): the `msr register values for "" preset have been set successfully` + line confirms XMRig wrote the per-family preset (e.g. `ryzen_19h_zen4`). A `FAILED` line is flagged, + usually Secure Boot or a missing `msr.allow_writes=on`. +- Register read-back via `rdmsr` (run `doctor` as root, with `msr-tools` installed; `setup` installs it): + `doctor` reads the prefetcher registers back and checks they hold the preset's values, catching a write + a hypervisor or kernel lockdown silently dropped even though XMRig reported success. Run without root, + without `rdmsr`, or with the `msr` module unloaded, this step is skipped with an advisory, never a false + alarm; the log check above still confirms the write. -You almost never need to **tune** the MSR preset — XMRig auto-selects the right per-family preset, and -that's optimal on the vast majority of CPUs. The knob exists for the rare case where a non-default preset -(or disabling the mod) wins on unusual silicon: set `TUNE_WRMSR="true false"` (or a preset number) to -sweep `randomx.wrmsr` alongside the other knobs — it's applied per-bench (no reboot) and pinned only if -it actually wins. +You almost never need to tune the MSR preset. XMRig auto-selects the right per-family preset, and that's +optimal on the vast majority of CPUs. The knob exists for the rare case where a non-default preset (or +disabling the mod) wins on unusual silicon: set `TUNE_WRMSR="true false"` (or a preset number) to sweep +`randomx.wrmsr` alongside the other knobs. It's applied per-bench (no reboot) and pinned only if it +actually wins. --- @@ -195,37 +194,36 @@ sudo systemctl start xmrig # start the miner sudo systemctl restart xmrig # restart (e.g. after a config change) ``` -RigForge also wraps these so you don't have to remember the unit name — +RigForge also wraps these so you don't have to remember the unit name: `sudo ./rigforge.sh status` / `logs` / `start` / `stop` / `restart`. The service is enabled at install, so it starts automatically on boot (and after the post-setup reboot). -> On **macOS** there is no systemd service — RigForge builds and configures XMRig but you run it -> yourself. See [Running on macOS](#running-on-macos) below. +> On macOS there is no systemd service. RigForge builds and configures XMRig but you run it yourself. See +> [Running on macOS](#running-on-macos) below. --- ## Running on macOS -macOS is a **development / light-use** target — Ubuntu is the supported deployment platform. On macOS, -`sudo ./rigforge.sh` still does the core work: it installs dependencies (via **Homebrew**), compiles -XMRig from source, and writes a tuned `config.json`. What it **doesn't** do is the Linux-only system -integration: +macOS is a development / light-use target; Ubuntu is the supported deployment platform. On macOS, +`sudo ./rigforge.sh` still does the core work: it installs dependencies (via Homebrew), compiles XMRig +from source, and writes a tuned `config.json`. What it doesn't do is the Linux-only system integration: -- **No kernel tuning, and no reboot.** macOS doesn't expose HugePages or MSRs, so the HugePages, MSR, +- No kernel tuning, and no reboot. macOS doesn't expose HugePages or MSRs, so the HugePages, MSR, `hugetlbfs`, and GRUB steps are skipped. The generated config turns those knobs off accordingly (`huge-pages`, `1gb-pages`, `wrmsr`/`rdmsr` are `false`) and binds the API to IPv6 `::`. Because the - biggest RandomX levers (HugePages + MSR) are Linux-only, **expect a lower hashrate than a tuned Linux - box** — fine for development, not for a production rig. -- **No systemd service / no auto-start on boot.** There's no service to install, and the miner doesn't - start at boot. But `setup` doesn't leave you to hand-roll a launch command — the same `start` / `stop` - / `restart` / `status` / `logs` verbs work on macOS too (see below); on macOS they manage XMRig as a + biggest RandomX levers (HugePages + MSR) are Linux-only, expect a lower hashrate than a tuned Linux box: + fine for development, not for a production rig. +- No systemd service / no auto-start on boot. There's no service to install, and the miner doesn't start + at boot. `setup` doesn't leave you to hand-roll a launch command, though. The same `start` / `stop` / + `restart` / `status` / `logs` verbs work on macOS too (see below); on macOS they manage XMRig as a background process tracked by a PID file under the worker root, instead of via systemd. ### Run the miner -`setup` doesn't start the miner on macOS, so launch it yourself when ready — with the same command you'd +`setup` doesn't start the miner on macOS, so launch it yourself when ready, with the same command you'd use on Linux: ```bash @@ -242,8 +240,8 @@ binary from the worker build dir with the generated config; the log is at `/xmrig.log` (e.g. `data/worker/xmrig.log`). -- **Rotation:** a `logrotate` policy is installed automatically to compress and archive logs. -- **Build log:** the XMRig compile output is captured to `/build.log` (e.g. +- Log file: `/xmrig.log` (e.g. `data/worker/xmrig.log`). +- Rotation: a `logrotate` policy is installed automatically to compress and archive logs. +- Build log: the XMRig compile output is captured to `/build.log` (e.g. `data/worker/build.log`) during setup, so a failed build is diagnosable after the fact. On any unexpected failure the script also names the step that failed and prints the last lines of the build log. @@ -298,15 +296,15 @@ After editing `config.json`, apply it in one step: sudo ./rigforge.sh apply ``` -`apply` re-reads `config.json`, regenerates the live XMRig config, and restarts the service — no +`apply` re-reads `config.json`, regenerates the live XMRig config, and restarts the service, with no recompile. Use it for a pool change, a new rig label, TLS, or failover pools. Changing `DONATION` is -the exception: it's compiled into the binary and needs a rebuild — see +the exception: it's compiled into the binary and needs a rebuild. See [Configuration › Changing settings later](configuration.md#changing-settings-later). -A full `setup` re-run also regenerates the config, but it's meant for re-provisioning and — so it won't -interrupt a running miner — does **not** restart an already-built worker on its own. When you just want +A full `setup` re-run also regenerates the config, but it's meant for re-provisioning, and so that it +won't interrupt a running miner, does not restart an already-built worker on its own. When you want an edit to take effect, use `apply`. (On macOS, `apply` regenerates the config but you restart the -miner yourself — see [Running on macOS](#running-on-macos).) +miner yourself; see [Running on macOS](#running-on-macos).) --- @@ -321,11 +319,11 @@ sudo ./rigforge.sh upgrade # rebuild + restart only if the pin changed `upgrade` is a no-op when the pinned XMRig is already built, so it's cheap to run. A plain `sudo ./rigforge.sh` (setup) also picks up a changed pin, but `upgrade` is the explicit, restart-aware -path. If you've enabled periodic [autotune](#live-auto-tuning-opt-in), `upgrade` **re-tunes the new build -automatically** once it's live — the optimal prefetch mode can change between XMRig versions, so the -upgrade is the moment that actually warrants a re-tune (the monthly timer is just a slow safety net). +path. If you've enabled periodic [autotune](#live-auto-tuning-opt-in), `upgrade` re-tunes the new build +automatically once it's live. The optimal prefetch mode can change between XMRig versions, so the upgrade +is the moment that actually warrants a re-tune (the monthly timer is just a slow safety net). -> **Pulled RigForge changes but not a new XMRig pin?** Then `upgrade` is a no-op — the build is +> Pulled RigForge changes but not a new XMRig pin? Then `upgrade` is a no-op, since the build is > unchanged. To pick up RigForge's own changes, run `sudo ./rigforge.sh apply` (regenerate the live > config + restart); if the pull also changed kernel tuning or the service unit, run a full > `sudo ./rigforge.sh setup`, then `restart`. When unsure, `upgrade` followed by `apply` covers the @@ -337,10 +335,10 @@ upgrade is the moment that actually warrants a re-tune (the monthly timer is jus ## Backup & restore -A worker's **expensive, hard-to-recreate state** is small: your `config.json` and the **tuning** result -(`tune-overrides.json` — which can take hours to produce). The XMRig build and the system tuning are -regenerated by `setup`, so they're not worth saving. `backup` snapshots just that valuable state into a -portable archive: +A worker's expensive, hard-to-recreate state is small: your `config.json` and the tuning result +(`tune-overrides.json`, which can take hours to produce). The XMRig build and the system tuning are +regenerated by `setup`, so they're not worth saving. `backup` snapshots that state into a portable +archive: ```bash sudo ./rigforge.sh backup # -> ./backups/rigforge-backup-YYYYMMDD-HHMMSS.tar.gz @@ -349,7 +347,7 @@ sudo ./rigforge.sh backup # -> ./backups/rigforge-backup-YYYYMMDD-HHMM The archive is owner-only (`chmod 600`) and includes `config.json`, the tuning files, and a small manifest (RigForge version + source host). Back up after first-run setup and again after each `tune`. -**Restore** puts it back — point it at an archive: +`restore` puts it back. Point it at an archive: ```bash sudo ./rigforge.sh restore ./backups/rigforge-backup-20260101-120000.tar.gz # prompts; -y to skip @@ -361,23 +359,23 @@ you to run `setup`/`apply` to put the restored config into effect. ### Two reasons to use it -- **Recover from data loss.** A wiped disk would otherwise mean re-doing setup *and* re-tuning. With a - backup it's `restore` + `setup`. -- **Roll a tune across a fleet.** Tune one machine, `backup`, then `restore` on each identical machine — - they all get the same config and the same tuning without re-running the (slow) search. +- Recover from data loss. A wiped disk would otherwise mean re-doing setup and re-tuning. With a backup + it's `restore` + `setup`. +- Roll a tune across a fleet. Tune one machine, `backup`, then `restore` on each identical machine; they + all get the same config and the same tuning without re-running the slow search. -> ⚠️ **Tuning is CPU-specific.** Only reuse `tune-overrides.json` between **identical** CPUs. On -> different hardware, restore the config but re-run `tune` (or `tune --clear` to drop the inherited -> tuning). Backups made with the default `HOME_DIR` (`DYNAMIC_HOME`) are fully portable; an absolute -> `HOME_DIR` carries that machine's path. +> NOTE: Tuning is CPU-specific. Only reuse `tune-overrides.json` between identical CPUs. On different +> hardware, restore the config but re-run `tune` (or `tune --clear` to drop the inherited tuning). +> Backups made with the default `HOME_DIR` (`DYNAMIC_HOME`) are fully portable; an absolute `HOME_DIR` +> carries that machine's path. --- ## Verification -After setup (and the reboot, on Linux), confirm the optimizations applied: +After setup (and the reboot, on Linux), confirm the optimizations applied. -**HugePages** +HugePages: ```bash grep Huge /proc/meminfo @@ -386,7 +384,7 @@ grep Huge /proc/meminfo `HugePages_Total`, `HugePages_Free`, and `Hugepagesize` should be non-zero and match what setup configured. -**MSR (Model-Specific Registers)** +MSR (Model-Specific Registers): ```bash grep -i msr /xmrig.log @@ -400,16 +398,16 @@ If you see MSR errors, see Troubleshooting below. | Symptom | Likely cause & fix | |---|---| -| **Setup fails during the build** | The script names the step that failed and tails the build log. Read the full error in `/build.log` (e.g. `data/worker/build.log`). Common causes: a build dependency you declined to install (re-run and accept), or too little RAM during compilation (the build already caps parallelism by RAM — add swap on very low-memory hosts). Re-run `sudo ./rigforge.sh` once resolved; it resumes without redoing finished work. | -| **MSR errors in the log** | Secure Boot is blocking the `msr` kernel module. **Disable Secure Boot** in your BIOS/UEFI, then reboot. | -| **`doctor`: "MSR registers don't match the preset"** | XMRig's log says the write succeeded but the read-back disagrees — the kernel or hypervisor silently dropped it. Common on VMs/cloud instances and under kernel lockdown. Run RigForge on **bare metal**, and ensure `msr.allow_writes=on` (RigForge sets this) and that lockdown isn't enforced. | -| **`doctor`: "couldn't read the MSRs via rdmsr"** | The `msr` module isn't loaded (or `doctor` wasn't run as root). Run `sudo ./rigforge.sh doctor`; if it persists, `sudo modprobe msr` (Secure Boot can block it). This is advisory — XMRig's log already confirms the write. | -| **`HugePages_Total` is 0** | The kernel tuning needs a **reboot** to take effect (GRUB change). Reboot, then re-check `grep Huge /proc/meminfo`. | -| **HugePages still 0 after reboot** | Not enough contiguous memory was reservable, or another tool changed GRUB. Re-run `sudo ./rigforge.sh`; RigForge **merges** its kernel parameters into `GRUB_CMDLINE_LINUX_DEFAULT` rather than overwriting, so other params are preserved. | -| **Low hashrate / few threads** | RandomX is L3-bound (~2 MB per thread). A CPU with little L3 runs fewer effective threads — this is expected. See [Hardware › L3 cache](hardware.md#a-note-on-l3-cache). | -| **No AVX2** | RandomX still runs but slower. AVX2 is strongly recommended; there's no fix beyond different hardware. | -| **Dashboard can't read the worker** | The HTTP API token must equal the rig name (or be unset), the API must be on `:8080`, and the worker must be reachable from the stack host. See [Pithead Integration › Troubleshooting](pithead-integration.md#troubleshooting). | -| **Pool unreachable** | Confirm the worker can reach its pool URL (firewall, DHCP/static IP). Workers use plain Stratum on the LAN — no Tor. | +| Setup fails during the build | The script names the step that failed and tails the build log. Read the full error in `/build.log` (e.g. `data/worker/build.log`). Common causes: a build dependency you declined to install (re-run and accept), or too little RAM during compilation (the build already caps parallelism by RAM; add swap on very low-memory hosts). Re-run `sudo ./rigforge.sh` once resolved; it resumes without redoing finished work. | +| MSR errors in the log | Secure Boot is blocking the `msr` kernel module. Disable Secure Boot in your BIOS/UEFI, then reboot. | +| `doctor`: "MSR registers don't match the preset" | XMRig's log says the write succeeded but the read-back disagrees: the kernel or hypervisor silently dropped it. Common on VMs/cloud instances and under kernel lockdown. Run RigForge on bare metal, and ensure `msr.allow_writes=on` (RigForge sets this) and that lockdown isn't enforced. | +| `doctor`: "couldn't read the MSRs via rdmsr" | The `msr` module isn't loaded (or `doctor` wasn't run as root). Run `sudo ./rigforge.sh doctor`; if it persists, `sudo modprobe msr` (Secure Boot can block it). This is advisory; XMRig's log already confirms the write. | +| `HugePages_Total` is 0 | The kernel tuning needs a reboot to take effect (GRUB change). Reboot, then re-check `grep Huge /proc/meminfo`. | +| HugePages still 0 after reboot | Not enough contiguous memory was reservable, or another tool changed GRUB. Re-run `sudo ./rigforge.sh`; RigForge merges its kernel parameters into `GRUB_CMDLINE_LINUX_DEFAULT` rather than overwriting, so other params are preserved. | +| Low hashrate / few threads | RandomX is L3-bound (~2 MB per thread). A CPU with little L3 runs fewer effective threads; this is expected. See [Hardware › L3 cache](hardware.md#a-note-on-l3-cache). | +| No AVX2 | RandomX still runs but slower. AVX2 is strongly recommended; there's no fix beyond different hardware. | +| Dashboard can't read the worker | The HTTP API token must equal the rig name (or be unset), the API must be on `:8080`, and the worker must be reachable from the stack host. See [Pithead Integration › Troubleshooting](pithead-integration.md#troubleshooting). | +| Pool unreachable | Confirm the worker can reach its pool URL (firewall, DHCP/static IP). Workers use plain Stratum on the LAN, no Tor. | --- diff --git a/docs/pithead-integration.md b/docs/pithead-integration.md index dbc674f..d2d18ba 100644 --- a/docs/pithead-integration.md +++ b/docs/pithead-integration.md @@ -1,37 +1,37 @@ # Pithead Integration -RigForge works against any RandomX pool, but it's built as the companion miner for -**[Pithead](https://github.com/p2pool-starter-stack/pithead)** — a self-hosted Monero + P2Pool + Tari -mining stack. This page describes the contract between a RigForge worker and the Pithead dashboard, so -the two read as one product. +The contract between a RigForge worker and the Pithead dashboard. RigForge works against any RandomX +pool, but it's built as the companion miner for +[Pithead](https://github.com/p2pool-starter-stack/pithead), a self-hosted Monero + P2Pool + Tari mining +stack. There are two connections between a worker and the stack: -1. **Mining** — the worker → the stack's stratum proxy on `:3333`. -2. **Stats** — the dashboard → the worker's XMRig HTTP API on `:8080`. +1. Mining: the worker → the stack's stratum proxy on `:3333`. +2. Stats: the dashboard → the worker's XMRig HTTP API on `:8080`. --- ## 1. Mining connection (`:3333`) -Point a pool at the stack — `{ "pools": [{ "url": "your-stack:3333" }] }`, the stack's `xmrig-proxy` +Point a pool at the stack: `{ "pools": [{ "url": "your-stack:3333" }] }`, the stack's `xmrig-proxy` endpoint (its proxy listens on `3333`). The stack handles pool selection, payouts, and the P2Pool/XvB -split centrally, so the worker config stays minimal and you **never put a wallet address in it**. +split centrally, so the worker config stays minimal and you never put a wallet address in it. -- The XMRig pool `user` field is just a **label** for the rig — it defaults to the hostname (set - `pools[].user` to name it) so you can tell workers apart on the dashboard. +- The XMRig pool `user` field is just a label for the rig. It defaults to the hostname (set `pools[].user` + to name it) so you can tell workers apart on the dashboard. - Point as many workers as you like at the same stack endpoint; the stack aggregates them. -- Workers talk to the pool over plain Stratum on your local network — they do **not** need Tor. -- The endpoint must be reachable from the worker; if the stack host has a firewall, allow the Stratum - port (3333) on the LAN. +- Workers talk to the pool over plain Stratum on your local network; they do not need Tor. +- The endpoint must be reachable from the worker; if the stack host has a firewall, allow the Stratum port + (3333) on the LAN. ### Stratum authentication (optional) -By default the stack's `:3333` is **open** — any rig that can reach it may mine, and the pool `pass` -is ignored (RigForge defaults it to `"x"`). If the operator turns authentication **on** by setting +By default the stack's `:3333` is open: any rig that can reach it may mine, and the pool `pass` is +ignored (RigForge defaults it to `"x"`). If the operator turns authentication on by setting [`p2pool.stratum_password`](https://github.com/p2pool-starter-stack/pithead/blob/main/docs/workers.md#authentication) -on the stack, the proxy then **rejects any rig whose `pass` doesn't match** — XMRig logs -`Permission denied` and the rig won't mine. Put that same secret in the rig's pool `pass`: +on the stack, the proxy then rejects any rig whose `pass` doesn't match: XMRig logs `Permission denied` +and the rig won't mine. Put that same secret in the rig's pool `pass`: ```jsonc // config.json — set "pass" to the stack's p2pool.stratum_password @@ -44,44 +44,49 @@ on the stack, the proxy then **rejects any rig whose `pass` doesn't match** — Then `./rigforge.sh apply` (or `setup`) regenerates the worker config with the new password. -- It's the **same secret on every rig**. The operator finds it on the stack side — it's printed after +- It's the same secret on every rig. The operator finds it on the stack side: it's printed after `pithead apply`/`setup`, stored in the stack's `.env` as `PROXY_STRATUM_PASSWORD`, and shown by `pithead status`. -- The password travels **cleartext** over your LAN's plain Stratum, so this is access control — - *who may mine* — **not** encryption. Keep `:3333` on a trusted LAN (the stack's `p2pool.stratum_bind` - / a firewall do the rest). -- This is unrelated to the `DONATION` knob (that's *this rig's* dev-fee donation) and to the API - `ACCESS_TOKEN` below (that's the read-only stats auth on `:8080`). +- The password travels cleartext over your LAN's plain Stratum, so this is access control (who may mine), + not encryption. Keep `:3333` on a trusted LAN (the stack's `p2pool.stratum_bind` / a firewall do the + rest). +- This is unrelated to the `DONATION` knob (that's this rig's dev-fee donation) and to the optional API + `ACCESS_TOKEN` below (that gates the read-only stats API on `:8080`, which is open by default). --- ## 2. Stats connection — the Worker API (`:8080`) Each worker exposes XMRig's HTTP API so Pithead's dashboard can show per-rig stats (hashrate, shares, -uptime). RigForge configures the API to match Pithead's contract **exactly**, so there's nothing to -set up stack-side: +uptime). RigForge configures the API to match Pithead's contract exactly, so there's nothing to set up +stack-side: | Setting | Value | Why | |---|---|---| -| **Port** | `8080` | Pithead reads `GET http://:8080/1/summary`; the port is fixed dashboard-side. | -| **Bind** | `0.0.0.0` (all interfaces) | The dashboard polls each worker from the stack host over the LAN. | -| **Mode** | `restricted: true` (read-only) | The API can be **read** but not used to **control** the miner remotely. | -| **Auth token** | the rig name — the first pool's `user` (default hostname), or an explicit `ACCESS_TOKEN` | Pithead authenticates as `Bearer `, so the token defaults to the rig name and stays in sync even when you set a custom `pools[].user`. | +| Port | `8080` | Pithead reads `GET http://:8080/1/summary`; the port is fixed dashboard-side. | +| Bind | `0.0.0.0` (all interfaces) | The dashboard polls each worker from the stack host over the LAN. | +| Mode | `restricted: true` (read-only) | The API can be read but not used to control the miner remotely. | +| Auth token | none (open) by default; set `ACCESS_TOKEN` to require a Bearer token | Pithead's stock probe is no-auth, so an open, read-only API works without extra config. Setting `ACCESS_TOKEN` turns auth on; see below. | Pithead discovers workers from the stratum proxy's connection list (the pool `user` label, which is the -rig name) — there's **nothing to register** stack-side. Workers run on a trusted LAN and need no Tor. +rig name), so there's nothing to register stack-side. Workers run on a trusted LAN and need no Tor. --- ## The token rule (important) -> ⚠️ **Don't set a random/custom API token for a Pithead-connected worker.** The dashboard -> authenticates as `Bearer `, so a decoupled token means it can't read the worker. Leave -> `ACCESS_TOKEN` unset (it defaults to the rig name) unless you've matched it on both sides. +> NOTE: By default the worker API is open (read-only, no token), which matches Pithead's default probe +> (`workers.api_auth: none`). Nothing to coordinate. Leave `ACCESS_TOKEN` unset and it works. -Likewise, **don't** bind the API to localhost only and **don't** change the port: a custom token, a -non-`8080` API port, or a worker reachable at a different host than the one it connects from all require -matching configuration on **both** sides — that cross-side coordination is later Pithead-side work +If you do want a token (e.g. you don't fully trust the LAN), set `ACCESS_TOKEN` here and match it on the +dashboard side: + +- a single shared token → Pithead `workers.api_auth: token` + `workers.api_token: `; +- the rig name as the token (`ACCESS_TOKEN` = the first pool's `user`) → Pithead `workers.api_auth: name`. + +Likewise, don't bind the API to localhost only and don't change the port without matching it on the stack +side (`workers.api_port`): a non-`8080` port, or a worker reachable at a different host than the one it +connects from, also need matching configuration on both sides (Pithead [#171](https://github.com/p2pool-starter-stack/pithead/issues/171) / [#172](https://github.com/p2pool-starter-stack/pithead/issues/172)). @@ -91,10 +96,10 @@ matching configuration on **both** sides — that cross-side coordination is lat | Symptom | Fix | |---|---| -| **Rig won't mine / XMRig logs `Permission denied` at login** | The stack has stratum authentication on (`p2pool.stratum_password`) — set the pool `pass` to that secret. See [Stratum authentication](#stratum-authentication-optional). | -| **Worker missing from the dashboard** | The dashboard discovers rigs from their stratum `user` label — confirm the worker is actually connected to the pool and mining. | -| **Rig shows as connected but no stats** | The HTTP API token must equal the rig name (or be unset). If you set a custom `ACCESS_TOKEN`, the dashboard can't read it — clear it and re-run setup. | -| **Stats unreachable from the stack host** | Confirm the worker's `:8080` is reachable from the stack host over the LAN (firewall, correct IP). RigForge binds `0.0.0.0` by default. | +| Rig won't mine / XMRig logs `Permission denied` at login | The stack has stratum authentication on (`p2pool.stratum_password`); set the pool `pass` to that secret. See [Stratum authentication](#stratum-authentication-optional). | +| Worker missing from the dashboard | The dashboard discovers rigs from their stratum `user` label; confirm the worker is actually connected to the pool and mining. | +| Rig shows as connected but no stats | By default the API is open and the dashboard reads it with no token. If you set an `ACCESS_TOKEN` here, the dashboard must match it (`workers.api_auth: token` + `workers.api_token`, or `name` if the token is the rig name); otherwise clear `ACCESS_TOKEN` and re-run setup. | +| Stats unreachable from the stack host | Confirm the worker's `:8080` is reachable from the stack host over the LAN (firewall, correct IP). RigForge binds `0.0.0.0` by default. | --- diff --git a/rigforge.sh b/rigforge.sh index a58db5d..624f1a1 100755 --- a/rigforge.sh +++ b/rigforge.sh @@ -229,7 +229,7 @@ ensure_config_exists() { log "Starting interactive setup..." # We only need the pool URL — every other key has a sensible default (see - # config.advanced.example.json for the full list). The URL is host:port (Pithead's proxy + # config.reference.json for the full list). The URL is host:port (Pithead's proxy # listens on 3333). read -r -p "Enter your pool URL (host:port, e.g. your-stack:3333): " IN_URL || true @@ -343,17 +343,16 @@ parse_config() { done done < <(jq -c '.[]' <<<"$POOLS_JSON") - # HTTP API token. The rig's label is the pool `user` (#22; defaults to the hostname — see - # generate_xmrig_config). The token defaults to that same rig name, so the Pithead contract - # (the dashboard authenticates as `Bearer `) holds out of the box. An explicit - # ACCESS_TOKEN overrides it. + # HTTP API token (OPTIONAL). By default the rig's read-only xmrig API is left OPEN — no token. + # Pithead's stock contract is a no-auth probe of GET http://:8080/1/summary, so an + # untokened, `restricted` (read-only) API works out of the box. Set ACCESS_TOKEN to require a + # Bearer token instead — then match it on the dashboard side (Pithead `workers.api_auth: token` + # + `workers.api_token`; or `name` if you set ACCESS_TOKEN to the rig name). See + # docs/pithead-integration.md. ACCESS_TOKEN=$(jq -r '.ACCESS_TOKEN // empty' "$CONFIG_JSON") - if [ -z "$ACCESS_TOKEN" ]; then - ACCESS_TOKEN=$(jq -r '.[0].user' <<<"$POOLS_JSON") - [ -n "$ACCESS_TOKEN" ] || ACCESS_TOKEN=$(hostname) - fi - # The token is sent as an HTTP Authorization header, so keep it to safe, header-clean characters. - if ! [[ "$ACCESS_TOKEN" =~ ^[A-Za-z0-9._:@+-]+$ ]]; then + # When set, the token is sent as an HTTP Authorization header, so keep it to safe, header-clean + # characters. Empty is allowed and means "open API" (the default). + if [ -n "$ACCESS_TOKEN" ] && ! [[ "$ACCESS_TOKEN" =~ ^[A-Za-z0-9._:@+-]+$ ]]; then error "ACCESS_TOKEN has invalid characters (allowed: letters, digits, . _ - : @ +): '$ACCESS_TOKEN'." fi @@ -590,27 +589,20 @@ generate_xmrig_config() { INIT_AVX2="-1" # Lock down the HTTP API to READ-ONLY (restricted) so it can't be used to *control* the miner # remotely. Keep it bound to all interfaces, NOT localhost: Pithead reads per-rig stats from the - # stack host via GET http://:8080/1/summary (read-only, authenticated by the per-rig access - # token = rig name). Binding localhost would break that integration — see issue #24. Workers are - # expected to live on a trusted LAN. + # stack host via GET http://:8080/1/summary (read-only; OPEN by default — see ACCESS_TOKEN + # above). Binding localhost would break that integration — see issue #24. Workers are expected to + # live on a trusted LAN, which is why a read-only API with no token is a safe default there. HTTP_RESTRICTED="true" HTTP_HOST="0.0.0.0" - # macOS Specific Overrides + # macOS Specific Overrides (only the values that differ from the shared defaults above) if [ "$OS_TYPE" == "Darwin" ]; then - YIELD="false" - # Match the Linux dedicated-miner default (2). XMRig warns a priority above 2 can make the - # machine unresponsive, and macOS is a light-use/dev target — don't pin it to the most - # aggressive level here. - PRIORITY="2" ASM="true" WRMSR="false" RDMSR="false" HUGE_PAGES="false" MEMORY_POOL="false" ONE_GB_PAGES="false" - NUMA="true" - HTTP_RESTRICTED="true" HTTP_HOST="::" # Generate rx array [-1, -1, ...] based on core count @@ -900,7 +892,7 @@ configure_limits() { # removes it regardless, but only while it's still our symlink. link_cli() { [ "${ADD_TO_PATH:-false}" = "true" ] || return 0 - local target="$SCRIPT_DIR/rigforge.sh" link="$BIN_DIR/rigforge" ok=1 + local target="$SCRIPT_DIR/rigforge.sh" link="$BIN_DIR/rigforge" if [ ! -d "$BIN_DIR" ]; then warn "Skipped the 'rigforge' command — $BIN_DIR doesn't exist. Run it as './rigforge.sh' instead." return 0 @@ -917,8 +909,7 @@ link_cli() { local sudo_pfx="" [ -w "$BIN_DIR" ] || sudo_pfx="sudo" # shellcheck disable=SC2086 - $sudo_pfx rm -f "$link" 2>/dev/null && $sudo_pfx ln -s "$target" "$link" 2>/dev/null || ok=0 - if [ "$ok" = 1 ]; then + if $sudo_pfx rm -f "$link" 2>/dev/null && $sudo_pfx ln -s "$target" "$link" 2>/dev/null; then log "Installed the 'rigforge' command -> $link (try: 'sudo rigforge doctor' from anywhere)." else warn "Couldn't add the 'rigforge' command at $link (permissions?). Run it as './rigforge.sh' instead." @@ -2341,8 +2332,14 @@ _read_api_hashrate() { return fi command -v curl >/dev/null 2>&1 || return 0 - curl -fsS --max-time 5 -H "Authorization: Bearer ${ACCESS_TOKEN:-}" "$url" 2>/dev/null | - jq -r '.hashrate.total[0] // empty' 2>/dev/null + # The API is open (read-only) with no token by default; only send a Bearer when ACCESS_TOKEN is set. + # XMRig 401s a token it never asked for, and curl -f (exit 22) would then abort the caller under set -e. + # Branch rather than an empty-array curl arg, which also trips set -u on bash 3.2 (macOS). + if [ -n "${ACCESS_TOKEN:-}" ]; then + curl -fsS --max-time 5 -H "Authorization: Bearer $ACCESS_TOKEN" "$url" 2>/dev/null + else + curl -fsS --max-time 5 "$url" 2>/dev/null + fi | jq -r '.hashrate.total[0] // empty' 2>/dev/null } # Median of N live API hashrate samples, seconds apart. Smooths the jittery live reading so a diff --git a/tests/README.md b/tests/README.md index f59cf1a..b7c11d9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,10 +1,10 @@ # RigForge tests -RigForge is one self-contained script, so its tests are layered by **how much they exercise for -real** — from a dependency-free suite that runs anywhere, up to a real-hardware gate that actually +RigForge is one self-contained script, so its tests are layered by how much they exercise for +real, from a dependency-free suite that runs anywhere up to a real-hardware gate that actually compiles XMRig and mines. Each layer covers what the one below it has to stub. -> The whole point of the split: CI proves everything it *can* on a GitHub runner; the things a runner +> The split exists so CI proves everything it can on a GitHub runner, while the things a runner > physically can't do (compile XMRig, reserve HugePages, write MSRs, set the governor, hash) are > proven once, by hand, on a real rig before tagging a release. @@ -12,37 +12,37 @@ compiles XMRig and mines. Each layer covers what the one below it has to stub. | Layer | File | Runs | What it proves | How to run | |---|---|---|---|---| -| **Unit + black-box suite** | [`run.sh`](run.sh) | Any host (macOS/Linux), no Docker. **In CI.** | Config parsing, the XMRig-config generation matrix (every CPU/OS profile, simulated via PATH stubs), GRUB/HugePages math, the command surface, tune search, doctor — everything that doesn't need a real `/etc` or real hardware. The bulk of coverage. | `make test` (lint + suite) or `bash tests/run.sh` | +| **Unit + black-box suite** | [`run.sh`](run.sh) | Any host (macOS/Linux), no Docker. **In CI.** | Config parsing, the XMRig-config generation matrix (every CPU/OS profile, simulated via PATH stubs), GRUB/HugePages math, the command surface, tune search, doctor: everything that doesn't need a real `/etc` or real hardware. The bulk of coverage. | `make test` (lint + suite) or `bash tests/run.sh` | | **Linux container e2e** | [`e2e/linux.sh`](e2e/linux.sh) → [`e2e/in-container.sh`](e2e/in-container.sh) | Disposable Ubuntu container, **needs Docker**. **In CI.** | The genuine Linux deploy path against a real (throwaway) `/etc` with real GNU tools (`sed -i`, `tee`, `envsubst`) + idempotency on re-run. Only the heavy/privileged bits (compile, package install, `systemctl`/`mount`) are stubbed. | `make test-e2e` | | **Native macOS e2e** | [`e2e/macos.sh`](e2e/macos.sh) | A real Mac, **CI-only** (runs as a step in the macOS job). | The macOS deploy path with genuine BSD tools the Linux CI can only stub: BSD `sed`, the macOS config profile, `mac_*` process control (real `nohup` + PID file), the launchd login agent, `backup`/`restore`. | `make test-e2e-macos` | | **Coverage gate** | [`coverage.sh`](coverage.sh) | kcov in **Docker**. **In CI.** | Line coverage of `rigforge.sh` + `util/` by running `run.sh` under kcov; enforces the committed floor ([`coverage-floor.txt`](coverage-floor.txt)) plus a patch-coverage gate (diff-cover) on changed lines. | `make coverage` | -| **Release smoke (quick)** | [`smoke.sh`](smoke.sh) | Real Linux rig, **manual**. Not in CI. | The *compiled* binary actually starts and hashes (`xmrig --bench`, fully offline). Fast pre-tag confidence that the worker we ship runs. | `make smoke` | -| **Release e2e (full)** | [`e2e-real.sh`](e2e-real.sh) | Real Linux rig, **manual, root**. Not in CI. | The real thing end to end: build + tune + kernel tuning + service + a real hash, then a clean uninstall. **The release gate.** | `make e2e-real` — see [`RELEASING.md`](../RELEASING.md) | +| **Release smoke (quick)** | [`smoke.sh`](smoke.sh) | Real Linux rig, **manual**. Not in CI. | The compiled binary actually starts and hashes (`xmrig --bench`, fully offline). Fast pre-tag confidence that the worker we ship runs. | `make smoke` | +| **Release e2e (full)** | [`e2e-real.sh`](e2e-real.sh) | Real Linux rig, **manual, root**. Not in CI. | The real thing end to end: build + tune + kernel tuning + service + a real hash, then a clean uninstall. **The release gate.** | `make e2e-real` (see [`RELEASING.md`](../RELEASING.md)) | The first four run automatically on every push/PR (see [`.github/workflows/ci.yml`](../.github/workflows/ci.yml)). -The last two are deliberately kept **out of CI** — a real build, HugePages, and live mining are -flaky-by-nature and against GitHub Actions' ToS — so they're a manual pre-tag gate the releaser runs. +The last two are deliberately kept out of CI, because a real build, HugePages, and live mining are +flaky by nature and against GitHub Actions' ToS. They're a manual pre-tag gate the releaser runs. ## Where does my test go? -- **New logic, config-gen behaviour, a CPU/OS profile, or command behaviour** → [`run.sh`](run.sh). +- New logic, config-gen behaviour, a CPU/OS profile, or command behaviour → [`run.sh`](run.sh). It's the default home for almost everything; hardware and OS are simulated with PATH stubs, so it stays hardware-independent and runs the same on any machine. -- **A new real-`/etc` system effect** (fstab, memlock limits, an MSR/modules edit, a mount) → assert it - in [`e2e/in-container.sh`](e2e/in-container.sh), which runs against a real throwaway filesystem. -- **macOS-specific behaviour** (BSD tools, launchd, the mac process control) → [`e2e/macos.sh`](e2e/macos.sh). -- **Something only provable on real hardware** (it actually hashes, MSRs really applied, HugePages +- A new real-`/etc` system effect (fstab, memlock limits, an MSR/modules edit, a mount) → assert it + in [`e2e/in-container.sh`](e2e/in-container.sh), which runs against a throwaway filesystem. +- macOS-specific behaviour (BSD tools, launchd, the mac process control) → [`e2e/macos.sh`](e2e/macos.sh). +- Something only provable on real hardware (it actually hashes, MSRs really applied, HugePages really reserved) → [`e2e-real.sh`](e2e-real.sh). ## Conventions -- `run.sh` is **dependency-free** — no bats, no frameworks. Tiny `assert_*` helpers, and every external +- `run.sh` is dependency-free: no bats, no frameworks. Tiny `assert_*` helpers, and every external or privileged command is faked in a stub dir placed first on `PATH`. Keep it that way: a contributor must be able to run `bash tests/run.sh` on a stock machine. -- It's **hardware-independent on purpose**: all hardware-probe env hooks are pointed at non-existent +- It's hardware-independent on purpose: all hardware-probe env hooks are pointed at non-existent paths up top, so the same run exercises EPYC / Ryzen-X3D / macOS inputs back to back and gives the - *same* result on any host. Don't read real `/sys` or `/proc` — drive behaviour through the stubs. -- The suite must pass under both modern bash and **Apple's bash 3.2** (CI runs `/bin/bash tests/run.sh` + same result on any host. Don't read real `/sys` or `/proc`; drive behaviour through the stubs. +- The suite must pass under both modern bash and Apple's bash 3.2 (CI runs `/bin/bash tests/run.sh` on macOS); avoid bash-4-only syntax. - Lint everything: `make lint` (shellcheck + shfmt). The file list lives in the Makefile's `SHELL_FILES` so CI and local stay in sync — add new `tests/*.sh` there. diff --git a/tests/e2e-real.sh b/tests/e2e-real.sh index fce9616..2f8bb62 100755 --- a/tests/e2e-real.sh +++ b/tests/e2e-real.sh @@ -149,9 +149,14 @@ verify() { # miner's startup logging and spuriously fails the MSR assertions (the mod is applied a beat later). # Best-effort: wait up to ~90s for a live API hashrate, then proceed regardless so a genuinely dead # miner still surfaces as a doctor failure rather than hanging. - local _w _hr + # The worker API is open (read-only) with no token by default now, so only send a Bearer when the + # operator actually set ACCESS_TOKEN — XMRig 401s a token it never asked for, which under set -e + + # pipefail (curl -f → exit 22) would abort verify here before it prints a thing. + local _w _hr _tok _auth=() + _tok=$(jq -r '.ACCESS_TOKEN // empty' "$HERE/config.json" 2>/dev/null || true) + [ -n "$_tok" ] && _auth=(-H "Authorization: Bearer $_tok") for _w in $(seq 1 30); do - _hr=$(curl -fsS --max-time 4 -H "Authorization: Bearer $(hostname)" http://127.0.0.1:8080/2/summary 2>/dev/null | jq -r '.hashrate.total[0] // 0' 2>/dev/null) + _hr=$(curl -fsS --max-time 4 "${_auth[@]}" http://127.0.0.1:8080/2/summary 2>/dev/null | jq -r '.hashrate.total[0] // 0' 2>/dev/null) { [ -n "$_hr" ] && awk "BEGIN{exit !($_hr > 0)}" 2>/dev/null; } && break sleep 3 done diff --git a/tests/e2e/macos.sh b/tests/e2e/macos.sh index fe3b5bf..347f9c1 100755 --- a/tests/e2e/macos.sh +++ b/tests/e2e/macos.sh @@ -46,7 +46,7 @@ trap cleanup EXIT mkdir -p "$HOME" # Writable copy of just the deploy bits (not .git / the suite / build artifacts). -(cd "$SRC" && cp -a rigforge.sh util systemd config.json.template config.advanced.example.json VERSION "$WORK"/) 2>/dev/null +(cd "$SRC" && cp -a rigforge.sh util systemd config.minimal.json config.reference.json VERSION "$WORK"/) 2>/dev/null cd "$WORK" || { echo "cannot enter $WORK" exit 1 diff --git a/tests/run.sh b/tests/run.sh index de261c8..df0231f 100644 --- a/tests/run.sh +++ b/tests/run.sh @@ -169,6 +169,13 @@ if [ "$1" = list ] && [ -n "$2" ]; then fi fi exit 0 +EOF + # curl stub for the worker-API probe: record the invocation (so a test can assert whether an + # Authorization header was passed) and emit an XMRig-style /2/summary body. Exits 0 like a real 200. + cat >"$bin/curl" <<'EOF' +#!/usr/bin/env bash +echo "[curl] $*" >> "${CURL_LOG:-/dev/null}" +printf '{"hashrate":{"total":[%s,0,0]}}\n' "${STUB_API_HR:-1234.5}" EOF chmod +x "$bin"/* @@ -365,16 +372,16 @@ assert_eq "ACCESS_TOKEN honoured" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" assert_eq "no bundled XMRig template file" "$([ -e "$ROOT/worker-config" ] && echo present || echo gone)" "gone" # #22: the rig's label is the pool `user` (folded in from the old WORKER_NAME); blank -> hostname (at -# config-gen). The HTTP API token follows the rig name (the first pool's user), so the Pithead -# "Bearer " contract holds out of the box; an explicit ACCESS_TOKEN overrides it. -echo "== unit: rig label = pool user, token follows it (#22) ==" +# config-gen). The HTTP API token is OPTIONAL and defaults to empty (an open, read-only API — the +# stock Pithead no-auth contract); an explicit ACCESS_TOKEN turns auth on. +echo "== unit: rig label = pool user; API token off by default (#22) ==" c="$(mkconf userset "{ \"pools\": [{\"url\":\"h:3333\",\"user\":\"rig-07\"}] }")" assert_eq "pool user honoured" "$(parse_and_print "$c" "$ROOT" POOLS_JSON | jq -r '.[0].user')" "rig-07" -assert_eq "token defaults to the pool user" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "rig-07" +assert_eq "token empty (open API) by default" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "" c="$(mkconf userblank "{ $POOL }")" -assert_eq "token falls back to hostname when user blank" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "rigbox" +assert_eq "token stays empty even when user blank" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "" c="$(mkconf usertok "{ \"pools\": [{\"url\":\"h:3333\",\"user\":\"rig-07\"}], \"ACCESS_TOKEN\": \"custom\" }")" -assert_eq "explicit token overrides the rig name" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "custom" +assert_eq "explicit token turns auth on" "$(parse_and_print "$c" "$ROOT" ACCESS_TOKEN)" "custom" echo "== unit: parse_config — error paths ==" printf '{ not json ' >"$SANDBOX/bad.json" @@ -512,8 +519,9 @@ assert_eq "generic: no dead cpu.hwloc key" "$(J "$cfg" '.cpu.hwloc')" "null" assert_eq "generic: huge-pages-jit off (matches XMRig default)" "$(J "$cfg" '.cpu."huge-pages-jit"')" "false" # HTTP API locked down on Linux (#7 / #17): made READ-ONLY (restricted) so it can't control the # miner remotely. It stays bound to 0.0.0.0 (NOT localhost) on purpose: Pithead reads per-rig stats -# from the stack host at http://:8080 (read-only, token = rig name) — localhost would break that -# integration (issue #24). The access-token assertion below is the auth half of the lockdown. +# from the stack host at http://:8080 (read-only; OPEN by default) — localhost would break that +# integration (issue #24). This generic profile sets an explicit ACCESS_TOKEN (tok123), so the +# access-token assertion below covers the opt-in auth path; the open default is asserted separately. assert_eq "generic: http restricted" "$(J "$cfg" '.http.restricted')" "true" assert_eq "generic: http reachable (LAN)" "$(J "$cfg" '.http.host')" "0.0.0.0" assert_eq "contract: http port 8080 (#24)" "$(J "$cfg" '.http.port')" "8080" @@ -526,6 +534,29 @@ assert_eq "access-token applied" "$(J "$cfg" '.http."access-token"')" "tok123" assert_eq "donate-level = DONATION" "$(J "$cfg" '.["donate-level"]')" "5" assert_eq "donate-over-proxy = DONATION" "$(J "$cfg" '.["donate-over-proxy"]')" "5" +echo "== config-gen: open API by default (no ACCESS_TOKEN) ==" +# Default (ACCESS_TOKEN unset/empty): the read-only API is left OPEN — access-token renders as null. +# This is the stock Pithead no-auth contract (the dashboard probes :8080 with no Authorization); +# setting ACCESS_TOKEN turns Bearer auth back on (the explicit-token render is asserted above). +export STUB_CPU_MODEL="Intel(R) Xeon(R) Silver 4310" STUB_NPROC=8 STUB_HOSTNAME=rigbox +d_open="$(mktemp -d "$SANDBOX/open.XXXXXX")" +( + cd "$d_open" || exit 1 + source "$SCRIPT" + OS_TYPE=Linux + WORKER_ROOT="$d_open" + POOL_ADDRESS=myrig.local + POOLS_JSON='[{"url":"myrig.local:3333","user":"","pass":"x","keepalive":true,"tls":false,"enabled":true}]' + ACCESS_TOKEN="" + DONATION=1 + LOGROTATE_DIR="$d_open" + set +e + PATH="$STUBS:$PATH" generate_xmrig_config >/dev/null 2>&1 +) +cfg_open="$d_open/config.json" +assert_eq "open API by default: access-token null" "$(J "$cfg_open" '.http."access-token"')" "null" +assert_eq "open API by default: still restricted (read-only)" "$(J "$cfg_open" '.http.restricted')" "true" + echo "== config-gen: AMD EPYC (server) ==" # Run directly (not via gen_config) so we can also capture the profile log line from stdout. export STUB_CPU_MODEL="AMD EPYC 7763 64-Core Processor" STUB_NPROC=8 STUB_HOSTNAME=rigbox @@ -609,6 +640,37 @@ cfg="$d/config.json" unset SIM_POOLS STUB_CPU_MODEL STUB_NPROC STUB_HOSTNAME assert_eq "explicit pool user kept" "$(J "$cfg" '.pools[0].user')" "fancy-rig" +# #21/#24: fields that must survive generate_xmrig_config unmangled. parse_config-side acceptance is +# covered above; here we prove the EMITTED config.json (what XMRig actually loads) preserves them — a jq +# re-emit is exactly where a bracketed IPv6 host or a lone TLS flag could get dropped or reshaped. +echo "== config-gen: IPv6 host / single-pool TLS / empty-token round-trip ==" +export STUB_CPU_MODEL="Intel(R) Xeon" STUB_NPROC=8 STUB_HOSTNAME=rigbox +SIM_OS=Linux SIM_DON=1 +SIM_POOLS='[{"url":"[2001:db8::1]:3333","user":"","pass":"x","keepalive":true,"tls":true,"enabled":true}]' +d="$(gen_config)" +cfg="$d/config.json" +unset SIM_POOLS +assert_eq "bracketed IPv6 pool url round-trips unmangled" "$(J "$cfg" '.pools[0].url')" "[2001:db8::1]:3333" +assert_eq "single-pool tls:true reaches config.json" "$(J "$cfg" '.pools[0].tls')" "true" +# An empty ACCESS_TOKEN must emit JSON `null` (auth-disabled), not "" or the string "null". gen_config's +# `${SIM_TOK:-tok123}` can't express an empty token, so drive generate_xmrig_config directly. +dn="$(mktemp -d "$SANDBOX/tok.XXXXXX")" +( + cd "$dn" || exit 1 + source "$SCRIPT" + OS_TYPE=Linux + WORKER_ROOT="$dn" + POOL_ADDRESS=myrig.local + POOLS_JSON='[{"url":"myrig.local:3333","user":"r","pass":"x","keepalive":true,"tls":false,"enabled":true}]' + ACCESS_TOKEN="" + DONATION=1 + LOGROTATE_DIR="$dn" + set +e + PATH="$STUBS:$PATH" generate_xmrig_config >/dev/null 2>&1 +) +assert_eq "empty token emits JSON null (not \"\" or \"null\")" "$(J "$dn/config.json" '.http."access-token" == null')" "true" +unset STUB_CPU_MODEL STUB_NPROC STUB_HOSTNAME + echo "== config-gen: idempotent (same inputs -> identical output) ==" export STUB_CPU_MODEL="Intel(R) Xeon(R)" STUB_NPROC=8 STUB_HOSTNAME=rigbox SIM_OS=Linux SIM_DON=1 @@ -769,6 +831,22 @@ out="$(pin_compile "tamperedsha1111111111111111111111111111")" rc=$? assert_rc "tampered commit fails build" "$rc" "1" assert_contains "tampered commit is reported" "$out" "commit mismatch" +# The mismatch path also drops the clone (rm -rf xmrig) so the NEXT run starts clean instead of tripping +# git's "destination 'xmrig' already exists and is not empty" (#18). Assert the dir is gone — a regression +# that removed the cleanup would still print the mismatch error and pass every assertion above. +pc="$(mktemp -d "$SANDBOX/pinclean.XXXXXX")" +( + cd "$pc" || exit 1 + source "$SCRIPT" + OS_TYPE="$(uname -s)" + DONATION=1 + WORKER_ROOT="$pc" + export XMRIG_COMMIT="pinnedsha000000000000000000000000000000" + export STUB_GIT_HEAD="tamperedsha1111111111111111111111111111" + set +e + PATH="$STUBS:$PATH" compile_xmrig >/dev/null 2>&1 +) +assert_eq "commit mismatch removes the clone so the next run starts clean (#18)" "$([ -e "$pc/xmrig" ] && echo present || echo gone)" "gone" # --------------------------------------------------------------------------- # Build robustness (#9): cap -j by RAM (~1 job / 2 GB) and report the failing step on error. @@ -793,6 +871,12 @@ assert_eq "unknown RAM -> all cores" "$( source "$SCRIPT" MEMINFO=/nonexistent compute_build_jobs 6 )" "6" +# The `max < 1 -> 1` floor: a ~1.5 GB host computes max = 1/2 = 0, which must clamp to 1 job (not 0, which +# would make `make -j0` fail). The 2 GB case above lands on max=1 already, so it never exercises this clamp. +assert_eq "sub-2GB host floors to 1 job (never 0)" "$( + source "$SCRIPT" + MEMINFO="$(mk_meminfo 1572864 mi1_5)" compute_build_jobs 8 +)" "1" echo "== unit: on_err reports the failing step (#9) ==" out="$( @@ -1376,6 +1460,31 @@ wdead="$( ( ))" assert_eq "_wait_miner_live: false while the API stays at 0 (#95)" "$wdead" "DEAD" +# The worker API is open (read-only) with no token by default (#125), so _read_api_hashrate must send a +# Bearer ONLY when ACCESS_TOKEN is set — else XMRig 401s a token it never asked for and curl -f (exit 22) +# aborts the caller under set -e, silently breaking live tuning. The rest of the suite stubs this via +# API_CMD, so this is the one place the real curl branch (the header logic) is exercised. +echo "== unit: _read_api_hashrate sends a Bearer only when ACCESS_TOKEN is set (#125) ==" +clog="$SANDBOX/curl-calls.log" +: >"$clog" +hr_open="$( ( + source "$SCRIPT" + unset API_CMD + ACCESS_TOKEN="" + PATH="$STUBS:$PATH" CURL_LOG="$clog" STUB_API_HR=1234.5 _read_api_hashrate +))" +assert_eq "_read_api_hashrate returns the hashrate on the open (no-token) API" "$hr_open" "1234.5" +assert_absent "no Authorization header sent when ACCESS_TOKEN is unset" "$(cat "$clog")" "Authorization" +: >"$clog" +hr_auth="$( ( + source "$SCRIPT" + unset API_CMD + ACCESS_TOKEN="miner-0" + PATH="$STUBS:$PATH" CURL_LOG="$clog" STUB_API_HR=987.6 _read_api_hashrate +))" +assert_eq "_read_api_hashrate returns the hashrate when a token is set" "$hr_auth" "987.6" +assert_contains "Bearer sent when ACCESS_TOKEN is set" "$(cat "$clog")" "Authorization: Bearer miner-0" + # #reown: REAL_USER is who root-written files are handed back to. The systemd autotune runs as root with # no SUDO_USER, so its unit's RIGFORGE_OPERATOR must drive the re-own; interactive SUDO_USER still wins. ru_op="$( ( @@ -1430,6 +1539,116 @@ assert_rc "install_dependencies exits 0 on a non-tty stdin (#74)" "$rc" "0" assert_contains "auto-installs the missing dep (#74)" "$(cat "$APT_LOG")" "build-essential" assert_contains "apt waits for the lock, not fail (#74)" "$(cat "$APT_LOG")" "DPkg::Lock::Timeout=300" +# The apt path adds the versioned kernel-tools package ONLY when `apt-cache show` finds it. The #74 test +# stubs apt-cache to exit 1 (absent), so the present-branch (dep list gains linux-tools-) is untested. +echo "== unit: install_dependencies adds versioned linux-tools when apt-cache has it (#74) ==" +LT="$(mktemp -d "$SANDBOX/lt.XXXXXX")" +# Stubs use an ABSOLUTE `#!/bin/sh` shebang, not `#!/usr/bin/env bash`: these scenarios restrict PATH to +# the stub dir alone (so `command -v` picks the intended package manager), which would leave `env` unable +# to find bash on PATH. The stub bodies are POSIX-simple, so /bin/sh runs them directly. +printf '#!/bin/sh\nexit 1\n' >"$LT/dpkg" # every dep "missing" -> all go to the install list +printf '#!/bin/sh\nexit 0\n' >"$LT/apt-cache" # linux-tools- IS available +printf '#!/bin/sh\necho "[apt-get] $*" >>"$CALL_LOG"\n' >"$LT/apt-get" +printf '#!/bin/sh\nwhile [ "${1#*=}" != "$1" ]; do export "$1"; shift; done\nexec "$@"\n' >"$LT/sudo" +printf '#!/bin/sh\necho 6.0.0-rig\n' >"$LT/uname" +chmod +x "$LT"/* +: >"$LT/calls.log" +( + source "$SCRIPT" + OS_TYPE=Linux REAL_USER=test + PATH="$LT" CALL_LOG="$LT/calls.log" install_dependencies /dev/null 2>&1 +assert_contains "apt install list includes linux-tools- (#74)" "$(cat "$LT/calls.log")" "linux-tools-6.0.0-rig" + +# check_prerequisites (the jq bootstrap) had NO test. jq is deliberately kept OFF the scenario PATH so the +# install branch runs; each dir holds ONLY the package manager(s) under test, so `command -v` selects the +# intended per-distro branch from any host. sudo is a passthrough so the (stubbed) installer actually runs. +echo "== unit: check_prerequisites installs jq per package manager ==" +mk_pm_bin() { # : a passthrough sudo (strips any VAR=val prefix) + a logging stub per command. + # Absolute /bin/sh shebangs so the stubs run under a PATH restricted to alone (no bash/env lookup). + local d="$1" c + shift + mkdir -p "$d" + printf '#!/bin/sh\nwhile [ "${1#*=}" != "$1" ]; do export "$1"; shift; done\nexec "$@"\n' >"$d/sudo" + for c in "$@"; do printf '#!/bin/sh\necho "[%s] $*" >>"$CALL_LOG"\nexit 0\n' "$c" >"$d/$c"; done + chmod +x "$d"/* +} +prereq_run() { # : echoes the function output, an rc line, then the recorded calls + local d="$1" os="$2" o rc + : >"$d/calls.log" + o="$( + source "$SCRIPT" + OS_TYPE="$os" + set +e + PATH="$d" CALL_LOG="$d/calls.log" check_prerequisites 2>&1 + )" + rc=$? + printf '%s\nrc=%s\n%s\n' "$o" "$rc" "$(cat "$d/calls.log")" +} +PB="$SANDBOX/prereq" +out="$(mk_pm_bin "$PB/apt" apt-get && prereq_run "$PB/apt" Linux)" +assert_contains "apt: installs jq via apt-get" "$out" "[apt-get] install" +assert_contains "apt: the installed package is jq" "$out" "jq" +out="$(mk_pm_bin "$PB/dnf" dnf && prereq_run "$PB/dnf" Linux)" +assert_contains "dnf: installs jq via dnf" "$out" "[dnf] install -y -q jq" +out="$(mk_pm_bin "$PB/pac" pacman && prereq_run "$PB/pac" Linux)" +assert_contains "pacman: installs jq via pacman" "$out" "[pacman] -Sy --noconfirm jq" +out="$(mk_pm_bin "$PB/none" && prereq_run "$PB/none" Linux)" # sudo only, no package manager +assert_contains "no package manager: hard error" "$out" "no supported package manager" +assert_contains "no package manager: exits non-zero" "$out" "rc=1" +out="$(mk_pm_bin "$PB/mac" brew && prereq_run "$PB/mac" Darwin)" +assert_contains "macOS with brew: installs jq via brew" "$out" "[brew] install jq" +out="$(mk_pm_bin "$PB/macnobrew" && prereq_run "$PB/macnobrew" Darwin)" # no brew +assert_contains "macOS without brew: hard error" "$out" "Homebrew is required" +# jq already present -> no install attempted at all. +out="$(mk_pm_bin "$PB/have" jq apt-get && prereq_run "$PB/have" Linux)" +assert_absent "jq present: does not reinstall it" "$out" "Installing prerequisite" +assert_absent "jq present: no package manager touched" "$out" "[apt-get]" + +# install_dependencies only had the apt path tested (#74). The dnf and pacman branches — different package +# sets, different check/install commands — are our dispatch logic and were never run. apt-get is kept OFF +# PATH so `command -v` falls through to the intended manager; the check command reports every dep missing +# so the install command actually runs. (Third-party install internals aren't our concern — we assert only +# that the RIGHT command installs a distro-appropriate package.) +echo "== unit: install_dependencies dnf / pacman / no-manager branches ==" +deps_run() { # : echoes the function output, an rc line, then the recorded calls + local d="$1" os="$2" o rc + : >"$d/calls.log" + o="$( + source "$SCRIPT" + OS_TYPE="$os" REAL_USER=test + set +e + PATH="$d" CALL_LOG="$d/calls.log" install_dependencies &1 + )" + rc=$? + printf '%s\nrc=%s\n%s\n' "$o" "$rc" "$(cat "$d/calls.log")" +} +DB="$SANDBOX/deps" +# dnf: rpm is the check command (report missing), dnf the installer. +mkdir -p "$DB/dnf" +printf '#!/bin/sh\nexit 1\n' >"$DB/dnf/rpm" # `rpm -q ` -> missing +printf '#!/bin/sh\necho "[dnf] $*" >>"$CALL_LOG"\n' >"$DB/dnf/dnf" +printf '#!/bin/sh\nexec "$@"\n' >"$DB/dnf/sudo" +chmod +x "$DB/dnf"/* +out="$(deps_run "$DB/dnf" Linux)" +assert_contains "dnf: installs via 'dnf install -y'" "$out" "[dnf] install -y" +assert_contains "dnf: pulls a dnf-flavoured package (gcc-c++)" "$out" "gcc-c++" +# pacman is BOTH the check (`-Qi` -> missing) and the installer (`-Sy` -> log). +mkdir -p "$DB/pac" +cat >"$DB/pac/pacman" <<'EOF' +#!/bin/sh +case "$1" in -Qi) exit 1 ;; *) echo "[pacman] $*" >>"$CALL_LOG" ;; esac +EOF +printf '#!/bin/sh\nexec "$@"\n' >"$DB/pac/sudo" +chmod +x "$DB/pac"/* +out="$(deps_run "$DB/pac" Linux)" +assert_contains "pacman: installs via 'pacman -Sy --noconfirm --needed'" "$out" "[pacman] -Sy --noconfirm --needed" +assert_contains "pacman: pulls base-devel" "$out" "base-devel" +# No supported package manager: warn and return 0 (must NOT abort the whole setup run). +out="$(mk_pm_bin "$DB/none" && deps_run "$DB/none" Linux)" +assert_contains "no manager: warns instead of failing" "$out" "No supported package manager" +assert_contains "no manager: returns 0 (setup continues)" "$out" "rc=0" + # --------------------------------------------------------------------------- # When no service was installed (macOS), finish_deployment points the user at 'start' — not a raw # screen/xmrig command (the build-dir config #20 guaranteed is now handled inside mac_start, asserted @@ -1484,6 +1703,17 @@ assert_contains "macOS status shows stopped after stop" "$out" "not running" [ -f "$PIDF" ] && kill "$(cat "$PIDF" 2>/dev/null)" 2>/dev/null rm -f "$PIDF" +# Guard: `start` before `setup` (no built binary) must fail with a clear "run setup first", NOT spawn a +# broken PID. Uses a worker root with no build dir at all. +NOB="$(mktemp -d "$SANDBOX/nobuilt.XXXXXX")" +cp "$ROOT/VERSION" "$NOB/" +cat >"$NOB/config.json" <&1))" +assert_rc "macOS start with no built worker fails" "$?" "1" +assert_contains "macOS start with no worker points at setup" "$out" "Run 'setup' first" + # macOS login auto-start: enable installs a launchd LaunchAgent ($HOME sandboxed to $MC so the plist # never touches the real ~/Library/LaunchAgents). With it installed, launchd owns the miner and # start/stop/status delegate to launchctl (the stub records calls + reports a PID via STUB_LAUNCHD_PID). @@ -1503,6 +1733,9 @@ assert_contains "enable loaded the agent" "$(cat "$LCL")" "[launchctl] load" out="$(mac_lr bash "$SCRIPT" start)" assert_contains "start delegates to launchctl when enabled" "$(cat "$LCL")" "[launchctl] start" assert_contains "start reports login-agent control" "$out" "login agent" +: >"$LCL" +out="$(mac_lr bash "$SCRIPT" stop)" +assert_contains "stop delegates to launchctl when enabled" "$(cat "$LCL")" "[launchctl] stop" out="$( (cd "$MC" && PATH="$STUBS:$PATH" STUB_UNAME_S=Darwin HOME="$MC" CALL_LOG="$LCL" STUB_LAUNCHD_PID=4321 RIGFORGE_HOME="$PWD" bash "$SCRIPT" status &1))" assert_contains "status reads the launchd PID" "$out" "pid 4321" out="$(mac_lr bash "$SCRIPT" disable)" @@ -1510,6 +1743,45 @@ assert_rc "macOS disable exits 0" "$?" "0" assert_eq "disable removed the plist" "$([ -f "$PLIST" ] && echo y || echo n)" "n" assert_contains "disable unloaded the agent" "$(cat "$LCL")" "[launchctl] unload" +# #audit A2: when a GRUB change pends a reboot, HugePages aren't reserved yet, so install_service must +# ENABLE the unit but NOT start it — starting now would run the miner degraded (no huge-page backing) and, +# with Restart=always, churn until the reboot. The full-deploy run enters this branch but its systemctl +# stub is a silent no-op, so nothing proved the start was withheld. Drive install_service directly and read +# the recorded systemctl calls for each of the three cases (reboot-pending / rebuilt / steady-state). +echo "== unit: install_service reboot-gates the start (#audit A2) ==" +svc_run() { # : renders into , echoes the systemctl call log + local d="$1" + mkdir -p "$d/etc/systemd" "$d/xmrig/build" + ( + cd "$d" || exit 1 + source "$SCRIPT" + OS_TYPE=Linux + SCRIPT_DIR="$ROOT" # so envsubst reads the real systemd/xmrig.service.template + WORKER_ROOT="$d" + SYSTEMD_DIR="$d/etc/systemd" + REBOOT_REQUIRED="$2" + XMRIG_REBUILD="$3" + set +e + PATH="$STUBS:$PATH" CALL_LOG="$d/calls.log" install_service >/dev/null 2>&1 + ) + cat "$d/calls.log" +} +SVC_RB="$(mktemp -d "$SANDBOX/svcrb.XXXXXX")" +log_reboot="$(svc_run "$SVC_RB" true false)" +assert_contains "reboot pending: service enabled" "$log_reboot" "[systemctl] enable xmrig.service" +assert_absent "reboot pending: NOT started (would run degraded) (#audit A2)" "$log_reboot" "start xmrig.service" +assert_absent "reboot pending: NOT restarted" "$log_reboot" "restart xmrig.service" +# CPUPOWER_PATH substitution: the ExecStartPre governor set is best-effort (leading `-`); a literal +# unexpanded $CPUPOWER_PATH there would break with Restart=always. Assert it resolved to a real path. +svc_rendered="$(cat "$SVC_RB/etc/systemd/xmrig.service")" +assert_contains "service: ExecStartPre governor set rendered" "$svc_rendered" "ExecStartPre=-" +assert_absent "service: no unexpanded CPUPOWER_PATH" "$svc_rendered" '$CPUPOWER_PATH' +log_rebuild="$(svc_run "$(mktemp -d "$SANDBOX/svcrbu.XXXXXX")" false true)" +assert_contains "rebuilt binary, no reboot: service restarted" "$log_rebuild" "[systemctl] restart xmrig.service" +log_steady="$(svc_run "$(mktemp -d "$SANDBOX/svcst.XXXXXX")" false false)" +assert_contains "no rebuild, no reboot: service (re)started, not restarted" "$log_steady" "[systemctl] start xmrig.service" +assert_absent "no rebuild: does not needlessly restart a running miner" "$log_steady" "restart xmrig.service" + # --------------------------------------------------------------------------- # Full end-to-end run of the REAL script with everything stubbed, executed TWICE to prove idempotency. # Every /etc target is redirected into the work dir, and passthrough sudo lets the writes land there. @@ -1592,10 +1864,19 @@ if [ "$HOST_OS" = Linux ]; then assert_contains "service: NoNewPrivileges" "$svc" "NoNewPrivileges=true" assert_contains "service: ProtectSystem=full" "$svc" "ProtectSystem=full" assert_contains "service: LimitMEMLOCK=infinity" "$svc" "LimitMEMLOCK=infinity" + # The rest of the defense-in-depth block was unchecked — a dropped line is a silent hardening regression. + assert_contains "service: ProtectControlGroups" "$svc" "ProtectControlGroups=true" + assert_contains "service: ProtectClock" "$svc" "ProtectClock=true" + assert_contains "service: RestrictSUIDSGID" "$svc" "RestrictSUIDSGID=true" + assert_contains "service: LockPersonality" "$svc" "LockPersonality=true" + assert_contains "service: PrivateTmp" "$svc" "PrivateTmp=true" assert_contains "service: ReadWritePaths -> worker root" "$svc" "ReadWritePaths=$W/home/worker" assert_absent "service: no unexpanded WORKER_ROOT" "$svc" 'ReadWritePaths=$WORKER_ROOT' assert_contains "kernel: msr module enabled" "$(cat "$W/etc/modules-load.d/msr.conf")" "msr" assert_contains "limits: fstab 2M mount written" "$(cat "$W/etc/fstab")" "hugetlbfs /dev/hugepages" + # The 1G mount line's content was only asserted in the uninstall pre-seed, never as produced by a fresh + # configure_limits — so a regression in the line it WRITES would go unnoticed. + assert_contains "limits: fstab 1G mount written (pagesize=1G)" "$(cat "$W/etc/fstab")" "pagesize=1G" # #13: memlock scoped to the mining user, NOT granted to every account ("*"). assert_contains "limits: memlock unlimited written" "$(cat "$W/etc/security/limits.conf")" "soft memlock unlimited" assert_absent "limits: not wildcard memlock" "$(cat "$W/etc/security/limits.conf")" "* soft memlock unlimited" @@ -2131,6 +2412,22 @@ out="$(un_run)" assert_rc "second uninstall exits 0" "$?" "0" assert_eq "cli: a non-RigForge 'rigforge' is preserved (#cli)" "$([ -f "$UN/usr-local-bin/rigforge" ] && [ ! -L "$UN/usr-local-bin/rigforge" ] && echo kept || echo removed)" "kept" +# Without --yes, uninstall PROMPTS; answering 'n' must abort cleanly and revert NOTHING (a mistyped +# uninstall shouldn't tear down a working rig). Every other uninstall test passes --yes, so this path was +# never taken. +echo "== black-box: uninstall without --yes aborts on 'n' (reverts nothing) ==" +UNN="$(mktemp -d "$SANDBOX/uninstn.XXXXXX")" +cp "$ROOT/VERSION" "$UNN/" +mkdir -p "$UNN/etc/systemd/system" +: >"$UNN/etc/systemd/system/xmrig.service" +cat >"$UNN/config.json" <&1))" +assert_rc "uninstall 'n' exits 0" "$?" "0" +assert_contains "uninstall 'n' reports it aborted" "$out" "Aborted" +assert_eq "uninstall 'n' left the service unit in place" "$([ -f "$UNN/etc/systemd/system/xmrig.service" ] && echo present || echo gone)" "present" + # #54: tune is an iterative, noise-aware, multi-knob hill-climb. It sweeps prefetch_mode, cpu.yield and # the RandomX thread count (cpu.rx, around L3/2 MB), measures each candidate as the MEDIAN of N runs, # memoizes so a combo is never benchmarked twice, climbs from two seeds (auto + educated guess), and @@ -2474,6 +2771,24 @@ assert_eq "watts = energy delta / time (1.00 W) (#81)" "$(wfe 1000000 4000000 18 assert_eq "watts corrects a single counter wrap (#81)" "$(wfe 17000000 1000000 18000000 2)" "1.00" assert_eq "watts empty on elapsed<=0 (no divide-by-zero) (#81)" "$(wfe 1 2 9 0)" "" assert_eq "mean averages the samples (#81)" "$(mean 80 100 120)" "100.00" +# Degenerate inputs, from the missing-sensor / single-read paths that the fakes never reproduce: the stats +# helpers must stay well-defined (no blank garbage, no divide-by-zero) so a candidate with one usable read +# still ranks. med/sd source the same helpers the tune loop uses. +med() { ( + source "$SCRIPT" + _median "$@" +); } +sd() { ( + source "$SCRIPT" + _stddev "$@" +); } +assert_eq "median of a single sample is itself (#81)" "$(med 500)" "500" +assert_eq "median of no samples is empty, not 0 (#81)" "$(med)" "" +assert_eq "stddev needs >=2 samples, else 0 (#81)" "$(sd 500)" "0" +assert_eq "mean of no samples is empty (#81)" "$(mean)" "" +# A backwards energy counter with NO wrap-max (RAPL absent/mispaired) must yield empty, not negative watts. +# The existing wrap test always passes mx>0 (the correction branch), so the mx=0 give-up branch was unrun. +assert_eq "watts empty on a backwards counter with no wrap-max (#81)" "$(wfe 5000000 1000000 0 2)" "" # #81: the BUG this fixes — watts must be sampled UNDER LOAD, not at idle after the bench. A fake xmrig # stays alive for the poll window and marks DONE only on exit; TUNE_POWER_CMD returns 200 W while running, @@ -2540,6 +2855,34 @@ acc() { # ; cand=1000H/s/10hpw vs b } assert_eq "perf: a slower candidate is rejected (#79)" "$(acc perf)" "reject" assert_eq "efficiency: a more-efficient candidate is accepted (#79)" "$(acc efficiency)" "accept" +# #79: if EITHER side lacks a power reading, efficiency ranking can't apply — it must fall back to the raw +# H/s comparison so the search still progresses. Here cand has NO hpw entry, so under efficiency the slower +# cand (1000 < 1200) is rejected on raw H/s, exactly as under perf. (The existing gate always has both.) +printf 'best\t8.0\n' >"$AT2/hpw-partial" +assert_eq "efficiency with a missing power reading falls back to raw H/s (#79)" "$( + source "$SCRIPT" + TUNE_TARGET=efficiency + TUNE_MIN_DELTA=0.01 + TUNE_SIGMA=0 + MEMO_SD_FILE="$AT2/sd" + MEMO_THROTTLE_FILE="$AT2/thr" + MEMO_HPW_FILE="$AT2/hpw-partial" + set +e + _accept_better 1000 cand 1200 best && echo accept || echo reject +)" "reject" + +# The scalar scorer used by the autotune log/ranking has the same no-power fallback (#95): efficiency +# ranks hs/W only when watts is present and > 0; otherwise it scores raw H/s. Only ever exercised +# indirectly (full autotune runs always supply watts) — unit-test the branch directly. +echo "== unit: _autotune_score efficiency needs watts, else raw H/s (#95) ==" +asc() { ( + source "$SCRIPT" + _autotune_score "$@" +); } +assert_eq "efficiency with watts scores hashrate-per-watt" "$(asc efficiency 1000 8)" "125.0000" +assert_eq "efficiency with zero watts falls back to raw H/s" "$(asc efficiency 1000 0)" "1000" +assert_eq "efficiency with empty watts falls back to raw H/s" "$(asc efficiency 1000 '')" "1000" +assert_eq "perf target always scores raw H/s" "$(asc perf 1000 8)" "1000" # #79: end-to-end — with power that makes prefetch=1 more efficient (1000 H/s @ 100 W = 10 hpw) than # prefetch=2 (1200 H/s @ 200 W = 6 hpw), perf picks the faster prefetch=2 and efficiency picks prefetch=1. @@ -2973,6 +3316,22 @@ assert_rc "restore of a missing archive fails" "$?" "1" out="$(printf 'n\n' | (cd "$FR" && PATH="$STUBS:$PATH" RIGFORGE_HOME="$PWD" bash "$SCRIPT" restore "$ARCHIVE" 2>&1))" assert_rc "restore cancels cleanly on 'n'" "$?" "0" assert_contains "restore cancel message" "$out" "cancelled" +# A bad archive must fail LOUDLY and leave the existing good config.json untouched — a silent clobber here +# would destroy a working config. FR/config.json currently holds DONATION=7 (restored above); assert both +# the error AND that it survives. (1) not a tar/gzip at all: +printf 'this is not a tar archive\n' >"$FR/junk.tar.gz" +out="$(cd "$FR" && PATH="$STUBS:$PATH" RIGFORGE_HOME="$PWD" bash "$SCRIPT" restore -y "$FR/junk.tar.gz" &1)" +assert_rc "restore of a non-tar archive fails" "$?" "1" +assert_contains "restore of a non-tar archive is reported" "$out" "Could not extract" +assert_eq "corrupt archive did not clobber the existing config" "$(J "$FR/config.json" '.DONATION')" "7" +# (2) a valid tar that has no config.json (extracts fine, but isn't a RigForge backup): +NOCFG="$(mktemp -d "$SANDBOX/nocfg.XXXXXX")" +printf 'stray\n' >"$NOCFG/not-config.txt" +tar -czf "$FR/nocfg.tar.gz" -C "$NOCFG" not-config.txt +out="$(cd "$FR" && PATH="$STUBS:$PATH" RIGFORGE_HOME="$PWD" bash "$SCRIPT" restore -y "$FR/nocfg.tar.gz" &1)" +assert_rc "restore of a config-less archive fails" "$?" "1" +assert_contains "restore of a config-less archive is reported" "$out" "no config.json" +assert_eq "config-less archive did not clobber the existing config" "$(J "$FR/config.json" '.DONATION')" "7" # backup needs a config to snapshot. NOC="$(mktemp -d "$SANDBOX/noc.XXXXXX")" cp "$ROOT/VERSION" "$NOC/" @@ -2986,8 +3345,8 @@ if [[ "$ver" =~ ^[0-9]+\.[0-9]+\.[0-9]+([-+.].*)?$ ]]; then ok "VERSION is SemVe # #23: the advanced example must be valid JSON and must document every config.json key parse_config # reads — so the reference can't silently drift from the code. -echo "== unit: config.advanced.example.json (#23) ==" -ADV="$ROOT/config.advanced.example.json" +echo "== unit: config.reference.json (#23) ==" +ADV="$ROOT/config.reference.json" if jq -e . "$ADV" >/dev/null 2>&1; then ok "advanced example is valid JSON"; else bad "advanced example is valid JSON" "jq parse failed"; fi # The advanced example documents exactly the user-facing keys. The rig label lives in pools[].user and # the template is internal, so WORKER_NAME / WORKER_CONFIG_FILE / POOL_HOST must NOT appear. @@ -2998,13 +3357,13 @@ for k in POOL_HOST WORKER_NAME WORKER_CONFIG_FILE; do assert_absent "advanced example has no $k key" "$(cat "$ADV")" "\"$k\"" done -# config.json.template is the copy-me starter (referenced by the docs and shipped in the release bundle). +# config.minimal.json is the copy-me starter (referenced by the docs and shipped in the release bundle). # It must be valid JSON, carry an obvious unreplaced placeholder, and be REJECTED by parse_config unedited # — so a user can't accidentally deploy the template and mine to a bogus host. (It can drift unnoticed -# otherwise: unlike config.advanced.example.json, nothing else validates it.) -echo "== unit: config.json.template (starter) ==" -TPL="$ROOT/config.json.template" -if jq -e . "$TPL" >/dev/null 2>&1; then ok "config.json.template is valid JSON"; else bad "config.json.template is valid JSON" "jq parse failed"; fi +# otherwise: unlike config.reference.json, nothing else validates it.) +echo "== unit: config.minimal.json (starter) ==" +TPL="$ROOT/config.minimal.json" +if jq -e . "$TPL" >/dev/null 2>&1; then ok "config.minimal.json is valid JSON"; else bad "config.minimal.json is valid JSON" "jq parse failed"; fi assert_contains "template carries an unreplaced pool placeholder" "$(jq -r '.pools[0].url' "$TPL")" "" TT="$(mktemp -d "$SANDBOX/tpl.XXXXXX")" cp "$TPL" "$TT/config.json" diff --git a/util/proposed-grub.sh b/util/proposed-grub.sh index 3a02d6e..e4b12e8 100755 --- a/util/proposed-grub.sh +++ b/util/proposed-grub.sh @@ -28,8 +28,8 @@ HUGEPAGES_1G_NR="${HUGEPAGES_1G_NR:-/sys/kernel/mm/hugepages/hugepages-1048576kB # Extract L3 Cache size and normalize to Megabytes # Output format varies (e.g., "32M", "32768K"), so we strip non-numeric characters. -L3_RAW=$(lscpu | grep "L3 cache" | head -n 1 | awk '{print $3$4}') -L3_MB=$(echo "$L3_RAW" | sed 's/[^0-9]//g') +L3_RAW=$(lscpu | awk '/L3 cache/{print $3$4; exit}') +L3_MB="${L3_RAW//[!0-9]/}" # Convert Kilobytes to Megabytes if necessary if [[ "$L3_RAW" == *K* ]]; then @@ -40,7 +40,7 @@ if [[ -z "$L3_MB" ]]; then fi # Detect Physical CPU Sockets (for display / NUMA fallback). -SOCKETS=$(lscpu | grep "Socket(s):" | awk '{print $2}') +SOCKETS=$(lscpu | awk '/Socket\(s\):/{print $2; exit}') if [[ -z "$SOCKETS" ]]; then SOCKETS=1 fi @@ -86,7 +86,7 @@ if [ "$RUNTIME" -eq 1 ]; then # Check if 1GB pages are already allocated PAGES_1GB=0 if [ -f "$HUGEPAGES_1G_NR" ]; then - PAGES_1GB=$(cat "$HUGEPAGES_1G_NR" || echo 0) + PAGES_1GB=$(<"$HUGEPAGES_1G_NR") fi if [ "$PAGES_1GB" -gt 0 ]; then