From 1aec83c2f32a02c6f5e3c010c76ed012d75e8e53 Mon Sep 17 00:00:00 2001
From: Kuba Sunderland-Ober <kuba@mareimbrium.org>
Date: Sun, 24 May 2026 14:52:27 +0200
Subject: [PATCH 1/2] Port link checking to node.js. No more Python dependency.

---
 .github/workflows/checks.yml                  |  41 +-
 .github/workflows/jekyll-gh-pages.yml         |  61 +-
 WIP.md                                        |   4 +-
 .../Documentation Development.md              |   2 +-
 docs/_plugins/offlinify.md                    |  26 +-
 docs/_plugins/offlinify.rb                    |   7 +-
 docs/check.bat                                |  52 +-
 docs/lychee.bat                               |  45 --
 package-lock.json                             | 108 ++++
 package.json                                  |   1 +
 requirements.txt                              |   1 -
 scripts/check_links.mjs                       | 607 ++++++++++++++++++
 scripts/check_links.py                        | 455 -------------
 scripts/check_offline_live_links.py           |  97 ---
 14 files changed, 800 insertions(+), 707 deletions(-)
 delete mode 100644 docs/lychee.bat
 delete mode 100644 requirements.txt
 create mode 100644 scripts/check_links.mjs
 delete mode 100644 scripts/check_links.py
 delete mode 100644 scripts/check_offline_live_links.py
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index a6fca2e8..c246d68d 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -35,41 +35,40 @@ jobs:
       - name: Build with Jekyll
         run: bundle exec jekyll build
         working-directory: ./docs
-      - name: Set up Python for link checks
-        uses: actions/setup-python@v5
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
         with:
-          python-version: '3.14'
-          cache: 'pip'
-      - name: Install Python deps
-        run: pip install -r requirements.txt
-      - name: Check online links (check_links.py)
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      - name: Install Node.js dependencies
+        run: npm ci
+      - name: Check online links (check_links.mjs)
         # `--fallback-extensions html` mirrors what GitHub Pages does at request time:
         # an extensionless URL like `/FAQ` is served as `/FAQ.html`. This workflow's
         # Jekyll build runs without --baseurl (no Pages prefix), so no --base-path is
         # needed -- contrast with jekyll-gh-pages.yml.
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --fallback-extensions html
           --index-files 'index.html,.'
           --root-dir docs/_site
           docs/_site
-      - name: Check offline links (check_links.py)
+      - name: Check offline links and live-link survivors (check_links.mjs)
+        # Strict check on `_site-offline/`: every link must resolve to an actual file
+        # under `file://`, with no extension fallback. `--forbid` also fails the build
+        # if any extracted link still points at https://docs.twinbasic.com/<path> --
+        # i.e. any live-site reference the offlinify rewrite missed. The bare root
+        # URL (https://docs.twinbasic.com[/]) is exempt, since intentional "go to the
+        # live site" links are allowed.
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --index-files index.html
+          --forbid 'https://docs.twinbasic.com'
           --root-dir docs/_site-offline
           docs/_site-offline
-      - name: Check for surviving live-site links in offline tree
-        # Flags any https://docs.twinbasic.com/<path> reference left in
-        # _site-offline/ HTML outside <code>/<pre> blocks. After offlinify
-        # strips the jekyll-seo-tag block, anything surviving is a source
-        # link that points at the live site instead of using a relative or
-        # /tB/... permalink that resolves locally. The bare root URL
-        # (https://docs.twinbasic.com[/]) is exempt -- intentional "go to
-        # the live site" links are allowed.
-        run: python scripts/check_offline_live_links.py
       - name: Check book links (informational)
         # Failures do not block the build. The book still has absolute
         # intra-site URLs that the chapter transform has not yet rewritten
@@ -77,7 +76,7 @@ jobs:
         # for visibility until those are fixed.
         continue-on-error: true
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --root-dir docs/_site-pdf
-          docs/_site-pdf/book.html
\ No newline at end of file
+          docs/_site-pdf/book.html
diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
index 3ca84c27..e7b5c684 100644
--- a/.github/workflows/jekyll-gh-pages.yml
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -57,51 +57,48 @@ jobs:
         env:
           JEKYLL_ENV: production
           PAGES_REPO_NWO: "${{ github.repository }}"
-      - name: Set up Python for link checks
-        uses: actions/setup-python@v5
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
         with:
-          python-version: '3.14'
-          cache: 'pip'
-      - name: Install Python deps
-        run: pip install -r requirements.txt
-      - name: Check online links (check_links.py)
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      - name: Install Node.js dependencies and Chromium
+        # Install npm deps first so the link checks (check_links.mjs)
+        # can use htmlparser2. Chromium download runs in the same step
+        # so the cache hit / miss is one decision.
+        run: |
+          npm ci
+          sudo npx puppeteer browsers install chrome --install-deps
+      - name: Check online links (check_links.mjs)
         # `--fallback-extensions html` mirrors what GitHub Pages does at request time:
         # an extensionless URL like `/FAQ` is served as `/FAQ.html`. Without the flag
         # every pretty permalink on the site would look broken.
         #
         # `--base-path` strips the Pages baseurl (e.g. `/twinBASIC-docs`) from absolute
-        # URLs before resolving against `--root-dir`. Equivalent to the `--remap` regex
-        # that lychee used in earlier iterations of this step.
+        # URLs before resolving against `--root-dir`.
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --fallback-extensions html
           --index-files 'index.html,.'
           --base-path '${{ steps.pages.outputs.base_path }}'
           --root-dir docs/_site
           docs/_site
-      - name: Check offline links (check_links.py)
+      - name: Check offline links and live-link survivors (check_links.mjs)
         # Strict check on `_site-offline/`: every link must resolve to an actual file
-        # under `file://`, with no extension fallback. Catches relative links in
-        # markdown sources that point at a permalink that doesn't match the rendered
-        # filename (e.g. `[Foo](Foo/)` when Jekyll wrote `Foo.html`, not
-        # `Foo/index.html`) -- the kind of breakage the online check above hides
-        # behind `--fallback-extensions html`.
+        # under `file://`, with no extension fallback. `--forbid` also fails the build
+        # if any extracted link still points at https://docs.twinbasic.com/<path> --
+        # i.e. any live-site reference the offlinify rewrite missed. The bare root
+        # URL (https://docs.twinbasic.com[/]) is exempt, since intentional "go to the
+        # live site" links are allowed.
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --index-files index.html
+          --forbid 'https://docs.twinbasic.com'
           --root-dir docs/_site-offline
           docs/_site-offline
-      - name: Check for surviving live-site links in offline tree
-        # Flags any https://docs.twinbasic.com/<path> reference left in
-        # _site-offline/ HTML outside <code>/<pre> blocks. After offlinify
-        # strips the jekyll-seo-tag block, anything surviving is a source
-        # link that points at the live site instead of using a relative or
-        # /tB/... permalink that resolves locally. The bare root URL
-        # (https://docs.twinbasic.com[/]) is exempt -- intentional "go to
-        # the live site" links are allowed.
-        run: python scripts/check_offline_live_links.py
       - name: Check book links (informational)
         # Failures do not block the build. The book still has absolute
         # intra-site URLs that the chapter transform has not yet rewritten
@@ -109,20 +106,10 @@ jobs:
         # for visibility until those are fixed.
         continue-on-error: true
         run: >-
-          python scripts/check_links.py
+          node scripts/check_links.mjs
           --offline --include-fragments
           --root-dir docs/_site-pdf
           docs/_site-pdf/book.html
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '22'
-          cache: 'npm'
-          cache-dependency-path: package-lock.json
-      - name: Install Node.js dependencies and Chromium
-        run: |
-          npm ci
-          sudo npx puppeteer browsers install chrome --install-deps
       - name: Render book PDF
         run: |
           mkdir -p _pdf
diff --git a/WIP.md b/WIP.md
index ef72dfa5..35ecd07e 100644
--- a/WIP.md
+++ b/WIP.md
@@ -430,7 +430,7 @@ From `docs/`:
 
 - `bundle exec jekyll build` (or `build.bat`) — builds three trees in a single Jekyll run: the online copy at `_site/`, a `file://`-browsable copy at `_site-offline/`, and the sparse pagedjs source at `_site-pdf/`. The offline pass (`_plugins/offlinify.rb`, activated by `also_build_offline: true` in `_config.yml`) adds ~3-5s and the PDF pass (`_plugins/pdfify.rb`, activated by `also_build_pdf: true`) adds <1s on top of the normal ~13s build. The PDF plugin captures `book.html`'s rendered output (the concatenated chapter document built via `_layouts/book-combined.html`) at `:pages, :post_render`, drops the page from `site.pages` at `:site, :post_render` so `_site/book.html` is never written, and at `:site, :post_write` writes the captured bytes into `_site-pdf/book.html` along with `assets/css/print.css`, `assets/css/rouge.css`, and every relative `<img src=>` target -- just what pagedjs needs to render the book PDF. The companion `offline_exclude: [..., book.html]` entry in `_config.yml` keeps `offlinify.rb` from copying book.html into `_site-offline/`: offlinify's per-page hook fires before pdfify's `:site, :post_render` (Jekyll fires every per-page hook before any site-level post-render hook), so during offlinify's pass `book.html` is still in `site.pages` and the exclude is what makes it skip writing the offline copy. When `also_build_pdf: false` the exclude does the same job from a different angle -- pdfify never runs, `book.html` renders normally to `_site/`, and the exclude still keeps it out of `_site-offline/`. After Jekyll's WRITE phase, the offline plugin walks `_site/`, copies binary assets verbatim into `_site-offline/`, and for each HTML and CSS file rewrites every root-absolute `href` / `src` / `url()` to a page-relative path with the resolved file extension (`/FAQ` → `../../FAQ.html`, `/Tutorials/CEF/` → `../../Tutorials/CEF/index.html`). It also patches the offline copy of `assets/js/just-the-docs.js` in two places — `navLink()` to match the active nav entry by resolved DOM `link.href` rather than `document.location.pathname` (the upstream pathname-vs-attribute compare returns no match under `file://`, leaving the sidebar with no `.active` class so the nav appears collapsed on every navigation), and `initSearch()` to read the lunr index from `window.SEARCH_DATA` rather than fetching `search-data.json` over `XMLHttpRequest` (XHR to `file://` resources is blocked by browsers; classic `<script src=>` is not). To support that, the plugin (a) generates `_site-offline/assets/js/search-data.js` once per build by wrapping the rendered `search-data.json` in `window.SEARCH_DATA = {...};`, and (b) injects two `<script>` tags per page right before `just-the-docs.js`: one that sets `window.OFFLINE_SITE_ROOT` to the per-page relative prefix to the offline site root, and one that loads `search-data.js`. The patched `initSearch()` rewrites every `doc.url` from a root-absolute permalink (`/tB/Core/Const`) to a page-relative path (`<OFFLINE_SITE_ROOT>tB/Core/Const.html`) so search-result clicks land on the actual file regardless of which page the user is on.
 - `bundle exec jekyll serve` (or `serve.bat`) — local server at `localhost:4000`. Note that `_site-offline/` is also produced on the initial build, but live-reload only updates `_site/`; manual rebuild needed for offline updates.
-- `check.bat` — link check (offline Lychee against `_site/`).
+- `check.bat` — link check (offline `scripts/check_links.mjs` against `_site/` and `_site-offline/`; the offline pass also runs `--forbid 'https://docs.twinbasic.com'` to catch surviving live-site links).
 - `book.bat` — renders the PDF from `_site-pdf/book.html` via `pagedjs-cli` into `_pdf/book.pdf`. Run `build.bat` first to populate `_site-pdf/`.
 
 The HTML whitespace compression that wraps every page's render chain is handled by `_plugins/html-compress.rb` rather than the just-the-docs theme's `vendor/compress.html` Liquid layout — see [_plugins/html-compress.md](docs/_plugins/html-compress.md) for the full writeup. The Liquid layout's per-page cost in the profile was ~2.4s of Liquid filter dispatch (a `split: " " | join: " "` over the outside-of-`<pre>` content, lowering to a per-page Array allocation of every whitespace-delimited token across 837 pages — millions of small `String` objects). The layout is short-circuited via `compress_html.ignore.envs: all` in `_config.yml`; it then outputs a bare `{{ content }}` and the plugin takes over at `:pages, :post_render` / `:documents, :post_render` with `priority :normal`, doing the same pre-block-protected whitespace collapse via `content.split(PRE_BLOCK_RE).each { |s| s.split(" ").join(" ") }` in C-implemented Ruby. The `:normal` priority is the *middle* tier of a three-level convention across the site's `:post_render` hooks: mutators (`book-href-rewrite`) run at `:high`, this cleanup pass at `:normal`, readers (`pdfify`, `offlinify`) at `:low`. The invariant "compress runs after every mutator and before every reader" therefore holds by construction; no downstream plugin has to be whitespace-aware. Pages whose layout chain doesn't reach `vendor/compress` are gated out via a `:site, :pre_render` precompute that walks `site.layouts[name].data["layout"]` for every layout key and marks the entire compress-reaching chain (default → table_wrappers → vendor/compress) -- jekyll-redirect-from stubs, the SCSS-derived CSS pages, and `assets/js/zzzz-search-data.json` all stay un-gated and pass through verbatim. `book.html` (which uses the minimal `book-combined` layout that has no parent) is *also* outside that chain but is explicitly added to the compress-eligible set at the end of the precompute, so the same whitespace collapse runs on it -- saves paged.js's render-time `WhiteSpaceFilter` ~37k DOM mutations (~28k `textContent` overwrites + ~9k `removeChild` calls) at the cost of ~480 ms once per Jekyll build. Output is byte-identical to the layout-based version: a recursive `diff -rq` of `_site/` against a vendor/compress.html baseline reports zero differences across all ~840 HTML pages, 290 redirect stubs, every CSS / JSON / SVG / image asset. The plugin's correctness depended on two non-obvious details that broke an earlier cut -- the layout-chain walk has to compare against the layout *key* (`"vendor/compress"`) rather than `layout.name` (which carries the `.html` extension), and the per-segment `split(" ").join(" ")` strips trailing whitespace that the Liquid layout's *template* re-adds via its trailing-newline source character, so the plugin captures `content.end_with?("\n")` before the split and re-appends a `\n` after the join. Both regressions surfaced as nonzero `diff -rq` counts during development and are flagged in the plugin's header comment and [_plugins/html-compress.md](docs/_plugins/html-compress.md).
@@ -567,7 +567,7 @@ After a batch of changes, verify the site builds clean and all links resolve. Fr
 build.bat && check.bat
 ```
 
-`check.bat` runs Lychee in offline mode against the built `_site/` tree — it catches broken intra-site links, missing pages, and malformed `redirect_from` entries (the most common breakage when adding new pages or moving content between sections). A clean run is the bar for "ready to commit".
+`check.bat` runs [scripts/check_links.mjs](scripts/check_links.mjs) in offline mode against both `_site/` and `_site-offline/` — it catches broken intra-site links, missing pages, malformed `redirect_from` entries (the most common breakage when adding new pages or moving content between sections), and (via `--forbid 'https://docs.twinbasic.com'` on the offline pass) any extracted link that still points at the live docs site after the offlinify rewrite. A clean run is the bar for "ready to commit".
 
 Requires `build.bat` to have produced an up-to-date `_site/`.
 
diff --git a/docs/Miscellaneous/Documentation Development.md b/docs/Miscellaneous/Documentation Development.md
index 9ea54887..18835654 100644
--- a/docs/Miscellaneous/Documentation Development.md	
+++ b/docs/Miscellaneous/Documentation Development.md	
@@ -201,7 +201,7 @@ To check that none of the internal links in the most recent documentation build
 
     check.bat
 
-This runs three checks: `scripts/check_links.py` against `_site/` (the live tree, in offline mode), the same against `_site-offline/` (the file://-browsable mirror), and `scripts/check_offline_live_links.py` over `_site-offline/` that flags any surviving `https://docs.twinbasic.com/<path>` link --- the offline mirror should not navigate back to the live docs site. The same three checks run in CI on every pull request and on every push to `staging`.
+This runs two checks: `scripts/check_links.mjs` against `_site/` (the live tree, in offline mode), and the same against `_site-offline/` (the file://-browsable mirror) with `--forbid 'https://docs.twinbasic.com'` to also flag any surviving live-site link --- the offline mirror should not navigate back to the live docs site. The same two checks run in CI on every pull request and on every push to `staging`.
 
 ### Building and Local Serving
 
diff --git a/docs/_plugins/offlinify.md b/docs/_plugins/offlinify.md
index c75dd8d6..736a6217 100644
--- a/docs/_plugins/offlinify.md
+++ b/docs/_plugins/offlinify.md
@@ -76,7 +76,7 @@ For each page:
 
 2. **Check `offline_exclude`** (see [Exclude list](#exclude-list)). Matched files increment the `excluded_files` counter and skip the write.
 
-3. **Detect jekyll-redirect-from stubs** by class-name string check (`page.class.name == "JekyllRedirectFrom::RedirectPage"`). The stubs are tiny HTML files whose meta-refresh, canonical link, `<script>location=`, and fallback `<a>` all reference an absolute `https://<site.url>/<path>` URL produced by `absolute_url`. Online these redirect to the canonical page; offline they would require network access and land on the live site rather than the local file — defeating the offline scenario. Rewrite each `<site.url><path>` occurrence to its resolved page-relative form via the same `compute_relative` the main HTML pass uses, then write the stub. Counted under `rewritten_redirects` in the summary log line. Some source pages (notably `Miscellaneous/Documentation Development.md`) intentionally link via `redirect_from` URLs as a stable-URL pattern, so the rewritten stubs let those source links navigate locally instead of failing. The class-name string check is used rather than `is_a?` so the plugin still loads if jekyll-redirect-from is removed. If `site.url` is unset (empty) the stub is written verbatim — the path-portion targets still resolve under lychee's offline check the same way the main HTML pass's link targets do.
+3. **Detect jekyll-redirect-from stubs** by class-name string check (`page.class.name == "JekyllRedirectFrom::RedirectPage"`). The stubs are tiny HTML files whose meta-refresh, canonical link, `<script>location=`, and fallback `<a>` all reference an absolute `https://<site.url>/<path>` URL produced by `absolute_url`. Online these redirect to the canonical page; offline they would require network access and land on the live site rather than the local file — defeating the offline scenario. Rewrite each `<site.url><path>` occurrence to its resolved page-relative form via the same `compute_relative` the main HTML pass uses, then write the stub. Counted under `rewritten_redirects` in the summary log line. Some source pages (notably `Miscellaneous/Documentation Development.md`) intentionally link via `redirect_from` URLs as a stable-URL pattern, so the rewritten stubs let those source links navigate locally instead of failing. The class-name string check is used rather than `is_a?` so the plugin still loads if jekyll-redirect-from is removed. If `site.url` is unset (empty) the stub is written verbatim — the path-portion targets still resolve under the offline link check the same way the main HTML pass's link targets do.
 
 4. **Dispatch on output extension:**
    - `.html`: dup `page.output`, strip the jekyll-seo-tag block (see [SEO block stripping](#seo-block-stripping)), scan for code-block ranges, run the combined HTML URL rewrite (see [HTML URL rewriting](#html-url-rewriting)), inject the search-setup script tags, hand off to `write_or_enqueue!`.
@@ -358,25 +358,23 @@ The offline build touches the following files:
 | `docs/_config.yml` | `also_build_offline: true` (default-on) and `exclude: [_site-offline]` (keeps Jekyll's watcher from rebuilding on the plugin's own output). |
 | `docs/build.bat` | Plain `bundle exec jekyll build` — produces `_site/`, `_site-offline/`, and (via `pdfify.rb`) `_site-pdf/` in one run. |
 | `docs/serve.bat` | `bundle exec jekyll serve` — watcher-friendly thanks to the exclude. |
-| `docs/check.bat` | Local link check (CI runs the same three passes via the workflows). Three steps: `scripts/check_links.py` permissive on `_site/`, `scripts/check_links.py` strict on `_site-offline/`, and `scripts/check_offline_live_links.py` against `_site-offline/`. Exits non-zero on any failure. |
-| `scripts/check_offline_live_links.py` | Flags any `https://docs.twinbasic.com/<path>` reference that survived offlinify in `_site-offline/` HTML, outside `<code>` / `<pre>` blocks. Skips the bare root (`https://docs.twinbasic.com[/]`) since intentional "go to the live site" links are allowed. Run by `check.bat` locally and by both CI workflows after the offline link check. |
+| `docs/check.bat` | Local link check (CI runs the same passes via the workflows). Two steps: `scripts/check_links.mjs` permissive on `_site/`, and `scripts/check_links.mjs` strict on `_site-offline/` with `--forbid 'https://docs.twinbasic.com'` to also catch any surviving live-site link. Exits non-zero on any failure. |
+| `scripts/check_links.mjs` | Node link checker (SAX-based HTML parsing via `htmlparser2`). The `--forbid PREFIX` flag (repeatable) folds in the former separate live-link scan -- any extracted link whose value starts with a forbidden prefix is reported (bare prefix and `prefix/` are exempt, so intentional "go to the live site" links remain allowed). |
 | `docs/.gitignore` | `_site`, `_site-offline`, and `_site-pdf` all excluded from git. |
-| `.github/workflows/jekyll-gh-pages.yml` | Deploy workflow (push to `staging`, manual dispatch). Builds, runs lychee against `_site/`, runs `scripts/check_links.py` against `_site-offline/`, runs `scripts/check_offline_live_links.py` against `_site-offline/`, deploys to Pages, and (on manual dispatch) packages `_site-offline/` as a release artifact. |
-| `.github/workflows/checks.yml` | PR-gating workflow (pull-request to `main`, manual dispatch). Same three link-check steps as the deploy workflow; no deploy or release. |
+| `.github/workflows/jekyll-gh-pages.yml` | Deploy workflow (push to `staging`, manual dispatch). Builds, runs `scripts/check_links.mjs` against `_site/` and `_site-offline/` (the offline pass with `--forbid`), deploys to Pages, and (on manual dispatch) packages `_site-offline/` as a release artifact. |
+| `.github/workflows/checks.yml` | PR-gating workflow (pull-request to `main`, manual dispatch). Same link-check steps as the deploy workflow; no deploy or release. |
 
 ## CI integration
 
 `bundle exec jekyll build` in CI passes `--baseurl "${{ steps.pages.outputs.base_path }}"` from `actions/configure-pages`. For a Pages site with a custom domain (CNAME), base_path is empty. For a project page without a custom domain, it's `/repo-name`. Offlinify handles both cases — `normalize_baseurl` in `setup` produces the right prefix to strip.
 
-The workflow has three link-check steps after the build:
+The workflow has two link-check steps after the build:
 
-1. **Lychee against `_site/`**, with `--fallback-extensions html` and a `--remap` that strips the base_path prefix. This mirrors what GitHub Pages does at request time — extensionless URLs like `/FAQ` get served as `/FAQ.html`. Without `--fallback-extensions html`, every pretty permalink would appear broken in this check. Lychee (not `scripts/check_links.py`) handles the online tree because `--remap` isn't implemented in the Python checker; the offline tree below has all baseurl prefixes already stripped by offlinify and doesn't need it.
+1. **`scripts/check_links.mjs` against `_site/`**, with `--fallback-extensions html` and `--base-path` to strip the Pages `base_path` prefix. This mirrors what GitHub Pages does at request time — extensionless URLs like `/FAQ` get served as `/FAQ.html`. Without `--fallback-extensions html`, every pretty permalink would appear broken in this check.
 
-2. **`scripts/check_links.py` against `_site-offline/`**, strict — no extension fallback (`--index-files index.html` only; the online check also accepts the bare directory via `,.`). Every link must resolve to a real file as written. This catches relative links in markdown sources whose permalink shape doesn't match the rendered filename (e.g. `[Foo](Foo/)` when Jekyll wrote `Foo.html`, not `Foo/index.html`) — the kind of breakage the online check above hides behind both the fallback and the bare-directory acceptance. The Python checker is roughly 25× faster than lychee on this workload and a bit stricter (catches missing `<script src>` targets and trailing slashes on file-shaped URLs).
+2. **`scripts/check_links.mjs` against `_site-offline/`**, strict — no extension fallback (`--index-files index.html` only; the online check also accepts the bare directory via `,.`). Every link must resolve to a real file as written. This catches relative links in markdown sources whose permalink shape doesn't match the rendered filename (e.g. `[Foo](Foo/)` when Jekyll wrote `Foo.html`, not `Foo/index.html`) — the kind of breakage the online check above hides behind both the fallback and the bare-directory acceptance. This step also passes `--forbid 'https://docs.twinbasic.com'`, which flags any extracted link whose value still points at the live docs site (a surviving offlinify miss); the bare root is exempt — see [Failure modes: Surviving live-site links](#failure-modes).
 
-3. **`scripts/check_offline_live_links.py` against `_site-offline/`**, flagging any surviving `https://docs.twinbasic.com/<path>` reference outside `<code>` / `<pre>` blocks (the bare root is exempt — see [Failure modes: Surviving live-site links](#failure-modes)).
-
-All three steps fail the build on the first non-zero exit, blocking the Pages deploy and the release upload. After they succeed and Pages is deployed, the release job (gated to manual dispatch only) downloads the offline-site workflow artifact, computes a tag like `docs-YYYY-MM-DD-HHMM` (UTC), and creates a GitHub release with `twinbasic-docs-offline.zip` attached via `softprops/action-gh-release@v2`.
+Both steps fail the build on the first non-zero exit, blocking the Pages deploy and the release upload. After they succeed and Pages is deployed, the release job (gated to manual dispatch only) downloads the offline-site workflow artifact, computes a tag like `docs-YYYY-MM-DD-HHMM` (UTC), and creates a GitHub release with `twinbasic-docs-offline.zip` attached via `softprops/action-gh-release@v2`.
 
 ## Failure modes
 
@@ -388,11 +386,11 @@ The plugin surfaces several conditions in its summary log lines:
 
 - **Missing `search-data.json`.** Silent — the search-data.js generation step is a no-op. The per-page script tag injection still runs, so each page will request `search-data.js` and the browser will log a 404. The patched `initSearch()` will hit its `window.SEARCH_DATA not found` branch and log a console message.
 
-- **Real broken links in markdown sources.** Caught by the strict lychee step in CI (or by `check.bat` locally). These don't surface in the offlinify summary because the rewrite passes correctly identify them as unresolvable and leave them alone — that's the right behavior, the source markdown needs fixing. Source markdown linking at a `redirect_from` URL is reachable in the offline tree (the redirect stub is rewritten to navigate locally), but a stub that itself references a missing target falls back to the original `https://<site.url>/...` URL and lychee will then surface it as broken — same right-thing-to-do behaviour.
+- **Real broken links in markdown sources.** Caught by the strict offline link-check step in CI (or by `check.bat` locally). These don't surface in the offlinify summary because the rewrite passes correctly identify them as unresolvable and leave them alone — that's the right behavior, the source markdown needs fixing. Source markdown linking at a `redirect_from` URL is reachable in the offline tree (the redirect stub is rewritten to navigate locally), but a stub that itself references a missing target falls back to the original `https://<site.url>/...` URL and the checker will then surface it as broken — same right-thing-to-do behaviour.
 
 - **`_site-offline/` triggering `jekyll serve` rebuilds.** Was a problem; now handled by two things in combination: `exclude: [_site-offline]` in `_config.yml`, and the "clean contents but keep the directory" trick in the wipe step (which keeps all watcher events under `_site-offline/...` where the exclude matches).
 
-- **Surviving live-site links.** The [SEO block stripping](#seo-block-stripping) pass removes the bulk of `https://docs.twinbasic.com` references each page contains (canonical link, OpenGraph URL, JSON-LD `url`). Anything left in `_site-offline/` is a source link that points at the live docs site -- usually a markdown author writing `https://docs.twinbasic.com/<path>` instead of a relative link or `/tB/...` permalink, which would silently navigate the offline reader back online. `scripts/check_offline_live_links.py` flags these; the bare root `https://docs.twinbasic.com[/]` is exempt since intentional "go to the live site" links are allowed. Run locally by `check.bat` and in CI by both workflows after the offline link check.
+- **Surviving live-site links.** The [SEO block stripping](#seo-block-stripping) pass removes the bulk of `https://docs.twinbasic.com` references each page contains (canonical link, OpenGraph URL, JSON-LD `url`). Anything left in `_site-offline/` is a source link that points at the live docs site -- usually a markdown author writing `https://docs.twinbasic.com/<path>` instead of a relative link or `/tB/...` permalink, which would silently navigate the offline reader back online. The offline link-check step passes `--forbid 'https://docs.twinbasic.com'` to `scripts/check_links.mjs`, which flags any extracted link whose value matches the prefix (in any `href` / `src` / `srcset` / `formaction` / ... attribute); the bare root `https://docs.twinbasic.com[/]` is exempt since intentional "go to the live site" links are allowed. Folded into the offline link check, so the same single pass catches both broken-link and forbidden-prefix failures.
 
 ## Performance
 
@@ -465,7 +463,7 @@ After nav-block caching and (on Windows) the async write pool, the picture is ev
 
 ## Known limitations
 
-- **Source-only broken links**, where the markdown points at a permalink shape that doesn't match the rendered filename, can't be fixed by the plugin — `compute_rel_url` correctly identifies the target as nonexistent and leaves the link unchanged. The strict lychee step in CI surfaces these as real errors so they get fixed at the source.
+- **Source-only broken links**, where the markdown points at a permalink shape that doesn't match the rendered filename, can't be fixed by the plugin — `compute_rel_url` correctly identifies the target as nonexistent and leaves the link unchanged. The strict offline link-check step in CI surfaces these as real errors so they get fixed at the source.
 
 - **`<a href>` values inside `<code>` blocks** *were* not distinguishable from real links at the regex level; example URLs in tutorial code samples surfaced as false-positive entries in the unresolved counter. The [code-block skip](#code-block-skip) now suppresses them — both the rewrite and the counter increment. Worth keeping an eye on if the upstream syntax highlighter (Rouge) ever switches away from wrapping highlighted code in `<code>` / `<pre>`.
 
diff --git a/docs/_plugins/offlinify.rb b/docs/_plugins/offlinify.rb
index 782038f5..4022d34d 100644
--- a/docs/_plugins/offlinify.rb
+++ b/docs/_plugins/offlinify.rb
@@ -709,9 +709,10 @@ def self.process_page(page)
       # Rewrite each `<site_url><path>` occurrence to its resolved
       # page-relative form via the same `compute_relative` the main
       # HTML pass uses. Unresolved matches fall back to the original
-      # absolute URL -- lychee will then flag the source as broken,
-      # which is the right behaviour for a real bug. If `site.url`
-      # is unset (empty), write the stub verbatim: lychee against
+      # absolute URL -- the offline link-check (check_links.mjs with
+      # --forbid) will then flag the source as broken, which is the
+      # right behaviour for a real bug. If `site.url` is unset
+      # (empty), write the stub verbatim: the link check against
       # _site-offline/ will still find the path-portion targets in
       # the same way the main HTML pass does, so the stub passes
       # link-check even though it won't navigate locally.
diff --git a/docs/check.bat b/docs/check.bat
index 78e080ef..990a0914 100644
--- a/docs/check.bat
+++ b/docs/check.bat
@@ -1,52 +1,43 @@
-@rem Run the Python-based link checker on both build outputs, then scan
-@rem _site-offline/ for live-site links that survived offlinify.
-@rem
-@rem Same arguments as lychee.bat -- only the executable differs. The Python
-@rem script is faster on this workload (~25x on Windows) and a bit stricter:
-@rem it flags <script src> targets that don't exist and rejects trailing
-@rem slashes on file-shaped URLs (e.g. `foo.html/`), both of which lychee
-@rem silently accepts. lychee.bat remains available as a second opinion.
+@rem Run the Node-based link checker on both build outputs. The offline
+@rem pass additionally uses --forbid to flag any surviving
+@rem https://docs.twinbasic.com/<path> link that the offlinify rewrite
+@rem missed.
 @rem
 @rem _site/        Online tree. `--fallback-extensions html` mirrors what
 @rem               GitHub Pages does at request time: an extensionless
 @rem               URL like /FAQ is served as /FAQ.html. Without the flag
-@rem               every pretty permalink would appear broken.
-@rem _site-offline/ Offline tree. No extension fallback -- every link must
-@rem               resolve to an actual file under file://, since the
-@rem               browser does no rewriting. Catches relative links in
-@rem               markdown sources whose permalink shape doesn't match
-@rem               the rendered filename (e.g. `[Foo](Foo/)` when Jekyll
-@rem               wrote `Foo.html`, not `Foo/index.html`).
-@rem live-links   Greps _site-offline/ HTML for any surviving
-@rem               https://docs.twinbasic.com reference outside <code> /
-@rem               <pre> blocks. After _plugins/offlinify.rb strips the
-@rem               jekyll-seo-tag block from each page, none should
-@rem               remain -- a hit means a source link goes to the live
-@rem               site instead of the canonical /tB/... permalink.
-@rem               See ../scripts/check_offline_live_links.py.
+@rem               every pretty permalink would appear broken. No
+@rem               --forbid here -- the online tree intentionally carries
+@rem               canonical https://docs.twinbasic.com links in the
+@rem               jekyll-seo-tag block.
+@rem _site-offline/ Offline tree. No extension fallback -- every link
+@rem               must resolve to an actual file under file://, since
+@rem               the browser does no rewriting. Catches relative links
+@rem               whose permalink shape doesn't match the rendered
+@rem               filename (e.g. `[Foo](Foo/)` when Jekyll wrote
+@rem               `Foo.html`, not `Foo/index.html`). --forbid on this
+@rem               pass also fails the build if any extracted link
+@rem               still points at https://docs.twinbasic.com/<path>
+@rem               (bare domain and trailing-slash root are exempt).
 @rem
 @rem All three checks always run so you see all errors in one pass; the
 @rem script exits non-zero if any fails (earlier failures take precedence
 @rem in the reported code).
 @setlocal
-@set CHECK=python "%~dp0..\scripts\check_links.py"
+@set CHECK=node "%~dp0..\scripts\check_links.mjs"
 @echo Checking _site/ (online) ...
 @%CHECK% --offline --include-fragments --fallback-extensions html --index-files "index.html,." --root-dir ".\_site" ".\_site" %*
 @set EXIT1=%ERRORLEVEL%
 @echo.
-@echo Checking _site-offline/ (offline) ...
+@echo Checking _site-offline/ (offline, with --forbid) ...
 @rem No `.` in --index-files: under file://, a bare directory URL
 @rem (`Foo/`) requires an actual index.html inside. The online check
 @rem above accepts `.` because GitHub Pages can serve an unstyled
 @rem directory listing or a 404 in that case; offline, there's no
 @rem such fallback, and the link is just broken.
-@%CHECK% --offline --include-fragments --index-files "index.html" --root-dir ".\_site-offline" ".\_site-offline" %*
+@%CHECK% --offline --include-fragments --index-files "index.html" --forbid "https://docs.twinbasic.com" --root-dir ".\_site-offline" ".\_site-offline" %*
 @set EXIT2=%ERRORLEVEL%
 @echo.
-@echo Checking _site-offline/ for live-site links ...
-@python "%~dp0..\scripts\check_offline_live_links.py"
-@set EXIT3=%ERRORLEVEL%
-@echo.
 @echo Checking _site-pdf/book.html (informational -- failures do not block) ...
 @rem Links in the book are not fully resolved (absolute intra-site URLs stay live
 @rem until the book chapter transform rewrites them, and some fragments are still
@@ -54,5 +45,4 @@
 @%CHECK% --offline --include-fragments --root-dir ".\_site-pdf" ".\_site-pdf\book.html" %*
 @echo.
 @if %EXIT1% NEQ 0 exit /b %EXIT1%
-@if %EXIT2% NEQ 0 exit /b %EXIT2%
-@exit /b %EXIT3%
+@exit /b %EXIT2%
diff --git a/docs/lychee.bat b/docs/lychee.bat
deleted file mode 100644
index 6d19e3bb..00000000
--- a/docs/lychee.bat
+++ /dev/null
@@ -1,45 +0,0 @@
-@rem Use lychee to check the links in both build outputs, then scan
-@rem _site-offline/ for live-site links that survived offlinify.
-@rem
-@rem _site/        Online tree. `--fallback-extensions html` mirrors what
-@rem               GitHub Pages does at request time: an extensionless
-@rem               URL like /FAQ is served as /FAQ.html. Without the flag
-@rem               every pretty permalink would appear broken.
-@rem _site-offline/ Offline tree. No extension fallback -- every link must
-@rem               resolve to an actual file under file://, since the
-@rem               browser does no rewriting. Catches relative links in
-@rem               markdown sources whose permalink shape doesn't match
-@rem               the rendered filename (e.g. `[Foo](Foo/)` when Jekyll
-@rem               wrote `Foo.html`, not `Foo/index.html`).
-@rem live-links   Greps _site-offline/ HTML for any surviving
-@rem               https://docs.twinbasic.com reference outside <code> /
-@rem               <pre> blocks. After _plugins/offlinify.rb strips the
-@rem               jekyll-seo-tag block from each page, none should
-@rem               remain -- a hit means a source link goes to the live
-@rem               site instead of the canonical /tB/... permalink.
-@rem               See ../scripts/check_offline_live_links.py.
-@rem
-@rem All three checks always run so you see all errors in one pass; the
-@rem script exits non-zero if any fails (earlier failures take precedence
-@rem in the reported code).
-@setlocal
-@set LYCHEE="%~dp0..\.claude\lychee.exe"
-@echo Checking _site/ (online) ...
-@%LYCHEE% --offline --include-fragments --fallback-extensions html --index-files "index.html,." --root-dir ".\_site" ".\_site" %*
-@set EXIT1=%ERRORLEVEL%
-@echo.
-@echo Checking _site-offline/ (offline) ...
-@rem No `.` in --index-files: under file://, a bare directory URL
-@rem (`Foo/`) requires an actual index.html inside. The online check
-@rem above accepts `.` because GitHub Pages can serve an unstyled
-@rem directory listing or a 404 in that case; offline, there's no
-@rem such fallback, and the link is just broken.
-@%LYCHEE% --offline --include-fragments --index-files "index.html" --root-dir ".\_site-offline" ".\_site-offline" %*
-@set EXIT2=%ERRORLEVEL%
-@echo.
-@echo Checking _site-offline/ for live-site links ...
-@python "%~dp0..\scripts\check_offline_live_links.py"
-@set EXIT3=%ERRORLEVEL%
-@if %EXIT1% NEQ 0 exit /b %EXIT1%
-@if %EXIT2% NEQ 0 exit /b %EXIT2%
-@exit /b %EXIT3%
diff --git a/package-lock.json b/package-lock.json
index 3e17b771..de1ba58c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,6 +9,7 @@
       "version": "0.0.0",
       "devDependencies": {
         "html-entities": "^2.6.0",
+        "htmlparser2": "^12.0.0",
         "pdf-lib": "1.17.1",
         "puppeteer": "25.0.4"
       }
@@ -346,6 +347,77 @@
       "license": "BSD-3-Clause",
       "peer": true
     },
+    "node_modules/dom-serializer": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-3.1.1.tgz",
+      "integrity": "sha512-4MEa38/QexBob6gFNwu+EGdWvhJ1OKuNwdYY3Y3NyeWDQfnGeDYQUDfIRzWu5B5gsv03so2Uxd28YC6zrsx3Lw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^3.0.0",
+        "domhandler": "^6.0.0",
+        "entities": "^8.0.0"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
+      }
+    },
+    "node_modules/domelementtype": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-3.0.0.tgz",
+      "integrity": "sha512-umCQid3jKbDmVjx8jGaW7uUykm4DEUeyV21hPxNMo2nV955DhUThwqyOIDtreepP31hl84X7G5U9ZfsWvIB3Pg==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
+    "node_modules/domhandler": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-6.0.1.tgz",
+      "integrity": "sha512-gYzvtM72ZtxQO0T048kd6HWSbbGCNOUwcnfQ01cqIJ4X2IYKFFHZ5mKvrQETcFXxsRObZulDaKmy//R7TPtsBg==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "domelementtype": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/fb55/domhandler?sponsor=1"
+      }
+    },
+    "node_modules/domutils": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/domutils/-/domutils-4.0.2.tgz",
+      "integrity": "sha512-qI4JLRKnSzqFqr7hAlS5xQDusBCjKSEG4t4+7aNrIQMHBcsC2TGEhuyABJdYkgSewL57PNLYEiibY2iPKhKpaA==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "dom-serializer": "^3.0.0",
+        "domelementtype": "^3.0.0",
+        "domhandler": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/fb55/domutils?sponsor=1"
+      }
+    },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -363,6 +435,19 @@
         "once": "^1.4.0"
       }
     },
+    "node_modules/entities": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz",
+      "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
     "node_modules/env-paths": {
       "version": "2.2.1",
       "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz",
@@ -437,6 +522,29 @@
       ],
       "license": "MIT"
     },
+    "node_modules/htmlparser2": {
+      "version": "12.0.0",
+      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-12.0.0.tgz",
+      "integrity": "sha512-Tz7u1i95/g2x2jz81+x0FBVhBhY5aRTvD3tXXdFaljuNdzDLJ8UGNRrTcj2cgQvAg3iW/h77Fz15nLW0L0CrZw==",
+      "dev": true,
+      "funding": [
+        "https://github.com/fb55/htmlparser2?sponsor=1",
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^3.0.0",
+        "domhandler": "^6.0.0",
+        "domutils": "^4.0.2",
+        "entities": "^8.0.0"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
     "node_modules/import-fresh": {
       "version": "3.3.1",
       "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
diff --git a/package.json b/package.json
index 3dce8713..635a1c0f 100644
--- a/package.json
+++ b/package.json
@@ -5,6 +5,7 @@
   "description": "PDF book pipeline and profiling harness for the twinBASIC documentation",
   "devDependencies": {
     "html-entities": "^2.6.0",
+    "htmlparser2": "^12.0.0",
     "pdf-lib": "1.17.1",
     "puppeteer": "25.0.4"
   }
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 97619517..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-selectolax==0.4.9
diff --git a/scripts/check_links.mjs b/scripts/check_links.mjs
new file mode 100644
index 00000000..620ee95e
--- /dev/null
+++ b/scripts/check_links.mjs
@@ -0,0 +1,607 @@
+// Offline link checker for static sites.
+//
+// Typical invocation, from docs/check.bat:
+//
+//     node scripts/check_links.mjs --offline --include-fragments
+//         --fallback-extensions html --index-files "index.html,."
+//         --root-dir docs/_site docs/_site
+//
+// On this site (~733k link occurrences, ~12k unique targets across
+// 1127 HTML files / 124 MB) the script runs in ~2.2 s single-threaded
+// on the dev box. It dedupes (target, frag) up front so each unique
+// filesystem and fragment check fires exactly once regardless of how
+// many pages link to the same target.
+//
+// Online (network) link checking is not implemented. --offline is
+// therefore required; the script exits non-zero if it is absent.
+//
+// Strictness beyond a typical link checker:
+//   * Trailing slash on a file-shaped URL ('foo.html/') is reported
+//     broken (catches authoring mistakes).
+//   * <script src> URLs are checked.
+//   * The --forbid PREFIX flag (repeatable) fails the run if any
+//     extracted link starts with one of the given URL prefixes
+//     (bare prefix and 'prefix/' exempt), used by the offline pass
+//     to catch live-site links the offlinify rewrite missed.
+//
+// Output limitation: no per-link line numbers in error messages --
+// htmlparser2 SAX doesn't expose source positions.
+
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { performance } from "node:perf_hooks";
+import { Parser } from "htmlparser2";
+
+// tag -> [attr, ...]. SAX walker dispatches on tag name; for each
+// matching attr present on that tag, the value becomes one or more
+// link references. Covers the standard set of HTML link-bearing
+// attributes (href / src / srcset / longdesc / formaction / action /
+// data / cite / poster).
+const LINK_ATTR_TABLE = new Map([
+  ["a",          ["href"]],
+  ["area",       ["href"]],
+  ["base",       ["href"]],
+  ["link",       ["href"]],
+  ["img",        ["src", "longdesc", "srcset"]],
+  ["script",     ["src"]],
+  ["iframe",     ["src"]],
+  ["frame",      ["src"]],
+  ["embed",      ["src"]],
+  ["source",     ["src", "srcset"]],
+  ["audio",      ["src"]],
+  ["video",      ["src", "poster"]],
+  ["track",      ["src"]],
+  ["input",      ["src", "formaction"]],
+  ["button",     ["formaction"]],
+  ["form",       ["action"]],
+  ["object",     ["data"]],
+  ["blockquote", ["cite"]],
+  ["q",          ["cite"]],
+  ["del",        ["cite"]],
+  ["ins",        ["cite"]],
+]);
+
+const SRCSET_ATTRS = new Set(["srcset"]);
+
+function* splitSrcset(value) {
+  // `URL [descriptor], URL [descriptor], ...`. Descriptors cannot
+  // contain commas, so a comma split is safe; each part's first
+  // whitespace-separated token is the URL.
+  for (const part of value.split(",")) {
+    const trimmed = part.trim();
+    if (!trimmed) continue;
+    const ws = trimmed.search(/\s/);
+    const url = ws < 0 ? trimmed : trimmed.slice(0, ws);
+    if (url) yield url;
+  }
+}
+
+// One pass per file: extract every outgoing link AND every fragment-
+// target id/name in a single parse. The Python original makes two
+// passes (extract_links over all files, then extract_fragment_ids over
+// the dedup'd fragment-target file set), but the per-file re-parse cost
+// for the second pass outweighs the savings from skipping the ~25 % of
+// files that no one links to with a fragment. captureIds=false is kept
+// for completeness; main() always passes true when --include-fragments
+// is on, which it always is in our use.
+//
+// forbidPrefixes is an array of string prefixes; any extracted link
+// whose value starts with one of them, and whose tail past the prefix
+// is non-empty and not just '/', is collected into the returned
+// `forbidden` list. The bare prefix and prefix/ are exempt
+// (intentional "go to live site" links).
+function extractLinksAndIds(htmlPath, captureIds, forbidPrefixes) {
+  const links = [];
+  const ids = captureIds ? new Set() : null;
+  const hasForbid = forbidPrefixes && forbidPrefixes.length > 0;
+  const forbidden = hasForbid ? [] : null;
+  const checkForbid = hasForbid ? (url) => {
+    for (const prefix of forbidPrefixes) {
+      if (!url.startsWith(prefix)) continue;
+      const tail = url.slice(prefix.length);
+      if (tail === "" || tail === "/") return;
+      forbidden.push({ prefix, url });
+      return;
+    }
+  } : null;
+  const parser = new Parser({
+    onopentag(name, attribs) {
+      if (captureIds) {
+        const id = attribs.id;
+        if (id) ids.add(id);
+        if (name === "a") {
+          const nm = attribs.name;
+          if (nm) ids.add(nm);
+        }
+      }
+      const attrs = LINK_ATTR_TABLE.get(name);
+      if (!attrs) return;
+      for (const a of attrs) {
+        const v = attribs[a];
+        if (!v) continue;
+        if (SRCSET_ATTRS.has(a)) {
+          for (const u of splitSrcset(v)) {
+            links.push(u);
+            if (checkForbid) checkForbid(u);
+          }
+        } else {
+          links.push(v);
+          if (checkForbid) checkForbid(v);
+        }
+      }
+    },
+  });
+  parser.write(fs.readFileSync(htmlPath, "utf8"));
+  parser.end();
+  return { links, ids, forbidden };
+}
+
+// Coerce a base-path arg into the canonical '/prefix' form (leading
+// slash, no trailing slash). Empty input maps to empty string.
+function normalizeBasePath(s) {
+  if (!s) return "";
+  let v = s.trim().replace(/\/+$/, "");
+  if (!v) return "";
+  if (!v.startsWith("/")) v = "/" + v;
+  return v;
+}
+
+// Lop a base-path prefix off an absolute URL path, if it matches.
+//
+//   '/twinBASIC-docs/foo'    -> '/foo'     (prefix + /...)
+//   '/twinBASIC-docs'        -> '/'        (bare prefix, treat as root)
+//   '/twinBASIC-docs-other'  -> unchanged  (only strip on '/' or end)
+//   '/foo'                   -> unchanged  (no prefix match)
+function stripBasePath(pathStr, basePath) {
+  if (!basePath) return pathStr;
+  if (pathStr === basePath) return "/";
+  if (pathStr.startsWith(basePath + "/")) return pathStr.slice(basePath.length);
+  return pathStr;
+}
+
+// Resolve href -> [normalizedTargetStr, isDirLink, fragment].
+// Returns null for schemes/netlocs we skip. Uses only string ops (no
+// filesystem syscalls).
+//
+// isDirLink captures whether the URL ended in '/' before normalization.
+// path.normalize strips trailing slashes, but the distinction matters
+// for resolution: 'foo/' must resolve as a directory (try index files),
+// while 'foo' falls through to fallback extensions ('foo.html') if no
+// file/dir 'foo' exists.
+//
+// basePath is an absolute-URL prefix to strip before resolving against
+// rootStr -- e.g. '/twinBASIC-docs' to handle a Jekyll --baseurl build.
+// Only applied to absolute URLs; relative paths are unaffected.
+const SCHEME_RE = /^[a-zA-Z][a-zA-Z0-9+\-.]*:/;
+
+function resolve(href, sourceDir, sourcePath, rootStr, basePath) {
+  let pathPart, frag;
+  const hashIdx = href.indexOf("#");
+  if (hashIdx >= 0) {
+    pathPart = href.slice(0, hashIdx);
+    frag = href.slice(hashIdx + 1);
+  } else {
+    pathPart = href;
+    frag = null;
+  }
+  if (!pathPart) {
+    return [sourcePath, false, frag];
+  }
+
+  // Cheap scheme/netloc check. Matches Python's urlparse heuristic:
+  // a colon in the first 16 chars OR a leading "//" triggers the URL
+  // path; if there is a real scheme or netloc, the link is skipped.
+  const colon = pathPart.indexOf(":");
+  if (pathPart.startsWith("//")) return null;
+  if (colon >= 0 && colon < 16 && SCHEME_RE.test(pathPart)) return null;
+  let pathStr = pathPart;
+
+  if (pathStr.indexOf("%") >= 0) {
+    try { pathStr = decodeURIComponent(pathStr); } catch { /* keep raw */ }
+  }
+
+  const isDirLink = pathStr.endsWith("/") || pathStr.endsWith("/.");
+
+  let target;
+  if (pathStr.startsWith("/")) {
+    const stripped = stripBasePath(pathStr, basePath);
+    target = path.normalize(path.join(rootStr, stripped.replace(/^\/+/, "")));
+  } else {
+    target = path.normalize(path.join(sourceDir, pathStr));
+  }
+  return [target, isDirLink, frag];
+}
+
+function statSafe(p) {
+  try { return fs.statSync(p); } catch { return null; }
+}
+
+// Resolve a URL path string to an on-disk file by the same rules
+// GitHub Pages applies at request time.
+//
+// A trailing-slash URL ('foo/') must resolve as a directory: try each
+// indexFile in order, with '.' meaning 'accept the directory itself'.
+// Fallback extensions never apply to dir-shaped links.
+//
+// A non-slash URL ('foo') tries the path as a file first, then as a dir
+// (same index-file logic), then falls back to fallback extensions.
+function checkPath(targetStr, isDirLink, fallbackExts, indexFiles) {
+  const stat = statSafe(targetStr);
+  if (isDirLink) {
+    if (!stat || !stat.isDirectory()) return null;
+    for (const idx of indexFiles) {
+      if (idx === ".") return targetStr;
+      const cand = path.join(targetStr, idx);
+      const s = statSafe(cand);
+      if (s && s.isFile()) return cand;
+    }
+    return null;
+  }
+  if (stat && stat.isFile()) return targetStr;
+  if (stat && stat.isDirectory()) {
+    for (const idx of indexFiles) {
+      if (idx === ".") return targetStr;
+      const cand = path.join(targetStr, idx);
+      const s = statSafe(cand);
+      if (s && s.isFile()) return cand;
+    }
+    return null;
+  }
+  for (const ext of fallbackExts) {
+    const cand = targetStr + "." + ext;
+    const s = statSafe(cand);
+    if (s && s.isFile()) return cand;
+  }
+  return null;
+}
+
+function printHelp() {
+  process.stdout.write(`Usage: node check_links.mjs [options] <inputs...>
+
+Offline link checker for static sites. Only offline checking is
+implemented; --offline is required.
+
+Options:
+  --offline                  REQUIRED. Skip network checks.
+  --include-fragments        Verify URL fragments against id/name attrs.
+  --fallback-extensions EXTS Comma-separated extensions to try if a path
+                             does not resolve as-is (e.g. 'html').
+  --index-files FILES        Comma-separated index file names to try when
+                             a path resolves to a directory. '.' means
+                             accept the directory itself.
+  --root-dir DIR             Root directory for absolute URL paths.
+  --base-path PREFIX         URL-path prefix to strip from absolute URLs
+                             before resolving against --root-dir
+                             (e.g. '/twinBASIC-docs').
+  --forbid PREFIX            Fail if any extracted link starts with this
+                             URL prefix. The bare prefix and 'prefix/'
+                             are exempt (intentional "go to live site"
+                             links). Repeatable. Used by the offline
+                             pass to catch live-site references the
+                             offlinify rewrite missed.
+  --threads N                Accepted for CLI compatibility; the Node
+                             port is currently single-threaded.
+  -v, --verbose              Print per-stage timing breakdown.
+  -h, --help                 Show this help and exit.
+
+Inputs are files or directories; directories are searched recursively
+for *.html.
+`);
+}
+
+function parseArgs(argv) {
+  const opts = {
+    offline: false,
+    includeFragments: false,
+    fallbackExtensions: "",
+    indexFiles: "",
+    rootDir: null,
+    basePath: "",
+    forbid: [],
+    verbose: false,
+  };
+  const inputs = [];
+  const unknown = [];
+  const need = (flag, i) => {
+    if (i >= argv.length) {
+      process.stderr.write(`error: ${flag} requires a value\n`);
+      process.exit(2);
+    }
+    return argv[i];
+  };
+
+  let i = 0;
+  while (i < argv.length) {
+    const a = argv[i++];
+    if (a === "--offline") opts.offline = true;
+    else if (a === "--include-fragments") opts.includeFragments = true;
+    else if (a === "--fallback-extensions") opts.fallbackExtensions = need(a, i++);
+    else if (a === "--index-files") opts.indexFiles = need(a, i++);
+    else if (a === "--root-dir") opts.rootDir = need(a, i++);
+    else if (a === "--base-path") opts.basePath = need(a, i++);
+    else if (a === "--forbid") opts.forbid.push(need(a, i++));
+    else if (a === "--threads") { need(a, i++); /* accepted, ignored */ }
+    else if (a === "-v" || a === "--verbose") opts.verbose = true;
+    else if (a === "-h" || a === "--help") { printHelp(); process.exit(0); }
+    else if (a.startsWith("--")) {
+      // Tolerate unknown flags passed through via check.bat's %*.
+      // Consume an attached value if present.
+      if (!a.includes("=") && i < argv.length && !argv[i].startsWith("-")) {
+        unknown.push(a, argv[i++]);
+      } else {
+        unknown.push(a);
+      }
+    } else if (a.startsWith("-") && a.length > 1) {
+      unknown.push(a);
+    } else {
+      inputs.push(a);
+    }
+  }
+  return { opts, inputs, unknown };
+}
+
+function collectHtmlFiles(inputs) {
+  const out = [];
+  for (const inp of inputs) {
+    const s = statSafe(inp);
+    if (!s) {
+      process.stderr.write(`warning: input not found: ${inp}\n`);
+      continue;
+    }
+    if (s.isFile()) {
+      out.push(inp);
+    } else if (s.isDirectory()) {
+      const entries = fs.readdirSync(inp, { recursive: true, withFileTypes: true });
+      for (const e of entries) {
+        if (e.isFile() && e.name.endsWith(".html")) {
+          out.push(path.join(e.parentPath || inp, e.name));
+        }
+      }
+    }
+  }
+  return out;
+}
+
+function main() {
+  const { opts, inputs, unknown } = parseArgs(process.argv.slice(2));
+
+  if (unknown.length) {
+    process.stderr.write(
+      `warning: ignoring unrecognised arguments: ${unknown.join(" ")}\n`
+    );
+  }
+
+  if (!opts.offline) {
+    process.stderr.write(
+      "error: --offline is required. Online (network) checking is not " +
+      "implemented by this tool.\n"
+    );
+    process.exit(2);
+  }
+  if (!inputs.length) {
+    process.stderr.write("error: at least one input file or directory is required\n");
+    process.exit(2);
+  }
+
+  // Keep --root-dir in its caller-supplied shape (no path.resolve) so
+  // resolver-built target strings have the same relative-vs-absolute
+  // shape as walk paths -- otherwise the idsByFile lookup below would
+  // miss for absolute-URL hrefs, which produce absolute targets when
+  // root-dir is absolute but relative walk-path entries when not.
+  // check.bat / CI both pass the same string for --root-dir and the
+  // positional input, so the two sides always agree.
+  const rootStr = opts.rootDir ?? "";
+  const fallbackExts = opts.fallbackExtensions.split(",").filter(Boolean);
+  const indexFiles = opts.indexFiles.split(",").filter(Boolean);
+  const basePath = normalizeBasePath(opts.basePath);
+
+  const t0 = performance.now();
+  const htmlFiles = collectHtmlFiles(inputs);
+  const tWalk = performance.now();
+
+  // Per-file: extract once, then group hrefs by (source_dir, href) so we
+  // resolve each unique combination exactly once. The same nav/footer
+  // links repeat across hundreds of pages from the same directory. Also
+  // capture the per-file id/name set if fragment checking is on, so the
+  // later fragment check is a Map lookup instead of a second SAX pass.
+  // idsByFile key matches the walk-path shape and (because rootStr is
+  // kept relative -- see above) the resolver-built target shape too,
+  // so a later `idsByFile.get(entry.resolved)` lands without
+  // canonicalisation.
+  const occurrences = []; // [srcPath, srcDir, href]
+  const idsByFile = opts.includeFragments ? new Map() : null;
+  const forbidPrefixes = opts.forbid.length ? opts.forbid : null;
+  const forbiddenBySource = forbidPrefixes ? new Map() : null;
+  for (const src of htmlFiles) {
+    const srcDir = path.dirname(src);
+    const { links, ids, forbidden } = extractLinksAndIds(
+      src, opts.includeFragments, forbidPrefixes
+    );
+    for (const h of links) occurrences.push([src, srcDir, h]);
+    if (idsByFile) idsByFile.set(src, ids);
+    if (forbidden && forbidden.length) forbiddenBySource.set(src, forbidden);
+  }
+  const tExtract = performance.now();
+
+  // Memoize resolution by (sourceDir, href). Nested Map<srcDir,
+  // Map<href, resolved>> avoids the per-occurrence composite-key
+  // string allocation that a flat Map<srcDir+sep+href, _> would cost
+  // (~733k of them on this site).
+  const resolutionCache = new Map();
+  // Same trick on the dedup side: Map<target, Map<isDirFrag, entry>>.
+  // The inner key is a short string built from (isDir + (frag || ""))
+  // -- no fresh allocation per occurrence beyond what JS would have
+  // done anyway.
+  const uniqueByTarget = new Map();
+  const uniqueEntries = []; // flat list in insertion order for later loops
+  for (let oi = 0; oi < occurrences.length; oi++) {
+    const occ = occurrences[oi];
+    const src = occ[0], srcDir = occ[1], href = occ[2];
+    let dirCache = resolutionCache.get(srcDir);
+    if (!dirCache) { dirCache = new Map(); resolutionCache.set(srcDir, dirCache); }
+    let r;
+    if (dirCache.has(href)) {
+      r = dirCache.get(href);
+    } else {
+      r = resolve(href, srcDir, src, rootStr, basePath);
+      dirCache.set(href, r);
+    }
+    if (r === null) continue;
+    const target = r[0], isDir = r[1], frag = r[2];
+    let inner = uniqueByTarget.get(target);
+    if (!inner) { inner = new Map(); uniqueByTarget.set(target, inner); }
+    const innerKey = (isDir ? "1" : "0") + (frag === null ? "" : frag);
+    let entry = inner.get(innerKey);
+    if (!entry) {
+      entry = { target, isDir, frag, resolved: undefined, sources: [] };
+      inner.set(innerKey, entry);
+      uniqueEntries.push(entry);
+    }
+    entry.sources.push(src, href);
+  }
+  const tResolve = performance.now();
+
+  // De-dup (target, isDir) for filesystem checks: 'foo' and 'foo#bar'
+  // share the same path lookup. The inner-Map structure already groups
+  // by target, so the per-target dir-flag check is at most two stats.
+  for (const inner of uniqueByTarget.values()) {
+    let resolvedFile;     // undefined = not yet computed
+    let resolvedDir;
+    let computedFile = false, computedDir = false;
+    for (const entry of inner.values()) {
+      if (entry.isDir) {
+        if (!computedDir) {
+          resolvedDir = checkPath(entry.target, true, fallbackExts, indexFiles);
+          computedDir = true;
+        }
+        entry.resolved = resolvedDir;
+      } else {
+        if (!computedFile) {
+          resolvedFile = checkPath(entry.target, false, fallbackExts, indexFiles);
+          computedFile = true;
+        }
+        entry.resolved = resolvedFile;
+      }
+    }
+  }
+  const tCheckPaths = performance.now();
+
+  // Fragment IDs were captured during the link-extraction pass; no
+  // second SAX walk needed. Just expose a Map<file, Set<id>> for the
+  // checking loop, restricted to actual fragment targets so the verbose
+  // breakdown still reports a useful count.
+  const fragmentCache = new Map();
+  let filesForFragments = [];
+  if (opts.includeFragments) {
+    const setFor = new Set();
+    for (const entry of uniqueEntries) {
+      if (entry.frag && entry.resolved) setFor.add(entry.resolved);
+    }
+    filesForFragments = [...setFor].sort();
+    for (const f of filesForFragments) {
+      // A resolved target may be a file we never scanned (e.g. directly
+      // referenced asset that isn't *.html), in which case it has no
+      // captured id set; treat as empty so the fragment check fails.
+      fragmentCache.set(f, idsByFile.get(f) || new Set());
+    }
+  }
+  const tFragments = performance.now();
+
+  const broken = []; // (src, href, reason) triples flattened
+  let brokenUniqueCount = 0;
+  for (const entry of uniqueEntries) {
+    if (entry.resolved === null) {
+      brokenUniqueCount++;
+      const srcs = entry.sources;
+      for (let i = 0; i < srcs.length; i += 2) {
+        broken.push(srcs[i], srcs[i + 1], "target not found");
+      }
+      continue;
+    }
+    if (entry.frag && opts.includeFragments) {
+      const ids = fragmentCache.get(entry.resolved);
+      if (!ids || !ids.has(entry.frag)) {
+        brokenUniqueCount++;
+        const reason = `fragment #${entry.frag} not found`;
+        const srcs = entry.sources;
+        for (let i = 0; i < srcs.length; i += 2) {
+          broken.push(srcs[i], srcs[i + 1], reason);
+        }
+      }
+    }
+  }
+  const tDone = performance.now();
+
+  // Merge broken + forbidden into a single per-source report so a file
+  // with both kinds of issue appears in one block, with the BROKEN /
+  // FORBIDDEN labels distinguishing them. Labels are padded to the
+  // wider of the two so href columns line up.
+  if (broken.length || (forbiddenBySource && forbiddenBySource.size)) {
+    const bySource = new Map();
+    for (let i = 0; i < broken.length; i += 3) {
+      const src = broken[i], href = broken[i + 1], reason = broken[i + 2];
+      let set = bySource.get(src);
+      if (!set) { set = new Set(); bySource.set(src, set); }
+      set.add("E\0" + href + "\0" + reason);
+    }
+    if (forbiddenBySource) {
+      for (const [src, fhits] of forbiddenBySource) {
+        let set = bySource.get(src);
+        if (!set) { set = new Set(); bySource.set(src, set); }
+        for (const fh of fhits) {
+          set.add(`F\0${fh.url}\0forbidden prefix '${fh.prefix}'`);
+        }
+      }
+    }
+    const sortedSources = [...bySource.keys()].sort();
+    const lines = [];
+    for (const src of sortedSources) {
+      lines.push("");
+      lines.push(`${src}:`);
+      const items = [...bySource.get(src)].sort();
+      for (const item of items) {
+        const j1 = item.indexOf("\0");
+        const j2 = item.indexOf("\0", j1 + 1);
+        const kind = item.slice(0, j1);
+        const href = item.slice(j1 + 1, j2);
+        const reason = item.slice(j2 + 1);
+        const label = kind === "F" ? "FORBIDDEN" : "BROKEN   ";
+        lines.push(`  ${label}  ${href} -- ${reason}`);
+      }
+    }
+    lines.push("");
+    process.stdout.write(lines.join("\n") + "\n");
+  }
+
+  let forbiddenCount = 0;
+  if (forbiddenBySource) {
+    for (const fhits of forbiddenBySource.values()) forbiddenCount += fhits.length;
+  }
+  const total = occurrences.length;
+  const unique = uniqueEntries.length;
+  const errorsUnique = brokenUniqueCount;
+  const okUnique = unique - errorsUnique;
+  const elapsed = (tDone - t0) / 1000;
+  const forbidNote = forbidPrefixes ? `, ${forbiddenCount} forbidden` : "";
+  process.stdout.write(
+    `Checked ${total} occurrences (${unique} unique) in ${elapsed.toFixed(3)}s ` +
+    `-- ${okUnique} OK, ${errorsUnique} broken${forbidNote}\n`
+  );
+
+  if (opts.verbose) {
+    const fmt = (a, b) => `${((b - a) / 1000).toFixed(3)}s`;
+    process.stdout.write("\n");
+    process.stdout.write(`  Files scanned:        ${htmlFiles.length}\n`);
+    process.stdout.write(`  Fragment targets:     ${filesForFragments.length}\n`);
+    process.stdout.write(`  Walk:        ${fmt(t0, tWalk)}\n`);
+    process.stdout.write(`  Extract:     ${fmt(tWalk, tExtract)}\n`);
+    process.stdout.write(`  Resolve:     ${fmt(tExtract, tResolve)}\n`);
+    process.stdout.write(`  Check paths: ${fmt(tResolve, tCheckPaths)}\n`);
+    process.stdout.write(`  Fragments:   ${fmt(tCheckPaths, tFragments)}\n`);
+    process.stdout.write(`  Report:      ${fmt(tFragments, tDone)}\n`);
+  }
+
+  if (broken.length || forbiddenCount) process.exit(1);
+}
+
+main();
diff --git a/scripts/check_links.py b/scripts/check_links.py
deleted file mode 100644
index 49e473f3..00000000
--- a/scripts/check_links.py
+++ /dev/null
@@ -1,455 +0,0 @@
-"""
-Offline link checker for static sites.
-
-CLI mirrors the subset of lychee flags used by docs/check.bat, so that an
-invocation like
-
-    python scripts/check_links.py --offline --include-fragments
-        --fallback-extensions html --index-files "index.html,."
-        --root-dir docs/_site docs/_site
-
-produces the same correctness verdict as the equivalent lychee call (only
-faster and a bit stricter -- see "Differences from lychee" below).
-
-Why this exists: lychee's offline pipeline funnels every link occurrence
-through an async channel before its dedup cache short-circuits the work.
-On this site (~733k occurrences, ~12k unique targets) that fixed-per-
-occurrence overhead is ~50s on Windows. This script dedupes (target, frag)
-up front, so the filesystem and fragment checks run once per unique target.
-
-Online (network) link checking is not implemented. --offline is therefore
-required; the script exits non-zero if it is absent.
-
-Differences from lychee (correctness):
-  * Trailing slash on a file-shaped URL ('foo.html/') is reported broken,
-    where lychee normalises and accepts. Catches authoring mistakes.
-  * <script src> URLs are checked. Lychee 0.24.1 silently skips them.
-
-Differences from lychee (output): no per-link line numbers in error
-messages -- selectolax doesn't expose source positions.
-"""
-
-import argparse
-import os
-import sys
-import time
-from concurrent.futures import ThreadPoolExecutor
-from pathlib import Path
-from urllib.parse import unquote, urlparse
-
-from selectolax.parser import HTMLParser
-
-# (selector, attribute_name) pairs. We can't use a single multi-selector
-# query because some elements expose links under non-href/src attributes
-# (cite, action, data, srcset, ...). Lychee covers a similar set in
-# lychee-lib/src/extract/html/html5gum.rs.
-LINK_ATTRS = [
-    ("a[href]", "href"),
-    ("area[href]", "href"),
-    ("base[href]", "href"),
-    ("link[href]", "href"),
-    ("img[src]", "src"),
-    ("img[longdesc]", "longdesc"),
-    ("img[srcset]", "srcset"),
-    ("script[src]", "src"),
-    ("iframe[src]", "src"),
-    ("frame[src]", "src"),
-    ("embed[src]", "src"),
-    ("source[src]", "src"),
-    ("source[srcset]", "srcset"),
-    ("audio[src]", "src"),
-    ("video[src]", "src"),
-    ("video[poster]", "poster"),
-    ("track[src]", "src"),
-    ("input[src]", "src"),
-    ("input[formaction]", "formaction"),
-    ("button[formaction]", "formaction"),
-    ("form[action]", "action"),
-    ("object[data]", "data"),
-    ("blockquote[cite]", "cite"),
-    ("q[cite]", "cite"),
-    ("del[cite]", "cite"),
-    ("ins[cite]", "cite"),
-]
-
-SRCSET_ATTRS = {"srcset"}
-
-
-def _split_srcset(value):
-    # srcset is `URL [descriptor], URL [descriptor], ...`. Descriptors
-    # cannot contain commas, so a comma split is safe; each part's first
-    # whitespace-separated token is the URL.
-    for part in value.split(","):
-        part = part.strip()
-        if not part:
-            continue
-        url = part.split(None, 1)[0]
-        if url:
-            yield url
-
-
-def extract_links(html_path):
-    data = html_path.read_bytes()
-    tree = HTMLParser(data)
-    out = []
-    for selector, attr in LINK_ATTRS:
-        for node in tree.css(selector):
-            v = node.attributes.get(attr)
-            if not v:
-                continue
-            if attr in SRCSET_ATTRS:
-                out.extend(_split_srcset(v))
-            else:
-                out.append(v)
-    return out
-
-
-def extract_fragment_ids(html_path):
-    data = html_path.read_bytes()
-    tree = HTMLParser(data)
-    ids = set()
-    for node in tree.css("[id]"):
-        v = node.attributes.get("id")
-        if v:
-            ids.add(v)
-    for node in tree.css("a[name]"):
-        v = node.attributes.get("name")
-        if v:
-            ids.add(v)
-    return ids
-
-
-def _normalize_base_path(s):
-    """Coerce a base-path arg into the canonical '/prefix' form (leading
-    slash, no trailing slash). Empty input maps to empty string."""
-    if not s:
-        return ""
-    s = s.strip().rstrip("/")
-    if not s:
-        return ""
-    if not s.startswith("/"):
-        s = "/" + s
-    return s
-
-
-def _strip_base_path(path_str, base_path):
-    """Lop a base-path prefix off an absolute URL path, if it matches.
-
-    A Jekyll build with `--baseurl /twinBASIC-docs` produces hrefs like
-    '/twinBASIC-docs/foo' that resolve, in the deployed site, to '/foo'
-    under the actual root. This mirrors lychee's `--remap` regex but as
-    a clean prefix strip:
-
-      '/twinBASIC-docs/foo' -> '/foo'      (prefix + /...)
-      '/twinBASIC-docs'     -> '/'          (bare prefix, treat as root)
-      '/twinBASIC-docs-other' -> unchanged  (only strip on '/' or end-of-string)
-      '/foo'                -> unchanged    (no prefix match)
-    """
-    if not base_path:
-        return path_str
-    if path_str == base_path:
-        return "/"
-    if path_str.startswith(base_path + "/"):
-        return path_str[len(base_path):]
-    return path_str
-
-
-def resolve(href, source_dir_str, source_str, root_str, base_path=""):
-    """Lexically resolve href -> (normalized_target_str, is_dir_link, fragment).
-    Returns None for schemes/netlocs we skip. Uses only string ops — no
-    filesystem syscalls (Path.resolve is ~110us per call on Windows).
-
-    is_dir_link captures whether the URL ended in '/' before normalization.
-    os.path.normpath strips trailing slashes, but the distinction matters
-    for resolution: 'foo/' must resolve as a directory (try index files),
-    while 'foo' falls through to fallback extensions ('foo.html') if no
-    file/dir 'foo' exists.
-
-    base_path is an absolute-URL prefix to strip before resolving against
-    root_str -- e.g. '/twinBASIC-docs' to handle a Jekyll --baseurl build.
-    Only applied to absolute URLs; relative paths are unaffected.
-    """
-    if "#" in href:
-        path_part, frag = href.split("#", 1)
-    else:
-        path_part, frag = href, None
-    if not path_part:
-        return source_str, False, frag
-
-    if ":" in path_part[:16] or path_part.startswith("//"):
-        parsed = urlparse(path_part)
-        if parsed.scheme or parsed.netloc:
-            return None
-        path_str = parsed.path
-    else:
-        path_str = path_part
-
-    if "%" in path_str:
-        path_str = unquote(path_str)
-
-    is_dir_link = path_str.endswith("/") or path_str.endswith("/.")
-
-    if path_str.startswith("/"):
-        path_str = _strip_base_path(path_str, base_path)
-        target = os.path.normpath(os.path.join(root_str, path_str.lstrip("/")))
-    else:
-        target = os.path.normpath(os.path.join(source_dir_str, path_str))
-    return target, is_dir_link, frag
-
-
-def check_path(target_str, is_dir_link, fallback_exts, index_files):
-    """Mirror lychee --fallback-extensions / --index-files semantics.
-
-    A trailing-slash URL ('foo/') must resolve as a directory: try each
-    index_file in order, with '.' meaning 'accept the directory itself'.
-    Fallback extensions never apply to dir-shaped links.
-
-    A non-slash URL ('foo') tries the path as a file first, then as a dir
-    (same index-file logic), then falls back to fallback extensions.
-    """
-    target = Path(target_str)
-    if is_dir_link:
-        if not target.is_dir():
-            return None
-        for idx in index_files:
-            if idx == ".":
-                return target
-            cand = target / idx
-            if cand.is_file():
-                return cand
-        return None
-    if target.is_file():
-        return target
-    if target.is_dir():
-        for idx in index_files:
-            if idx == ".":
-                return target
-            cand = target / idx
-            if cand.is_file():
-                return cand
-        return None
-    for ext in fallback_exts:
-        cand = Path(target_str + "." + ext)
-        if cand.is_file():
-            return cand
-    return None
-
-
-def _build_parser():
-    ap = argparse.ArgumentParser(
-        prog="check_links.py",
-        description=(
-            "Offline link checker. CLI mirrors the subset of lychee flags "
-            "used by check.bat. Only offline checking is implemented; "
-            "--offline is required."
-        ),
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    ap.add_argument(
-        "--offline", action="store_true",
-        help=(
-            "REQUIRED. Skip network checks (the only mode supported). "
-            "This script exits non-zero if the flag is absent."
-        ),
-    )
-    ap.add_argument(
-        "--include-fragments", action="store_true",
-        help=(
-            "Verify URL fragments (#anchor) against id/name attributes in "
-            "the target HTML. Off by default to match lychee."
-        ),
-    )
-    ap.add_argument(
-        "--fallback-extensions", default="", metavar="EXTS",
-        help=(
-            "Comma-separated extensions to try if a path does not resolve "
-            "as-is (e.g. 'html'). Empty by default."
-        ),
-    )
-    ap.add_argument(
-        "--index-files", default="", metavar="FILES",
-        help=(
-            "Comma-separated index file names to try when a path resolves "
-            "to a directory. Use '.' to accept the directory itself when "
-            "no index file matches. Empty by default."
-        ),
-    )
-    ap.add_argument(
-        "--root-dir", type=Path, metavar="DIR",
-        help=(
-            "Root directory for absolute URL paths (e.g. '/foo'). "
-            "If absent, absolute URLs cannot be resolved and are reported "
-            "as broken."
-        ),
-    )
-    ap.add_argument(
-        "--base-path", default="", metavar="PREFIX",
-        help=(
-            "URL-path prefix to strip from absolute URLs before resolving "
-            "against --root-dir. Matches a Jekyll build's --baseurl, e.g. "
-            "'/twinBASIC-docs'. Equivalent to a constrained form of "
-            "lychee's --remap. Empty by default (no stripping)."
-        ),
-    )
-    ap.add_argument(
-        "--threads", type=int, default=os.cpu_count() or 4, metavar="N",
-        help="Worker threads for HTML parsing. Default: CPU count.",
-    )
-    ap.add_argument(
-        "-v", "--verbose", action="store_true",
-        help="Print per-stage timing breakdown.",
-    )
-    ap.add_argument(
-        "inputs", nargs="+", type=Path,
-        help=(
-            "Files or directories to scan. Directories are searched "
-            "recursively for *.html."
-        ),
-    )
-    return ap
-
-
-def _collect_html_files(inputs):
-    out = []
-    for inp in inputs:
-        if inp.is_file():
-            out.append(inp)
-        elif inp.is_dir():
-            out.extend(inp.rglob("*.html"))
-        else:
-            print(f"warning: input not found: {inp}", file=sys.stderr)
-    return out
-
-
-def main():
-    ap = _build_parser()
-    # parse_known_args so extra lychee flags passed via check.bat's %*
-    # don't break us. Unknown flags are surfaced as a warning.
-    args, extra = ap.parse_known_args()
-    if extra:
-        print(
-            f"warning: ignoring unrecognised arguments: {' '.join(extra)}",
-            file=sys.stderr,
-        )
-
-    if not args.offline:
-        ap.error(
-            "--offline is required. Online (network) checking is not "
-            "implemented by this tool; use lychee for that."
-        )
-
-    root_str = str(args.root_dir.resolve()) if args.root_dir else ""
-    fallback_exts = [e for e in args.fallback_extensions.split(",") if e]
-    index_files = [e for e in args.index_files.split(",") if e]
-    base_path = _normalize_base_path(args.base_path)
-
-    t0 = time.perf_counter()
-    html_files = _collect_html_files(args.inputs)
-    t_walk = time.perf_counter()
-
-    # Per-file: extract once, then group hrefs by (source_dir, href) so we
-    # resolve each unique combination exactly once. The same nav/footer
-    # links repeat across hundreds of pages from the same directory.
-    occurrences = []  # (source_path, source_dir_str, href)
-    with ThreadPoolExecutor(max_workers=args.threads) as ex:
-        for src, hrefs in zip(html_files, ex.map(extract_links, html_files)):
-            src_dir = str(src.parent)
-            src_str = str(src)
-            for h in hrefs:
-                occurrences.append((src_str, src_dir, h))
-    t_extract = time.perf_counter()
-
-    # Memoize resolution by (source_dir, href). Each unique (dir, href)
-    # resolves identically regardless of which file in that dir found it.
-    resolution_cache = {}
-    unique_checks = {}
-    for src_str, src_dir, href in occurrences:
-        rk = (src_dir, href)
-        r = resolution_cache.get(rk, ...)
-        if r is ...:
-            r = resolve(href, src_dir, src_str, root_str, base_path)
-            resolution_cache[rk] = r
-        if r is None:
-            continue
-        target, is_dir, frag = r
-        # Include is_dir in the key: 'foo' and 'foo/' resolve via
-        # different rules even after normpath collapses them.
-        key = (target, is_dir, frag)
-        unique_checks.setdefault(key, []).append((src_str, href))
-    t_resolve = time.perf_counter()
-
-    path_keys = sorted({(t, d) for (t, d, _) in unique_checks})
-    target_resolution = {}
-    for (t, d) in path_keys:
-        target_resolution[(t, d)] = check_path(t, d, fallback_exts, index_files)
-    t_check_paths = time.perf_counter()
-
-    files_for_fragments = sorted({
-        target_resolution[(t, d)] for (t, d, f) in unique_checks
-        if f and target_resolution.get((t, d))
-    })
-    fragment_cache = {}
-    if args.include_fragments and files_for_fragments:
-        with ThreadPoolExecutor(max_workers=args.threads) as ex:
-            for f, ids in zip(files_for_fragments,
-                              ex.map(extract_fragment_ids, files_for_fragments)):
-                fragment_cache[f] = ids
-    t_fragments = time.perf_counter()
-
-    broken = []  # one entry per occurrence; for human-readable report
-    broken_keys = set()  # unique broken (target, is_dir, frag) keys
-    for key, sources in unique_checks.items():
-        target_str, is_dir, frag = key
-        resolved = target_resolution.get((target_str, is_dir))
-        if resolved is None:
-            broken_keys.add(key)
-            for src_str, href in sources:
-                broken.append((src_str, href, "target not found"))
-            continue
-        if frag and args.include_fragments:
-            ids = fragment_cache.get(resolved, set())
-            if frag not in ids:
-                broken_keys.add(key)
-                for src_str, href in sources:
-                    broken.append((src_str, href, f"fragment #{frag} not found"))
-    t_done = time.perf_counter()
-
-    total = len(occurrences)
-    unique = len(unique_checks)
-    errors_unique = len(broken_keys)
-    ok_unique = unique - errors_unique
-
-    if broken:
-        # Group by source file, lychee-style.
-        by_source = {}
-        for src_str, href, reason in broken:
-            by_source.setdefault(src_str, set()).add((href, reason))
-        for src_str in sorted(by_source):
-            print(f"\n[{src_str}]:")
-            for href, reason in sorted(by_source[src_str]):
-                print(f"  ERROR  {href} -- {reason}")
-        print()
-
-    elapsed = t_done - t0
-    print(
-        f"Checked {total} occurrences ({unique} unique) in {elapsed:.3f}s "
-        f"-- {ok_unique} OK, {errors_unique} broken"
-    )
-
-    if args.verbose:
-        print()
-        print(f"  Files scanned:        {len(html_files)}")
-        print(f"  Fragment targets:     {len(files_for_fragments)}")
-        print(f"  Walk:        {t_walk - t0:.3f}s")
-        print(f"  Extract:     {t_extract - t_walk:.3f}s")
-        print(f"  Resolve:     {t_resolve - t_extract:.3f}s")
-        print(f"  Check paths: {t_check_paths - t_resolve:.3f}s")
-        print(f"  Fragments:   {t_fragments - t_check_paths:.3f}s")
-        print(f"  Report:      {t_done - t_fragments:.3f}s")
-
-    if broken:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/check_offline_live_links.py b/scripts/check_offline_live_links.py
deleted file mode 100644
index c9fb0309..00000000
--- a/scripts/check_offline_live_links.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-Scan docs/_site-offline/ for any https://docs.twinbasic.com/<path>
-reference outside of <code> / <pre> blocks. Exit 1 if any found,
-0 otherwise.
-
-Run by docs/check.bat after the offline lychee pass. After
-_plugins/offlinify.rb's SEO-block strip, no live-site references
-should remain except:
-
-  * Sample URLs inside <code> / <pre> blocks (tutorial code that
-    legitimately shows live URLs as data, e.g. the VBRUN.Hyperlink
-    `NavigateTo "https://docs.twinbasic.com/"` example). Skipped
-    via the same code-block shape offlinify uses for its URL
-    rewrite.
-  * The bare root URL `https://docs.twinbasic.com` or
-    `https://docs.twinbasic.com/` -- intentional "go to the live
-    docs site" links (e.g. the Documentation entry in the FAQ
-    resource list). Skipped via the tail check below.
-
-Anything deeper (`https://docs.twinbasic.com/tB/Core/Const`,
-`https://docs.twinbasic.comi`, ...) is flagged: in the offline
-copy those navigate back to the live site, undermining the local
-read; in source they should be a relative link or a /tB/...
-permalink that resolves locally.
-
-Run from anywhere:
-    python scripts/check_offline_live_links.py
-"""
-
-import re
-import sys
-from pathlib import Path
-
-SCRIPT_DIR = Path(__file__).resolve().parent
-REPO_ROOT = SCRIPT_DIR.parent
-OFFLINE_TREE = REPO_ROOT / "docs" / "_site-offline"
-
-# Matches a <code>...</code> or <pre>...</pre> block. Same shape as
-# _plugins/offlinify.rb CODE_BLOCK_RE so sample URLs in tutorial
-# code are skipped here too.
-CODE_BLOCK_RE = re.compile(r"<(code|pre)\b[^>]*>.*?</\1>", re.DOTALL)
-
-# Captures the trailing path/typo characters after the domain. An
-# empty tail or `/` means the bare root URL (intentional). Anything
-# else is a deep link or a typo (`.comi`, `.com/tB/...`).
-LIVE_LINK_RE = re.compile(r"https://docs\.twinbasic\.com(?P<tail>[^\s\"'<>]*)")
-
-
-def main() -> int:
-    if not OFFLINE_TREE.is_dir():
-        print(
-            f"_site-offline/ not found at {OFFLINE_TREE} -- run docs/build.bat first."
-        )
-        return 2
-
-    hits = []
-    for html in sorted(OFFLINE_TREE.rglob("*.html")):
-        content = html.read_text(encoding="utf-8")
-        link_matches = list(LIVE_LINK_RE.finditer(content))
-        if not link_matches:
-            continue
-        code_ranges = [(m.start(), m.end()) for m in CODE_BLOCK_RE.finditer(content)]
-        for m in link_matches:
-            tail = m.group("tail")
-            if tail == "" or tail == "/":
-                continue
-            if any(s <= m.start() < e for s, e in code_ranges):
-                continue
-            line_num = content.count("\n", 0, m.start()) + 1
-            start = max(0, m.start() - 60)
-            end = min(len(content), m.start() + 80)
-            snippet = re.sub(r"[\r\n]+", " ", content[start:end])
-            hits.append((html, line_num, snippet))
-
-    if hits:
-        print(
-            f"FAIL: {len(hits)} reference(s) to docs.twinbasic.com in "
-            f"_site-offline/ outside code blocks:"
-        )
-        for path, line_num, snippet in hits:
-            try:
-                rel = path.relative_to(REPO_ROOT)
-            except ValueError:
-                rel = path
-            print(f"  {rel}:{line_num}: ...{snippet}...")
-        print()
-        print(
-            "Update the source markdown to use a relative link or /tB/... "
-            "permalink instead."
-        )
-        return 1
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())

From c6886a40f94c79dc7e16bd23642ec1cb576373bd Mon Sep 17 00:00:00 2001
From: Kuba Sunderland-Ober <kuba@mareimbrium.org>
Date: Sun, 24 May 2026 15:37:33 +0200
Subject: [PATCH 2/2] Parallelize link checking of all 3 sites.

---
 .github/workflows/checks.yml          |  36 ++---
 .github/workflows/jekyll-gh-pages.yml |  38 ++---
 docs/check.bat                        |  61 ++------
 scripts/check_links.mjs               | 198 ++++++++++++++++++++------
 4 files changed, 198 insertions(+), 135 deletions(-)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index c246d68d..101ab0bf 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -43,11 +43,16 @@ jobs:
           cache-dependency-path: package-lock.json
       - name: Install Node.js dependencies
         run: npm ci
-      - name: Check online links (check_links.mjs)
-        # `--fallback-extensions html` mirrors what GitHub Pages does at request time:
-        # an extensionless URL like `/FAQ` is served as `/FAQ.html`. This workflow's
-        # Jekyll build runs without --baseurl (no Pages prefix), so no --base-path is
-        # needed -- contrast with jekyll-gh-pages.yml.
+      - name: Check links (check_links.mjs)
+        # Three passes run in parallel via /sep/:
+        #   1. Online (_site/): --fallback-extensions html mirrors GitHub Pages'
+        #      extensionless-URL behaviour.  No --base-path needed -- this
+        #      workflow builds without --baseurl (contrast jekyll-gh-pages.yml).
+        #   2. Offline (_site-offline/): strict, no extension fallback.  --forbid
+        #      catches any surviving https://docs.twinbasic.com/<path> link the
+        #      offlinify rewrite missed (bare root URL is exempt).
+        #   3. Book (_site-pdf/book.html): --no-fail makes failures informational
+        #      (some links are not yet fully resolved).
         run: >-
           node scripts/check_links.mjs
           --offline --include-fragments
@@ -55,28 +60,13 @@ jobs:
           --index-files 'index.html,.'
           --root-dir docs/_site
           docs/_site
-      - name: Check offline links and live-link survivors (check_links.mjs)
-        # Strict check on `_site-offline/`: every link must resolve to an actual file
-        # under `file://`, with no extension fallback. `--forbid` also fails the build
-        # if any extracted link still points at https://docs.twinbasic.com/<path> --
-        # i.e. any live-site reference the offlinify rewrite missed. The bare root
-        # URL (https://docs.twinbasic.com[/]) is exempt, since intentional "go to the
-        # live site" links are allowed.
-        run: >-
-          node scripts/check_links.mjs
+          /sep/
           --offline --include-fragments
           --index-files index.html
           --forbid 'https://docs.twinbasic.com'
           --root-dir docs/_site-offline
           docs/_site-offline
-      - name: Check book links (informational)
-        # Failures do not block the build. The book still has absolute
-        # intra-site URLs that the chapter transform has not yet rewritten
-        # and some fragment anchors that are not yet generated. Tracked here
-        # for visibility until those are fixed.
-        continue-on-error: true
-        run: >-
-          node scripts/check_links.mjs
-          --offline --include-fragments
+          /sep/
+          --offline --no-fail --include-fragments
           --root-dir docs/_site-pdf
           docs/_site-pdf/book.html
diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml
index e7b5c684..0fd63003 100644
--- a/.github/workflows/jekyll-gh-pages.yml
+++ b/.github/workflows/jekyll-gh-pages.yml
@@ -70,13 +70,16 @@ jobs:
         run: |
           npm ci
           sudo npx puppeteer browsers install chrome --install-deps
-      - name: Check online links (check_links.mjs)
-        # `--fallback-extensions html` mirrors what GitHub Pages does at request time:
-        # an extensionless URL like `/FAQ` is served as `/FAQ.html`. Without the flag
-        # every pretty permalink on the site would look broken.
-        #
-        # `--base-path` strips the Pages baseurl (e.g. `/twinBASIC-docs`) from absolute
-        # URLs before resolving against `--root-dir`.
+      - name: Check links (check_links.mjs)
+        # Three passes run in parallel via /sep/:
+        #   1. Online (_site/): --fallback-extensions html mirrors GitHub Pages'
+        #      extensionless-URL behaviour.  --base-path strips the Pages baseurl
+        #      (e.g. `/twinBASIC-docs`) from absolute URLs before resolving.
+        #   2. Offline (_site-offline/): strict, no extension fallback.  --forbid
+        #      catches any surviving https://docs.twinbasic.com/<path> link the
+        #      offlinify rewrite missed (bare root URL is exempt).
+        #   3. Book (_site-pdf/book.html): --no-fail makes failures informational
+        #      (some links are not yet fully resolved).
         run: >-
           node scripts/check_links.mjs
           --offline --include-fragments
@@ -85,29 +88,14 @@ jobs:
           --base-path '${{ steps.pages.outputs.base_path }}'
           --root-dir docs/_site
           docs/_site
-      - name: Check offline links and live-link survivors (check_links.mjs)
-        # Strict check on `_site-offline/`: every link must resolve to an actual file
-        # under `file://`, with no extension fallback. `--forbid` also fails the build
-        # if any extracted link still points at https://docs.twinbasic.com/<path> --
-        # i.e. any live-site reference the offlinify rewrite missed. The bare root
-        # URL (https://docs.twinbasic.com[/]) is exempt, since intentional "go to the
-        # live site" links are allowed.
-        run: >-
-          node scripts/check_links.mjs
+          /sep/
           --offline --include-fragments
           --index-files index.html
           --forbid 'https://docs.twinbasic.com'
           --root-dir docs/_site-offline
           docs/_site-offline
-      - name: Check book links (informational)
-        # Failures do not block the build. The book still has absolute
-        # intra-site URLs that the chapter transform has not yet rewritten
-        # and some fragment anchors that are not yet generated. Tracked here
-        # for visibility until those are fixed.
-        continue-on-error: true
-        run: >-
-          node scripts/check_links.mjs
-          --offline --include-fragments
+          /sep/
+          --offline --no-fail --include-fragments
           --root-dir docs/_site-pdf
           docs/_site-pdf/book.html
       - name: Render book PDF
diff --git a/docs/check.bat b/docs/check.bat
index 990a0914..7465f65d 100644
--- a/docs/check.bat
+++ b/docs/check.bat
@@ -1,48 +1,17 @@
-@rem Run the Node-based link checker on both build outputs. The offline
-@rem pass additionally uses --forbid to flag any surviving
-@rem https://docs.twinbasic.com/<path> link that the offlinify rewrite
-@rem missed.
+@rem Run the Node-based link checker on all three build outputs in parallel.
+@rem /sep/ dispatches each segment to a worker thread; results are printed
+@rem in order with headers.
 @rem
-@rem _site/        Online tree. `--fallback-extensions html` mirrors what
-@rem               GitHub Pages does at request time: an extensionless
-@rem               URL like /FAQ is served as /FAQ.html. Without the flag
-@rem               every pretty permalink would appear broken. No
-@rem               --forbid here -- the online tree intentionally carries
-@rem               canonical https://docs.twinbasic.com links in the
-@rem               jekyll-seo-tag block.
-@rem _site-offline/ Offline tree. No extension fallback -- every link
-@rem               must resolve to an actual file under file://, since
-@rem               the browser does no rewriting. Catches relative links
-@rem               whose permalink shape doesn't match the rendered
-@rem               filename (e.g. `[Foo](Foo/)` when Jekyll wrote
-@rem               `Foo.html`, not `Foo/index.html`). --forbid on this
-@rem               pass also fails the build if any extracted link
-@rem               still points at https://docs.twinbasic.com/<path>
-@rem               (bare domain and trailing-slash root are exempt).
-@rem
-@rem All three checks always run so you see all errors in one pass; the
-@rem script exits non-zero if any fails (earlier failures take precedence
-@rem in the reported code).
+@rem _site/         Online tree.  --fallback-extensions html mirrors what
+@rem                GitHub Pages does at request time.
+@rem _site-offline/  Offline tree.  --forbid catches surviving live-site
+@rem                links the offlinify rewrite missed.
+@rem _site-pdf/      PDF source.  --no-fail makes failures informational
+@rem                (links in the book are not fully resolved).
 @setlocal
-@set CHECK=node "%~dp0..\scripts\check_links.mjs"
-@echo Checking _site/ (online) ...
-@%CHECK% --offline --include-fragments --fallback-extensions html --index-files "index.html,." --root-dir ".\_site" ".\_site" %*
-@set EXIT1=%ERRORLEVEL%
-@echo.
-@echo Checking _site-offline/ (offline, with --forbid) ...
-@rem No `.` in --index-files: under file://, a bare directory URL
-@rem (`Foo/`) requires an actual index.html inside. The online check
-@rem above accepts `.` because GitHub Pages can serve an unstyled
-@rem directory listing or a 404 in that case; offline, there's no
-@rem such fallback, and the link is just broken.
-@%CHECK% --offline --include-fragments --index-files "index.html" --forbid "https://docs.twinbasic.com" --root-dir ".\_site-offline" ".\_site-offline" %*
-@set EXIT2=%ERRORLEVEL%
-@echo.
-@echo Checking _site-pdf/book.html (informational -- failures do not block) ...
-@rem Links in the book are not fully resolved (absolute intra-site URLs stay live
-@rem until the book chapter transform rewrites them, and some fragments are still
-@rem missing). Run for visibility; exit code is intentionally not propagated.
-@%CHECK% --offline --include-fragments --root-dir ".\_site-pdf" ".\_site-pdf\book.html" %*
-@echo.
-@if %EXIT1% NEQ 0 exit /b %EXIT1%
-@exit /b %EXIT2%
+@node "%~dp0..\scripts\check_links.mjs"^
+ --offline --include-fragments --fallback-extensions html --index-files "index.html,." --root-dir ".\_site" ".\_site" %*^
+ /sep/^
+ --offline --include-fragments --index-files "index.html" --forbid "https://docs.twinbasic.com" --root-dir ".\_site-offline" ".\_site-offline" %*^
+ /sep/^
+ --offline --no-fail --include-fragments --root-dir ".\_site-pdf" ".\_site-pdf\book.html" %*
diff --git a/scripts/check_links.mjs b/scripts/check_links.mjs
index 620ee95e..67f8e9cf 100644
--- a/scripts/check_links.mjs
+++ b/scripts/check_links.mjs
@@ -1,16 +1,24 @@
 // Offline link checker for static sites.
 //
-// Typical invocation, from docs/check.bat:
+// Typical invocation (single pass), from docs/check.bat:
 //
 //     node scripts/check_links.mjs --offline --include-fragments
 //         --fallback-extensions html --index-files "index.html,."
 //         --root-dir docs/_site docs/_site
 //
+// Multiple passes can run in parallel by separating them with /sep/:
+//
+//     node scripts/check_links.mjs <args1...> /sep/ <args2...>
+//
+// Each /sep/-separated segment is dispatched to a worker_threads
+// Worker (libuv threadpool).  Results are collected and printed in
+// order with headers.  A single segment (no /sep/) runs inline.
+//
 // On this site (~733k link occurrences, ~12k unique targets across
-// 1127 HTML files / 124 MB) the script runs in ~2.2 s single-threaded
-// on the dev box. It dedupes (target, frag) up front so each unique
-// filesystem and fragment check fires exactly once regardless of how
-// many pages link to the same target.
+// 1127 HTML files / 124 MB) each pass runs in ~2.2 s on the dev box.
+// It dedupes (target, frag) up front so each unique filesystem and
+// fragment check fires exactly once regardless of how many pages
+// link to the same target.
 //
 // Online (network) link checking is not implemented. --offline is
 // therefore required; the script exits non-zero if it is absent.
@@ -31,6 +39,7 @@ import * as fs from "node:fs";
 import * as path from "node:path";
 import { performance } from "node:perf_hooks";
 import { Parser } from "htmlparser2";
+import { isMainThread, parentPort, workerData, Worker } from "node:worker_threads";
 
 // tag -> [attr, ...]. SAX walker dispatches on tag name; for each
 // matching attr present on that tag, the value becomes one or more
@@ -257,10 +266,15 @@ function checkPath(targetStr, isDirLink, fallbackExts, indexFiles) {
 
 function printHelp() {
   process.stdout.write(`Usage: node check_links.mjs [options] <inputs...>
+       node check_links.mjs <args1...> /sep/ <args2...> [/sep/ ...]
 
 Offline link checker for static sites. Only offline checking is
 implemented; --offline is required.
 
+Multiple check passes can be combined in one invocation by separating
+them with /sep/.  Each segment runs on its own worker thread; results
+are printed in order with headers.
+
 Options:
   --offline                  REQUIRED. Skip network checks.
   --include-fragments        Verify URL fragments against id/name attrs.
@@ -276,11 +290,11 @@ Options:
   --forbid PREFIX            Fail if any extracted link starts with this
                              URL prefix. The bare prefix and 'prefix/'
                              are exempt (intentional "go to live site"
-                             links). Repeatable. Used by the offline
-                             pass to catch live-site references the
-                             offlinify rewrite missed.
-  --threads N                Accepted for CLI compatibility; the Node
-                             port is currently single-threaded.
+                             links). Repeatable.
+  --no-fail                  Always exit 0, even if errors are found.
+                             Errors are still printed. Useful for
+                             informational checks that should not block.
+  --threads N                Accepted for CLI compatibility; ignored.
   -v, --verbose              Print per-stage timing breakdown.
   -h, --help                 Show this help and exit.
 
@@ -298,15 +312,13 @@ function parseArgs(argv) {
     rootDir: null,
     basePath: "",
     forbid: [],
+    noFail: false,
     verbose: false,
   };
   const inputs = [];
   const unknown = [];
   const need = (flag, i) => {
-    if (i >= argv.length) {
-      process.stderr.write(`error: ${flag} requires a value\n`);
-      process.exit(2);
-    }
+    if (i >= argv.length) throw new Error(`${flag} requires a value`);
     return argv[i];
   };
 
@@ -320,9 +332,10 @@ function parseArgs(argv) {
     else if (a === "--root-dir") opts.rootDir = need(a, i++);
     else if (a === "--base-path") opts.basePath = need(a, i++);
     else if (a === "--forbid") opts.forbid.push(need(a, i++));
+    else if (a === "--no-fail") opts.noFail = true;
     else if (a === "--threads") { need(a, i++); /* accepted, ignored */ }
     else if (a === "-v" || a === "--verbose") opts.verbose = true;
-    else if (a === "-h" || a === "--help") { printHelp(); process.exit(0); }
+    else if (a === "-h" || a === "--help") { /* handled before dispatch */ }
     else if (a.startsWith("--")) {
       // Tolerate unknown flags passed through via check.bat's %*.
       // Consume an attached value if present.
@@ -341,46 +354,59 @@ function parseArgs(argv) {
 }
 
 function collectHtmlFiles(inputs) {
-  const out = [];
+  const files = [];
+  const warnings = [];
   for (const inp of inputs) {
     const s = statSafe(inp);
     if (!s) {
-      process.stderr.write(`warning: input not found: ${inp}\n`);
+      warnings.push(`warning: input not found: ${inp}\n`);
       continue;
     }
     if (s.isFile()) {
-      out.push(inp);
+      files.push(inp);
     } else if (s.isDirectory()) {
       const entries = fs.readdirSync(inp, { recursive: true, withFileTypes: true });
       for (const e of entries) {
         if (e.isFile() && e.name.endsWith(".html")) {
-          out.push(path.join(e.parentPath || inp, e.name));
+          files.push(path.join(e.parentPath || inp, e.name));
         }
       }
     }
   }
-  return out;
+  return { files, warnings };
 }
 
-function main() {
-  const { opts, inputs, unknown } = parseArgs(process.argv.slice(2));
+// Run a single check pass.  All output is collected into a buffer;
+// nothing is written to stdout/stderr.  Returns { output, exitCode }.
+function runCheck(argv) {
+  const buf = [];
+  const write = (s) => buf.push(s);
+
+  let parsed;
+  try {
+    parsed = parseArgs(argv);
+  } catch (e) {
+    write(`error: ${e.message}\n`);
+    return { output: buf.join(""), exitCode: 2 };
+  }
+  const { opts, inputs, unknown } = parsed;
 
   if (unknown.length) {
-    process.stderr.write(
+    write(
       `warning: ignoring unrecognised arguments: ${unknown.join(" ")}\n`
     );
   }
 
   if (!opts.offline) {
-    process.stderr.write(
+    write(
       "error: --offline is required. Online (network) checking is not " +
       "implemented by this tool.\n"
     );
-    process.exit(2);
+    return { output: buf.join(""), exitCode: 2 };
   }
   if (!inputs.length) {
-    process.stderr.write("error: at least one input file or directory is required\n");
-    process.exit(2);
+    write("error: at least one input file or directory is required\n");
+    return { output: buf.join(""), exitCode: 2 };
   }
 
   // Keep --root-dir in its caller-supplied shape (no path.resolve) so
@@ -396,7 +422,8 @@ function main() {
   const basePath = normalizeBasePath(opts.basePath);
 
   const t0 = performance.now();
-  const htmlFiles = collectHtmlFiles(inputs);
+  const { files: htmlFiles, warnings: walkWarnings } = collectHtmlFiles(inputs);
+  for (const w of walkWarnings) write(w);
   const tWalk = performance.now();
 
   // Per-file: extract once, then group hrefs by (source_dir, href) so we
@@ -570,7 +597,7 @@ function main() {
       }
     }
     lines.push("");
-    process.stdout.write(lines.join("\n") + "\n");
+    write(lines.join("\n") + "\n");
   }
 
   let forbiddenCount = 0;
@@ -583,25 +610,114 @@ function main() {
   const okUnique = unique - errorsUnique;
   const elapsed = (tDone - t0) / 1000;
   const forbidNote = forbidPrefixes ? `, ${forbiddenCount} forbidden` : "";
-  process.stdout.write(
+  write(
     `Checked ${total} occurrences (${unique} unique) in ${elapsed.toFixed(3)}s ` +
     `-- ${okUnique} OK, ${errorsUnique} broken${forbidNote}\n`
   );
 
   if (opts.verbose) {
     const fmt = (a, b) => `${((b - a) / 1000).toFixed(3)}s`;
-    process.stdout.write("\n");
-    process.stdout.write(`  Files scanned:        ${htmlFiles.length}\n`);
-    process.stdout.write(`  Fragment targets:     ${filesForFragments.length}\n`);
-    process.stdout.write(`  Walk:        ${fmt(t0, tWalk)}\n`);
-    process.stdout.write(`  Extract:     ${fmt(tWalk, tExtract)}\n`);
-    process.stdout.write(`  Resolve:     ${fmt(tExtract, tResolve)}\n`);
-    process.stdout.write(`  Check paths: ${fmt(tResolve, tCheckPaths)}\n`);
-    process.stdout.write(`  Fragments:   ${fmt(tCheckPaths, tFragments)}\n`);
-    process.stdout.write(`  Report:      ${fmt(tFragments, tDone)}\n`);
+    write("\n");
+    write(`  Files scanned:        ${htmlFiles.length}\n`);
+    write(`  Fragment targets:     ${filesForFragments.length}\n`);
+    write(`  Walk:        ${fmt(t0, tWalk)}\n`);
+    write(`  Extract:     ${fmt(tWalk, tExtract)}\n`);
+    write(`  Resolve:     ${fmt(tExtract, tResolve)}\n`);
+    write(`  Check paths: ${fmt(tResolve, tCheckPaths)}\n`);
+    write(`  Fragments:   ${fmt(tCheckPaths, tFragments)}\n`);
+    write(`  Report:      ${fmt(tFragments, tDone)}\n`);
   }
 
-  if (broken.length || forbiddenCount) process.exit(1);
+  let exitCode = (broken.length || forbiddenCount) ? 1 : 0;
+  if (opts.noFail) exitCode = 0;
+  return { output: buf.join(""), exitCode };
 }
 
-main();
+// ── Module entry ────────────────────────────────────────────────
+
+if (!isMainThread) {
+  const result = runCheck(workerData.argv);
+  parentPort.postMessage(result);
+} else {
+  const rawArgv = process.argv.slice(2);
+
+  if (rawArgv.includes("-h") || rawArgv.includes("--help")) {
+    printHelp();
+    process.exit(0);
+  }
+
+  // Split on /sep/ into separate command lines.
+  const commands = [];
+  let current = [];
+  for (const arg of rawArgv) {
+    if (arg === "/sep/") {
+      commands.push(current);
+      current = [];
+    } else {
+      current.push(arg);
+    }
+  }
+  commands.push(current);
+  const segments = commands.filter(c => c.length > 0);
+
+  if (segments.length === 0) {
+    printHelp();
+    process.exit(2);
+  }
+
+  if (segments.length === 1) {
+    // Single command -- run inline, no worker overhead.
+    const { output, exitCode } = runCheck(segments[0]);
+    process.stdout.write(output);
+    process.exit(exitCode);
+  }
+
+  // Multiple commands -- dispatch to worker threads.
+  const t0 = performance.now();
+  const n = segments.length;
+  process.stdout.write(`Running ${n} checks in parallel...\n`);
+
+  const promises = segments.map((cmd) =>
+    new Promise((resolve, reject) => {
+      const w = new Worker(new URL(import.meta.url), {
+        workerData: { argv: cmd },
+      });
+      let result;
+      w.on("message", (msg) => { result = msg; });
+      w.on("error", reject);
+      w.on("exit", () => {
+        if (result) resolve(result);
+        else reject(new Error("worker exited without posting a result"));
+      });
+    })
+  );
+
+  const settled = await Promise.allSettled(promises);
+  const elapsed = ((performance.now() - t0) / 1000).toFixed(3);
+
+  const HEADER_WIDTH = 78;
+  let exitCode = 0;
+  for (let i = 0; i < settled.length; i++) {
+    const tag = `[${i + 1}/${n}]`;
+    const prefix = `== ${tag} `;
+    const header = prefix + "=".repeat(Math.max(3, HEADER_WIDTH - prefix.length));
+    const cmdLine = segments[i].join(" ");
+
+    process.stdout.write(`\n${header}\n${cmdLine}\n\n`);
+
+    if (settled[i].status === "fulfilled") {
+      const r = settled[i].value;
+      process.stdout.write(r.output);
+      if (r.exitCode !== 0 && exitCode === 0) exitCode = r.exitCode;
+    } else {
+      process.stdout.write(`INTERNAL ERROR: ${settled[i].reason}\n`);
+      if (exitCode === 0) exitCode = 1;
+    }
+  }
+
+  const summaryPrefix = `== ${n} checks completed in ${elapsed}s `;
+  const summary = summaryPrefix + "=".repeat(Math.max(3, HEADER_WIDTH - summaryPrefix.length));
+  process.stdout.write(`\n${summary}\n`);
+
+  process.exit(exitCode);
+}