From c25ffc702ef0c7a1c78d4e9409841df7c2f94e45 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Rajak Date: Fri, 5 Jun 2026 14:46:32 +0000 Subject: [PATCH 1/4] feat: automate fetching and sanitization of webpack ecosystem readmes --- .gitignore | 2 + package.json | 5 +- scripts/fetch-readmes.mjs | 208 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 scripts/fetch-readmes.mjs diff --git a/.gitignore b/.gitignore index cf61f01..483d879 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ out *.generated.* /.cache /pages/api +/pages/loaders/ +/pages/plugins/ diff --git a/package.json b/package.json index 3101f51..28401a1 100644 --- a/package.json +++ b/package.json @@ -2,8 +2,11 @@ "scripts": { "prep": "node scripts/prepare/index.mjs", "build:md": "node scripts/markdown/index.mjs", + "build:md:loaders": "node scripts/fetch-readmes.mjs --loaders", + "build:md:plugins": "node scripts/fetch-readmes.mjs --plugins", + "build:md:readmes": "node scripts/fetch-readmes.mjs", "build:html": "node scripts/html/index.mjs", - "build": "npm run prep && npm run build:md && npm run build:html", + "build": "npm run prep && npm run build:md && npm run build:md:readmes && npm run build:html", "lint": "eslint .", "lint:fix": "eslint --fix .", "format": "prettier --write .", diff --git a/scripts/fetch-readmes.mjs b/scripts/fetch-readmes.mjs new file mode 100644 index 0000000..e5b48fc --- /dev/null +++ b/scripts/fetch-readmes.mjs @@ -0,0 +1,208 @@ +import { mkdirSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const { GH_TOKEN } = process.env; +if (!GH_TOKEN) { + throw new Error('GH_TOKEN environment variable is not set'); +} + +const BASE_HEADERS = { + Authorization: `Bearer ${GH_TOKEN}`, + 'X-GitHub-Api-Version': '2022-11-28', +}; + +const parseNextLink = linkHeader => { + if (!linkHeader) return null; + const match = linkHeader.match(/<([^>]+)>;\s*rel="next"/); + return match ? match[1] : null; +}; + +const discoverRepos = async () => { + const loaders = []; + const plugins = []; + let url = + 'https://api.github.com/orgs/webpack/repos?per_page=100&type=public'; + + while (url) { + const res = await fetch(url, { headers: BASE_HEADERS }); + if (!res.ok) + throw new Error( + `Failed to list org repos: ${res.status} ${res.statusText}` + ); + + const repos = await res.json(); + for (const repo of repos) { + if (repo.archived) continue; + if (repo.name.endsWith('-loader')) { + loaders.push(repo.full_name); + } else if ( + repo.name.endsWith('-webpack-plugin') || + repo.name.endsWith('-plugin') + ) { + plugins.push(repo.full_name); + } + } + + url = parseNextLink(res.headers.get('link')); + } + + return { loaders, plugins }; +}; + +const stripLeadingDiv = content => + content.replace(/^\s*\n*/i, ''); + +// Remove badge lines - lines consisting only of [![...][ref]][ref] or [![...](url)](url) links +const stripBadges = content => + content + .replace( + /^(\[!\[[^\]]*\](?:\[[^\]]*\]|\([^)]*\))\]\s*(?:\[[^\]]*\]|\([^)]*\))\s*)+$/gm, + '' + ) + .replace(/\n{3,}/g, '\n\n'); + +const SUPPORTED_LANGS = new Set([ + 'bash', + 'c', + 'c++', + 'cjs', + 'coffee', + 'coffeescript', + 'console', + 'cpp', + 'diff', + 'docker', + 'dockerfile', + 'glsl', + 'gql', + 'graphql', + 'http', + 'ini', + 'java', + 'javascript', + 'js', + 'json', + 'jsx', + 'mjs', + 'powershell', + 'ps', + 'ps1', + 'regex', + 'regexp', + 'sh', + 'shell', + 'shellscript', + 'shellsession', + 'sql', + 'ts', + 'tsx', + 'typescript', + 'xml', + 'yaml', + 'yml', + 'zsh', +]); + +const sanitizeCodeFences = content => + content.replace(/^```([a-zA-Z0-9_+-]+)\b/gm, (match, lang) => + SUPPORTED_LANGS.has(lang.toLowerCase()) ? match : '```' + ); + +// remark-gfm does not support GitHub alert syntax (> [!TYPE]); rewrite to bold label inside the blockquote. +const GFM_ALERT_LABELS = { + NOTE: 'Note', + TIP: 'Tip', + IMPORTANT: 'Important', + WARNING: 'Warning', + CAUTION: 'Caution', +}; +const GFM_ALERT_RE = + /^([ \t]*>[ \t]*)\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\][ \t]*$/gim; + +const transformGfmAlerts = content => + content.replace( + GFM_ALERT_RE, + (_, prefix, type) => `${prefix}**${GFM_ALERT_LABELS[type]}:**` + ); + +const processContent = content => + transformGfmAlerts(sanitizeCodeFences(stripBadges(stripLeadingDiv(content)))); + +const fetchReadme = async fullName => { + const url = `https://api.github.com/repos/${fullName}/readme`; + const res = await fetch(url, { + headers: { ...BASE_HEADERS, Accept: 'application/vnd.github.raw' }, + }); + return res.ok + ? { ok: true, text: await res.text() } + : { ok: false, status: res.status }; +}; + +const processRepos = async ( + repos, + { layout, groupName, basePath, outputDir } +) => { + mkdirSync(outputDir, { recursive: true }); + const repoName = r => r.split('/')[1]; + console.log( + `Discovered ${groupName.toLowerCase()}: ${repos.map(repoName).join(', ')}` + ); + + const fetched = []; + for (const fullName of repos) { + const name = repoName(fullName); + const result = await fetchReadme(fullName); + if (!result.ok) { + console.log(`Failed: ${name} — ${result.status}`); + continue; + } + const content = `---\nlayout: ${layout}\n---\n\n${processContent(result.text)}`; + writeFileSync(join(outputDir, `${name}.md`), content, 'utf8'); + fetched.push(name); + console.log(`Fetched: ${name}`); + } + + const siteJson = { + sidebar: [ + { + groupName, + items: fetched + .sort() + .map(name => ({ link: `${basePath}/${name}`, label: name })), + }, + ], + }; + writeFileSync( + join(outputDir, 'site.json'), + JSON.stringify(siteJson, null, 2) + '\n', + 'utf8' + ); + console.log( + `Written: ${outputDir}/site.json (${fetched.length} ${groupName.toLowerCase()})` + ); +}; + +const args = process.argv.slice(2); +const runLoaders = args.includes('--loaders') || args.length === 0; +const runPlugins = args.includes('--plugins') || args.length === 0; + +const root = new URL('..', import.meta.url).pathname; +const { loaders, plugins } = await discoverRepos(); + +if (runLoaders) { + await processRepos(loaders, { + layout: 'loader', + groupName: 'Loaders', + basePath: '/loaders', + outputDir: join(root, 'pages/loaders'), + }); +} + +if (runPlugins) { + await processRepos(plugins, { + layout: 'plugin', + groupName: 'Plugins', + basePath: '/plugins', + outputDir: join(root, 'pages/plugins'), + }); +} From cabe77f8ee69a565c420758e893b3ec84a09d4a6 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Rajak Date: Sat, 6 Jun 2026 16:34:12 +0000 Subject: [PATCH 2/4] Fixup! --- components/Footer/index.jsx | 2 +- components/NavBar.jsx | 2 +- components/SideBar.jsx | 29 +++++++++++++++++++---------- pages/site.mjs | 7 +++++++ scripts/fetch-readmes.mjs | 23 +++++++---------------- scripts/html/doc-kit.config.mjs | 6 ++++-- 6 files changed, 39 insertions(+), 30 deletions(-) create mode 100644 pages/site.mjs diff --git a/components/Footer/index.jsx b/components/Footer/index.jsx index 16f9ace..7f6d787 100644 --- a/components/Footer/index.jsx +++ b/components/Footer/index.jsx @@ -2,7 +2,7 @@ import GitHubIcon from '@node-core/ui-components/Icons/Social/GitHub'; import LinkedInIcon from '@node-core/ui-components/Icons/Social/LinkedIn'; import DiscordIcon from '@node-core/ui-components/Icons/Social/Discord'; import XIcon from '@node-core/ui-components/Icons/Social/X'; -import { footer } from '#theme/site' with { type: 'json' }; +import { footer } from '#theme/site'; import Logo from '#theme/Logo'; import styles from './index.module.css'; diff --git a/components/NavBar.jsx b/components/NavBar.jsx index fd75c3e..ab10da8 100644 --- a/components/NavBar.jsx +++ b/components/NavBar.jsx @@ -5,7 +5,7 @@ import GitHubIcon from '@node-core/ui-components/Icons/Social/GitHub'; import SearchBox from '@node-core/doc-kit/src/generators/web/ui/components/SearchBox'; import { useTheme } from '@node-core/doc-kit/src/generators/web/ui/hooks/useTheme.mjs'; -import { navbar } from '#theme/site' with { type: 'json' }; +import { navbar } from '#theme/site'; import Logo from '#theme/Logo'; /** diff --git a/components/SideBar.jsx b/components/SideBar.jsx index 2b5b181..b33cd05 100644 --- a/components/SideBar.jsx +++ b/components/SideBar.jsx @@ -1,5 +1,5 @@ import SideBar from '@node-core/ui-components/Containers/Sidebar'; -import { sidebar } from '#theme/local/site' with { type: 'json' }; +import { sidebar } from '#theme/local/site'; /** @param {string} url */ const redirect = url => (window.location.href = url); @@ -8,15 +8,24 @@ const PrefetchLink = props => ; const pathnameFor = path => path.replace(/\/index$/, '') || '/'; +const groupsFor = path => { + const segment = path.split('/').filter(Boolean)[0]; + const matched = sidebar.filter(g => g.groupName.toLowerCase() === segment); + return matched.length > 0 ? matched : sidebar; +}; + /** * Sidebar component for MDX documentation with page navigation. */ -export default ({ metadata }) => ( - -); +export default ({ metadata }) => { + const path = pathnameFor(metadata.path); + return ( + + ); +}; diff --git a/pages/site.mjs b/pages/site.mjs new file mode 100644 index 0000000..22c8075 --- /dev/null +++ b/pages/site.mjs @@ -0,0 +1,7 @@ +import base from './site.json' with { type: 'json' }; +import loadersSite from './loaders/site.json' with { type: 'json' }; +import pluginsSite from './plugins/site.json' with { type: 'json' }; + +export const { navbar, footer } = base; + +export const sidebar = [...loadersSite.sidebar, ...pluginsSite.sidebar]; diff --git a/scripts/fetch-readmes.mjs b/scripts/fetch-readmes.mjs index e5b48fc..7c9fe9e 100644 --- a/scripts/fetch-readmes.mjs +++ b/scripts/fetch-readmes.mjs @@ -2,12 +2,9 @@ import { mkdirSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; const { GH_TOKEN } = process.env; -if (!GH_TOKEN) { - throw new Error('GH_TOKEN environment variable is not set'); -} const BASE_HEADERS = { - Authorization: `Bearer ${GH_TOKEN}`, + ...(GH_TOKEN && { Authorization: `Bearer ${GH_TOKEN}` }), 'X-GitHub-Api-Version': '2022-11-28', }; @@ -61,6 +58,7 @@ const stripBadges = content => ) .replace(/\n{3,}/g, '\n\n'); +// TODO: remove this allowlist once Shiki silently skips unknown languages instead of build errors. const SUPPORTED_LANGS = new Set([ 'bash', 'c', @@ -129,19 +127,14 @@ const processContent = content => transformGfmAlerts(sanitizeCodeFences(stripBadges(stripLeadingDiv(content)))); const fetchReadme = async fullName => { - const url = `https://api.github.com/repos/${fullName}/readme`; - const res = await fetch(url, { - headers: { ...BASE_HEADERS, Accept: 'application/vnd.github.raw' }, - }); + const url = `https://raw.githubusercontent.com/${fullName}/HEAD/README.md`; + const res = await fetch(url); return res.ok ? { ok: true, text: await res.text() } : { ok: false, status: res.status }; }; -const processRepos = async ( - repos, - { layout, groupName, basePath, outputDir } -) => { +const processRepos = async (repos, { groupName, basePath, outputDir }) => { mkdirSync(outputDir, { recursive: true }); const repoName = r => r.split('/')[1]; console.log( @@ -156,7 +149,7 @@ const processRepos = async ( console.log(`Failed: ${name} — ${result.status}`); continue; } - const content = `---\nlayout: ${layout}\n---\n\n${processContent(result.text)}`; + const content = processContent(result.text); writeFileSync(join(outputDir, `${name}.md`), content, 'utf8'); fetched.push(name); console.log(`Fetched: ${name}`); @@ -186,12 +179,11 @@ const args = process.argv.slice(2); const runLoaders = args.includes('--loaders') || args.length === 0; const runPlugins = args.includes('--plugins') || args.length === 0; -const root = new URL('..', import.meta.url).pathname; +const root = join(import.meta.dirname, '..'); const { loaders, plugins } = await discoverRepos(); if (runLoaders) { await processRepos(loaders, { - layout: 'loader', groupName: 'Loaders', basePath: '/loaders', outputDir: join(root, 'pages/loaders'), @@ -200,7 +192,6 @@ if (runLoaders) { if (runPlugins) { await processRepos(plugins, { - layout: 'plugin', groupName: 'Plugins', basePath: '/plugins', outputDir: join(root, 'pages/plugins'), diff --git a/scripts/html/doc-kit.config.mjs b/scripts/html/doc-kit.config.mjs index 2cea088..c588486 100644 --- a/scripts/html/doc-kit.config.mjs +++ b/scripts/html/doc-kit.config.mjs @@ -40,10 +40,12 @@ export default { useAbsoluteURLs: true, remoteConfigUrl: null, imports: { - '#theme/local/site': join(inputDir, 'site.json'), + '#theme/local/site': VERSION + ? join(inputDir, 'site.json') + : join(ROOT, 'pages/site.mjs'), '#theme/Sidebar': join(ROOT, 'components/SideBar.jsx'), - '#theme/site': join(ROOT, 'pages/site.json'), + '#theme/site': join(ROOT, 'pages/site.mjs'), '#theme/Layout': join(ROOT, 'components/Layout.jsx'), '#theme/Navigation': join(ROOT, 'components/NavBar.jsx'), '#theme/Footer': join(ROOT, 'components/Footer/index.jsx'), From 19f2714df500e0694ea62875450e98bf998ef833 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Rajak <145889151+ryzrr@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:53:39 +0530 Subject: [PATCH 3/4] Update scripts/fetch-readmes.mjs Co-authored-by: Aviv Keller --- scripts/fetch-readmes.mjs | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/fetch-readmes.mjs b/scripts/fetch-readmes.mjs index 7c9fe9e..7462c19 100644 --- a/scripts/fetch-readmes.mjs +++ b/scripts/fetch-readmes.mjs @@ -33,7 +33,6 @@ const discoverRepos = async () => { if (repo.name.endsWith('-loader')) { loaders.push(repo.full_name); } else if ( - repo.name.endsWith('-webpack-plugin') || repo.name.endsWith('-plugin') ) { plugins.push(repo.full_name); From 445f65007d68344f6c885f6fb135ea8dece14754 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Rajak Date: Sun, 7 Jun 2026 13:50:30 +0000 Subject: [PATCH 4/4] style: format fetch-readmes.mjs --- scripts/fetch-readmes.mjs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/fetch-readmes.mjs b/scripts/fetch-readmes.mjs index 7462c19..ac424c8 100644 --- a/scripts/fetch-readmes.mjs +++ b/scripts/fetch-readmes.mjs @@ -32,9 +32,7 @@ const discoverRepos = async () => { if (repo.archived) continue; if (repo.name.endsWith('-loader')) { loaders.push(repo.full_name); - } else if ( - repo.name.endsWith('-plugin') - ) { + } else if (repo.name.endsWith('-plugin')) { plugins.push(repo.full_name); } }