diff --git a/.gitignore b/.gitignore
index b651ebe92b..5a321307d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ plugins/*/compiled
.netlify
.cache-loader
static/llms.txt
+static/llms-full.txt
static/reference-full.md
static/web-console/*.json
diff --git a/package.json b/package.json
index 5975e39a97..cf25b452de 100644
--- a/package.json
+++ b/package.json
@@ -5,7 +5,7 @@
"license": "Apache-2.0",
"scripts": {
"start": "cross-env docusaurus start --port 3001",
- "prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
+ "prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-llms-full.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
"build": "cross-env NO_UPDATE_NOTIFIER=true USE_SIMPLE_CSS_MINIFIER=true PWA_SW_CUSTOM= docusaurus build",
"deploy": "docusaurus deploy",
"serve": "docusaurus serve",
diff --git a/scripts/generate-llms-files.js b/scripts/generate-llms-files.js
index a88380f0d3..6cad75d131 100644
--- a/scripts/generate-llms-files.js
+++ b/scripts/generate-llms-files.js
@@ -3,7 +3,8 @@ const path = require('path')
const yaml = require('js-yaml')
const sidebarConfig = require('../documentation/sidebars.js')
-const BASE_URL = 'https://questdb.com/docs/'
+const { generateUrl: buildDocUrl } = require('./lib/docs-urls')
+const { subtreeContainsDoc } = require('./lib/sidebar-utils')
const processedFiles = new Map()
@@ -53,43 +54,10 @@ function extractFrontmatter(filePath) {
}
}
-function normalizeUrl(url) {
- const clean = url.endsWith("/") ? url.slice(0, -1) : url
- return clean + ".md"
-}
-
function generateUrl(docId, docPath) {
// Extract frontmatter to check for custom slug
const { slug } = extractFrontmatter(docPath)
-
- if (slug) {
- let urlPath = slug
-
- // Absolute slug (starts with /)
- if (urlPath.startsWith('/')) {
- urlPath = urlPath.substring(1)
- if (urlPath === '') {
- return BASE_URL + "index.md"
- }
- return normalizeUrl(BASE_URL + urlPath)
- }
-
- // Relative slug - resolve it relative to the document's directory
- const docDir = path.dirname(docId)
- if (docDir && docDir !== '.') {
- urlPath = path.join(docDir, urlPath)
- }
-
- return normalizeUrl(BASE_URL + urlPath)
- }
-
- // Default behavior: use docId
- if (docId === 'introduction') {
- return BASE_URL + "index.md"
- }
- // Strip /index suffix to match raw-markdown plugin output (e.g. cookbook/index -> cookbook.md)
- let urlDocId = docId.endsWith('/index') ? docId.slice(0, -'/index'.length) : docId
- return normalizeUrl(BASE_URL + urlDocId)
+ return buildDocUrl(docId, slug)
}
function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
@@ -123,13 +91,25 @@ function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
result += '\n'
} else if (item.type === 'category') {
+ // A category's own link page (link: {type: 'doc'}) is a real doc too,
+ // unless the same doc is already listed among the category's items
+ const linkDoc = item.link && item.link.type === 'doc' && item.link.id &&
+ !subtreeContainsDoc(item.items, item.link.id)
+ ? [{ type: 'doc', id: item.link.id }]
+ : []
if (isTopLevel) {
result += `\n## ${item.label}\n`
+ if (linkDoc.length > 0) {
+ result += processForLlmsTxt(linkDoc, 0, false)
+ }
if (item.items && item.items.length > 0) {
result += processForLlmsTxt(item.items, 0, false)
}
} else {
result += `${indentStr}${item.label}\n`
+ if (linkDoc.length > 0) {
+ result += processForLlmsTxt(linkDoc, indent + 1, false)
+ }
if (item.items && item.items.length > 0) {
result += processForLlmsTxt(item.items, indent + 1, false)
}
diff --git a/scripts/generate-llms-full.js b/scripts/generate-llms-full.js
new file mode 100644
index 0000000000..ceb02a041f
--- /dev/null
+++ b/scripts/generate-llms-full.js
@@ -0,0 +1,241 @@
+const fs = require('fs')
+const path = require('path')
+const matter = require('gray-matter')
+const {
+ convertAllComponents,
+ bumpHeadings,
+ normalizeNewLines,
+ removeImports,
+ processPartialImports,
+} = require('../plugins/raw-markdown/convert-components')
+const remoteRepoExamplePlugin = require('../plugins/remote-repo-example/index')
+
+const sidebarConfig = require('../documentation/sidebars.js')
+const { BASE_URL, generateUrl } = require('./lib/docs-urls')
+const { subtreeContainsDoc } = require('./lib/sidebar-utils')
+
+const ROOT_DIR = path.resolve(__dirname, '..')
+const DOCS_DIR = path.join(ROOT_DIR, 'documentation')
+const OUTPUT_DIR = path.join(ROOT_DIR, 'static')
+
+function readDocFile(docId) {
+ const mdPath = path.join(DOCS_DIR, docId + '.md')
+ if (fs.existsSync(mdPath)) {
+ return { raw: fs.readFileSync(mdPath, 'utf8'), filePath: mdPath }
+ }
+ const mdxPath = path.join(DOCS_DIR, docId + '.mdx')
+ if (fs.existsSync(mdxPath)) {
+ return { raw: fs.readFileSync(mdxPath, 'utf8'), filePath: mdxPath }
+ }
+ console.warn(`[generate-llms-full] Warning: File not found: ${mdPath} or ${mdxPath}`)
+ return null
+}
+
+// Partial cache shared across all files
+const partialCache = new Map()
+
+function loadPartial(partialPath, currentFileDir) {
+ // Unescape markdown escaped characters (like \_ -> _)
+ const unescapedPath = partialPath.replace(/\\_/g, '_')
+ const absolutePath = path.resolve(path.join(DOCS_DIR, currentFileDir), unescapedPath)
+
+ if (partialCache.has(absolutePath)) {
+ return partialCache.get(absolutePath)
+ }
+
+ if (fs.existsSync(absolutePath)) {
+ const partialRaw = fs.readFileSync(absolutePath, 'utf8')
+ const { content } = matter(partialRaw)
+ partialCache.set(absolutePath, content)
+ return content
+ }
+
+ console.warn(`[generate-llms-full] Warning: Partial not found: ${absolutePath}`)
+ return ``
+}
+
+async function renderDoc(docId, repoExamples) {
+ const doc = readDocFile(docId)
+ if (!doc) return ''
+
+ const { data: frontmatter, content: mainContent } = matter(doc.raw)
+
+ // Process partial component imports
+ const relativeDir = path.relative(DOCS_DIR, path.dirname(doc.filePath))
+ let processedContent = processPartialImports(mainContent, loadPartial, relativeDir)
+
+ // Convert MDX components to markdown
+ processedContent = await convertAllComponents(
+ processedContent,
+ path.dirname(doc.filePath),
+ DOCS_DIR,
+ repoExamples,
+ )
+
+ processedContent = removeImports(processedContent)
+ processedContent = normalizeNewLines(processedContent)
+
+ // Bump body headings by 2 (H1 -> H3, H2 -> H4, …) so nothing in a doc body
+ // can collide with the H1 section headers or the H2 per-doc title below —
+ // some docs (introduction, changelog) legitimately contain body H1s
+ processedContent = bumpHeadings(processedContent, 2)
+
+ const title = frontmatter.title || docId
+ const url = generateUrl(docId, frontmatter.slug || null)
+
+ let out = `## ${title}\n\n`
+ out += `Source: ${url}\n\n`
+ if (frontmatter.description) {
+ out += `${frontmatter.description}\n\n`
+ }
+ out += processedContent.trim() + '\n\n'
+ return out
+}
+
+function docTitle(docId) {
+ const doc = readDocFile(docId)
+ if (!doc) return docId
+ const { data } = matter(doc.raw)
+ return data.title || docId
+}
+
+// Walk the sidebar in order, collecting doc ids grouped into sections.
+// Top-level categories become sections labeled by the category. Loose
+// top-level docs before the first category form an "Overview" section;
+// loose docs appearing after a category (e.g. changelog) each get their own
+// section labeled by the doc's title, so no doc is misattributed to a
+// neighboring category. A category's own `link: {type: 'doc'}` page is
+// included before its items unless the items already list it — the same
+// rule (and therefore the same order) as the llms.txt generator.
+function collectSections(items) {
+ const sections = []
+ const leading = { label: 'Overview', docIds: [] }
+ let seenCategory = false
+
+ function categoryLinkDocIds(item) {
+ return item.link && item.link.type === 'doc' && item.link.id &&
+ !subtreeContainsDoc(item.items, item.link.id)
+ ? [item.link.id]
+ : []
+ }
+
+ function collectDocIds(subItems, into) {
+ for (const item of subItems) {
+ if (typeof item === 'string') {
+ into.push(item)
+ } else if (item.type === 'doc') {
+ into.push(item.id)
+ } else if (item.type === 'category') {
+ into.push(...categoryLinkDocIds(item))
+ if (item.items) {
+ collectDocIds(item.items, into)
+ }
+ }
+ // item.type === 'link' is external; skip
+ }
+ }
+
+ for (const item of items) {
+ if (typeof item === 'string' || item.type === 'doc') {
+ const docId = typeof item === 'string' ? item : item.id
+ if (seenCategory) {
+ sections.push({ label: docTitle(docId), docIds: [docId] })
+ } else {
+ leading.docIds.push(docId)
+ }
+ } else if (item.type === 'category') {
+ if (!seenCategory && leading.docIds.length > 0) {
+ sections.push(leading)
+ }
+ seenCategory = true
+ const section = { label: item.label, docIds: [] }
+ section.docIds.push(...categoryLinkDocIds(item))
+ if (item.items) {
+ collectDocIds(item.items, section.docIds)
+ }
+ sections.push(section)
+ }
+ }
+
+ if (!seenCategory && leading.docIds.length > 0) {
+ sections.push(leading)
+ }
+
+ return sections
+}
+
+// Same remote example data the raw-markdown plugin receives at build time,
+// so renders real code instead of its fallback.
+// Never fails the build: this data is only used for llms-full.txt, so on
+// persistent fetch errors we degrade to placeholder examples for one build
+// rather than blocking the whole docs deploy on a GitHub flake.
+async function loadRepoExamples() {
+ for (let attempt = 1; attempt <= 2; attempt++) {
+ try {
+ return await remoteRepoExamplePlugin().loadContent()
+ } catch (error) {
+ console.warn(`[generate-llms-full] Warning: could not load remote repo examples (attempt ${attempt}/2): ${error.message}`)
+ }
+ }
+ console.warn('[generate-llms-full] Proceeding without remote examples; blocks will render placeholders until the next successful build.')
+ return {}
+}
+
+async function generateLlmsFull() {
+ console.log('Generating llms-full.txt from QuestDB documentation...')
+
+ const repoExamples = await loadRepoExamples()
+
+ const sections = collectSections(sidebarConfig.docs)
+
+ let output = `# QuestDB Documentation — Full Content
+
+Complete text of the QuestDB documentation as a single document, in the same
+order as the index at ${BASE_URL}llms.txt. Each entry links its canonical
+markdown source.
+
+`
+
+ // Docs can appear in several sidebar positions; render each only once
+ const renderedDocIds = new Set()
+ let docCount = 0
+ let duplicateCount = 0
+
+ for (const section of sections) {
+ let body = ''
+ for (const docId of section.docIds) {
+ if (renderedDocIds.has(docId)) {
+ duplicateCount++
+ continue
+ }
+ renderedDocIds.add(docId)
+ const rendered = await renderDoc(docId, repoExamples)
+ if (rendered) {
+ body += rendered
+ docCount++
+ }
+ }
+ // Skip the header if every doc in this section was a duplicate or missing
+ if (body) {
+ output += `# ${section.label}\n\n` + body
+ }
+ }
+
+ if (!fs.existsSync(OUTPUT_DIR)) {
+ fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+ }
+
+ const targetPath = path.join(OUTPUT_DIR, 'llms-full.txt')
+ fs.writeFileSync(targetPath, output)
+
+ const sizeMB = (Buffer.byteLength(output, 'utf8') / 1024 / 1024).toFixed(2)
+ console.log('✅ llms-full.txt generated successfully!')
+ console.log(` - Path: ${targetPath}`)
+ console.log(` - Docs: ${docCount} (${duplicateCount} duplicate sidebar entries skipped)`)
+ console.log(` - Size: ${sizeMB} MB`)
+}
+
+generateLlmsFull().catch(error => {
+ console.error('Error generating llms-full.txt:', error)
+ process.exitCode = 1
+})
diff --git a/scripts/lib/docs-urls.js b/scripts/lib/docs-urls.js
new file mode 100644
index 0000000000..a4146adacd
--- /dev/null
+++ b/scripts/lib/docs-urls.js
@@ -0,0 +1,45 @@
+const path = require('path')
+
+const BASE_URL = 'https://questdb.com/docs/'
+
+// Canonical raw-markdown URL for a doc, shared by the llms.txt and
+// llms-full.txt generators. Mirrors plugins/raw-markdown/index.js exactly —
+// that plugin decides where the .md files are actually written, so any
+// divergence here produces dead Source links.
+function generateUrl(docId, slug) {
+ let urlPath
+
+ if (slug) {
+ urlPath = slug
+ if (urlPath.startsWith('/')) {
+ urlPath = urlPath.substring(1)
+ }
+ // Only prepend the doc's directory if the slug doesn't already include
+ // path segments (same rule as the raw-markdown plugin)
+ const fileDir = path.dirname(docId)
+ if (!urlPath.includes('/') && fileDir !== '.') {
+ urlPath = path.join(fileDir, urlPath)
+ }
+ } else {
+ // Safety net: introduction carries `slug: /`; if slug extraction ever
+ // fails (parse error, unreadable file) fall back to the URL the plugin
+ // publishes for it rather than emitting a dead introduction.md link.
+ if (docId === 'introduction') {
+ return BASE_URL + 'index.md'
+ }
+ urlPath = docId
+ if (urlPath.endsWith('/index')) {
+ urlPath = urlPath.replace(/\/index$/, '')
+ }
+ }
+ // Note: a trailing '/' in a slug is deliberately NOT stripped — the
+ // raw-markdown plugin writes `.md` verbatim, so stripping here
+ // would link a path the plugin never publishes.
+
+ if (urlPath === '' || urlPath === '.') {
+ return BASE_URL + 'index.md'
+ }
+ return BASE_URL + urlPath + '.md'
+}
+
+module.exports = { BASE_URL, generateUrl }
diff --git a/scripts/lib/sidebar-utils.js b/scripts/lib/sidebar-utils.js
new file mode 100644
index 0000000000..47bd06c38f
--- /dev/null
+++ b/scripts/lib/sidebar-utils.js
@@ -0,0 +1,16 @@
+// Shared sidebar helpers for the llms.txt / llms-full.txt generators.
+
+// True if docId appears anywhere in the given sidebar items subtree
+// (as a string entry, a doc entry, or a category's own link doc).
+function subtreeContainsDoc(items, docId) {
+ if (!items) return false
+ return items.some(item =>
+ (typeof item === 'string' && item === docId) ||
+ (item.type === 'doc' && item.id === docId) ||
+ (item.type === 'category' &&
+ ((item.link && item.link.type === 'doc' && item.link.id === docId) ||
+ subtreeContainsDoc(item.items, docId)))
+ )
+}
+
+module.exports = { subtreeContainsDoc }