questdb · sandroqdb · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ plugins/*/compiled
 .netlify
 .cache-loader
 static/llms.txt
+static/llms-full.txt
 static/reference-full.md
 static/web-console/*.json
 

diff --git a/package.json b/package.json
@@ -5,7 +5,7 @@
   "license": "Apache-2.0",
   "scripts": {
     "start": "cross-env docusaurus start --port 3001",
-    "prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
+    "prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-llms-full.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
     "build": "cross-env NO_UPDATE_NOTIFIER=true USE_SIMPLE_CSS_MINIFIER=true PWA_SW_CUSTOM= docusaurus build",
     "deploy": "docusaurus deploy",
     "serve": "docusaurus serve",

diff --git a/scripts/generate-llms-files.js b/scripts/generate-llms-files.js
@@ -3,7 +3,8 @@ const path = require('path')
 const yaml = require('js-yaml')
 
 const sidebarConfig = require('../documentation/sidebars.js')
-const BASE_URL = 'https://questdb.com/docs/'
+const { generateUrl: buildDocUrl } = require('./lib/docs-urls')
+const { subtreeContainsDoc } = require('./lib/sidebar-utils')
 
 const processedFiles = new Map()
 
@@ -53,43 +54,10 @@ function extractFrontmatter(filePath) {
   }
 }
 
-function normalizeUrl(url) {
-  const clean = url.endsWith("/") ? url.slice(0, -1) : url
-  return clean + ".md"
-}
-
 function generateUrl(docId, docPath) {
   // Extract frontmatter to check for custom slug
   const { slug } = extractFrontmatter(docPath)
-
-  if (slug) {
-    let urlPath = slug
-
-    // Absolute slug (starts with /)
-    if (urlPath.startsWith('/')) {
-      urlPath = urlPath.substring(1)
-      if (urlPath === '') {
-        return BASE_URL + "index.md"
-      }
-      return normalizeUrl(BASE_URL + urlPath)
-    }
-
-    // Relative slug - resolve it relative to the document's directory
-    const docDir = path.dirname(docId)
-    if (docDir && docDir !== '.') {
-      urlPath = path.join(docDir, urlPath)
-    }
-
-    return normalizeUrl(BASE_URL + urlPath)
-  }
-
-  // Default behavior: use docId
-  if (docId === 'introduction') {
-    return BASE_URL + "index.md"
-  }
-  // Strip /index suffix to match raw-markdown plugin output (e.g. cookbook/index -> cookbook.md)
-  let urlDocId = docId.endsWith('/index') ? docId.slice(0, -'/index'.length) : docId
-  return normalizeUrl(BASE_URL + urlDocId)
+  return buildDocUrl(docId, slug)
 }
 
 function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
@@ -123,13 +91,25 @@ function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
       result += '\n'
 
     } else if (item.type === 'category') {
+      // A category's own link page (link: {type: 'doc'}) is a real doc too,
+      // unless the same doc is already listed among the category's items
+      const linkDoc = item.link && item.link.type === 'doc' && item.link.id &&
+        !subtreeContainsDoc(item.items, item.link.id)
+        ? [{ type: 'doc', id: item.link.id }]
+        : []
       if (isTopLevel) {
         result += `\n## ${item.label}\n`
+        if (linkDoc.length > 0) {
+          result += processForLlmsTxt(linkDoc, 0, false)
+        }
         if (item.items && item.items.length > 0) {
           result += processForLlmsTxt(item.items, 0, false)
         }
       } else {
         result += `${indentStr}${item.label}\n`
+        if (linkDoc.length > 0) {
+          result += processForLlmsTxt(linkDoc, indent + 1, false)
+        }
         if (item.items && item.items.length > 0) {
           result += processForLlmsTxt(item.items, indent + 1, false)
         }

diff --git a/scripts/generate-llms-full.js b/scripts/generate-llms-full.js
@@ -0,0 +1,241 @@
+const fs = require('fs')
+const path = require('path')
+const matter = require('gray-matter')
+const {
+  convertAllComponents,
+  bumpHeadings,
+  normalizeNewLines,
+  removeImports,
+  processPartialImports,
+} = require('../plugins/raw-markdown/convert-components')
+const remoteRepoExamplePlugin = require('../plugins/remote-repo-example/index')
+
+const sidebarConfig = require('../documentation/sidebars.js')
+const { BASE_URL, generateUrl } = require('./lib/docs-urls')
+const { subtreeContainsDoc } = require('./lib/sidebar-utils')
+
+const ROOT_DIR = path.resolve(__dirname, '..')
+const DOCS_DIR = path.join(ROOT_DIR, 'documentation')
+const OUTPUT_DIR = path.join(ROOT_DIR, 'static')
+
+function readDocFile(docId) {
+  const mdPath = path.join(DOCS_DIR, docId + '.md')
+  if (fs.existsSync(mdPath)) {
+    return { raw: fs.readFileSync(mdPath, 'utf8'), filePath: mdPath }
+  }
+  const mdxPath = path.join(DOCS_DIR, docId + '.mdx')
+  if (fs.existsSync(mdxPath)) {
+    return { raw: fs.readFileSync(mdxPath, 'utf8'), filePath: mdxPath }
+  }
+  console.warn(`[generate-llms-full] Warning: File not found: ${mdPath} or ${mdxPath}`)
+  return null
+}
+
+// Partial cache shared across all files
+const partialCache = new Map()
+
+function loadPartial(partialPath, currentFileDir) {
+  // Unescape markdown escaped characters (like \_ -> _)
+  const unescapedPath = partialPath.replace(/\\_/g, '_')
+  const absolutePath = path.resolve(path.join(DOCS_DIR, currentFileDir), unescapedPath)
+
+  if (partialCache.has(absolutePath)) {
+    return partialCache.get(absolutePath)
+  }
+
+  if (fs.existsSync(absolutePath)) {
+    const partialRaw = fs.readFileSync(absolutePath, 'utf8')
+    const { content } = matter(partialRaw)
+    partialCache.set(absolutePath, content)
+    return content
+  }
+
+  console.warn(`[generate-llms-full] Warning: Partial not found: ${absolutePath}`)
+  return `<!-- Partial not found: ${partialPath} -->`
+}
+
+async function renderDoc(docId, repoExamples) {
+  const doc = readDocFile(docId)
+  if (!doc) return ''
+
+  const { data: frontmatter, content: mainContent } = matter(doc.raw)
+
+  // Process partial component imports
+  const relativeDir = path.relative(DOCS_DIR, path.dirname(doc.filePath))
+  let processedContent = processPartialImports(mainContent, loadPartial, relativeDir)
+
+  // Convert MDX components to markdown
+  processedContent = await convertAllComponents(
+    processedContent,
+    path.dirname(doc.filePath),
+    DOCS_DIR,
+    repoExamples,
+  )
+
+  processedContent = removeImports(processedContent)
+  processedContent = normalizeNewLines(processedContent)
+
+  // Bump body headings by 2 (H1 -> H3, H2 -> H4, …) so nothing in a doc body
+  // can collide with the H1 section headers or the H2 per-doc title below —
+  // some docs (introduction, changelog) legitimately contain body H1s
+  processedContent = bumpHeadings(processedContent, 2)
+
+  const title = frontmatter.title || docId
+  const url = generateUrl(docId, frontmatter.slug || null)
+
+  let out = `## ${title}\n\n`
+  out += `Source: ${url}\n\n`
+  if (frontmatter.description) {
+    out += `${frontmatter.description}\n\n`
+  }
+  out += processedContent.trim() + '\n\n'
+  return out
+}
+
+function docTitle(docId) {
+  const doc = readDocFile(docId)
+  if (!doc) return docId
+  const { data } = matter(doc.raw)
+  return data.title || docId
+}
+
+// Walk the sidebar in order, collecting doc ids grouped into sections.
+// Top-level categories become sections labeled by the category. Loose
+// top-level docs before the first category form an "Overview" section;
+// loose docs appearing after a category (e.g. changelog) each get their own
+// section labeled by the doc's title, so no doc is misattributed to a
+// neighboring category. A category's own `link: {type: 'doc'}` page is
+// included before its items unless the items already list it — the same
+// rule (and therefore the same order) as the llms.txt generator.
+function collectSections(items) {
+  const sections = []
+  const leading = { label: 'Overview', docIds: [] }
+  let seenCategory = false
+
+  function categoryLinkDocIds(item) {
+    return item.link && item.link.type === 'doc' && item.link.id &&
+      !subtreeContainsDoc(item.items, item.link.id)
+      ? [item.link.id]
+      : []
+  }
+
+  function collectDocIds(subItems, into) {
+    for (const item of subItems) {
+      if (typeof item === 'string') {
+        into.push(item)
+      } else if (item.type === 'doc') {
+        into.push(item.id)
+      } else if (item.type === 'category') {
+        into.push(...categoryLinkDocIds(item))
+        if (item.items) {
+          collectDocIds(item.items, into)
+        }
+      }
+      // item.type === 'link' is external; skip
+    }
+  }
+
+  for (const item of items) {
+    if (typeof item === 'string' || item.type === 'doc') {
+      const docId = typeof item === 'string' ? item : item.id
+      if (seenCategory) {
+        sections.push({ label: docTitle(docId), docIds: [docId] })
+      } else {
+        leading.docIds.push(docId)
+      }
+    } else if (item.type === 'category') {
+      if (!seenCategory && leading.docIds.length > 0) {
+        sections.push(leading)
+      }
+      seenCategory = true
+      const section = { label: item.label, docIds: [] }
+      section.docIds.push(...categoryLinkDocIds(item))
+      if (item.items) {
+        collectDocIds(item.items, section.docIds)
+      }
+      sections.push(section)
+    }
+  }
+
+  if (!seenCategory && leading.docIds.length > 0) {
+    sections.push(leading)
+  }
+
+  return sections
+}
+
+// Same remote example data the raw-markdown plugin receives at build time,
+// so <RemoteRepoExample /> renders real code instead of its fallback.
+// Never fails the build: this data is only used for llms-full.txt, so on
+// persistent fetch errors we degrade to placeholder examples for one build
+// rather than blocking the whole docs deploy on a GitHub flake.
+async function loadRepoExamples() {
+  for (let attempt = 1; attempt <= 2; attempt++) {
+    try {
+      return await remoteRepoExamplePlugin().loadContent()
+    } catch (error) {
+      console.warn(`[generate-llms-full] Warning: could not load remote repo examples (attempt ${attempt}/2): ${error.message}`)
+    }
+  }
+  console.warn('[generate-llms-full] Proceeding without remote examples; <RemoteRepoExample /> blocks will render placeholders until the next successful build.')
+  return {}
+}
+
+async function generateLlmsFull() {
+  console.log('Generating llms-full.txt from QuestDB documentation...')
+
+  const repoExamples = await loadRepoExamples()
+
+  const sections = collectSections(sidebarConfig.docs)
+
+  let output = `# QuestDB Documentation — Full Content
+
+Complete text of the QuestDB documentation as a single document, in the same
+order as the index at ${BASE_URL}llms.txt. Each entry links its canonical
+markdown source.
+
+`
+
+  // Docs can appear in several sidebar positions; render each only once
+  const renderedDocIds = new Set()
+  let docCount = 0
+  let duplicateCount = 0
+
+  for (const section of sections) {
+    let body = ''
+    for (const docId of section.docIds) {
+      if (renderedDocIds.has(docId)) {
+        duplicateCount++
+        continue
+      }
+      renderedDocIds.add(docId)
+      const rendered = await renderDoc(docId, repoExamples)
+      if (rendered) {
+        body += rendered
+        docCount++
+      }
+    }
+    // Skip the header if every doc in this section was a duplicate or missing
+    if (body) {
+      output += `# ${section.label}\n\n` + body
+    }
+  }
+
+  if (!fs.existsSync(OUTPUT_DIR)) {
+    fs.mkdirSync(OUTPUT_DIR, { recursive: true })
+  }
+
+  const targetPath = path.join(OUTPUT_DIR, 'llms-full.txt')
+  fs.writeFileSync(targetPath, output)
+
+  const sizeMB = (Buffer.byteLength(output, 'utf8') / 1024 / 1024).toFixed(2)
+  console.log('✅ llms-full.txt generated successfully!')
+  console.log(`   - Path: ${targetPath}`)
+  console.log(`   - Docs: ${docCount} (${duplicateCount} duplicate sidebar entries skipped)`)
+  console.log(`   - Size: ${sizeMB} MB`)
+}
+
+generateLlmsFull().catch(error => {
+  console.error('Error generating llms-full.txt:', error)
+  process.exitCode = 1
+})
diff --git a/scripts/lib/docs-urls.js b/scripts/lib/docs-urls.js
@@ -0,0 +1,45 @@
+const path = require('path')
+
+const BASE_URL = 'https://questdb.com/docs/'
+
+// Canonical raw-markdown URL for a doc, shared by the llms.txt and
+// llms-full.txt generators. Mirrors plugins/raw-markdown/index.js exactly —
+// that plugin decides where the .md files are actually written, so any
+// divergence here produces dead Source links.
+function generateUrl(docId, slug) {
+  let urlPath
+
+  if (slug) {
+    urlPath = slug
+    if (urlPath.startsWith('/')) {
+      urlPath = urlPath.substring(1)
+    }
+    // Only prepend the doc's directory if the slug doesn't already include
+    // path segments (same rule as the raw-markdown plugin)
+    const fileDir = path.dirname(docId)
+    if (!urlPath.includes('/') && fileDir !== '.') {
+      urlPath = path.join(fileDir, urlPath)
+    }
+  } else {
+    // Safety net: introduction carries `slug: /`; if slug extraction ever
+    // fails (parse error, unreadable file) fall back to the URL the plugin
+    // publishes for it rather than emitting a dead introduction.md link.
+    if (docId === 'introduction') {
+      return BASE_URL + 'index.md'
+    }
+    urlPath = docId
+    if (urlPath.endsWith('/index')) {
+      urlPath = urlPath.replace(/\/index$/, '')
+    }
+  }
+  // Note: a trailing '/' in a slug is deliberately NOT stripped — the
+  // raw-markdown plugin writes `<slug>.md` verbatim, so stripping here
+  // would link a path the plugin never publishes.
+
+  if (urlPath === '' || urlPath === '.') {
+    return BASE_URL + 'index.md'
+  }
+  return BASE_URL + urlPath + '.md'
+}
+
+module.exports = { BASE_URL, generateUrl }