Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ plugins/*/compiled
.netlify
.cache-loader
static/llms.txt
static/llms-full.txt
static/reference-full.md
static/web-console/*.json

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"license": "Apache-2.0",
"scripts": {
"start": "cross-env docusaurus start --port 3001",
"prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
"prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js && node ./scripts/generate-llms-full.js && node ./scripts/generate-reference-full.js && node ./scripts/generate-web-console-json.js",
"build": "cross-env NO_UPDATE_NOTIFIER=true USE_SIMPLE_CSS_MINIFIER=true PWA_SW_CUSTOM= docusaurus build",
"deploy": "docusaurus deploy",
"serve": "docusaurus serve",
Expand Down
50 changes: 15 additions & 35 deletions scripts/generate-llms-files.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ const path = require('path')
const yaml = require('js-yaml')

const sidebarConfig = require('../documentation/sidebars.js')
const BASE_URL = 'https://questdb.com/docs/'
const { generateUrl: buildDocUrl } = require('./lib/docs-urls')
const { subtreeContainsDoc } = require('./lib/sidebar-utils')

const processedFiles = new Map()

Expand Down Expand Up @@ -53,43 +54,10 @@ function extractFrontmatter(filePath) {
}
}

function normalizeUrl(url) {
const clean = url.endsWith("/") ? url.slice(0, -1) : url
return clean + ".md"
}

function generateUrl(docId, docPath) {
// Extract frontmatter to check for custom slug
const { slug } = extractFrontmatter(docPath)

if (slug) {
let urlPath = slug

// Absolute slug (starts with /)
if (urlPath.startsWith('/')) {
urlPath = urlPath.substring(1)
if (urlPath === '') {
return BASE_URL + "index.md"
}
return normalizeUrl(BASE_URL + urlPath)
}

// Relative slug - resolve it relative to the document's directory
const docDir = path.dirname(docId)
if (docDir && docDir !== '.') {
urlPath = path.join(docDir, urlPath)
}

return normalizeUrl(BASE_URL + urlPath)
}

// Default behavior: use docId
if (docId === 'introduction') {
return BASE_URL + "index.md"
}
// Strip /index suffix to match raw-markdown plugin output (e.g. cookbook/index -> cookbook.md)
let urlDocId = docId.endsWith('/index') ? docId.slice(0, -'/index'.length) : docId
return normalizeUrl(BASE_URL + urlDocId)
return buildDocUrl(docId, slug)
}

function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
Expand Down Expand Up @@ -123,13 +91,25 @@ function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
result += '\n'

} else if (item.type === 'category') {
// A category's own link page (link: {type: 'doc'}) is a real doc too,
// unless the same doc is already listed among the category's items
const linkDoc = item.link && item.link.type === 'doc' && item.link.id &&
!subtreeContainsDoc(item.items, item.link.id)
? [{ type: 'doc', id: item.link.id }]
: []
if (isTopLevel) {
result += `\n## ${item.label}\n`
if (linkDoc.length > 0) {
result += processForLlmsTxt(linkDoc, 0, false)
}
if (item.items && item.items.length > 0) {
result += processForLlmsTxt(item.items, 0, false)
}
} else {
result += `${indentStr}${item.label}\n`
if (linkDoc.length > 0) {
result += processForLlmsTxt(linkDoc, indent + 1, false)
}
if (item.items && item.items.length > 0) {
result += processForLlmsTxt(item.items, indent + 1, false)
}
Expand Down
241 changes: 241 additions & 0 deletions scripts/generate-llms-full.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
const fs = require('fs')
const path = require('path')
const matter = require('gray-matter')
const {
convertAllComponents,
bumpHeadings,
normalizeNewLines,
removeImports,
processPartialImports,
} = require('../plugins/raw-markdown/convert-components')
const remoteRepoExamplePlugin = require('../plugins/remote-repo-example/index')

const sidebarConfig = require('../documentation/sidebars.js')
const { BASE_URL, generateUrl } = require('./lib/docs-urls')
const { subtreeContainsDoc } = require('./lib/sidebar-utils')

const ROOT_DIR = path.resolve(__dirname, '..')
const DOCS_DIR = path.join(ROOT_DIR, 'documentation')
const OUTPUT_DIR = path.join(ROOT_DIR, 'static')

function readDocFile(docId) {
const mdPath = path.join(DOCS_DIR, docId + '.md')
if (fs.existsSync(mdPath)) {
return { raw: fs.readFileSync(mdPath, 'utf8'), filePath: mdPath }
}
const mdxPath = path.join(DOCS_DIR, docId + '.mdx')
if (fs.existsSync(mdxPath)) {
return { raw: fs.readFileSync(mdxPath, 'utf8'), filePath: mdxPath }
}
console.warn(`[generate-llms-full] Warning: File not found: ${mdPath} or ${mdxPath}`)
return null
}

// Partial cache shared across all files
const partialCache = new Map()

function loadPartial(partialPath, currentFileDir) {
// Unescape markdown escaped characters (like \_ -> _)
const unescapedPath = partialPath.replace(/\\_/g, '_')
const absolutePath = path.resolve(path.join(DOCS_DIR, currentFileDir), unescapedPath)

if (partialCache.has(absolutePath)) {
return partialCache.get(absolutePath)
}

if (fs.existsSync(absolutePath)) {
const partialRaw = fs.readFileSync(absolutePath, 'utf8')
const { content } = matter(partialRaw)
partialCache.set(absolutePath, content)
return content
}

console.warn(`[generate-llms-full] Warning: Partial not found: ${absolutePath}`)
return `<!-- Partial not found: ${partialPath} -->`
}

async function renderDoc(docId, repoExamples) {
const doc = readDocFile(docId)
if (!doc) return ''

const { data: frontmatter, content: mainContent } = matter(doc.raw)

// Process partial component imports
const relativeDir = path.relative(DOCS_DIR, path.dirname(doc.filePath))
let processedContent = processPartialImports(mainContent, loadPartial, relativeDir)

// Convert MDX components to markdown
processedContent = await convertAllComponents(
processedContent,
path.dirname(doc.filePath),
DOCS_DIR,
repoExamples,
)

processedContent = removeImports(processedContent)
processedContent = normalizeNewLines(processedContent)

// Bump body headings by 2 (H1 -> H3, H2 -> H4, …) so nothing in a doc body
// can collide with the H1 section headers or the H2 per-doc title below —
// some docs (introduction, changelog) legitimately contain body H1s
processedContent = bumpHeadings(processedContent, 2)

const title = frontmatter.title || docId
const url = generateUrl(docId, frontmatter.slug || null)

let out = `## ${title}\n\n`
out += `Source: ${url}\n\n`
if (frontmatter.description) {
out += `${frontmatter.description}\n\n`
}
out += processedContent.trim() + '\n\n'
return out
}

function docTitle(docId) {
const doc = readDocFile(docId)
if (!doc) return docId
const { data } = matter(doc.raw)
return data.title || docId
}

// Walk the sidebar in order, collecting doc ids grouped into sections.
// Top-level categories become sections labeled by the category. Loose
// top-level docs before the first category form an "Overview" section;
// loose docs appearing after a category (e.g. changelog) each get their own
// section labeled by the doc's title, so no doc is misattributed to a
// neighboring category. A category's own `link: {type: 'doc'}` page is
// included before its items unless the items already list it — the same
// rule (and therefore the same order) as the llms.txt generator.
function collectSections(items) {
const sections = []
const leading = { label: 'Overview', docIds: [] }
let seenCategory = false

function categoryLinkDocIds(item) {
return item.link && item.link.type === 'doc' && item.link.id &&
!subtreeContainsDoc(item.items, item.link.id)
? [item.link.id]
: []
}

function collectDocIds(subItems, into) {
for (const item of subItems) {
if (typeof item === 'string') {
into.push(item)
} else if (item.type === 'doc') {
into.push(item.id)
} else if (item.type === 'category') {
into.push(...categoryLinkDocIds(item))
if (item.items) {
collectDocIds(item.items, into)
}
}
// item.type === 'link' is external; skip
}
}

for (const item of items) {
if (typeof item === 'string' || item.type === 'doc') {
const docId = typeof item === 'string' ? item : item.id
if (seenCategory) {
sections.push({ label: docTitle(docId), docIds: [docId] })
} else {
leading.docIds.push(docId)
}
} else if (item.type === 'category') {
if (!seenCategory && leading.docIds.length > 0) {
sections.push(leading)
}
seenCategory = true
const section = { label: item.label, docIds: [] }
section.docIds.push(...categoryLinkDocIds(item))
if (item.items) {
collectDocIds(item.items, section.docIds)
}
sections.push(section)
}
}

if (!seenCategory && leading.docIds.length > 0) {
sections.push(leading)
}

return sections
}

// Same remote example data the raw-markdown plugin receives at build time,
// so <RemoteRepoExample /> renders real code instead of its fallback.
// Never fails the build: this data is only used for llms-full.txt, so on
// persistent fetch errors we degrade to placeholder examples for one build
// rather than blocking the whole docs deploy on a GitHub flake.
async function loadRepoExamples() {
for (let attempt = 1; attempt <= 2; attempt++) {
try {
return await remoteRepoExamplePlugin().loadContent()
} catch (error) {
console.warn(`[generate-llms-full] Warning: could not load remote repo examples (attempt ${attempt}/2): ${error.message}`)
}
}
console.warn('[generate-llms-full] Proceeding without remote examples; <RemoteRepoExample /> blocks will render placeholders until the next successful build.')
return {}
}

async function generateLlmsFull() {
console.log('Generating llms-full.txt from QuestDB documentation...')

const repoExamples = await loadRepoExamples()

const sections = collectSections(sidebarConfig.docs)

let output = `# QuestDB Documentation — Full Content

Complete text of the QuestDB documentation as a single document, in the same
order as the index at ${BASE_URL}llms.txt. Each entry links its canonical
markdown source.

`

// Docs can appear in several sidebar positions; render each only once
const renderedDocIds = new Set()
let docCount = 0
let duplicateCount = 0

for (const section of sections) {
let body = ''
for (const docId of section.docIds) {
if (renderedDocIds.has(docId)) {
duplicateCount++
continue
}
renderedDocIds.add(docId)
const rendered = await renderDoc(docId, repoExamples)
if (rendered) {
body += rendered
docCount++
}
}
// Skip the header if every doc in this section was a duplicate or missing
if (body) {
output += `# ${section.label}\n\n` + body
}
}

if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true })
}

const targetPath = path.join(OUTPUT_DIR, 'llms-full.txt')
fs.writeFileSync(targetPath, output)

const sizeMB = (Buffer.byteLength(output, 'utf8') / 1024 / 1024).toFixed(2)
console.log('✅ llms-full.txt generated successfully!')
console.log(` - Path: ${targetPath}`)
console.log(` - Docs: ${docCount} (${duplicateCount} duplicate sidebar entries skipped)`)
console.log(` - Size: ${sizeMB} MB`)
}

generateLlmsFull().catch(error => {
console.error('Error generating llms-full.txt:', error)
process.exitCode = 1
})
45 changes: 45 additions & 0 deletions scripts/lib/docs-urls.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
const path = require('path')

const BASE_URL = 'https://questdb.com/docs/'

// Canonical raw-markdown URL for a doc, shared by the llms.txt and
// llms-full.txt generators. Mirrors plugins/raw-markdown/index.js exactly —
// that plugin decides where the .md files are actually written, so any
// divergence here produces dead Source links.
function generateUrl(docId, slug) {
let urlPath

if (slug) {
urlPath = slug
if (urlPath.startsWith('/')) {
urlPath = urlPath.substring(1)
}
// Only prepend the doc's directory if the slug doesn't already include
// path segments (same rule as the raw-markdown plugin)
const fileDir = path.dirname(docId)
if (!urlPath.includes('/') && fileDir !== '.') {
urlPath = path.join(fileDir, urlPath)
}
} else {
// Safety net: introduction carries `slug: /`; if slug extraction ever
// fails (parse error, unreadable file) fall back to the URL the plugin
// publishes for it rather than emitting a dead introduction.md link.
if (docId === 'introduction') {
return BASE_URL + 'index.md'
}
urlPath = docId
if (urlPath.endsWith('/index')) {
urlPath = urlPath.replace(/\/index$/, '')
}
}
// Note: a trailing '/' in a slug is deliberately NOT stripped — the
// raw-markdown plugin writes `<slug>.md` verbatim, so stripping here
// would link a path the plugin never publishes.

if (urlPath === '' || urlPath === '.') {
return BASE_URL + 'index.md'
}
return BASE_URL + urlPath + '.md'
}

module.exports = { BASE_URL, generateUrl }
Loading
Loading