From 4be5b804cc2de17f4a23c4d048fe0cfd0c278861 Mon Sep 17 00:00:00 2001 From: djliden <7102904+djliden@users.noreply.github.com> Date: Thu, 2 Apr 2026 13:09:56 -0500 Subject: [PATCH 1/5] Add template content block system for rich cookbook narratives Introduces a typed block system (markdown, recipe, code) that lets templates interleave prose and code between recipe steps instead of just concatenating recipes. Refactors all template pages and the markdown export API to use the new system, with a fallback to legacy concatenation for templates without custom blocks. Co-authored-by: Isaac --- api/content-markdown.ts | 52 +++------ src/components/templates/template-blocks.tsx | 80 +++++++++++++ src/lib/template-content.ts | 110 ++++++++++++++++++ src/pages/resources/ai-chat-app-template.tsx | 6 +- .../resources/ai-data-explorer-template.tsx | 6 +- .../analytics-dashboard-app-template.tsx | 6 +- src/pages/resources/base-app-template.tsx | 6 +- src/pages/resources/data-app-template.tsx | 6 +- tests/markdown.test.ts | 6 + 9 files changed, 220 insertions(+), 58 deletions(-) create mode 100644 src/components/templates/template-blocks.tsx create mode 100644 src/lib/template-content.ts diff --git a/api/content-markdown.ts b/api/content-markdown.ts index c88c125..49ba440 100644 --- a/api/content-markdown.ts +++ b/api/content-markdown.ts @@ -2,13 +2,13 @@ import { existsSync, readFileSync } from "fs"; import { resolve } from "path"; import { hasMarkdownSlug } from "../src/lib/content-markdown"; import { recipes, templates } from "../src/lib/recipes/recipes"; +import { + buildTemplateMarkdownDocument, + collectTemplateRecipeIds, +} from "../src/lib/template-content"; export type MarkdownSection = "docs" | "recipes" | "solutions" | "templates"; -function recipeMarkdownPath(recipeId: string): string { - return `content/recipes/${recipeId}.md`; -} - function validateSlug(slug: string): void { if (!slug || slug.trim() === "") { throw new Error("Missing slug"); @@ -92,42 +92,18 @@ function readTemplateMarkdown(rootDir: string, slug: string): string { throw new Error(`Template page not found: "${slug}"`); } - const lines: string[] = [ - "---", - `title: "${template.name.replace(/"/g, '\\"')}"`, - `url: /resources/${template.id}`, - `summary: "${template.description.replace(/"/g, '\\"')}"`, - "---", - "", - `# ${template.name}`, - "", - template.description, - "", - ]; - - for (const recipeId of template.recipeIds) { - const recipe = recipes.find((entry) => entry.id === recipeId); - if (!recipe) { - throw new Error(`Recipe not found: "${recipeId}"`); - } - if (!hasMarkdownSlug(rootDir, "recipes", recipeId)) { - throw new Error(`Recipe page not found: "${recipeId}"`); - } + const rawBySlug = Object.fromEntries( + collectTemplateRecipeIds(template).map((recipeId) => { + const recipe = recipes.find((entry) => entry.id === recipeId); + if (!recipe) { + throw new Error(`Recipe not found: "${recipeId}"`); + } - const recipePath = recipeMarkdownPath(recipeId); - const absoluteRecipePath = resolve(rootDir, recipePath); - const recipeContent = readIfExists(absoluteRecipePath); - if (!recipeContent) { - throw new Error( - `Recipe markdown missing for "${recipeId}" at ${recipePath}`, - ); - } - - lines.push(recipeContent.trim()); - lines.push(""); - } + return [recipeId, readRecipeMarkdown(rootDir, recipeId)]; + }), + ); - return lines.join("\n"); + return buildTemplateMarkdownDocument(template, rawBySlug); } export function getDetailMarkdown( diff --git a/src/components/templates/template-blocks.tsx b/src/components/templates/template-blocks.tsx new file mode 100644 index 0000000..0538597 --- /dev/null +++ b/src/components/templates/template-blocks.tsx @@ -0,0 +1,80 @@ +import CodeBlock from "@theme/CodeBlock"; +import { evaluateSync } from "@mdx-js/mdx"; +import { useMDXComponents } from "@mdx-js/react"; +import { type ComponentType, type ReactNode, useMemo } from "react"; +import * as jsxRuntime from "react/jsx-runtime"; +import type { TemplateContentBlock } from "@/lib/template-content"; + +type TemplateRecipeComponentMap = Record; + +type TemplateBlockRendererProps = { + blocks: TemplateContentBlock[]; + recipeComponents: TemplateRecipeComponentMap; +}; + +type MarkdownBlockProps = { + content: string; +}; + +function TemplateMarkdownBlock({ content }: MarkdownBlockProps): ReactNode { + const components = useMDXComponents(); + + const Content = useMemo(() => { + return evaluateSync(content, { + ...jsxRuntime, + useMDXComponents: () => components, + }).default; + }, [components, content]); + + return ; +} + +type CodeBlockProps = { + language: string; + content: string; +}; + +function TemplateCodeBlock({ language, content }: CodeBlockProps): ReactNode { + return ( + + {content.replace(/\n$/, "")} + + ); +} + +export function TemplateBlockRenderer({ + blocks, + recipeComponents, +}: TemplateBlockRendererProps): ReactNode { + return ( + <> + {blocks.map((block, index) => { + const key = `${block.type}-${index}`; + + switch (block.type) { + case "markdown": + return ; + case "code": + return ( + + ); + case "recipe": { + const RecipeComponent = recipeComponents[block.recipeId]; + if (!RecipeComponent) { + throw new Error( + `Missing recipe component for template block: ${block.recipeId}`, + ); + } + return ; + } + default: + return null; + } + })} + + ); +} diff --git a/src/lib/template-content.ts b/src/lib/template-content.ts new file mode 100644 index 0000000..ff69662 --- /dev/null +++ b/src/lib/template-content.ts @@ -0,0 +1,110 @@ +import type { Template } from "./recipes/recipes"; + +export type TemplateContentBlock = + | { type: "markdown"; content: string } + | { type: "recipe"; recipeId: string } + | { type: "code"; language: string; content: string }; + +type RawRecipeMarkdownById = Record; + +const templateContentById: Record = {}; + +export function getTemplateContentBlocks( + templateId: string, +): TemplateContentBlock[] | undefined { + return templateContentById[templateId]; +} + +export function collectTemplateRecipeIds(template: Template): string[] { + const blocks = getTemplateContentBlocks(template.id); + if (!blocks) { + return template.recipeIds; + } + + return [ + ...new Set( + blocks.flatMap((block) => + block.type === "recipe" ? [block.recipeId] : [], + ), + ), + ]; +} + +function getRecipeMarkdown( + recipeId: string, + rawBySlug: RawRecipeMarkdownById, +): string { + const markdown = rawBySlug[recipeId]; + if (!markdown) { + throw new Error(`Recipe markdown not found: ${recipeId}`); + } + return markdown.trim(); +} + +export function buildLegacyTemplateRawMarkdown( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + return template.recipeIds + .map((id) => rawBySlug[id]) + .filter(Boolean) + .join("\n\n---\n\n"); +} + +export function serializeTemplateContentBlocks( + blocks: TemplateContentBlock[], + rawBySlug: RawRecipeMarkdownById, +): string { + return blocks + .map((block) => { + switch (block.type) { + case "markdown": + return block.content.trim(); + case "recipe": + return getRecipeMarkdown(block.recipeId, rawBySlug); + case "code": + return `\`\`\`${block.language}\n${block.content.trimEnd()}\n\`\`\``; + default: + return ""; + } + }) + .filter(Boolean) + .join("\n\n"); +} + +export function buildTemplateRawMarkdown( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + const blocks = getTemplateContentBlocks(template.id); + if (!blocks) { + return buildLegacyTemplateRawMarkdown(template, rawBySlug); + } + + return serializeTemplateContentBlocks(blocks, rawBySlug); +} + +function escapeFrontmatter(value: string): string { + return value.replace(/"/g, '\\"'); +} + +export function buildTemplateMarkdownDocument( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + const body = buildTemplateRawMarkdown(template, rawBySlug); + + return [ + "---", + `title: "${escapeFrontmatter(template.name)}"`, + `url: /resources/${template.id}`, + `summary: "${escapeFrontmatter(template.description)}"`, + "---", + "", + `# ${template.name}`, + "", + template.description, + "", + body, + ].join("\n"); +} diff --git a/src/pages/resources/ai-chat-app-template.tsx b/src/pages/resources/ai-chat-app-template.tsx index 350a7d1..d672d1d 100644 --- a/src/pages/resources/ai-chat-app-template.tsx +++ b/src/pages/resources/ai-chat-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import FoundationModelsApi from "@site/content/recipes/foundation-models-api.md"; @@ -15,10 +16,7 @@ export default function AiChatAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template ai-chat-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/ai-data-explorer-template.tsx b/src/pages/resources/ai-data-explorer-template.tsx index c2c3a6e..c0d6c0c 100644 --- a/src/pages/resources/ai-data-explorer-template.tsx +++ b/src/pages/resources/ai-data-explorer-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -16,10 +17,7 @@ export default function AiDataExplorerTemplatePage(): ReactNode { if (!template) { throw new Error("Template ai-data-explorer-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/analytics-dashboard-app-template.tsx b/src/pages/resources/analytics-dashboard-app-template.tsx index f9a2847..631209d 100644 --- a/src/pages/resources/analytics-dashboard-app-template.tsx +++ b/src/pages/resources/analytics-dashboard-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -15,10 +16,7 @@ export default function AnalyticsDashboardAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template analytics-dashboard-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/base-app-template.tsx b/src/pages/resources/base-app-template.tsx index eb0d14e..385e6b9 100644 --- a/src/pages/resources/base-app-template.tsx +++ b/src/pages/resources/base-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; @@ -11,10 +12,7 @@ export default function BaseAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template base-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/data-app-template.tsx b/src/pages/resources/data-app-template.tsx index bbd19fe..5fd8a6c 100644 --- a/src/pages/resources/data-app-template.tsx +++ b/src/pages/resources/data-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -12,10 +13,7 @@ export default function DataAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template data-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/tests/markdown.test.ts b/tests/markdown.test.ts index a8814ce..122bd45 100644 --- a/tests/markdown.test.ts +++ b/tests/markdown.test.ts @@ -19,6 +19,12 @@ describe("detail markdown resolver", () => { expect(markdown).toContain("## Databricks Local Bootstrap"); }); + test("does not duplicate recipe headings in legacy template export", () => { + const markdown = getDetailMarkdown("templates", "ai-chat-app-template"); + const matches = markdown.match(/## Databricks Local Bootstrap/g) ?? []; + expect(matches).toHaveLength(1); + }); + test("rejects path traversal", () => { expect(() => getDetailMarkdown("docs", "../package.json")).toThrow( "path traversal", From 83ab55f080092a1ec3135b1562a677372a02c712 Mon Sep 17 00:00:00 2001 From: djliden <7102904+djliden@users.noreply.github.com> Date: Mon, 6 Apr 2026 08:58:48 -0500 Subject: [PATCH 2/5] Add RAG chat recipes and update recipe registry Add three new recipes for RAG chat application development: - embeddings-generation.md: Guide for generating embeddings from documents - lakebase-pgvector.md: PostgreSQL vector storage with pgvector extension - scaffold-rag-chat.md: Scaffolding a RAG chat application Update recipe registry (recipes.ts) to include new RAG chat recipes with proper metadata, prerequisites, and categorization. Update content-entries plugin to support new recipe structure. Fix tsconfig path mapping for improved type resolution. Co-authored-by: Isaac --- content/recipes/embeddings-generation.md | 61 +++++++++++ content/recipes/lakebase-pgvector.md | 127 +++++++++++++++++++++++ content/recipes/scaffold-rag-chat.md | 69 ++++++++++++ plugins/content-entries.ts | 4 +- src/lib/recipes/recipes.ts | 50 +++++++++ tsconfig.json | 2 +- 6 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 content/recipes/embeddings-generation.md create mode 100644 content/recipes/lakebase-pgvector.md create mode 100644 content/recipes/scaffold-rag-chat.md diff --git a/content/recipes/embeddings-generation.md b/content/recipes/embeddings-generation.md new file mode 100644 index 0000000..9d7d97f --- /dev/null +++ b/content/recipes/embeddings-generation.md @@ -0,0 +1,61 @@ +## Generate Embeddings with AI Gateway + +Generate text embeddings from a Databricks AI Gateway endpoint using the Databricks SDK. + +### 1. Find an embedding endpoint + +```bash +databricks serving-endpoints list --profile +``` + +Common embedding endpoints: `databricks-gte-large-en` (1024d), `databricks-bge-large-en` (1024d). + +### 2. Configure environment + +`.env`: + +```bash +DATABRICKS_EMBEDDING_ENDPOINT=databricks-gte-large-en +``` + +`app.yaml`: + +```yaml +env: + - name: DATABRICKS_EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" +``` + +### 3. Embedding helper + +```typescript +import { getWorkspaceClient } from "@databricks/appkit"; + +const workspaceClient = getWorkspaceClient({}); + +async function generateEmbedding(text: string): Promise { + const endpoint = + process.env.DATABRICKS_EMBEDDING_ENDPOINT || "databricks-gte-large-en"; + const result = await workspaceClient.servingEndpoints.query({ + name: endpoint, + input: text, + }); + return result.data![0].embedding!; +} +``` + +No additional dependencies — uses `@databricks/appkit` already in your project. + +### 4. Verify + +```bash +databricks serving-endpoints query \ + --json '{"input": "Hello, world!"}' \ + --profile +``` + +Response includes a `data` array with `embedding` (float array). + +#### References + +- [Query embedding models](https://docs.databricks.com/aws/en/machine-learning/model-serving/query-embedding-models) diff --git a/content/recipes/lakebase-pgvector.md b/content/recipes/lakebase-pgvector.md new file mode 100644 index 0000000..f4c183a --- /dev/null +++ b/content/recipes/lakebase-pgvector.md @@ -0,0 +1,127 @@ +## Lakebase pgvector + +Enable vector similarity search in Lakebase using the pgvector extension. + +### 1. Enable pgvector + +```bash +databricks psql --project --profile -- -c " + CREATE EXTENSION IF NOT EXISTS vector; +" +``` + +### 2. Create embedding table + +```sql +CREATE SCHEMA IF NOT EXISTS rag; + +CREATE TABLE IF NOT EXISTS rag.documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + content TEXT NOT NULL, + embedding VECTOR(1024), + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +``` + +> **Vector dimensions**: `VECTOR(1024)` must match your embedding model output. `databricks-gte-large-en` produces 1024 dimensions. + +### 3. Server-side setup + +Run DDL on boot before `appkit.server.start()`: + +```typescript +async function setupRagTables(appkit: AppKitWithLakebase) { + try { + await appkit.lakebase.query("CREATE EXTENSION IF NOT EXISTS vector"); + } catch (err: unknown) { + const code = (err as { code?: string }).code; + if (code === "42501") { + console.log( + "[rag] Skipping extension creation — insufficient privileges (likely already exists)", + ); + } else { + throw err; + } + } + const { rows } = await appkit.lakebase.query( + `SELECT 1 FROM information_schema.tables + WHERE table_schema = 'rag' AND table_name = 'documents'`, + ); + if (rows.length > 0) return; + await appkit.lakebase.query(`CREATE SCHEMA IF NOT EXISTS rag`); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS rag.documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + content TEXT NOT NULL, + embedding VECTOR(1024), + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); +} +``` + +### 4. Insert documents + +```typescript +async function insertDocument( + appkit: AppKitWithLakebase, + input: { + content: string; + embedding: number[]; + metadata?: Record; + }, +) { + const result = await appkit.lakebase.query( + `INSERT INTO rag.documents (content, embedding, metadata) + VALUES ($1, $2::vector, $3) + RETURNING id, content, metadata, created_at`, + [ + input.content, + JSON.stringify(input.embedding), + JSON.stringify(input.metadata ?? {}), + ], + ); + return result.rows[0]; +} +``` + +### 5. Query by cosine similarity + +```typescript +async function retrieveSimilar( + appkit: AppKitWithLakebase, + queryEmbedding: number[], + limit = 5, +) { + const result = await appkit.lakebase.query( + `SELECT id, content, metadata, 1 - (embedding <=> $1::vector) AS similarity + FROM rag.documents + WHERE embedding IS NOT NULL + ORDER BY embedding <=> $1::vector + LIMIT $2`, + [JSON.stringify(queryEmbedding), limit], + ); + return result.rows; +} +``` + +> **Distance operators**: `<=>` cosine (default for text), `<->` L2, `<#>` inner product. + +### 6. Create index + +Add after inserting initial data (IVFFlat needs representative data to build): + +```sql +CREATE INDEX IF NOT EXISTS idx_documents_embedding + ON rag.documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); +ANALYZE rag.documents; +``` + +> For higher recall without tuning, use `USING hnsw (embedding vector_cosine_ops)` instead. + +#### References + +- [pgvector](https://github.com/pgvector/pgvector) +- [Lakebase extensions](https://docs.databricks.com/aws/en/oltp/projects/extensions) diff --git a/content/recipes/scaffold-rag-chat.md b/content/recipes/scaffold-rag-chat.md new file mode 100644 index 0000000..1339a50 --- /dev/null +++ b/content/recipes/scaffold-rag-chat.md @@ -0,0 +1,69 @@ +## Scaffold the RAG Chat App + +Scaffold a complete RAG chat app with pgvector retrieval, AI Gateway embeddings, and streaming chat using AppKit CLI. + +### 1. Scaffold with Lakebase feature + +```bash +databricks apps init \ + --name \ + --description "RAG Chat AppKit app with pgvector retrieval" \ + --version latest \ + --features=lakebase \ + --run none \ + --profile +``` + +The `--features=lakebase` flag adds the Lakebase plugin with Postgres connection fields. The CLI will prompt for resource paths — use full Lakebase resource names from `databricks postgres list-branches` and `databricks postgres list-databases`. + +### 2. Enter the project and install dependencies + +```bash +cd +npm install +``` + +### 3. Install AI SDK packages + +```bash +npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai +``` + +### 4. Configure environment + +Add to `.env`: + +```bash +DATABRICKS_CONFIG_PROFILE= +DATABRICKS_ENDPOINT= +DATABRICKS_EMBEDDING_ENDPOINT= +DATABRICKS_WORKSPACE_ID= +RAG_RESEED=false +``` + +The Lakebase connection variables (`PGHOST`, `PGDATABASE`, `LAKEBASE_ENDPOINT`, `PGPORT`, `PGSSLMODE`) are populated by `appkit plugin sync` during scaffold. + +### 5. Verify databricks.yml resource paths + +Open `databricks.yml` and confirm the `postgres_branch` and `postgres_database` variables use full resource paths: + +```yaml +variables: + postgres_branch: projects//branches/ + postgres_database: projects//branches//databases/ +``` + +Short defaults like `main` will fail on deploy — always use full resource names from `databricks postgres list-branches` and `databricks postgres list-databases`. + +### 6. Verify local dev server + +```bash +npm run dev +``` + +The dev server starts on `http://localhost:8000`. After first run, check `.env` for duplicate blank entries added by `appkit plugin sync` and remove them. + +#### References + +- [Databricks Apps init](https://docs.databricks.com/aws/en/dev-tools/cli/app-commands#init) +- [AppKit Lakebase plugin](https://databricks.github.io/appkit/docs/plugins/lakebase) diff --git a/plugins/content-entries.ts b/plugins/content-entries.ts index 2f89887..23d0abb 100644 --- a/plugins/content-entries.ts +++ b/plugins/content-entries.ts @@ -76,7 +76,9 @@ function assertSlugParity( } throw new Error( - `Slug mismatch for ${entryType} entries (${sections.join(" | ")}). Keep content markdown and registry metadata in sync.`, + `Slug mismatch for ${entryType} entries (${sections.join( + " | ", + )}). Keep content markdown and registry metadata in sync.`, ); } diff --git a/src/lib/recipes/recipes.ts b/src/lib/recipes/recipes.ts index f3dd590..f8ce01f 100644 --- a/src/lib/recipes/recipes.ts +++ b/src/lib/recipes/recipes.ts @@ -113,6 +113,37 @@ export const recipes: Recipe[] = [ tags: ["Analytics", "SQL", "Charts", "Dashboard"], prerequisites: ["databricks-local-bootstrap"], }, + { + id: "embeddings-generation", + name: "Generate Embeddings with AI Gateway", + description: + "Generate text embeddings from a Databricks AI Gateway endpoint using the Databricks SDK for vector similarity search and RAG applications.", + tags: ["AI", "Embeddings", "AI Gateway", "RAG"], + prerequisites: ["databricks-local-bootstrap", "foundation-models-api"], + }, + { + id: "lakebase-pgvector", + name: "Lakebase pgvector", + description: + "Enable vector similarity search in Lakebase using the pgvector extension for storing and querying embeddings.", + tags: ["Lakebase", "Postgres", "pgvector", "Embeddings", "RAG"], + prerequisites: ["lakebase-data-persistence", "embeddings-generation"], + }, + { + id: "scaffold-rag-chat", + name: "Scaffold RAG Chat App", + description: + "Scaffold a complete RAG chat app with pgvector retrieval, AI Gateway embeddings, and streaming chat using the AppKit CLI.", + tags: ["RAG", "Chat", "AI", "AppKit", "pgvector"], + prerequisites: [ + "databricks-local-bootstrap", + "lakebase-data-persistence", + "foundation-models-api", + "embeddings-generation", + "lakebase-pgvector", + "ai-chat-model-serving", + ], + }, ]; const recipeIndex: Record = Object.fromEntries( @@ -124,8 +155,11 @@ export const recipesInOrder: Recipe[] = [ "lakebase-data-persistence", "foundation-models-api", "model-serving-endpoint-creation", + "embeddings-generation", + "lakebase-pgvector", "ai-chat-model-serving", "lakebase-chat-persistence", + "scaffold-rag-chat", "etl-lakehouse-sync-autoscaling", "reverse-etl-synced-tables-autoscaling", "genie-conversational-analytics", @@ -218,6 +252,22 @@ export const templates: Template[] = [ "genie-conversational-analytics", ], }), + createTemplate({ + id: "rag-chat-app-template", + name: "RAG Chat App Template", + description: + "Build a production-ready RAG chat application with pgvector similarity search, AI Gateway embeddings, streaming responses, and chat persistence.", + recipeIds: [ + "databricks-local-bootstrap", + "lakebase-data-persistence", + "foundation-models-api", + "embeddings-generation", + "lakebase-pgvector", + "ai-chat-model-serving", + "lakebase-chat-persistence", + "scaffold-rag-chat", + ], + }), ]; export const templatePreviewItems: TemplatePreviewItem[] = templates.map( diff --git a/tsconfig.json b/tsconfig.json index 4c4bea3..7f4f690 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,5 +16,5 @@ "@/*": ["src/*"] } }, - "exclude": [".docusaurus", "build", "tests"] + "exclude": [".docusaurus", "build", "tests", "templates"] } From 41c4a79cd0bedeb674215c34a219f3dd159a01cd Mon Sep 17 00:00:00 2001 From: djliden <7102904+djliden@users.noreply.github.com> Date: Mon, 6 Apr 2026 16:23:12 -0500 Subject: [PATCH 3/5] Replace scaffold-rag-chat with rag-chat-integration capstone recipe - Add rag-chat-integration.md: complete capstone recipe covering document seeding, RAG-augmented chat route, sources API, chat persistence routes, server bootstrap, client sources display, and App.tsx root route setup - Delete scaffold-rag-chat.md (was just CLI scaffolding with no RAG logic) - Create rag-chat-app-template.tsx page component (fixes broken link) - Update recipes.ts registry (swap slug, update recipeIds) - Enhance template-content.ts with connecting text between recipe phases - Fix @ai-sdk/openai@3 version pin in ai-chat-model-serving recipe - Improve embeddings-generation, lakebase-pgvector, lakebase-chat-persistence recipes with fuller code blocks and setupChatTables/listChats/getChatMessages Verified: an agent can one-shot build a working RAG chat app from the cookbook markdown alone (tested via dv2 dispatch). Deploy to Databricks Apps blocked by npm proxy issues (package-lock.json resolved URLs point to npm-proxy.dev.databricks.com which is unreachable from app containers). Co-authored-by: Isaac --- content/recipes/ai-chat-model-serving.md | 2 +- content/recipes/embeddings-generation.md | 6 +- content/recipes/lakebase-chat-persistence.md | 86 +- content/recipes/lakebase-pgvector.md | 34 +- content/recipes/rag-chat-integration.md | 821 ++++++++++++++++++ content/recipes/scaffold-rag-chat.md | 69 -- src/lib/recipes/recipes.ts | 16 +- src/lib/template-content.ts | 158 +++- src/pages/resources/rag-chat-app-template.tsx | 67 ++ 9 files changed, 1164 insertions(+), 95 deletions(-) create mode 100644 content/recipes/rag-chat-integration.md delete mode 100644 content/recipes/scaffold-rag-chat.md create mode 100644 src/pages/resources/rag-chat-app-template.tsx diff --git a/content/recipes/ai-chat-model-serving.md b/content/recipes/ai-chat-model-serving.md index 7cabc97..2bc03ca 100644 --- a/content/recipes/ai-chat-model-serving.md +++ b/content/recipes/ai-chat-model-serving.md @@ -28,7 +28,7 @@ If you run `npm run dev` before deploying, your user creates schemas that the de ### 2. Install AI SDK packages ```bash -npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai @databricks/sdk-experimental +npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai@3 @databricks/sdk-experimental ``` > **Version note**: This recipe uses AI SDK v6 APIs (`TextStreamChatTransport`, `sendMessage({ text })`, transport-based `useChat`). Tested with `ai@6.1`, `@ai-sdk/react@3.1`, and `@ai-sdk/openai@3.x`. diff --git a/content/recipes/embeddings-generation.md b/content/recipes/embeddings-generation.md index 9d7d97f..12c7c62 100644 --- a/content/recipes/embeddings-generation.md +++ b/content/recipes/embeddings-generation.md @@ -28,12 +28,16 @@ env: ### 3. Embedding helper +Create `server/lib/embeddings.ts`: + +`server/lib/embeddings.ts`: + ```typescript import { getWorkspaceClient } from "@databricks/appkit"; const workspaceClient = getWorkspaceClient({}); -async function generateEmbedding(text: string): Promise { +export async function generateEmbedding(text: string): Promise { const endpoint = process.env.DATABRICKS_EMBEDDING_ENDPOINT || "databricks-gte-large-en"; const result = await workspaceClient.servingEndpoints.query({ diff --git a/content/recipes/lakebase-chat-persistence.md b/content/recipes/lakebase-chat-persistence.md index de4a6d0..75f03eb 100644 --- a/content/recipes/lakebase-chat-persistence.md +++ b/content/recipes/lakebase-chat-persistence.md @@ -42,11 +42,75 @@ In `server/server.ts`, keep `autoStart: false` and run schema setup before `appk ### 3. Add persistence helpers -Create `server/lib/chat-store.ts` and use parameterized queries: +Create `server/lib/chat-store.ts` with table setup and parameterized query helpers: > **Getting userId**: In deployed Databricks Apps, use `req.header("x-forwarded-email")` from the request headers. For local development, use a hardcoded test user ID. +`server/lib/chat-store.ts`: + ```typescript +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +export async function setupChatTables(appkit: AppKitWithLakebase) { + await appkit.lakebase.query("CREATE SCHEMA IF NOT EXISTS chat"); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS chat.chats ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id TEXT NOT NULL, + title TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS chat.messages ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chat_id UUID NOT NULL REFERENCES chat.chats(id) ON DELETE CASCADE, + role TEXT NOT NULL CHECK (role IN ('system', 'user', 'assistant', 'tool')), + content TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); + try { + await appkit.lakebase.query(` + CREATE INDEX IF NOT EXISTS idx_messages_chat_id_created_at + ON chat.messages(chat_id, created_at) + `); + } catch (err: unknown) { + const code = (err as { code?: string }).code; + if (code === "42501") { + console.log( + "[chat] Skipping index creation — table owned by another identity", + ); + } else { + throw err; + } + } +} + +export async function listChats(appkit: AppKitWithLakebase, userId: string) { + const result = await appkit.lakebase.query( + `SELECT id, user_id, title, created_at, updated_at + FROM chat.chats + WHERE user_id = $1 + ORDER BY updated_at DESC`, + [userId], + ); + return result.rows; +} + export async function createChat( appkit: AppKitWithLakebase, input: { userId: string; title: string }, @@ -60,6 +124,20 @@ export async function createChat( return result.rows[0]; } +export async function getChatMessages( + appkit: AppKitWithLakebase, + chatId: string, +) { + const result = await appkit.lakebase.query( + `SELECT id, chat_id, role, content, created_at + FROM chat.messages + WHERE chat_id = $1 + ORDER BY created_at ASC`, + [chatId], + ); + return result.rows; +} + export async function appendMessage( appkit: AppKitWithLakebase, input: { chatId: string; role: string; content: string }, @@ -70,10 +148,16 @@ export async function appendMessage( RETURNING id, chat_id, role, content, created_at`, [input.chatId, input.role, input.content], ); + await appkit.lakebase.query( + `UPDATE chat.chats SET updated_at = NOW() WHERE id = $1`, + [input.chatId], + ); return result.rows[0]; } ``` +Call `setupChatTables(appkit)` from `server.ts` before starting the server. The `listChats`, `getChatMessages`, and `appendMessage` functions are imported by the chat persistence routes and chat routes in later recipes. + ### 4. Persist in the `/api/chat` flow In your chat route: diff --git a/content/recipes/lakebase-pgvector.md b/content/recipes/lakebase-pgvector.md index f4c183a..a4fcc1c 100644 --- a/content/recipes/lakebase-pgvector.md +++ b/content/recipes/lakebase-pgvector.md @@ -26,12 +26,28 @@ CREATE TABLE IF NOT EXISTS rag.documents ( > **Vector dimensions**: `VECTOR(1024)` must match your embedding model output. `databricks-gte-large-en` produces 1024 dimensions. -### 3. Server-side setup +### 3. Server-side RAG store module -Run DDL on boot before `appkit.server.start()`: +Create `server/lib/rag-store.ts` with table setup, insert, and similarity search. Call `setupRagTables(appkit)` from `server.ts` before starting the server. + +`server/lib/rag-store.ts`: ```typescript -async function setupRagTables(appkit: AppKitWithLakebase) { +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +export async function setupRagTables(appkit: AppKitWithLakebase) { try { await appkit.lakebase.query("CREATE EXTENSION IF NOT EXISTS vector"); } catch (err: unknown) { @@ -60,12 +76,8 @@ async function setupRagTables(appkit: AppKitWithLakebase) { ) `); } -``` - -### 4. Insert documents -```typescript -async function insertDocument( +export async function insertDocument( appkit: AppKitWithLakebase, input: { content: string; @@ -85,12 +97,8 @@ async function insertDocument( ); return result.rows[0]; } -``` -### 5. Query by cosine similarity - -```typescript -async function retrieveSimilar( +export async function retrieveSimilar( appkit: AppKitWithLakebase, queryEmbedding: number[], limit = 5, diff --git a/content/recipes/rag-chat-integration.md b/content/recipes/rag-chat-integration.md new file mode 100644 index 0000000..24b1ad2 --- /dev/null +++ b/content/recipes/rag-chat-integration.md @@ -0,0 +1,821 @@ +## RAG Chat Integration + +Wire pgvector retrieval, document seeding, and a sources API into a streaming chat app to build a complete RAG experience. This recipe builds on the embeddings, pgvector, chat, and persistence recipes to produce a fully working RAG chat app. + +### 1. Follow the prerequisite recipes first + +Complete these recipes before continuing: + +- [`Generate Embeddings with AI Gateway`](/resources/rag-chat-app-template#generate-embeddings-with-ai-gateway) +- [`Lakebase pgvector`](/resources/rag-chat-app-template#lakebase-pgvector) +- [`Streaming AI Chat with Model Serving`](/resources/rag-chat-app-template#streaming-ai-chat-with-model-serving) +- [`Lakebase Chat Persistence`](/resources/rag-chat-app-template#lakebase-chat-persistence) + +### 2. Add seed environment variable + +Add `RAG_RESEED` to `.env` so you can control whether seeding re-runs on restart: + +`.env`: + +```bash +RAG_RESEED=false +``` + +`app.yaml` (add under `env`): + +```yaml +env: + - name: RAG_RESEED + value: "false" +``` + +### 3. Create the document seeding module + +This module fetches Wikipedia articles, chunks them at paragraph boundaries, and seeds the pgvector table. Adapt `fetchWikipediaArticle()` for your own data sources. + +`server/lib/seed-data.ts`: + +```typescript +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +const WIKIPEDIA_ARTICLES = [ + "Databricks", + "Apache_Spark", + "Delta_Lake_(software)", + "Apache_Iceberg", + "Data_lakehouse", + "Apache_Parquet", + "Extract,_transform,_load", + "Retrieval-augmented_generation", + "Data_lake", +]; + +const SHOULD_RESEED = process.env.RAG_RESEED === "true"; + +async function fetchWikipediaArticle(title: string): Promise { + const url = + "https://en.wikipedia.org/w/api.php?" + + new URLSearchParams({ + action: "query", + prop: "extracts", + explaintext: "1", + format: "json", + titles: title, + }); + const res = await fetch(url); + const data = (await res.json()) as { + query: { pages: Record }; + }; + const page = Object.values(data.query.pages)[0]; + return page.extract ?? ""; +} + +function chunkText(text: string, maxLen = 1000): string[] { + const paragraphs = text.split(/\n\n+/).filter((p) => p.trim().length > 50); + const chunks: string[] = []; + let cur = ""; + for (const p of paragraphs) { + if (cur.length + p.length > maxLen && cur) { + chunks.push(cur.trim()); + cur = ""; + } + cur += p + "\n\n"; + } + if (cur.trim()) chunks.push(cur.trim()); + return chunks; +} + +export async function seedFromWikipedia( + appkit: AppKitWithLakebase, + generateEmbedding: (text: string) => Promise, + insertDocument: ( + appkit: AppKitWithLakebase, + input: { + content: string; + embedding: number[]; + metadata?: Record; + }, + ) => Promise>, +) { + const { rows } = await appkit.lakebase.query( + "SELECT COUNT(*) as count FROM rag.documents", + ); + const existingCount = parseInt(String(rows[0].count), 10); + if (existingCount > 0 && !SHOULD_RESEED) return; + if (existingCount > 0 && SHOULD_RESEED) { + await appkit.lakebase.query("DELETE FROM rag.documents"); + } + for (const title of WIKIPEDIA_ARTICLES) { + try { + const chunks = chunkText(await fetchWikipediaArticle(title)); + for (const [index, chunk] of chunks.entries()) { + await insertDocument(appkit, { + content: chunk, + embedding: await generateEmbedding(chunk), + metadata: { source: "wikipedia", article: title, chunkIndex: index }, + }); + } + } catch (err) { + console.warn(`[seed] ${title} failed:`, (err as Error).message); + } + } +} +``` + +Key points: + +- `chunkText()` splits on paragraph boundaries (double newlines) and merges short paragraphs up to `maxLen`. This keeps semantic units together. +- `seedFromWikipedia()` is idempotent: it skips seeding if documents exist unless `RAG_RESEED=true`. +- `generateEmbedding` and `insertDocument` are injected as parameters from earlier recipes. + +### 4. Create RAG-augmented chat routes + +This is the core RAG flow. The `/api/chat` route embeds the user query, retrieves similar documents, injects them as system context, and streams the response. The `/api/chat/sources` endpoint lets the client fetch sources in parallel. + +`server/routes/chat-routes.ts`: + +```typescript +import { createOpenAI } from "@ai-sdk/openai"; +import { streamText, type UIMessage } from "ai"; +import { Config } from "@databricks/sdk-experimental"; +import type { Application } from "express"; +import { generateEmbedding } from "../lib/embeddings"; +import { retrieveSimilar } from "../lib/rag-store"; +import { appendMessage } from "../lib/chat-store"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { extend(fn: (app: Application) => void): void }; +} + +async function getDatabricksToken() { + if (process.env.DATABRICKS_TOKEN) return process.env.DATABRICKS_TOKEN; + const config = new Config({ + profile: process.env.DATABRICKS_CONFIG_PROFILE || "DEFAULT", + }); + await config.ensureResolved(); + const headers = new Headers(); + await config.authenticate(headers); + const authHeader = headers.get("Authorization"); + if (!authHeader) + throw new Error( + "Failed to get Databricks token. Check your CLI profile or set DATABRICKS_TOKEN.", + ); + return authHeader.replace("Bearer ", ""); +} + +export function setupChatRoutes(appkit: AppKitWithLakebase) { + appkit.server.extend((app) => { + // Retrieve RAG sources for a query (called by client before/alongside chat) + app.get("/api/chat/sources", async (req, res) => { + const query = req.query.q as string | undefined; + if (!query) { + res.json([]); + return; + } + try { + const embedding = await generateEmbedding(query); + const similar = await retrieveSimilar(appkit, embedding, 5); + const sources = similar.map( + (d: Record, i: number) => ({ + index: i + 1, + content: d.content as string, + similarity: d.similarity as number, + metadata: d.metadata as Record, + }), + ); + res.json(sources); + } catch (err) { + console.error("[chat:sources]", (err as Error).message); + res.json([]); + } + }); + + app.post("/api/chat", async (req, res) => { + const { messages, chatId } = req.body as { + messages: UIMessage[]; + chatId: string; + }; + const coreMessages = messages.map((m) => ({ + role: m.role as "user" | "assistant" | "system", + content: + m.parts + ?.filter( + (p): p is Extract => + p.type === "text", + ) + .map((p) => p.text) + .join("") ?? "", + })); + + try { + const lastUserMsg = coreMessages.filter((m) => m.role === "user").pop(); + + // Save the user message + if (lastUserMsg && chatId) { + await appendMessage(appkit, { + chatId, + role: "user", + content: lastUserMsg.content, + }); + } + + const token = await getDatabricksToken(); + const endpoint = + process.env.DATABRICKS_ENDPOINT || "databricks-gpt-5-4-mini"; + + let contextPrefix = ""; + if (lastUserMsg) { + const similar = await retrieveSimilar( + appkit, + await generateEmbedding(lastUserMsg.content), + 5, + ); + if (similar.length > 0) { + contextPrefix = + "Use the following context to inform your answer. If not relevant, say so.\n\n" + + similar + .map( + (d: Record, i: number) => + `[${i + 1}] ${d.content}`, + ) + .join("\n\n"); + } + } + + const augmented = [ + ...(contextPrefix + ? [{ role: "system" as const, content: contextPrefix }] + : []), + ...coreMessages, + ]; + + const databricks = createOpenAI({ + baseURL: `https://${process.env.DATABRICKS_WORKSPACE_ID}.ai-gateway.cloud.databricks.com/mlflow/v1`, + apiKey: token, + }); + const result = streamText({ + model: databricks.chat(endpoint), + messages: augmented, + maxOutputTokens: 1000, + onFinish: async ({ text }) => { + if (chatId) { + await appendMessage(appkit, { + chatId, + role: "assistant", + content: text, + }); + } + }, + }); + result.pipeTextStreamToResponse(res); + } catch (err) { + console.error("[chat]", (err as Error).message); + res.status(502).json({ error: "Chat request failed" }); + } + }); + }); +} +``` + +The RAG flow in `/api/chat`: + +1. Extract the last user message +2. Save it to the chat session via `appendMessage()` +3. Embed the query with `generateEmbedding()` +4. Retrieve the top 5 similar documents with `retrieveSimilar()` +5. Build a context prefix with numbered sources +6. Prepend it as a system message +7. Stream the response and persist the assistant reply on finish + +### 5. Create chat persistence routes + +These REST endpoints let the client list, create, and load chat sessions. The `getUserId()` function reads the `x-forwarded-email` header set by Databricks Apps in production. + +`server/routes/chat-persistence-routes.ts`: + +```typescript +import type { Application } from "express"; +import { + listChats, + createChat, + getChatMessages, + appendMessage, +} from "../lib/chat-store"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +function getUserId(req: { header(name: string): string | undefined }): string { + return req.header("x-forwarded-email") || "local-dev-user"; +} + +export function setupChatPersistenceRoutes(appkit: AppKitWithLakebase) { + appkit.server.extend((app) => { + // List all chat sessions for the current user + app.get("/api/chats", async (req, res) => { + try { + const chats = await listChats(appkit, getUserId(req)); + res.json(chats); + } catch (err) { + console.error("[chats:list]", (err as Error).message); + res.status(500).json({ error: "Failed to list chats" }); + } + }); + + // Create a new chat session + app.post("/api/chats", async (req, res) => { + try { + const { title } = req.body as { title?: string }; + const chat = await createChat(appkit, { + userId: getUserId(req), + title: title || "New Chat", + }); + res.status(201).json(chat); + } catch (err) { + console.error("[chats:create]", (err as Error).message); + res.status(500).json({ error: "Failed to create chat" }); + } + }); + + // Load messages for a chat session + app.get("/api/chats/:id/messages", async (req, res) => { + try { + const messages = await getChatMessages(appkit, req.params.id); + res.json(messages); + } catch (err) { + console.error("[chats:messages]", (err as Error).message); + res.status(500).json({ error: "Failed to load messages" }); + } + }); + + // Save a message to a chat session + app.post("/api/chats/:id/messages", async (req, res) => { + try { + const { role, content } = req.body as { role: string; content: string }; + const message = await appendMessage(appkit, { + chatId: req.params.id, + role, + content, + }); + res.status(201).json(message); + } catch (err) { + console.error("[chats:save-message]", (err as Error).message); + res.status(500).json({ error: "Failed to save message" }); + } + }); + }); +} +``` + +### 6. Wire everything together in the server entry point + +The server bootstrap creates the AppKit instance, runs table setup and seeding, registers all routes, and starts the server. + +`server/server.ts`: + +```typescript +import { createApp, server, lakebase } from "@databricks/appkit"; +import { setupRagTables, insertDocument } from "./lib/rag-store"; +import { setupChatRoutes } from "./routes/chat-routes"; +import { setupChatPersistenceRoutes } from "./routes/chat-persistence-routes"; +import { setupChatTables } from "./lib/chat-store"; +import { seedFromWikipedia } from "./lib/seed-data"; +import { generateEmbedding } from "./lib/embeddings"; + +const appkit = await createApp({ + plugins: [server({ autoStart: false }), lakebase()], +}); + +await setupRagTables(appkit); +await setupChatTables(appkit); +await seedFromWikipedia(appkit, generateEmbedding, insertDocument); +setupChatRoutes(appkit); +setupChatPersistenceRoutes(appkit); +await appkit.server.start(); +``` + +The order matters: tables must exist before seeding, and seeding must complete before routes start handling requests. + +### 7. Create the RAG chat page + +This replaces the basic `ChatPage` from the streaming chat recipe with a full RAG-enabled version. It includes a chat sidebar, auto-creation of chat sessions, parallel source fetching, and an expandable sources display beneath each assistant response. + +`client/src/pages/ChatPage.tsx`: + +```tsx +import { useChat } from "@ai-sdk/react"; +import { TextStreamChatTransport } from "ai"; +import { useState, useEffect, useCallback, useRef } from "react"; +import { + MessageSquarePlus, + MessageSquare, + ChevronDown, + ChevronRight, +} from "lucide-react"; +import { + Button, + Input, + ScrollArea, + Separator, +} from "@databricks/appkit-ui/react"; + +interface ChatSession { + id: string; + title: string; + created_at: string; + updated_at: string; +} + +interface ChatMessage { + id: string; + chat_id: string; + role: string; + content: string; + created_at: string; +} + +interface RagSource { + index: number; + content: string; + similarity: number; + metadata: Record; +} + +function createTransport(chatIdRef: React.RefObject) { + return new TextStreamChatTransport({ + api: "/api/chat", + body: () => (chatIdRef.current ? { chatId: chatIdRef.current } : {}), + headers: { "Content-Type": "application/json" }, + }); +} + +function SourcesDisplay({ sources }: { sources: RagSource[] }) { + const [expanded, setExpanded] = useState(false); + + if (sources.length === 0) return null; + + return ( +
+ + {expanded && ( +
+ {sources.map((source) => ( +
+
+ Source {source.index} + + similarity: {(Number(source.similarity) * 100).toFixed(1)}% + +
+

+ {source.content} +

+
+ ))} +
+ )} +
+ ); +} + +export function ChatPage() { + const [chatId, setChatId] = useState(null); + const chatIdRef = useRef(null); + const chatLoadTokenRef = useRef(0); + const [chats, setChats] = useState([]); + const [sidebarOpen, setSidebarOpen] = useState(true); + const transportRef = useRef(createTransport(chatIdRef)); + const [sourcesMap, setSourcesMap] = useState>({}); + + const [input, setInput] = useState(""); + const { messages, setMessages, sendMessage, status } = useChat({ + transport: transportRef.current, + }); + + const loadChats = useCallback(async () => { + const res = await fetch("/api/chats"); + if (res.ok) setChats(await res.json()); + }, []); + + useEffect(() => { + void loadChats(); + }, [loadChats]); + + useEffect(() => { + chatIdRef.current = chatId; + }, [chatId]); + + const selectChat = useCallback( + async (id: string) => { + const loadToken = ++chatLoadTokenRef.current; + setChatId(id); + chatIdRef.current = id; + setSourcesMap({}); + setMessages([]); + const res = await fetch(`/api/chats/${id}/messages`); + if (!res.ok) return; + const saved: ChatMessage[] = await res.json(); + if (loadToken !== chatLoadTokenRef.current) return; + const restored = saved.map((m, i) => ({ + id: m.id || String(i), + role: m.role as "user" | "assistant", + content: m.content, + parts: [{ type: "text" as const, text: m.content }], + createdAt: new Date(m.created_at), + })); + setMessages(restored); + }, + [setMessages], + ); + + const startNewChat = useCallback(() => { + chatLoadTokenRef.current += 1; + setChatId(null); + chatIdRef.current = null; + setMessages([]); + setSourcesMap({}); + }, [setMessages]); + + const handleSubmit = useCallback( + async (e: React.FormEvent) => { + e.preventDefault(); + const text = input.trim(); + if (!text) return; + + let activeChatId = chatId; + + if (!activeChatId) { + const title = text.slice(0, 80); + const res = await fetch("/api/chats", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ title }), + }); + if (!res.ok) return; + const chat: ChatSession = await res.json(); + activeChatId = chat.id; + setChatId(activeChatId); + chatIdRef.current = activeChatId; + } + + const sourcesPromise = fetch( + `/api/chat/sources?q=${encodeURIComponent(text)}`, + ) + .then((res) => (res.ok ? res.json() : [])) + .catch(() => [] as RagSource[]); + + await sendMessage({ text }); + setInput(""); + + const sources: RagSource[] = await sourcesPromise; + if (sources.length > 0) { + setSourcesMap((prev) => ({ ...prev, [text]: sources })); + } + + void loadChats(); + }, + [input, chatId, sendMessage, setInput, loadChats], + ); + + function getSourcesForAssistantMessage(index: number): RagSource[] { + if (index === 0) return []; + const prevMessage = messages[index - 1]; + if (!prevMessage || prevMessage.role !== "user") return []; + const userText = + prevMessage.parts + ?.filter( + (p): p is Extract => p.type === "text", + ) + .map((p) => p.text) + .join("") ?? ""; + return sourcesMap[userText] || []; + } + + return ( +
+ {sidebarOpen && ( +
+
+

+ Lakehouse Knowledge Assistant +

+

+ Ask questions about Databricks, Spark, Delta Lake, and the + lakehouse. +

+
+
+ +
+ + +
+ {chats.map((chat) => ( + + ))} + {chats.length === 0 && ( +

+ No previous chats +

+ )} +
+
+
+ )} + +
+
+ +

RAG Chat

+
+ + +
+ {messages.length === 0 && ( +
+

+ Lakehouse Knowledge Assistant +

+

+ Ask questions about Databricks, Apache Spark, Delta Lake, and + the data lakehouse. Answers are grounded in a curated + knowledge base. +

+
+ )} + {messages.map((message, msgIndex) => ( +
+

+ {message.role === "user" ? "You" : "Assistant"} +

+ {message.parts.map((part, index) => + part.type === "text" ? ( +

+ {part.text} +

+ ) : null, + )} + {message.role === "assistant" && ( + + )} +
+ ))} +
+
+ +
+
+ setInput(e.target.value)} + placeholder="Ask a question..." + disabled={status !== "ready"} + /> + +
+
+
+
+ ); +} +``` + +Key patterns in this component: + +- **`createTransport`** passes `chatId` via the `body` callback so the server knows which session to persist to +- **`SourcesDisplay`** is an expandable panel showing retrieved context with similarity scores +- **Auto-create chat**: on first message, `handleSubmit` creates a chat session via `POST /api/chats` before sending +- **Parallel source fetch**: sources are fetched from `/api/chat/sources` in parallel with the chat request +- **`sourcesMap`** keys sources by user message text so each assistant response maps to its retrieval context +- **`selectChat`** loads persisted messages and restores them into the `useChat` state + +### 8. Make ChatPage the root route + +Update `client/src/App.tsx` so the chat is the entire app — remove the scaffold's Home page and Lakebase demo, and make `ChatPage` the root route: + +`client/src/App.tsx`: + +```tsx +import { createBrowserRouter, RouterProvider } from "react-router"; +import { ChatPage } from "./pages/ChatPage"; + +const router = createBrowserRouter([ + { + path: "/", + element: , + }, +]); + +export default function App() { + return ; +} +``` + +You can also delete the scaffold pages you no longer need: +- `client/src/pages/lakebase/LakebasePage.tsx` +- Any `HomePage` or welcome page from the scaffold + +### 9. Deploy and verify + +Deploy the app and verify the full RAG flow: + +```bash +databricks apps deploy --profile +``` + +Verification checklist: + +- Send "What is Apache Spark?" and confirm the answer is grounded in retrieved context +- Expand the sources panel beneath the response and verify similarity scores appear +- Refresh the page and confirm the chat session persists in the sidebar +- Start a new chat and verify it creates a separate session + +#### References + +- [Vercel AI SDK](https://ai-sdk.dev/docs/getting-started/overview) +- [pgvector](https://github.com/pgvector/pgvector) +- [Databricks Apps](https://docs.databricks.com/en/dev-tools/databricks-apps/index.html) +- [AppKit](https://databricks.github.io/appkit/) diff --git a/content/recipes/scaffold-rag-chat.md b/content/recipes/scaffold-rag-chat.md deleted file mode 100644 index 1339a50..0000000 --- a/content/recipes/scaffold-rag-chat.md +++ /dev/null @@ -1,69 +0,0 @@ -## Scaffold the RAG Chat App - -Scaffold a complete RAG chat app with pgvector retrieval, AI Gateway embeddings, and streaming chat using AppKit CLI. - -### 1. Scaffold with Lakebase feature - -```bash -databricks apps init \ - --name \ - --description "RAG Chat AppKit app with pgvector retrieval" \ - --version latest \ - --features=lakebase \ - --run none \ - --profile -``` - -The `--features=lakebase` flag adds the Lakebase plugin with Postgres connection fields. The CLI will prompt for resource paths — use full Lakebase resource names from `databricks postgres list-branches` and `databricks postgres list-databases`. - -### 2. Enter the project and install dependencies - -```bash -cd -npm install -``` - -### 3. Install AI SDK packages - -```bash -npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai -``` - -### 4. Configure environment - -Add to `.env`: - -```bash -DATABRICKS_CONFIG_PROFILE= -DATABRICKS_ENDPOINT= -DATABRICKS_EMBEDDING_ENDPOINT= -DATABRICKS_WORKSPACE_ID= -RAG_RESEED=false -``` - -The Lakebase connection variables (`PGHOST`, `PGDATABASE`, `LAKEBASE_ENDPOINT`, `PGPORT`, `PGSSLMODE`) are populated by `appkit plugin sync` during scaffold. - -### 5. Verify databricks.yml resource paths - -Open `databricks.yml` and confirm the `postgres_branch` and `postgres_database` variables use full resource paths: - -```yaml -variables: - postgres_branch: projects//branches/ - postgres_database: projects//branches//databases/ -``` - -Short defaults like `main` will fail on deploy — always use full resource names from `databricks postgres list-branches` and `databricks postgres list-databases`. - -### 6. Verify local dev server - -```bash -npm run dev -``` - -The dev server starts on `http://localhost:8000`. After first run, check `.env` for duplicate blank entries added by `appkit plugin sync` and remove them. - -#### References - -- [Databricks Apps init](https://docs.databricks.com/aws/en/dev-tools/cli/app-commands#init) -- [AppKit Lakebase plugin](https://databricks.github.io/appkit/docs/plugins/lakebase) diff --git a/src/lib/recipes/recipes.ts b/src/lib/recipes/recipes.ts index f8ce01f..6a5b825 100644 --- a/src/lib/recipes/recipes.ts +++ b/src/lib/recipes/recipes.ts @@ -130,18 +130,16 @@ export const recipes: Recipe[] = [ prerequisites: ["lakebase-data-persistence", "embeddings-generation"], }, { - id: "scaffold-rag-chat", - name: "Scaffold RAG Chat App", + id: "rag-chat-integration", + name: "RAG Chat Integration", description: - "Scaffold a complete RAG chat app with pgvector retrieval, AI Gateway embeddings, and streaming chat using the AppKit CLI.", - tags: ["RAG", "Chat", "AI", "AppKit", "pgvector"], + "Wire pgvector retrieval, document seeding, and a sources API into a streaming chat app to build a complete RAG experience.", + tags: ["RAG", "Chat", "AI", "pgvector", "Embeddings"], prerequisites: [ - "databricks-local-bootstrap", - "lakebase-data-persistence", - "foundation-models-api", "embeddings-generation", "lakebase-pgvector", "ai-chat-model-serving", + "lakebase-chat-persistence", ], }, ]; @@ -159,7 +157,7 @@ export const recipesInOrder: Recipe[] = [ "lakebase-pgvector", "ai-chat-model-serving", "lakebase-chat-persistence", - "scaffold-rag-chat", + "rag-chat-integration", "etl-lakehouse-sync-autoscaling", "reverse-etl-synced-tables-autoscaling", "genie-conversational-analytics", @@ -265,7 +263,7 @@ export const templates: Template[] = [ "lakebase-pgvector", "ai-chat-model-serving", "lakebase-chat-persistence", - "scaffold-rag-chat", + "rag-chat-integration", ], }), ]; diff --git a/src/lib/template-content.ts b/src/lib/template-content.ts index ff69662..f6c9949 100644 --- a/src/lib/template-content.ts +++ b/src/lib/template-content.ts @@ -7,7 +7,163 @@ export type TemplateContentBlock = type RawRecipeMarkdownById = Record; -const templateContentById: Record = {}; +const templateContentById: Record = { + "rag-chat-app-template": [ + { + type: "markdown", + content: `## What you are building + +A RAG (retrieval-augmented generation) chat app on Databricks. When a user sends a message, the server embeds the query, retrieves relevant documents from a pgvector table, injects them as context into the prompt, and streams a grounded response. The app persists chat sessions so users can resume conversations after refresh. + +### Architecture + +\`\`\` +Client (React) Server (Express + AppKit) +-------------- ------------------------- +ChatPage POST /api/chat + useChat (AI SDK v6) embed query -> retrieveSimilar -> build context + SourcesDisplay prepend system message -> streamText -> persist + chat sidebar GET /api/chat/sources + auto-create chat on first msg embed query -> retrieveSimilar -> return sources + GET /api/chats (list sessions) + POST /api/chats (create session) + GET /api/chats/:id/messages (load history) +\`\`\` + +### Final project structure + +After completing all recipes, your project will have these files: + +\`\`\` +server/ + server.ts # Entry point: setup tables, seed, register routes, start + lib/ + embeddings.ts # generateEmbedding() via AI Gateway + rag-store.ts # setupRagTables(), insertDocument(), retrieveSimilar() + chat-store.ts # setupChatTables(), createChat(), appendMessage(), listChats(), getChatMessages() + seed-data.ts # seedFromWikipedia() with paragraph chunking + routes/ + chat-routes.ts # POST /api/chat (RAG flow), GET /api/chat/sources + chat-persistence-routes.ts # CRUD endpoints for chat sessions +client/ + src/pages/ + ChatPage.tsx # Chat UI with sources display and sidebar +\`\`\` + +### Prerequisites + +- A Databricks workspace with a CLI profile configured +- A Lakebase Postgres project with pgvector extension enabled +- AI Gateway endpoints for chat (e.g. \`databricks-meta-llama-3-3-70b-instruct\`) and embeddings (e.g. \`databricks-gte-large-en\`) + +Work through the recipes below in order. Each one builds on the previous.`, + }, + { + type: "markdown", + content: `--- + +## Phase 1: Project setup + +Scaffold the app, authenticate, and install agent skills. This creates the base AppKit project structure.`, + }, + { type: "recipe", recipeId: "databricks-local-bootstrap" }, + { + type: "markdown", + content: `--- + +## Phase 2: Database layer + +Add Lakebase (managed Postgres) to your app. This recipe creates the \`lakebase()\` plugin connection and a sample CRUD app. You will not use the sample CRUD routes in the final app, but you need the Lakebase plugin wiring, environment variables, and \`databricks.yml\` resource configuration it sets up. + +**Important:** When scaffolding with \`--features=lakebase\`, use the full resource paths from \`databricks postgres list-branches\` and \`databricks postgres list-databases\`. Short names like \`main\` will fail on deploy.`, + }, + { type: "recipe", recipeId: "lakebase-data-persistence" }, + { + type: "markdown", + content: `--- + +## Phase 3: AI Gateway + +Configure your chat and embedding model endpoints. The key outputs from this recipe are the \`DATABRICKS_ENDPOINT\` and \`DATABRICKS_WORKSPACE_ID\` environment variables, and understanding how to use \`createOpenAI()\` with the AI Gateway URL pattern.`, + }, + { type: "recipe", recipeId: "foundation-models-api" }, + { + type: "markdown", + content: `--- + +## Phase 4: Embeddings + +Create the \`generateEmbedding()\` function that calls a Databricks embedding endpoint. This function is used in three places later: +1. **Document seeding** — embed each text chunk before inserting into pgvector +2. **RAG retrieval** — embed the user's query to find similar documents +3. **Sources endpoint** — embed the query to return sources to the client + +Create this as \`server/lib/embeddings.ts\` and export the \`generateEmbedding\` function.`, + }, + { type: "recipe", recipeId: "embeddings-generation" }, + { + type: "markdown", + content: `--- + +## Phase 5: Vector store + +Set up pgvector tables and the insert/query functions. This recipe produces three exports from \`server/lib/rag-store.ts\`: +- \`setupRagTables(appkit)\` — creates the \`rag.documents\` table with a \`VECTOR(1024)\` column on startup +- \`insertDocument(appkit, { content, embedding, metadata })\` — inserts a document with its embedding +- \`retrieveSimilar(appkit, queryEmbedding, limit)\` — cosine similarity search + +These are used by the seeding module and chat routes in later recipes.`, + }, + { type: "recipe", recipeId: "lakebase-pgvector" }, + { + type: "markdown", + content: `--- + +## Phase 6: Streaming chat + +Build the base chat experience with AI SDK v6 streaming. This recipe gives you: +- The \`getDatabricksToken()\` auth helper +- A basic \`POST /api/chat\` route with \`streamText()\` +- A \`ChatPage\` component with \`useChat()\` and \`TextStreamChatTransport\` + +**Important:** Phase 8 replaces \`server/server.ts\`, \`server/routes/chat-routes.ts\`, and \`ChatPage.tsx\` with RAG-augmented versions. If you are building the full RAG app in one pass, **skip creating those files now** — just install the packages and read this recipe for context, then use the Phase 8 versions. + +Install the required packages: + +\`\`\`bash +npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai@3 @databricks/sdk-experimental +\`\`\``, + }, + { type: "recipe", recipeId: "ai-chat-model-serving" }, + { + type: "markdown", + content: `--- + +## Phase 7: Chat persistence + +Add chat session storage so conversations survive page refreshes. This recipe provides the complete \`server/lib/chat-store.ts\` module with five exports: +- \`setupChatTables(appkit)\` — creates \`chat.chats\` and \`chat.messages\` tables on startup +- \`createChat(appkit, { userId, title })\` — creates a new chat session +- \`listChats(appkit, userId)\` — lists all chats for a user +- \`getChatMessages(appkit, chatId)\` — loads message history in chronological order +- \`appendMessage(appkit, { chatId, role, content })\` — saves a message and updates the chat's \`updated_at\` timestamp`, + }, + { type: "recipe", recipeId: "lakebase-chat-persistence" }, + { + type: "markdown", + content: `--- + +## Phase 8: RAG integration + +This is the capstone recipe. It ties everything together: document seeding, RAG-augmented chat routes (replacing the basic ones from Phase 6), sources API, chat persistence REST endpoints, the server bootstrap, and client-side sources display. + +**This recipe's \`server/server.ts\`, \`server/routes/chat-routes.ts\`, and \`ChatPage\` replace the versions from earlier recipes.** The earlier recipes taught the individual patterns; this recipe provides the final integrated versions. + +This recipe also replaces the scaffold's \`App.tsx\` — the chat becomes the entire app (root route), removing the scaffold's Home page and Lakebase demo.`, + }, + { type: "recipe", recipeId: "rag-chat-integration" }, + ], +}; export function getTemplateContentBlocks( templateId: string, diff --git a/src/pages/resources/rag-chat-app-template.tsx b/src/pages/resources/rag-chat-app-template.tsx new file mode 100644 index 0000000..53d48e9 --- /dev/null +++ b/src/pages/resources/rag-chat-app-template.tsx @@ -0,0 +1,67 @@ +import type { ReactNode } from "react"; +import { TemplateDetail } from "@/components/templates/template-detail"; +import { templates } from "@/lib/recipes/recipes"; +import { + buildTemplateRawMarkdown, + getTemplateContentBlocks, +} from "@/lib/template-content"; +import { TemplateBlockRenderer } from "@/components/templates/template-blocks"; +import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; +import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; +import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; +import FoundationModelsApi from "@site/content/recipes/foundation-models-api.md"; +import EmbeddingsGeneration from "@site/content/recipes/embeddings-generation.md"; +import LakebasePgvector from "@site/content/recipes/lakebase-pgvector.md"; +import AiChatModelServing from "@site/content/recipes/ai-chat-model-serving.md"; +import LakebaseChatPersistence from "@site/content/recipes/lakebase-chat-persistence.md"; +import RagChatIntegration from "@site/content/recipes/rag-chat-integration.md"; + +const template = templates.find((t) => t.id === "rag-chat-app-template"); +const blocks = getTemplateContentBlocks("rag-chat-app-template"); + +const recipeComponents: Record = { + "databricks-local-bootstrap": DatabricksLocalBootstrap, + "lakebase-data-persistence": LakebaseDataPersistence, + "foundation-models-api": FoundationModelsApi, + "embeddings-generation": EmbeddingsGeneration, + "lakebase-pgvector": LakebasePgvector, + "ai-chat-model-serving": AiChatModelServing, + "lakebase-chat-persistence": LakebaseChatPersistence, + "rag-chat-integration": RagChatIntegration, +}; + +export default function RagChatAppTemplatePage(): ReactNode { + const rawBySlug = useAllRawRecipeMarkdown(); + if (!template) { + throw new Error("Template rag-chat-app-template not found"); + } + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); + return ( + + {blocks ? ( + + ) : ( + <> + +
+ +
+ +
+ +
+ +
+ +
+ +
+ + + )} +
+ ); +} From e499032e58e652774697be72d1ab6cdeaf6b5c3d Mon Sep 17 00:00:00 2001 From: djliden <7102904+djliden@users.noreply.github.com> Date: Mon, 6 Apr 2026 16:38:47 -0500 Subject: [PATCH 4/5] Remove stale templates exclude from tsconfig.json No templates/ directory exists in the repo. Co-authored-by: Isaac --- tsconfig.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsconfig.json b/tsconfig.json index 7f4f690..4c4bea3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,5 +16,5 @@ "@/*": ["src/*"] } }, - "exclude": [".docusaurus", "build", "tests", "templates"] + "exclude": [".docusaurus", "build", "tests"] } From e53f5224ce22dfa24496173e9fba828337e16358 Mon Sep 17 00:00:00 2001 From: djliden <7102904+djliden@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:18:30 -0500 Subject: [PATCH 5/5] Replace raw-loader import with content-entries plugin for cookbooks Extend the content-entries plugin to support a data-only mode (no routes) for loading cookbook markdown at build time, eliminating the fragile raw-loader webpack hack. Co-authored-by: Isaac --- api/content-markdown.ts | 33 ++++- docusaurus.config.ts | 7 ++ plugins/content-entries.ts | 41 ++++--- src/lib/content-markdown.ts | 2 +- src/lib/use-raw-content-markdown.ts | 8 ++ src/pages/resources/rag-chat-app-template.tsx | 113 +++++++++++++----- 6 files changed, 150 insertions(+), 54 deletions(-) diff --git a/api/content-markdown.ts b/api/content-markdown.ts index 49ba440..2fa9188 100644 --- a/api/content-markdown.ts +++ b/api/content-markdown.ts @@ -5,7 +5,8 @@ import { recipes, templates } from "../src/lib/recipes/recipes"; import { buildTemplateMarkdownDocument, collectTemplateRecipeIds, -} from "../src/lib/template-content"; + parseTemplateMarkdown, +} from "../src/lib/template-markdown"; export type MarkdownSection = "docs" | "recipes" | "solutions" | "templates"; @@ -92,8 +93,36 @@ function readTemplateMarkdown(rootDir: string, slug: string): string { throw new Error(`Template page not found: "${slug}"`); } + if (template.id === "rag-chat-app-template") { + const cookbookPath = resolve( + rootDir, + "content", + "cookbooks", + "rag-chat-app-template.md", + ); + const cookbookContent = readIfExists(cookbookPath); + if (!cookbookContent) { + throw new Error( + `Cookbook markdown not found at content/cookbooks/rag-chat-app-template.md`, + ); + } + const blocks = parseTemplateMarkdown(cookbookContent); + const rawBySlug = Object.fromEntries( + collectTemplateRecipeIds(blocks).map((recipeId) => { + const recipe = recipes.find((entry) => entry.id === recipeId); + if (!recipe) { + throw new Error(`Recipe not found: "${recipeId}"`); + } + + return [recipeId, readRecipeMarkdown(rootDir, recipeId)]; + }), + ); + + return buildTemplateMarkdownDocument(template, rawBySlug, blocks); + } + const rawBySlug = Object.fromEntries( - collectTemplateRecipeIds(template).map((recipeId) => { + template.recipeIds.map((recipeId) => { const recipe = recipes.find((entry) => entry.id === recipeId); if (!recipe) { throw new Error(`Recipe not found: "${recipeId}"`); diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 064419d..a6f8e70 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -95,6 +95,13 @@ const config: Config = { contentSection: "solutions", }, ], + [ + contentEntriesPlugin, + { + id: "cookbooks", + contentSection: "cookbooks", + }, + ], llmsTxtPlugin, ], diff --git a/plugins/content-entries.ts b/plugins/content-entries.ts index 23d0abb..30bc9de 100644 --- a/plugins/content-entries.ts +++ b/plugins/content-entries.ts @@ -8,10 +8,10 @@ import { solutions } from "../src/lib/solutions/solutions"; type EntryType = "recipe" | "solution"; type ContentEntriesPluginOptions = { - id: "recipes" | "solutions"; - entryType: EntryType; - routeBasePath: string; - contentSection: "recipes" | "solutions"; + id: string; + entryType?: EntryType; + routeBasePath?: string; + contentSection: "recipes" | "solutions" | "cookbooks"; }; function createRouteModuleSource(entryType: EntryType, slug: string): string { @@ -94,8 +94,11 @@ export default function contentEntriesPlugin( context.siteDir, options.contentSection, ); - const registrySlugs = getRegistrySlugs(options.entryType); - assertSlugParity(options.entryType, contentSlugs, registrySlugs); + + if (options.entryType && options.routeBasePath) { + const registrySlugs = getRegistrySlugs(options.entryType); + assertSlugParity(options.entryType, contentSlugs, registrySlugs); + } const rawMarkdownBySlug: Record = {}; for (const slug of contentSlugs) { @@ -109,23 +112,23 @@ export default function contentEntriesPlugin( } setGlobalData({ - entryType: options.entryType, - routeBasePath: options.routeBasePath, slugs: contentSlugs, rawMarkdownBySlug, }); - for (const slug of contentSlugs) { - const modulePath = await createData( - `${options.id}-${slug}-route.tsx`, - createRouteModuleSource(options.entryType, slug), - ); - - addRoute({ - path: `${options.routeBasePath}/${slug}`, - component: modulePath, - exact: true, - }); + if (options.entryType && options.routeBasePath) { + for (const slug of contentSlugs) { + const modulePath = await createData( + `${options.id}-${slug}-route.tsx`, + createRouteModuleSource(options.entryType, slug), + ); + + addRoute({ + path: `${options.routeBasePath}/${slug}`, + component: modulePath, + exact: true, + }); + } } }, }; diff --git a/src/lib/content-markdown.ts b/src/lib/content-markdown.ts index 3139f20..a8e6cef 100644 --- a/src/lib/content-markdown.ts +++ b/src/lib/content-markdown.ts @@ -1,7 +1,7 @@ import { readdirSync } from "fs"; import { resolve } from "path"; -export type ContentMarkdownSection = "recipes" | "solutions"; +export type ContentMarkdownSection = "recipes" | "solutions" | "cookbooks"; function markdownDirectory( rootDir: string, diff --git a/src/lib/use-raw-content-markdown.ts b/src/lib/use-raw-content-markdown.ts index 9b2ef0c..4d786af 100644 --- a/src/lib/use-raw-content-markdown.ts +++ b/src/lib/use-raw-content-markdown.ts @@ -30,3 +30,11 @@ export function useRawSolutionMarkdown(slug: string): string | undefined { ) as ContentEntriesGlobalData; return data.rawMarkdownBySlug[slug]; } + +export function useRawCookbookMarkdown(slug: string): string | undefined { + const data = usePluginData( + "docusaurus-plugin-content-entries", + "cookbooks", + ) as ContentEntriesGlobalData; + return data.rawMarkdownBySlug[slug]; +} diff --git a/src/pages/resources/rag-chat-app-template.tsx b/src/pages/resources/rag-chat-app-template.tsx index 53d48e9..3d3a811 100644 --- a/src/pages/resources/rag-chat-app-template.tsx +++ b/src/pages/resources/rag-chat-app-template.tsx @@ -1,12 +1,19 @@ -import type { ReactNode } from "react"; +import CodeBlock from "@theme/CodeBlock"; +import { evaluateSync } from "@mdx-js/mdx"; +import { useMDXComponents } from "@mdx-js/react"; +import * as jsxRuntime from "react/jsx-runtime"; +import { type ComponentType, type ReactNode, useMemo } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; import { buildTemplateRawMarkdown, - getTemplateContentBlocks, -} from "@/lib/template-content"; -import { TemplateBlockRenderer } from "@/components/templates/template-blocks"; -import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; + parseTemplateMarkdown, + type TemplateContentBlock, +} from "@/lib/template-markdown"; +import { + useAllRawRecipeMarkdown, + useRawCookbookMarkdown, +} from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; import FoundationModelsApi from "@site/content/recipes/foundation-models-api.md"; @@ -17,9 +24,8 @@ import LakebaseChatPersistence from "@site/content/recipes/lakebase-chat-persist import RagChatIntegration from "@site/content/recipes/rag-chat-integration.md"; const template = templates.find((t) => t.id === "rag-chat-app-template"); -const blocks = getTemplateContentBlocks("rag-chat-app-template"); -const recipeComponents: Record = { +const recipeComponents: Record = { "databricks-local-bootstrap": DatabricksLocalBootstrap, "lakebase-data-persistence": LakebaseDataPersistence, "foundation-models-api": FoundationModelsApi, @@ -30,38 +36,81 @@ const recipeComponents: Record = { "rag-chat-integration": RagChatIntegration, }; +function TemplateMarkdownBlock({ content }: { content: string }): ReactNode { + const components = useMDXComponents(); + + const Content = useMemo(() => { + return evaluateSync(content, { + ...jsxRuntime, + useMDXComponents: () => components, + }).default; + }, [components, content]); + + return ; +} + +type TemplateRecipeComponentMap = Record; + +function TemplateBlockRenderer({ + blocks, + recipeComponents: recipeComponentMap, +}: { + blocks: TemplateContentBlock[]; + recipeComponents: TemplateRecipeComponentMap; +}): ReactNode { + return ( + <> + {blocks.map((block, index) => { + const key = `${block.type}-${index}`; + + switch (block.type) { + case "markdown": + return ; + case "code": + return ( + + {block.content.replace(/\n$/, "")} + + ); + case "recipe": { + const RecipeComponent = recipeComponentMap[block.recipeId]; + if (!RecipeComponent) { + throw new Error( + `Missing recipe component for template block: ${block.recipeId}`, + ); + } + return ; + } + default: + return null; + } + })} + + ); +} + export default function RagChatAppTemplatePage(): ReactNode { const rawBySlug = useAllRawRecipeMarkdown(); + const cookbookMarkdown = useRawCookbookMarkdown("rag-chat-app-template"); + const blocks = useMemo( + () => (cookbookMarkdown ? parseTemplateMarkdown(cookbookMarkdown) : []), + [cookbookMarkdown], + ); + if (!template) { throw new Error("Template rag-chat-app-template not found"); } - const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug, blocks); return ( - {blocks ? ( - - ) : ( - <> - -
- -
- -
- -
- -
- -
- -
- - - )} +
); }