diff --git a/api/content-markdown.ts b/api/content-markdown.ts index c88c125..2fa9188 100644 --- a/api/content-markdown.ts +++ b/api/content-markdown.ts @@ -2,13 +2,14 @@ import { existsSync, readFileSync } from "fs"; import { resolve } from "path"; import { hasMarkdownSlug } from "../src/lib/content-markdown"; import { recipes, templates } from "../src/lib/recipes/recipes"; +import { + buildTemplateMarkdownDocument, + collectTemplateRecipeIds, + parseTemplateMarkdown, +} from "../src/lib/template-markdown"; export type MarkdownSection = "docs" | "recipes" | "solutions" | "templates"; -function recipeMarkdownPath(recipeId: string): string { - return `content/recipes/${recipeId}.md`; -} - function validateSlug(slug: string): void { if (!slug || slug.trim() === "") { throw new Error("Missing slug"); @@ -92,42 +93,46 @@ function readTemplateMarkdown(rootDir: string, slug: string): string { throw new Error(`Template page not found: "${slug}"`); } - const lines: string[] = [ - "---", - `title: "${template.name.replace(/"/g, '\\"')}"`, - `url: /resources/${template.id}`, - `summary: "${template.description.replace(/"/g, '\\"')}"`, - "---", - "", - `# ${template.name}`, - "", - template.description, - "", - ]; - - for (const recipeId of template.recipeIds) { - const recipe = recipes.find((entry) => entry.id === recipeId); - if (!recipe) { - throw new Error(`Recipe not found: "${recipeId}"`); - } - if (!hasMarkdownSlug(rootDir, "recipes", recipeId)) { - throw new Error(`Recipe page not found: "${recipeId}"`); - } - - const recipePath = recipeMarkdownPath(recipeId); - const absoluteRecipePath = resolve(rootDir, recipePath); - const recipeContent = readIfExists(absoluteRecipePath); - if (!recipeContent) { + if (template.id === "rag-chat-app-template") { + const cookbookPath = resolve( + rootDir, + "content", + "cookbooks", + "rag-chat-app-template.md", + ); + const cookbookContent = readIfExists(cookbookPath); + if (!cookbookContent) { throw new Error( - `Recipe markdown missing for "${recipeId}" at ${recipePath}`, + `Cookbook markdown not found at content/cookbooks/rag-chat-app-template.md`, ); } + const blocks = parseTemplateMarkdown(cookbookContent); + const rawBySlug = Object.fromEntries( + collectTemplateRecipeIds(blocks).map((recipeId) => { + const recipe = recipes.find((entry) => entry.id === recipeId); + if (!recipe) { + throw new Error(`Recipe not found: "${recipeId}"`); + } + + return [recipeId, readRecipeMarkdown(rootDir, recipeId)]; + }), + ); - lines.push(recipeContent.trim()); - lines.push(""); + return buildTemplateMarkdownDocument(template, rawBySlug, blocks); } - return lines.join("\n"); + const rawBySlug = Object.fromEntries( + template.recipeIds.map((recipeId) => { + const recipe = recipes.find((entry) => entry.id === recipeId); + if (!recipe) { + throw new Error(`Recipe not found: "${recipeId}"`); + } + + return [recipeId, readRecipeMarkdown(rootDir, recipeId)]; + }), + ); + + return buildTemplateMarkdownDocument(template, rawBySlug); } export function getDetailMarkdown( diff --git a/content/recipes/ai-chat-model-serving.md b/content/recipes/ai-chat-model-serving.md index 7cabc97..2bc03ca 100644 --- a/content/recipes/ai-chat-model-serving.md +++ b/content/recipes/ai-chat-model-serving.md @@ -28,7 +28,7 @@ If you run `npm run dev` before deploying, your user creates schemas that the de ### 2. Install AI SDK packages ```bash -npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai @databricks/sdk-experimental +npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai@3 @databricks/sdk-experimental ``` > **Version note**: This recipe uses AI SDK v6 APIs (`TextStreamChatTransport`, `sendMessage({ text })`, transport-based `useChat`). Tested with `ai@6.1`, `@ai-sdk/react@3.1`, and `@ai-sdk/openai@3.x`. diff --git a/content/recipes/embeddings-generation.md b/content/recipes/embeddings-generation.md new file mode 100644 index 0000000..12c7c62 --- /dev/null +++ b/content/recipes/embeddings-generation.md @@ -0,0 +1,65 @@ +## Generate Embeddings with AI Gateway + +Generate text embeddings from a Databricks AI Gateway endpoint using the Databricks SDK. + +### 1. Find an embedding endpoint + +```bash +databricks serving-endpoints list --profile +``` + +Common embedding endpoints: `databricks-gte-large-en` (1024d), `databricks-bge-large-en` (1024d). + +### 2. Configure environment + +`.env`: + +```bash +DATABRICKS_EMBEDDING_ENDPOINT=databricks-gte-large-en +``` + +`app.yaml`: + +```yaml +env: + - name: DATABRICKS_EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" +``` + +### 3. Embedding helper + +Create `server/lib/embeddings.ts`: + +`server/lib/embeddings.ts`: + +```typescript +import { getWorkspaceClient } from "@databricks/appkit"; + +const workspaceClient = getWorkspaceClient({}); + +export async function generateEmbedding(text: string): Promise { + const endpoint = + process.env.DATABRICKS_EMBEDDING_ENDPOINT || "databricks-gte-large-en"; + const result = await workspaceClient.servingEndpoints.query({ + name: endpoint, + input: text, + }); + return result.data![0].embedding!; +} +``` + +No additional dependencies — uses `@databricks/appkit` already in your project. + +### 4. Verify + +```bash +databricks serving-endpoints query \ + --json '{"input": "Hello, world!"}' \ + --profile +``` + +Response includes a `data` array with `embedding` (float array). + +#### References + +- [Query embedding models](https://docs.databricks.com/aws/en/machine-learning/model-serving/query-embedding-models) diff --git a/content/recipes/lakebase-chat-persistence.md b/content/recipes/lakebase-chat-persistence.md index de4a6d0..75f03eb 100644 --- a/content/recipes/lakebase-chat-persistence.md +++ b/content/recipes/lakebase-chat-persistence.md @@ -42,11 +42,75 @@ In `server/server.ts`, keep `autoStart: false` and run schema setup before `appk ### 3. Add persistence helpers -Create `server/lib/chat-store.ts` and use parameterized queries: +Create `server/lib/chat-store.ts` with table setup and parameterized query helpers: > **Getting userId**: In deployed Databricks Apps, use `req.header("x-forwarded-email")` from the request headers. For local development, use a hardcoded test user ID. +`server/lib/chat-store.ts`: + ```typescript +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +export async function setupChatTables(appkit: AppKitWithLakebase) { + await appkit.lakebase.query("CREATE SCHEMA IF NOT EXISTS chat"); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS chat.chats ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id TEXT NOT NULL, + title TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS chat.messages ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + chat_id UUID NOT NULL REFERENCES chat.chats(id) ON DELETE CASCADE, + role TEXT NOT NULL CHECK (role IN ('system', 'user', 'assistant', 'tool')), + content TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); + try { + await appkit.lakebase.query(` + CREATE INDEX IF NOT EXISTS idx_messages_chat_id_created_at + ON chat.messages(chat_id, created_at) + `); + } catch (err: unknown) { + const code = (err as { code?: string }).code; + if (code === "42501") { + console.log( + "[chat] Skipping index creation — table owned by another identity", + ); + } else { + throw err; + } + } +} + +export async function listChats(appkit: AppKitWithLakebase, userId: string) { + const result = await appkit.lakebase.query( + `SELECT id, user_id, title, created_at, updated_at + FROM chat.chats + WHERE user_id = $1 + ORDER BY updated_at DESC`, + [userId], + ); + return result.rows; +} + export async function createChat( appkit: AppKitWithLakebase, input: { userId: string; title: string }, @@ -60,6 +124,20 @@ export async function createChat( return result.rows[0]; } +export async function getChatMessages( + appkit: AppKitWithLakebase, + chatId: string, +) { + const result = await appkit.lakebase.query( + `SELECT id, chat_id, role, content, created_at + FROM chat.messages + WHERE chat_id = $1 + ORDER BY created_at ASC`, + [chatId], + ); + return result.rows; +} + export async function appendMessage( appkit: AppKitWithLakebase, input: { chatId: string; role: string; content: string }, @@ -70,10 +148,16 @@ export async function appendMessage( RETURNING id, chat_id, role, content, created_at`, [input.chatId, input.role, input.content], ); + await appkit.lakebase.query( + `UPDATE chat.chats SET updated_at = NOW() WHERE id = $1`, + [input.chatId], + ); return result.rows[0]; } ``` +Call `setupChatTables(appkit)` from `server.ts` before starting the server. The `listChats`, `getChatMessages`, and `appendMessage` functions are imported by the chat persistence routes and chat routes in later recipes. + ### 4. Persist in the `/api/chat` flow In your chat route: diff --git a/content/recipes/lakebase-pgvector.md b/content/recipes/lakebase-pgvector.md new file mode 100644 index 0000000..a4fcc1c --- /dev/null +++ b/content/recipes/lakebase-pgvector.md @@ -0,0 +1,135 @@ +## Lakebase pgvector + +Enable vector similarity search in Lakebase using the pgvector extension. + +### 1. Enable pgvector + +```bash +databricks psql --project --profile -- -c " + CREATE EXTENSION IF NOT EXISTS vector; +" +``` + +### 2. Create embedding table + +```sql +CREATE SCHEMA IF NOT EXISTS rag; + +CREATE TABLE IF NOT EXISTS rag.documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + content TEXT NOT NULL, + embedding VECTOR(1024), + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +``` + +> **Vector dimensions**: `VECTOR(1024)` must match your embedding model output. `databricks-gte-large-en` produces 1024 dimensions. + +### 3. Server-side RAG store module + +Create `server/lib/rag-store.ts` with table setup, insert, and similarity search. Call `setupRagTables(appkit)` from `server.ts` before starting the server. + +`server/lib/rag-store.ts`: + +```typescript +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +export async function setupRagTables(appkit: AppKitWithLakebase) { + try { + await appkit.lakebase.query("CREATE EXTENSION IF NOT EXISTS vector"); + } catch (err: unknown) { + const code = (err as { code?: string }).code; + if (code === "42501") { + console.log( + "[rag] Skipping extension creation — insufficient privileges (likely already exists)", + ); + } else { + throw err; + } + } + const { rows } = await appkit.lakebase.query( + `SELECT 1 FROM information_schema.tables + WHERE table_schema = 'rag' AND table_name = 'documents'`, + ); + if (rows.length > 0) return; + await appkit.lakebase.query(`CREATE SCHEMA IF NOT EXISTS rag`); + await appkit.lakebase.query(` + CREATE TABLE IF NOT EXISTS rag.documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + content TEXT NOT NULL, + embedding VECTOR(1024), + metadata JSONB NOT NULL DEFAULT '{}', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `); +} + +export async function insertDocument( + appkit: AppKitWithLakebase, + input: { + content: string; + embedding: number[]; + metadata?: Record; + }, +) { + const result = await appkit.lakebase.query( + `INSERT INTO rag.documents (content, embedding, metadata) + VALUES ($1, $2::vector, $3) + RETURNING id, content, metadata, created_at`, + [ + input.content, + JSON.stringify(input.embedding), + JSON.stringify(input.metadata ?? {}), + ], + ); + return result.rows[0]; +} + +export async function retrieveSimilar( + appkit: AppKitWithLakebase, + queryEmbedding: number[], + limit = 5, +) { + const result = await appkit.lakebase.query( + `SELECT id, content, metadata, 1 - (embedding <=> $1::vector) AS similarity + FROM rag.documents + WHERE embedding IS NOT NULL + ORDER BY embedding <=> $1::vector + LIMIT $2`, + [JSON.stringify(queryEmbedding), limit], + ); + return result.rows; +} +``` + +> **Distance operators**: `<=>` cosine (default for text), `<->` L2, `<#>` inner product. + +### 6. Create index + +Add after inserting initial data (IVFFlat needs representative data to build): + +```sql +CREATE INDEX IF NOT EXISTS idx_documents_embedding + ON rag.documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); +ANALYZE rag.documents; +``` + +> For higher recall without tuning, use `USING hnsw (embedding vector_cosine_ops)` instead. + +#### References + +- [pgvector](https://github.com/pgvector/pgvector) +- [Lakebase extensions](https://docs.databricks.com/aws/en/oltp/projects/extensions) diff --git a/content/recipes/rag-chat-integration.md b/content/recipes/rag-chat-integration.md new file mode 100644 index 0000000..24b1ad2 --- /dev/null +++ b/content/recipes/rag-chat-integration.md @@ -0,0 +1,821 @@ +## RAG Chat Integration + +Wire pgvector retrieval, document seeding, and a sources API into a streaming chat app to build a complete RAG experience. This recipe builds on the embeddings, pgvector, chat, and persistence recipes to produce a fully working RAG chat app. + +### 1. Follow the prerequisite recipes first + +Complete these recipes before continuing: + +- [`Generate Embeddings with AI Gateway`](/resources/rag-chat-app-template#generate-embeddings-with-ai-gateway) +- [`Lakebase pgvector`](/resources/rag-chat-app-template#lakebase-pgvector) +- [`Streaming AI Chat with Model Serving`](/resources/rag-chat-app-template#streaming-ai-chat-with-model-serving) +- [`Lakebase Chat Persistence`](/resources/rag-chat-app-template#lakebase-chat-persistence) + +### 2. Add seed environment variable + +Add `RAG_RESEED` to `.env` so you can control whether seeding re-runs on restart: + +`.env`: + +```bash +RAG_RESEED=false +``` + +`app.yaml` (add under `env`): + +```yaml +env: + - name: RAG_RESEED + value: "false" +``` + +### 3. Create the document seeding module + +This module fetches Wikipedia articles, chunks them at paragraph boundaries, and seeds the pgvector table. Adapt `fetchWikipediaArticle()` for your own data sources. + +`server/lib/seed-data.ts`: + +```typescript +import type { Application } from "express"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +const WIKIPEDIA_ARTICLES = [ + "Databricks", + "Apache_Spark", + "Delta_Lake_(software)", + "Apache_Iceberg", + "Data_lakehouse", + "Apache_Parquet", + "Extract,_transform,_load", + "Retrieval-augmented_generation", + "Data_lake", +]; + +const SHOULD_RESEED = process.env.RAG_RESEED === "true"; + +async function fetchWikipediaArticle(title: string): Promise { + const url = + "https://en.wikipedia.org/w/api.php?" + + new URLSearchParams({ + action: "query", + prop: "extracts", + explaintext: "1", + format: "json", + titles: title, + }); + const res = await fetch(url); + const data = (await res.json()) as { + query: { pages: Record }; + }; + const page = Object.values(data.query.pages)[0]; + return page.extract ?? ""; +} + +function chunkText(text: string, maxLen = 1000): string[] { + const paragraphs = text.split(/\n\n+/).filter((p) => p.trim().length > 50); + const chunks: string[] = []; + let cur = ""; + for (const p of paragraphs) { + if (cur.length + p.length > maxLen && cur) { + chunks.push(cur.trim()); + cur = ""; + } + cur += p + "\n\n"; + } + if (cur.trim()) chunks.push(cur.trim()); + return chunks; +} + +export async function seedFromWikipedia( + appkit: AppKitWithLakebase, + generateEmbedding: (text: string) => Promise, + insertDocument: ( + appkit: AppKitWithLakebase, + input: { + content: string; + embedding: number[]; + metadata?: Record; + }, + ) => Promise>, +) { + const { rows } = await appkit.lakebase.query( + "SELECT COUNT(*) as count FROM rag.documents", + ); + const existingCount = parseInt(String(rows[0].count), 10); + if (existingCount > 0 && !SHOULD_RESEED) return; + if (existingCount > 0 && SHOULD_RESEED) { + await appkit.lakebase.query("DELETE FROM rag.documents"); + } + for (const title of WIKIPEDIA_ARTICLES) { + try { + const chunks = chunkText(await fetchWikipediaArticle(title)); + for (const [index, chunk] of chunks.entries()) { + await insertDocument(appkit, { + content: chunk, + embedding: await generateEmbedding(chunk), + metadata: { source: "wikipedia", article: title, chunkIndex: index }, + }); + } + } catch (err) { + console.warn(`[seed] ${title} failed:`, (err as Error).message); + } + } +} +``` + +Key points: + +- `chunkText()` splits on paragraph boundaries (double newlines) and merges short paragraphs up to `maxLen`. This keeps semantic units together. +- `seedFromWikipedia()` is idempotent: it skips seeding if documents exist unless `RAG_RESEED=true`. +- `generateEmbedding` and `insertDocument` are injected as parameters from earlier recipes. + +### 4. Create RAG-augmented chat routes + +This is the core RAG flow. The `/api/chat` route embeds the user query, retrieves similar documents, injects them as system context, and streams the response. The `/api/chat/sources` endpoint lets the client fetch sources in parallel. + +`server/routes/chat-routes.ts`: + +```typescript +import { createOpenAI } from "@ai-sdk/openai"; +import { streamText, type UIMessage } from "ai"; +import { Config } from "@databricks/sdk-experimental"; +import type { Application } from "express"; +import { generateEmbedding } from "../lib/embeddings"; +import { retrieveSimilar } from "../lib/rag-store"; +import { appendMessage } from "../lib/chat-store"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { extend(fn: (app: Application) => void): void }; +} + +async function getDatabricksToken() { + if (process.env.DATABRICKS_TOKEN) return process.env.DATABRICKS_TOKEN; + const config = new Config({ + profile: process.env.DATABRICKS_CONFIG_PROFILE || "DEFAULT", + }); + await config.ensureResolved(); + const headers = new Headers(); + await config.authenticate(headers); + const authHeader = headers.get("Authorization"); + if (!authHeader) + throw new Error( + "Failed to get Databricks token. Check your CLI profile or set DATABRICKS_TOKEN.", + ); + return authHeader.replace("Bearer ", ""); +} + +export function setupChatRoutes(appkit: AppKitWithLakebase) { + appkit.server.extend((app) => { + // Retrieve RAG sources for a query (called by client before/alongside chat) + app.get("/api/chat/sources", async (req, res) => { + const query = req.query.q as string | undefined; + if (!query) { + res.json([]); + return; + } + try { + const embedding = await generateEmbedding(query); + const similar = await retrieveSimilar(appkit, embedding, 5); + const sources = similar.map( + (d: Record, i: number) => ({ + index: i + 1, + content: d.content as string, + similarity: d.similarity as number, + metadata: d.metadata as Record, + }), + ); + res.json(sources); + } catch (err) { + console.error("[chat:sources]", (err as Error).message); + res.json([]); + } + }); + + app.post("/api/chat", async (req, res) => { + const { messages, chatId } = req.body as { + messages: UIMessage[]; + chatId: string; + }; + const coreMessages = messages.map((m) => ({ + role: m.role as "user" | "assistant" | "system", + content: + m.parts + ?.filter( + (p): p is Extract => + p.type === "text", + ) + .map((p) => p.text) + .join("") ?? "", + })); + + try { + const lastUserMsg = coreMessages.filter((m) => m.role === "user").pop(); + + // Save the user message + if (lastUserMsg && chatId) { + await appendMessage(appkit, { + chatId, + role: "user", + content: lastUserMsg.content, + }); + } + + const token = await getDatabricksToken(); + const endpoint = + process.env.DATABRICKS_ENDPOINT || "databricks-gpt-5-4-mini"; + + let contextPrefix = ""; + if (lastUserMsg) { + const similar = await retrieveSimilar( + appkit, + await generateEmbedding(lastUserMsg.content), + 5, + ); + if (similar.length > 0) { + contextPrefix = + "Use the following context to inform your answer. If not relevant, say so.\n\n" + + similar + .map( + (d: Record, i: number) => + `[${i + 1}] ${d.content}`, + ) + .join("\n\n"); + } + } + + const augmented = [ + ...(contextPrefix + ? [{ role: "system" as const, content: contextPrefix }] + : []), + ...coreMessages, + ]; + + const databricks = createOpenAI({ + baseURL: `https://${process.env.DATABRICKS_WORKSPACE_ID}.ai-gateway.cloud.databricks.com/mlflow/v1`, + apiKey: token, + }); + const result = streamText({ + model: databricks.chat(endpoint), + messages: augmented, + maxOutputTokens: 1000, + onFinish: async ({ text }) => { + if (chatId) { + await appendMessage(appkit, { + chatId, + role: "assistant", + content: text, + }); + } + }, + }); + result.pipeTextStreamToResponse(res); + } catch (err) { + console.error("[chat]", (err as Error).message); + res.status(502).json({ error: "Chat request failed" }); + } + }); + }); +} +``` + +The RAG flow in `/api/chat`: + +1. Extract the last user message +2. Save it to the chat session via `appendMessage()` +3. Embed the query with `generateEmbedding()` +4. Retrieve the top 5 similar documents with `retrieveSimilar()` +5. Build a context prefix with numbered sources +6. Prepend it as a system message +7. Stream the response and persist the assistant reply on finish + +### 5. Create chat persistence routes + +These REST endpoints let the client list, create, and load chat sessions. The `getUserId()` function reads the `x-forwarded-email` header set by Databricks Apps in production. + +`server/routes/chat-persistence-routes.ts`: + +```typescript +import type { Application } from "express"; +import { + listChats, + createChat, + getChatMessages, + appendMessage, +} from "../lib/chat-store"; + +interface AppKitWithLakebase { + lakebase: { + query( + text: string, + params?: unknown[], + ): Promise<{ rows: Record[] }>; + }; + server: { + extend(fn: (app: Application) => void): void; + }; +} + +function getUserId(req: { header(name: string): string | undefined }): string { + return req.header("x-forwarded-email") || "local-dev-user"; +} + +export function setupChatPersistenceRoutes(appkit: AppKitWithLakebase) { + appkit.server.extend((app) => { + // List all chat sessions for the current user + app.get("/api/chats", async (req, res) => { + try { + const chats = await listChats(appkit, getUserId(req)); + res.json(chats); + } catch (err) { + console.error("[chats:list]", (err as Error).message); + res.status(500).json({ error: "Failed to list chats" }); + } + }); + + // Create a new chat session + app.post("/api/chats", async (req, res) => { + try { + const { title } = req.body as { title?: string }; + const chat = await createChat(appkit, { + userId: getUserId(req), + title: title || "New Chat", + }); + res.status(201).json(chat); + } catch (err) { + console.error("[chats:create]", (err as Error).message); + res.status(500).json({ error: "Failed to create chat" }); + } + }); + + // Load messages for a chat session + app.get("/api/chats/:id/messages", async (req, res) => { + try { + const messages = await getChatMessages(appkit, req.params.id); + res.json(messages); + } catch (err) { + console.error("[chats:messages]", (err as Error).message); + res.status(500).json({ error: "Failed to load messages" }); + } + }); + + // Save a message to a chat session + app.post("/api/chats/:id/messages", async (req, res) => { + try { + const { role, content } = req.body as { role: string; content: string }; + const message = await appendMessage(appkit, { + chatId: req.params.id, + role, + content, + }); + res.status(201).json(message); + } catch (err) { + console.error("[chats:save-message]", (err as Error).message); + res.status(500).json({ error: "Failed to save message" }); + } + }); + }); +} +``` + +### 6. Wire everything together in the server entry point + +The server bootstrap creates the AppKit instance, runs table setup and seeding, registers all routes, and starts the server. + +`server/server.ts`: + +```typescript +import { createApp, server, lakebase } from "@databricks/appkit"; +import { setupRagTables, insertDocument } from "./lib/rag-store"; +import { setupChatRoutes } from "./routes/chat-routes"; +import { setupChatPersistenceRoutes } from "./routes/chat-persistence-routes"; +import { setupChatTables } from "./lib/chat-store"; +import { seedFromWikipedia } from "./lib/seed-data"; +import { generateEmbedding } from "./lib/embeddings"; + +const appkit = await createApp({ + plugins: [server({ autoStart: false }), lakebase()], +}); + +await setupRagTables(appkit); +await setupChatTables(appkit); +await seedFromWikipedia(appkit, generateEmbedding, insertDocument); +setupChatRoutes(appkit); +setupChatPersistenceRoutes(appkit); +await appkit.server.start(); +``` + +The order matters: tables must exist before seeding, and seeding must complete before routes start handling requests. + +### 7. Create the RAG chat page + +This replaces the basic `ChatPage` from the streaming chat recipe with a full RAG-enabled version. It includes a chat sidebar, auto-creation of chat sessions, parallel source fetching, and an expandable sources display beneath each assistant response. + +`client/src/pages/ChatPage.tsx`: + +```tsx +import { useChat } from "@ai-sdk/react"; +import { TextStreamChatTransport } from "ai"; +import { useState, useEffect, useCallback, useRef } from "react"; +import { + MessageSquarePlus, + MessageSquare, + ChevronDown, + ChevronRight, +} from "lucide-react"; +import { + Button, + Input, + ScrollArea, + Separator, +} from "@databricks/appkit-ui/react"; + +interface ChatSession { + id: string; + title: string; + created_at: string; + updated_at: string; +} + +interface ChatMessage { + id: string; + chat_id: string; + role: string; + content: string; + created_at: string; +} + +interface RagSource { + index: number; + content: string; + similarity: number; + metadata: Record; +} + +function createTransport(chatIdRef: React.RefObject) { + return new TextStreamChatTransport({ + api: "/api/chat", + body: () => (chatIdRef.current ? { chatId: chatIdRef.current } : {}), + headers: { "Content-Type": "application/json" }, + }); +} + +function SourcesDisplay({ sources }: { sources: RagSource[] }) { + const [expanded, setExpanded] = useState(false); + + if (sources.length === 0) return null; + + return ( +
+ + {expanded && ( +
+ {sources.map((source) => ( +
+
+ Source {source.index} + + similarity: {(Number(source.similarity) * 100).toFixed(1)}% + +
+

+ {source.content} +

+
+ ))} +
+ )} +
+ ); +} + +export function ChatPage() { + const [chatId, setChatId] = useState(null); + const chatIdRef = useRef(null); + const chatLoadTokenRef = useRef(0); + const [chats, setChats] = useState([]); + const [sidebarOpen, setSidebarOpen] = useState(true); + const transportRef = useRef(createTransport(chatIdRef)); + const [sourcesMap, setSourcesMap] = useState>({}); + + const [input, setInput] = useState(""); + const { messages, setMessages, sendMessage, status } = useChat({ + transport: transportRef.current, + }); + + const loadChats = useCallback(async () => { + const res = await fetch("/api/chats"); + if (res.ok) setChats(await res.json()); + }, []); + + useEffect(() => { + void loadChats(); + }, [loadChats]); + + useEffect(() => { + chatIdRef.current = chatId; + }, [chatId]); + + const selectChat = useCallback( + async (id: string) => { + const loadToken = ++chatLoadTokenRef.current; + setChatId(id); + chatIdRef.current = id; + setSourcesMap({}); + setMessages([]); + const res = await fetch(`/api/chats/${id}/messages`); + if (!res.ok) return; + const saved: ChatMessage[] = await res.json(); + if (loadToken !== chatLoadTokenRef.current) return; + const restored = saved.map((m, i) => ({ + id: m.id || String(i), + role: m.role as "user" | "assistant", + content: m.content, + parts: [{ type: "text" as const, text: m.content }], + createdAt: new Date(m.created_at), + })); + setMessages(restored); + }, + [setMessages], + ); + + const startNewChat = useCallback(() => { + chatLoadTokenRef.current += 1; + setChatId(null); + chatIdRef.current = null; + setMessages([]); + setSourcesMap({}); + }, [setMessages]); + + const handleSubmit = useCallback( + async (e: React.FormEvent) => { + e.preventDefault(); + const text = input.trim(); + if (!text) return; + + let activeChatId = chatId; + + if (!activeChatId) { + const title = text.slice(0, 80); + const res = await fetch("/api/chats", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ title }), + }); + if (!res.ok) return; + const chat: ChatSession = await res.json(); + activeChatId = chat.id; + setChatId(activeChatId); + chatIdRef.current = activeChatId; + } + + const sourcesPromise = fetch( + `/api/chat/sources?q=${encodeURIComponent(text)}`, + ) + .then((res) => (res.ok ? res.json() : [])) + .catch(() => [] as RagSource[]); + + await sendMessage({ text }); + setInput(""); + + const sources: RagSource[] = await sourcesPromise; + if (sources.length > 0) { + setSourcesMap((prev) => ({ ...prev, [text]: sources })); + } + + void loadChats(); + }, + [input, chatId, sendMessage, setInput, loadChats], + ); + + function getSourcesForAssistantMessage(index: number): RagSource[] { + if (index === 0) return []; + const prevMessage = messages[index - 1]; + if (!prevMessage || prevMessage.role !== "user") return []; + const userText = + prevMessage.parts + ?.filter( + (p): p is Extract => p.type === "text", + ) + .map((p) => p.text) + .join("") ?? ""; + return sourcesMap[userText] || []; + } + + return ( +
+ {sidebarOpen && ( +
+
+

+ Lakehouse Knowledge Assistant +

+

+ Ask questions about Databricks, Spark, Delta Lake, and the + lakehouse. +

+
+
+ +
+ + +
+ {chats.map((chat) => ( + + ))} + {chats.length === 0 && ( +

+ No previous chats +

+ )} +
+
+
+ )} + +
+
+ +

RAG Chat

+
+ + +
+ {messages.length === 0 && ( +
+

+ Lakehouse Knowledge Assistant +

+

+ Ask questions about Databricks, Apache Spark, Delta Lake, and + the data lakehouse. Answers are grounded in a curated + knowledge base. +

+
+ )} + {messages.map((message, msgIndex) => ( +
+

+ {message.role === "user" ? "You" : "Assistant"} +

+ {message.parts.map((part, index) => + part.type === "text" ? ( +

+ {part.text} +

+ ) : null, + )} + {message.role === "assistant" && ( + + )} +
+ ))} +
+
+ +
+
+ setInput(e.target.value)} + placeholder="Ask a question..." + disabled={status !== "ready"} + /> + +
+
+
+
+ ); +} +``` + +Key patterns in this component: + +- **`createTransport`** passes `chatId` via the `body` callback so the server knows which session to persist to +- **`SourcesDisplay`** is an expandable panel showing retrieved context with similarity scores +- **Auto-create chat**: on first message, `handleSubmit` creates a chat session via `POST /api/chats` before sending +- **Parallel source fetch**: sources are fetched from `/api/chat/sources` in parallel with the chat request +- **`sourcesMap`** keys sources by user message text so each assistant response maps to its retrieval context +- **`selectChat`** loads persisted messages and restores them into the `useChat` state + +### 8. Make ChatPage the root route + +Update `client/src/App.tsx` so the chat is the entire app — remove the scaffold's Home page and Lakebase demo, and make `ChatPage` the root route: + +`client/src/App.tsx`: + +```tsx +import { createBrowserRouter, RouterProvider } from "react-router"; +import { ChatPage } from "./pages/ChatPage"; + +const router = createBrowserRouter([ + { + path: "/", + element: , + }, +]); + +export default function App() { + return ; +} +``` + +You can also delete the scaffold pages you no longer need: +- `client/src/pages/lakebase/LakebasePage.tsx` +- Any `HomePage` or welcome page from the scaffold + +### 9. Deploy and verify + +Deploy the app and verify the full RAG flow: + +```bash +databricks apps deploy --profile +``` + +Verification checklist: + +- Send "What is Apache Spark?" and confirm the answer is grounded in retrieved context +- Expand the sources panel beneath the response and verify similarity scores appear +- Refresh the page and confirm the chat session persists in the sidebar +- Start a new chat and verify it creates a separate session + +#### References + +- [Vercel AI SDK](https://ai-sdk.dev/docs/getting-started/overview) +- [pgvector](https://github.com/pgvector/pgvector) +- [Databricks Apps](https://docs.databricks.com/en/dev-tools/databricks-apps/index.html) +- [AppKit](https://databricks.github.io/appkit/) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 064419d..a6f8e70 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -95,6 +95,13 @@ const config: Config = { contentSection: "solutions", }, ], + [ + contentEntriesPlugin, + { + id: "cookbooks", + contentSection: "cookbooks", + }, + ], llmsTxtPlugin, ], diff --git a/plugins/content-entries.ts b/plugins/content-entries.ts index 2f89887..30bc9de 100644 --- a/plugins/content-entries.ts +++ b/plugins/content-entries.ts @@ -8,10 +8,10 @@ import { solutions } from "../src/lib/solutions/solutions"; type EntryType = "recipe" | "solution"; type ContentEntriesPluginOptions = { - id: "recipes" | "solutions"; - entryType: EntryType; - routeBasePath: string; - contentSection: "recipes" | "solutions"; + id: string; + entryType?: EntryType; + routeBasePath?: string; + contentSection: "recipes" | "solutions" | "cookbooks"; }; function createRouteModuleSource(entryType: EntryType, slug: string): string { @@ -76,7 +76,9 @@ function assertSlugParity( } throw new Error( - `Slug mismatch for ${entryType} entries (${sections.join(" | ")}). Keep content markdown and registry metadata in sync.`, + `Slug mismatch for ${entryType} entries (${sections.join( + " | ", + )}). Keep content markdown and registry metadata in sync.`, ); } @@ -92,8 +94,11 @@ export default function contentEntriesPlugin( context.siteDir, options.contentSection, ); - const registrySlugs = getRegistrySlugs(options.entryType); - assertSlugParity(options.entryType, contentSlugs, registrySlugs); + + if (options.entryType && options.routeBasePath) { + const registrySlugs = getRegistrySlugs(options.entryType); + assertSlugParity(options.entryType, contentSlugs, registrySlugs); + } const rawMarkdownBySlug: Record = {}; for (const slug of contentSlugs) { @@ -107,23 +112,23 @@ export default function contentEntriesPlugin( } setGlobalData({ - entryType: options.entryType, - routeBasePath: options.routeBasePath, slugs: contentSlugs, rawMarkdownBySlug, }); - for (const slug of contentSlugs) { - const modulePath = await createData( - `${options.id}-${slug}-route.tsx`, - createRouteModuleSource(options.entryType, slug), - ); - - addRoute({ - path: `${options.routeBasePath}/${slug}`, - component: modulePath, - exact: true, - }); + if (options.entryType && options.routeBasePath) { + for (const slug of contentSlugs) { + const modulePath = await createData( + `${options.id}-${slug}-route.tsx`, + createRouteModuleSource(options.entryType, slug), + ); + + addRoute({ + path: `${options.routeBasePath}/${slug}`, + component: modulePath, + exact: true, + }); + } } }, }; diff --git a/src/components/templates/template-blocks.tsx b/src/components/templates/template-blocks.tsx new file mode 100644 index 0000000..0538597 --- /dev/null +++ b/src/components/templates/template-blocks.tsx @@ -0,0 +1,80 @@ +import CodeBlock from "@theme/CodeBlock"; +import { evaluateSync } from "@mdx-js/mdx"; +import { useMDXComponents } from "@mdx-js/react"; +import { type ComponentType, type ReactNode, useMemo } from "react"; +import * as jsxRuntime from "react/jsx-runtime"; +import type { TemplateContentBlock } from "@/lib/template-content"; + +type TemplateRecipeComponentMap = Record; + +type TemplateBlockRendererProps = { + blocks: TemplateContentBlock[]; + recipeComponents: TemplateRecipeComponentMap; +}; + +type MarkdownBlockProps = { + content: string; +}; + +function TemplateMarkdownBlock({ content }: MarkdownBlockProps): ReactNode { + const components = useMDXComponents(); + + const Content = useMemo(() => { + return evaluateSync(content, { + ...jsxRuntime, + useMDXComponents: () => components, + }).default; + }, [components, content]); + + return ; +} + +type CodeBlockProps = { + language: string; + content: string; +}; + +function TemplateCodeBlock({ language, content }: CodeBlockProps): ReactNode { + return ( + + {content.replace(/\n$/, "")} + + ); +} + +export function TemplateBlockRenderer({ + blocks, + recipeComponents, +}: TemplateBlockRendererProps): ReactNode { + return ( + <> + {blocks.map((block, index) => { + const key = `${block.type}-${index}`; + + switch (block.type) { + case "markdown": + return ; + case "code": + return ( + + ); + case "recipe": { + const RecipeComponent = recipeComponents[block.recipeId]; + if (!RecipeComponent) { + throw new Error( + `Missing recipe component for template block: ${block.recipeId}`, + ); + } + return ; + } + default: + return null; + } + })} + + ); +} diff --git a/src/lib/content-markdown.ts b/src/lib/content-markdown.ts index 3139f20..a8e6cef 100644 --- a/src/lib/content-markdown.ts +++ b/src/lib/content-markdown.ts @@ -1,7 +1,7 @@ import { readdirSync } from "fs"; import { resolve } from "path"; -export type ContentMarkdownSection = "recipes" | "solutions"; +export type ContentMarkdownSection = "recipes" | "solutions" | "cookbooks"; function markdownDirectory( rootDir: string, diff --git a/src/lib/recipes/recipes.ts b/src/lib/recipes/recipes.ts index f3dd590..6a5b825 100644 --- a/src/lib/recipes/recipes.ts +++ b/src/lib/recipes/recipes.ts @@ -113,6 +113,35 @@ export const recipes: Recipe[] = [ tags: ["Analytics", "SQL", "Charts", "Dashboard"], prerequisites: ["databricks-local-bootstrap"], }, + { + id: "embeddings-generation", + name: "Generate Embeddings with AI Gateway", + description: + "Generate text embeddings from a Databricks AI Gateway endpoint using the Databricks SDK for vector similarity search and RAG applications.", + tags: ["AI", "Embeddings", "AI Gateway", "RAG"], + prerequisites: ["databricks-local-bootstrap", "foundation-models-api"], + }, + { + id: "lakebase-pgvector", + name: "Lakebase pgvector", + description: + "Enable vector similarity search in Lakebase using the pgvector extension for storing and querying embeddings.", + tags: ["Lakebase", "Postgres", "pgvector", "Embeddings", "RAG"], + prerequisites: ["lakebase-data-persistence", "embeddings-generation"], + }, + { + id: "rag-chat-integration", + name: "RAG Chat Integration", + description: + "Wire pgvector retrieval, document seeding, and a sources API into a streaming chat app to build a complete RAG experience.", + tags: ["RAG", "Chat", "AI", "pgvector", "Embeddings"], + prerequisites: [ + "embeddings-generation", + "lakebase-pgvector", + "ai-chat-model-serving", + "lakebase-chat-persistence", + ], + }, ]; const recipeIndex: Record = Object.fromEntries( @@ -124,8 +153,11 @@ export const recipesInOrder: Recipe[] = [ "lakebase-data-persistence", "foundation-models-api", "model-serving-endpoint-creation", + "embeddings-generation", + "lakebase-pgvector", "ai-chat-model-serving", "lakebase-chat-persistence", + "rag-chat-integration", "etl-lakehouse-sync-autoscaling", "reverse-etl-synced-tables-autoscaling", "genie-conversational-analytics", @@ -218,6 +250,22 @@ export const templates: Template[] = [ "genie-conversational-analytics", ], }), + createTemplate({ + id: "rag-chat-app-template", + name: "RAG Chat App Template", + description: + "Build a production-ready RAG chat application with pgvector similarity search, AI Gateway embeddings, streaming responses, and chat persistence.", + recipeIds: [ + "databricks-local-bootstrap", + "lakebase-data-persistence", + "foundation-models-api", + "embeddings-generation", + "lakebase-pgvector", + "ai-chat-model-serving", + "lakebase-chat-persistence", + "rag-chat-integration", + ], + }), ]; export const templatePreviewItems: TemplatePreviewItem[] = templates.map( diff --git a/src/lib/template-content.ts b/src/lib/template-content.ts new file mode 100644 index 0000000..f6c9949 --- /dev/null +++ b/src/lib/template-content.ts @@ -0,0 +1,266 @@ +import type { Template } from "./recipes/recipes"; + +export type TemplateContentBlock = + | { type: "markdown"; content: string } + | { type: "recipe"; recipeId: string } + | { type: "code"; language: string; content: string }; + +type RawRecipeMarkdownById = Record; + +const templateContentById: Record = { + "rag-chat-app-template": [ + { + type: "markdown", + content: `## What you are building + +A RAG (retrieval-augmented generation) chat app on Databricks. When a user sends a message, the server embeds the query, retrieves relevant documents from a pgvector table, injects them as context into the prompt, and streams a grounded response. The app persists chat sessions so users can resume conversations after refresh. + +### Architecture + +\`\`\` +Client (React) Server (Express + AppKit) +-------------- ------------------------- +ChatPage POST /api/chat + useChat (AI SDK v6) embed query -> retrieveSimilar -> build context + SourcesDisplay prepend system message -> streamText -> persist + chat sidebar GET /api/chat/sources + auto-create chat on first msg embed query -> retrieveSimilar -> return sources + GET /api/chats (list sessions) + POST /api/chats (create session) + GET /api/chats/:id/messages (load history) +\`\`\` + +### Final project structure + +After completing all recipes, your project will have these files: + +\`\`\` +server/ + server.ts # Entry point: setup tables, seed, register routes, start + lib/ + embeddings.ts # generateEmbedding() via AI Gateway + rag-store.ts # setupRagTables(), insertDocument(), retrieveSimilar() + chat-store.ts # setupChatTables(), createChat(), appendMessage(), listChats(), getChatMessages() + seed-data.ts # seedFromWikipedia() with paragraph chunking + routes/ + chat-routes.ts # POST /api/chat (RAG flow), GET /api/chat/sources + chat-persistence-routes.ts # CRUD endpoints for chat sessions +client/ + src/pages/ + ChatPage.tsx # Chat UI with sources display and sidebar +\`\`\` + +### Prerequisites + +- A Databricks workspace with a CLI profile configured +- A Lakebase Postgres project with pgvector extension enabled +- AI Gateway endpoints for chat (e.g. \`databricks-meta-llama-3-3-70b-instruct\`) and embeddings (e.g. \`databricks-gte-large-en\`) + +Work through the recipes below in order. Each one builds on the previous.`, + }, + { + type: "markdown", + content: `--- + +## Phase 1: Project setup + +Scaffold the app, authenticate, and install agent skills. This creates the base AppKit project structure.`, + }, + { type: "recipe", recipeId: "databricks-local-bootstrap" }, + { + type: "markdown", + content: `--- + +## Phase 2: Database layer + +Add Lakebase (managed Postgres) to your app. This recipe creates the \`lakebase()\` plugin connection and a sample CRUD app. You will not use the sample CRUD routes in the final app, but you need the Lakebase plugin wiring, environment variables, and \`databricks.yml\` resource configuration it sets up. + +**Important:** When scaffolding with \`--features=lakebase\`, use the full resource paths from \`databricks postgres list-branches\` and \`databricks postgres list-databases\`. Short names like \`main\` will fail on deploy.`, + }, + { type: "recipe", recipeId: "lakebase-data-persistence" }, + { + type: "markdown", + content: `--- + +## Phase 3: AI Gateway + +Configure your chat and embedding model endpoints. The key outputs from this recipe are the \`DATABRICKS_ENDPOINT\` and \`DATABRICKS_WORKSPACE_ID\` environment variables, and understanding how to use \`createOpenAI()\` with the AI Gateway URL pattern.`, + }, + { type: "recipe", recipeId: "foundation-models-api" }, + { + type: "markdown", + content: `--- + +## Phase 4: Embeddings + +Create the \`generateEmbedding()\` function that calls a Databricks embedding endpoint. This function is used in three places later: +1. **Document seeding** — embed each text chunk before inserting into pgvector +2. **RAG retrieval** — embed the user's query to find similar documents +3. **Sources endpoint** — embed the query to return sources to the client + +Create this as \`server/lib/embeddings.ts\` and export the \`generateEmbedding\` function.`, + }, + { type: "recipe", recipeId: "embeddings-generation" }, + { + type: "markdown", + content: `--- + +## Phase 5: Vector store + +Set up pgvector tables and the insert/query functions. This recipe produces three exports from \`server/lib/rag-store.ts\`: +- \`setupRagTables(appkit)\` — creates the \`rag.documents\` table with a \`VECTOR(1024)\` column on startup +- \`insertDocument(appkit, { content, embedding, metadata })\` — inserts a document with its embedding +- \`retrieveSimilar(appkit, queryEmbedding, limit)\` — cosine similarity search + +These are used by the seeding module and chat routes in later recipes.`, + }, + { type: "recipe", recipeId: "lakebase-pgvector" }, + { + type: "markdown", + content: `--- + +## Phase 6: Streaming chat + +Build the base chat experience with AI SDK v6 streaming. This recipe gives you: +- The \`getDatabricksToken()\` auth helper +- A basic \`POST /api/chat\` route with \`streamText()\` +- A \`ChatPage\` component with \`useChat()\` and \`TextStreamChatTransport\` + +**Important:** Phase 8 replaces \`server/server.ts\`, \`server/routes/chat-routes.ts\`, and \`ChatPage.tsx\` with RAG-augmented versions. If you are building the full RAG app in one pass, **skip creating those files now** — just install the packages and read this recipe for context, then use the Phase 8 versions. + +Install the required packages: + +\`\`\`bash +npm install ai@6 @ai-sdk/react@3 @ai-sdk/openai@3 @databricks/sdk-experimental +\`\`\``, + }, + { type: "recipe", recipeId: "ai-chat-model-serving" }, + { + type: "markdown", + content: `--- + +## Phase 7: Chat persistence + +Add chat session storage so conversations survive page refreshes. This recipe provides the complete \`server/lib/chat-store.ts\` module with five exports: +- \`setupChatTables(appkit)\` — creates \`chat.chats\` and \`chat.messages\` tables on startup +- \`createChat(appkit, { userId, title })\` — creates a new chat session +- \`listChats(appkit, userId)\` — lists all chats for a user +- \`getChatMessages(appkit, chatId)\` — loads message history in chronological order +- \`appendMessage(appkit, { chatId, role, content })\` — saves a message and updates the chat's \`updated_at\` timestamp`, + }, + { type: "recipe", recipeId: "lakebase-chat-persistence" }, + { + type: "markdown", + content: `--- + +## Phase 8: RAG integration + +This is the capstone recipe. It ties everything together: document seeding, RAG-augmented chat routes (replacing the basic ones from Phase 6), sources API, chat persistence REST endpoints, the server bootstrap, and client-side sources display. + +**This recipe's \`server/server.ts\`, \`server/routes/chat-routes.ts\`, and \`ChatPage\` replace the versions from earlier recipes.** The earlier recipes taught the individual patterns; this recipe provides the final integrated versions. + +This recipe also replaces the scaffold's \`App.tsx\` — the chat becomes the entire app (root route), removing the scaffold's Home page and Lakebase demo.`, + }, + { type: "recipe", recipeId: "rag-chat-integration" }, + ], +}; + +export function getTemplateContentBlocks( + templateId: string, +): TemplateContentBlock[] | undefined { + return templateContentById[templateId]; +} + +export function collectTemplateRecipeIds(template: Template): string[] { + const blocks = getTemplateContentBlocks(template.id); + if (!blocks) { + return template.recipeIds; + } + + return [ + ...new Set( + blocks.flatMap((block) => + block.type === "recipe" ? [block.recipeId] : [], + ), + ), + ]; +} + +function getRecipeMarkdown( + recipeId: string, + rawBySlug: RawRecipeMarkdownById, +): string { + const markdown = rawBySlug[recipeId]; + if (!markdown) { + throw new Error(`Recipe markdown not found: ${recipeId}`); + } + return markdown.trim(); +} + +export function buildLegacyTemplateRawMarkdown( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + return template.recipeIds + .map((id) => rawBySlug[id]) + .filter(Boolean) + .join("\n\n---\n\n"); +} + +export function serializeTemplateContentBlocks( + blocks: TemplateContentBlock[], + rawBySlug: RawRecipeMarkdownById, +): string { + return blocks + .map((block) => { + switch (block.type) { + case "markdown": + return block.content.trim(); + case "recipe": + return getRecipeMarkdown(block.recipeId, rawBySlug); + case "code": + return `\`\`\`${block.language}\n${block.content.trimEnd()}\n\`\`\``; + default: + return ""; + } + }) + .filter(Boolean) + .join("\n\n"); +} + +export function buildTemplateRawMarkdown( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + const blocks = getTemplateContentBlocks(template.id); + if (!blocks) { + return buildLegacyTemplateRawMarkdown(template, rawBySlug); + } + + return serializeTemplateContentBlocks(blocks, rawBySlug); +} + +function escapeFrontmatter(value: string): string { + return value.replace(/"/g, '\\"'); +} + +export function buildTemplateMarkdownDocument( + template: Template, + rawBySlug: RawRecipeMarkdownById, +): string { + const body = buildTemplateRawMarkdown(template, rawBySlug); + + return [ + "---", + `title: "${escapeFrontmatter(template.name)}"`, + `url: /resources/${template.id}`, + `summary: "${escapeFrontmatter(template.description)}"`, + "---", + "", + `# ${template.name}`, + "", + template.description, + "", + body, + ].join("\n"); +} diff --git a/src/lib/use-raw-content-markdown.ts b/src/lib/use-raw-content-markdown.ts index 9b2ef0c..4d786af 100644 --- a/src/lib/use-raw-content-markdown.ts +++ b/src/lib/use-raw-content-markdown.ts @@ -30,3 +30,11 @@ export function useRawSolutionMarkdown(slug: string): string | undefined { ) as ContentEntriesGlobalData; return data.rawMarkdownBySlug[slug]; } + +export function useRawCookbookMarkdown(slug: string): string | undefined { + const data = usePluginData( + "docusaurus-plugin-content-entries", + "cookbooks", + ) as ContentEntriesGlobalData; + return data.rawMarkdownBySlug[slug]; +} diff --git a/src/pages/resources/ai-chat-app-template.tsx b/src/pages/resources/ai-chat-app-template.tsx index 350a7d1..d672d1d 100644 --- a/src/pages/resources/ai-chat-app-template.tsx +++ b/src/pages/resources/ai-chat-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import FoundationModelsApi from "@site/content/recipes/foundation-models-api.md"; @@ -15,10 +16,7 @@ export default function AiChatAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template ai-chat-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/ai-data-explorer-template.tsx b/src/pages/resources/ai-data-explorer-template.tsx index c2c3a6e..c0d6c0c 100644 --- a/src/pages/resources/ai-data-explorer-template.tsx +++ b/src/pages/resources/ai-data-explorer-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -16,10 +17,7 @@ export default function AiDataExplorerTemplatePage(): ReactNode { if (!template) { throw new Error("Template ai-data-explorer-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/analytics-dashboard-app-template.tsx b/src/pages/resources/analytics-dashboard-app-template.tsx index f9a2847..631209d 100644 --- a/src/pages/resources/analytics-dashboard-app-template.tsx +++ b/src/pages/resources/analytics-dashboard-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -15,10 +16,7 @@ export default function AnalyticsDashboardAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template analytics-dashboard-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/base-app-template.tsx b/src/pages/resources/base-app-template.tsx index eb0d14e..385e6b9 100644 --- a/src/pages/resources/base-app-template.tsx +++ b/src/pages/resources/base-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; @@ -11,10 +12,7 @@ export default function BaseAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template base-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/data-app-template.tsx b/src/pages/resources/data-app-template.tsx index bbd19fe..5fd8a6c 100644 --- a/src/pages/resources/data-app-template.tsx +++ b/src/pages/resources/data-app-template.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { TemplateDetail } from "@/components/templates/template-detail"; import { templates } from "@/lib/recipes/recipes"; +import { buildTemplateRawMarkdown } from "@/lib/template-content"; import { useAllRawRecipeMarkdown } from "@/lib/use-raw-content-markdown"; import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; @@ -12,10 +13,7 @@ export default function DataAppTemplatePage(): ReactNode { if (!template) { throw new Error("Template data-app-template not found"); } - const rawMarkdown = template.recipeIds - .map((id) => rawBySlug[id]) - .filter(Boolean) - .join("\n\n---\n\n"); + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug); return ( diff --git a/src/pages/resources/rag-chat-app-template.tsx b/src/pages/resources/rag-chat-app-template.tsx new file mode 100644 index 0000000..3d3a811 --- /dev/null +++ b/src/pages/resources/rag-chat-app-template.tsx @@ -0,0 +1,116 @@ +import CodeBlock from "@theme/CodeBlock"; +import { evaluateSync } from "@mdx-js/mdx"; +import { useMDXComponents } from "@mdx-js/react"; +import * as jsxRuntime from "react/jsx-runtime"; +import { type ComponentType, type ReactNode, useMemo } from "react"; +import { TemplateDetail } from "@/components/templates/template-detail"; +import { templates } from "@/lib/recipes/recipes"; +import { + buildTemplateRawMarkdown, + parseTemplateMarkdown, + type TemplateContentBlock, +} from "@/lib/template-markdown"; +import { + useAllRawRecipeMarkdown, + useRawCookbookMarkdown, +} from "@/lib/use-raw-content-markdown"; +import DatabricksLocalBootstrap from "@site/content/recipes/databricks-local-bootstrap.md"; +import LakebaseDataPersistence from "@site/content/recipes/lakebase-data-persistence.md"; +import FoundationModelsApi from "@site/content/recipes/foundation-models-api.md"; +import EmbeddingsGeneration from "@site/content/recipes/embeddings-generation.md"; +import LakebasePgvector from "@site/content/recipes/lakebase-pgvector.md"; +import AiChatModelServing from "@site/content/recipes/ai-chat-model-serving.md"; +import LakebaseChatPersistence from "@site/content/recipes/lakebase-chat-persistence.md"; +import RagChatIntegration from "@site/content/recipes/rag-chat-integration.md"; + +const template = templates.find((t) => t.id === "rag-chat-app-template"); + +const recipeComponents: Record = { + "databricks-local-bootstrap": DatabricksLocalBootstrap, + "lakebase-data-persistence": LakebaseDataPersistence, + "foundation-models-api": FoundationModelsApi, + "embeddings-generation": EmbeddingsGeneration, + "lakebase-pgvector": LakebasePgvector, + "ai-chat-model-serving": AiChatModelServing, + "lakebase-chat-persistence": LakebaseChatPersistence, + "rag-chat-integration": RagChatIntegration, +}; + +function TemplateMarkdownBlock({ content }: { content: string }): ReactNode { + const components = useMDXComponents(); + + const Content = useMemo(() => { + return evaluateSync(content, { + ...jsxRuntime, + useMDXComponents: () => components, + }).default; + }, [components, content]); + + return ; +} + +type TemplateRecipeComponentMap = Record; + +function TemplateBlockRenderer({ + blocks, + recipeComponents: recipeComponentMap, +}: { + blocks: TemplateContentBlock[]; + recipeComponents: TemplateRecipeComponentMap; +}): ReactNode { + return ( + <> + {blocks.map((block, index) => { + const key = `${block.type}-${index}`; + + switch (block.type) { + case "markdown": + return ; + case "code": + return ( + + {block.content.replace(/\n$/, "")} + + ); + case "recipe": { + const RecipeComponent = recipeComponentMap[block.recipeId]; + if (!RecipeComponent) { + throw new Error( + `Missing recipe component for template block: ${block.recipeId}`, + ); + } + return ; + } + default: + return null; + } + })} + + ); +} + +export default function RagChatAppTemplatePage(): ReactNode { + const rawBySlug = useAllRawRecipeMarkdown(); + const cookbookMarkdown = useRawCookbookMarkdown("rag-chat-app-template"); + const blocks = useMemo( + () => (cookbookMarkdown ? parseTemplateMarkdown(cookbookMarkdown) : []), + [cookbookMarkdown], + ); + + if (!template) { + throw new Error("Template rag-chat-app-template not found"); + } + const rawMarkdown = buildTemplateRawMarkdown(template, rawBySlug, blocks); + return ( + + + + ); +} diff --git a/tests/markdown.test.ts b/tests/markdown.test.ts index a8814ce..122bd45 100644 --- a/tests/markdown.test.ts +++ b/tests/markdown.test.ts @@ -19,6 +19,12 @@ describe("detail markdown resolver", () => { expect(markdown).toContain("## Databricks Local Bootstrap"); }); + test("does not duplicate recipe headings in legacy template export", () => { + const markdown = getDetailMarkdown("templates", "ai-chat-app-template"); + const matches = markdown.match(/## Databricks Local Bootstrap/g) ?? []; + expect(matches).toHaveLength(1); + }); + test("rejects path traversal", () => { expect(() => getDetailMarkdown("docs", "../package.json")).toThrow( "path traversal",