diff --git a/public/images/docs/observe/llm-tracing-overview.webp b/public/images/docs/observe/llm-tracing-overview.webp new file mode 100644 index 00000000..81015fc8 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-overview.webp differ diff --git a/src/components/CodeCopyButtons.astro b/src/components/CodeCopyButtons.astro index b9d9ab3f..26635599 100644 --- a/src/components/CodeCopyButtons.astro +++ b/src/components/CodeCopyButtons.astro @@ -17,6 +17,7 @@ document.querySelectorAll('.docs-content pre').forEach((pre) => { if (pre.hasAttribute('data-copy-ready')) return; if (pre.closest('.code-wrapper')) return; // CodeBlock provides its own button + if (pre.classList.contains('mermaid')) return; // Mermaid diagrams are not code blocks pre.setAttribute('data-copy-ready', ''); const btn = document.createElement('button'); diff --git a/src/components/docs/Mermaid.astro b/src/components/docs/Mermaid.astro new file mode 100644 index 00000000..54899a23 --- /dev/null +++ b/src/components/docs/Mermaid.astro @@ -0,0 +1,60 @@ +--- +/** + * Mermaid diagram, rendered client-side from CDN. + * Usage in MDX: B`} /> + * + * The script is `is:inline` so Astro does not bundle it — the CDN module import + * then runs natively in the browser (matching how the rest of this repo loads + * third-party client scripts). The diagram source lives in the page, per the + * docs playbook. In the full monorepo, `pnpm add mermaid` and swap the CDN + * import for `import mermaid from 'mermaid'`. + */ +interface Props { + chart: string; +} +const { chart } = Astro.props; +--- + +
{chart}
+ + + + diff --git a/src/components/docs/Steps.astro b/src/components/docs/Steps.astro index 9fbfbb4a..05dc7810 100644 --- a/src/components/docs/Steps.astro +++ b/src/components/docs/Steps.astro @@ -5,50 +5,10 @@ - + diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index 059c31d7..8f8646a3 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -69,6 +69,12 @@ export const tabNavigation: NavTab[] = [ { title: 'Troubleshooting and FAQs', href: '/docs/self-hosting/troubleshooting' }, ] }, + { + title: 'Release notes', + items: [ + { title: "What's new", href: '/docs/release-notes' }, + ] + }, ] }, { @@ -347,21 +353,21 @@ export const tabNavigation: NavTab[] = [ group: 'Observability', icon: 'eye', items: [ - { title: 'Overview', href: '/docs/observe' }, + { title: 'Get Started', href: '/docs/observe' }, + { title: 'Quickstart', href: '/docs/observe/quickstart' }, { title: 'Concepts', items: [ { title: 'Understanding Observability', href: '/docs/tracing/concepts' }, - { title: 'What are Traces?', href: '/docs/tracing/concepts/traces' }, - { title: 'What are Spans?', href: '/docs/tracing/concepts/spans' }, - { title: 'What is OpenTelemetry?', href: '/docs/tracing/concepts/otel' }, - { title: 'What is traceAI?', href: '/docs/tracing/concepts/traceai' }, + { title: 'Spans', href: '/docs/observe/concepts/spans' }, + { title: 'Traces', href: '/docs/observe/concepts/traces' }, + { title: 'OpenTelemetry', href: '/docs/tracing/concepts/otel' }, + { title: 'traceAI', href: '/docs/tracing/concepts/traceai' }, ] }, { title: 'Features', items: [ - { title: 'Set Up Observability', href: '/docs/observe/features/quickstart' }, { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, { title: 'Sessions', href: '/docs/observe/features/session' }, { title: 'Users', href: '/docs/observe/features/users' }, diff --git a/src/pages/docs/observe/concepts/spans.mdx b/src/pages/docs/observe/concepts/spans.mdx new file mode 100644 index 00000000..cfbb4920 --- /dev/null +++ b/src/pages/docs/observe/concepts/spans.mdx @@ -0,0 +1,43 @@ +--- +title: "Spans" +description: "Pinpointing the single step behind a slow or wrong answer." +--- + +## A span is one step + +A span is one operation inside a [trace](/docs/observe/concepts/traces): a single model call, tool call, retrieval, agent step, guardrail check, or evaluator run. It records its own input and output, when it started and finished, whether it succeeded, and, for model calls, the tokens and cost it ran up. Where a trace is the whole request, a span is one step inside it. + +Under the hood, a span is an [OpenTelemetry](/docs/tracing/concepts/otel) span. OpenTelemetry defines the shape, a named, timed unit of work with a status, a parent, and key-value attributes, and traceAI fills those attributes with LLM-specific keys: the span `kind` that says what ran, the prompt and completion, the token counts. So every span you see in Observe is a standard OTel span carrying traceAI's LLM attributes. + + +A parent span, say an agent, holds the child spans it set off, and each of those can have children of its own. That nesting is how Future AGI works out which step triggered which, and it is what lets a trace draw itself as a tree. + + B["chain span"] + A --> C["tool span"] + B --> D["retriever span"] + B --> E["llm span"]`} /> + +Each box is a span with its own timing and attributes. The edges come straight from OpenTelemetry: every span carries the trace ID and its parent span's ID, and OTel propagates that context down your call stack, so a span created deep inside nests under the one above it without you wiring it up. Those same links are what a [trace](/docs/observe/concepts/traces) is rebuilt from. + +## What a span isn't + +- **Not a log line.** A log is a flat text event. A span is a timed unit with structured input, output, status, and attributes, linked to a parent +- **Not an event.** An event is a point-in-time marker inside a span, like an exception. The span is the operation that holds it + +## Why it matters + +A response is only as strong as its weakest step, and that step is usually where things break. The trace tells you a request was slow or wrong; the span tells you which step did it and hands you the evidence: the exact prompt sent to the model, the arguments a tool received, the chunks a retriever pulled back, or the score an evaluator gave. traceAI captures these spans for you on supported frameworks, and where it can't reach, you can [add your own](/docs/observe/features/manual-tracing/create-tool-spans) so no part of your pipeline stays a black box. + +## Keep exploring + + + + The full request that spans are grouped into + + + Add custom spans where auto-instrumentation stops + + diff --git a/src/pages/docs/observe/concepts/traces.mdx b/src/pages/docs/observe/concepts/traces.mdx new file mode 100644 index 00000000..251aa12e --- /dev/null +++ b/src/pages/docs/observe/concepts/traces.mdx @@ -0,0 +1,44 @@ +--- +title: "Traces" +description: "Helping you debug an AI response, step by step." +--- + +## A trace is a tree + +A trace is a tree of [spans](/docs/observe/concepts/spans). The root span is the operation that kicked off the request, and every other span nests under the step that triggered it. They all share one trace ID, so the whole request stitches back together top to bottom, even when steps run across async tasks or services. + + S1["llm.intent_classification"] + T --> S2["tool.check_order_status"] + T --> S3["chain.generate_reply"] + S3 --> S4["retriever.knowledge_base"] + S3 --> S5["llm.response_generation"]`} /> + +The tree above is one support-agent request. The root `support_agent.run` is the whole request. Under it, `llm.intent_classification` reads the question, `tool.check_order_status` looks up the order, and `chain.generate_reply` writes the answer, which itself calls `retriever.knowledge_base` for the refund policy and `llm.response_generation` for the wording. + +Read top to bottom, the tree is the exact path the request took, so when an answer comes out wrong you can see which step caused it. + +## What a trace isn't + +- **Not a session.** A session bundles many traces from one conversation or user. A trace is just one request inside it. See [Sessions and users](/docs/observe/features/session) +- **Not a log line.** Logs are flat text events. A trace is a timed, structured tree with inputs, outputs, and cost at every step + +## Why it matters + +Without traces, a wrong or slow answer is a dead end. You see the output but not the steps behind it, so debugging turns into guesswork over flat logs. A trace turns that into a readable path you can walk: you spot that the retriever pulled the wrong policy chunk, that one tool call dragged on for four seconds, or that an eval flagged the answer as unsupported. Latency, cost, errors, and quality all hang off the same request, so you debug from one place instead of stitching logs together by hand. + +## Keep exploring + + + + The individual operations a trace is built from + + + Group multiple traces into one conversation or customer + + + Instrument your app so it emits traces + + diff --git a/src/pages/docs/observe/index.mdx b/src/pages/docs/observe/index.mdx index 478e3c5f..4dd5a957 100644 --- a/src/pages/docs/observe/index.mdx +++ b/src/pages/docs/observe/index.mdx @@ -1,45 +1,51 @@ --- -title: "Future AGI Observe: Monitor LLM Apps in Production" -description: "Monitor and evaluate LLM applications in production with real-time tracing, session analysis, cost tracking, and alerting." +title: "Get started with Observe" +description: "Observe records every request your AI app makes as a trace you can open, search, and score. Send your first trace, then go deeper" --- -## About +Observe shows you what your AI app actually did in production. Every request becomes a **trace**: the step-by-step record of the model calls, tool calls, and retrievals behind one response. When an answer is wrong, slow, or expensive, you open the trace and read what happened instead of guessing. -Observability is how you monitor your AI application after it goes live. Once your app is in production, things change: user inputs vary, model behavior shifts, and issues come up that testing never caught. Observability gives you a continuous view of how your application is performing so you can stay on top of it. +You only need one trace to begin. Everything else here builds on it. -It tracks every response your application generates, groups them by session and user, scores them for quality, and alerts you when something goes wrong. Instead of finding out about problems from users, you see them in the dashboard first. +Observe trace explorer listing production traces with status, latency, and token columns +*Every production request, captured as a trace and ready to inspect* -Sessions Overview +## Start here ---- - -## How Observability Connects to Other Features - -- **Prototype**: After you promote a winning version in Prototype, its traces continue flowing into Observe so you can monitor production performance against the same quality criteria. [Learn more](/docs/prototype) -- **Evaluation**: Observability uses the same built-in eval templates to score production traces automatically. Any eval you configured in Prototype or Datasets runs the same way here. [Learn more](/docs/evaluation) -- **Alerts**: Observability feeds into the alerting system so you are notified when quality, cost, or latency crosses a threshold in production. [Learn more](/docs/observe/features/alerts) + + + Instrument one call and watch it land in Observe, in about five minutes + + + One line to trace OpenAI, Anthropic, LangChain, and 30+ more + + ---- +## Understand the model -## Getting Started with Observability +A few short pages give you the whole mental model behind Observe. Read these and the rest of the product explains itself: - - Connect the SDK and start capturing traces in minutes. - - - Run evaluations on observed traces and sessions. + + What gets recorded for each request, and how the steps nest - - Group and analyze multi-turn interactions. + + Follow a full conversation, or one customer across sessions - - Track and analyze activity by user. + + The open library that sends your traces to Observe - - Configure alerts for real-time issue detection. + + +## Once your traces are flowing + +Every other feature in Observe is just a different lens on the traces you capture: + + + + Attach quality scores to whole traces or single spans - - Monitor voice agent interactions and call quality. + + Get told the moment a metric slips diff --git a/src/pages/docs/observe/quickstart.mdx b/src/pages/docs/observe/quickstart.mdx new file mode 100644 index 00000000..d3a95900 --- /dev/null +++ b/src/pages/docs/observe/quickstart.mdx @@ -0,0 +1,147 @@ +--- +title: "Quickstart" +description: "Send your first LLM trace to Future AGI Observe in about five minutes" +--- + +Get your first trace into Observe in about five minutes, without changing your app's logic. + +## In this page + +You will install the traceAI instrumentor, register an Observe project, run a single OpenAI call, and confirm the trace in the dashboard with its model, latency, and token cost. The same four steps work for 30+ frameworks, so once OpenAI is traced you have the pattern for the rest of your stack. + +## Prerequisites + +- A Future AGI account and your **`FI_API_KEY`** and **`FI_SECRET_KEY`** (Dashboard → Build → Keys) +- Python 3.11 (or Node 18+ for the TypeScript path) +- An OpenAI API key + + +Pin the packages to the version you test against, so a later release cannot change behavior under you + + +## Steps + + + + Install the core instrumentation package and the OpenAI instrumentor + + + ```bash Python + pip install fi-instrumentation-otel traceAI-openai + ``` + ```bash JS/TS + npm install @traceai/fi-core @traceai/openai + ``` + + + + + Read keys from the environment, never hardcode them in source + + ```bash + export FI_API_KEY="your-futureagi-api-key" + export FI_SECRET_KEY="your-futureagi-secret-key" + export OPENAI_API_KEY="your-openai-api-key" + ``` + + + + `register` returns a tracer provider. Set `project_type` to `OBSERVE`, attach the OpenAI instrumentor, then call OpenAI exactly as you normally would + + + ```python Python + from fi_instrumentation import register, Transport + from fi_instrumentation.fi_types import ProjectType + from traceai_openai import OpenAIInstrumentor + from openai import OpenAI + + # Connect to Future AGI and create (or reuse) an Observe project + trace_provider = register( + project_type=ProjectType.OBSERVE, + project_name="my-first-project", + transport=Transport.GRPC, + ) + + # Auto-instrument OpenAI: every call is now traced + OpenAIInstrumentor().instrument(tracer_provider=trace_provider) + + # Use OpenAI exactly as you normally would + client = OpenAI() + completion = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Write a one-sentence bedtime story about a unicorn."}], + ) + print(completion.choices[0].message.content) + ``` + ```typescript JS/TS + import { register, ProjectType } from "@traceai/fi-core"; + import { OpenAIInstrumentation } from "@traceai/openai"; + import { registerInstrumentations } from "@opentelemetry/instrumentation"; + import OpenAI from "openai"; + + // Connect to Future AGI and create (or reuse) an Observe project + const traceProvider = register({ + project_type: ProjectType.OBSERVE, + project_name: "my-first-project", + }); + + // Auto-instrument OpenAI: every call is now traced + registerInstrumentations({ + instrumentations: [new OpenAIInstrumentation({})], + tracerProvider: traceProvider, + }); + + // Use OpenAI exactly as you normally would + const client = new OpenAI(); + const completion = await client.chat.completions.create({ + model: "gpt-4o", + messages: [{ role: "user", content: "Write a one-sentence bedtime story about a unicorn." }], + }); + console.log(completion.choices[0].message.content); + ``` + + + Expected terminal output (the wording varies): + + ```text + Under a sky of silver stars, a gentle unicorn dipped its horn into a + moonlit pool and wished every sleeping child sweet dreams. + ``` + + + + Open **Observe → my-first-project → Tracing**. Within a few seconds you will see one trace row with **status OK**, the **model**, the **latency**, and the **token count**. Click it to read the prompt, the completion, and the span timing + + Observe trace explorer with one new OpenAI trace showing OK status, model, latency, and token columns + *Your request, now a trace. If the row is here with an OK status, instrumentation is working end to end* + + + +## What you just captured + +That row is a [trace](/docs/observe/concepts/traces), the full record of one request. Because this example made a single OpenAI call, the trace holds one [span](/docs/observe/concepts/spans): the `llm` operation, carrying the model, the prompt and completion, the token counts, and the cost. + +The same four steps instrument 30+ frameworks. Swap the instrumentor for your stack and the flow is identical, see [all framework integrations](/docs/tracing/auto). + +## Not seeing your trace? + +- **No trace appears**: a short script can exit before the exporter flushes. Call `trace_provider.force_flush()` before the process ends +- **Wrong or empty project**: confirm `project_name` matches the project you are viewing, and that `FI_API_KEY` and `FI_SECRET_KEY` belong to this workspace +- **Still nothing**: widen the date picker (it defaults to the last 7 days) and turn on **Auto refresh** + +## Dive deeper + + + + The mental model the rest of Observe is built on + + + One step inside a trace: a model call, a tool call, a retrieval + + + Attach quality scores to your production traces + + + Get told the moment a metric slips + + diff --git a/src/pages/docs/tracing/concepts/index.mdx b/src/pages/docs/tracing/concepts/index.mdx index 59d88314..7250aed2 100644 --- a/src/pages/docs/tracing/concepts/index.mdx +++ b/src/pages/docs/tracing/concepts/index.mdx @@ -45,10 +45,10 @@ Each **trace** is one request or execution. Each **span** is one operation (LLM, ## Next Steps - + The top-level unit: one request = one trace. - + The building blocks inside every trace. diff --git a/src/plugins/vite-docs-transform.mjs b/src/plugins/vite-docs-transform.mjs index dab3550b..2dffacfc 100644 --- a/src/plugins/vite-docs-transform.mjs +++ b/src/plugins/vite-docs-transform.mjs @@ -22,6 +22,7 @@ const COMPONENT_MAP = { CopyButton: '@docs/CopyButton.astro', Expandable: '@docs/Expandable.astro', Icon: '@docs/Icon.astro', + Mermaid: '@docs/Mermaid.astro', Note: '@docs/Note.astro', ParamField: '@docs/ParamField.astro', Prerequisites: '@docs/Prerequisites.astro', diff --git a/src/styles/global.css b/src/styles/global.css index fa266fda..1872c939 100644 --- a/src/styles/global.css +++ b/src/styles/global.css @@ -565,3 +565,70 @@ kbd { stroke-dasharray: 4 4; fill: none; } + +/* Steps component: number badges + connector line (the "step chain"). + Lives here, not in Steps.astro, so it is loaded on every page. The custom + FastNav swaps body content but not page-specific