From 62db624a1a8de2efa55929ac0566d41068495af4 Mon Sep 17 00:00:00 2001 From: khushalsonawat Date: Wed, 1 Jul 2026 16:22:10 +0530 Subject: [PATCH 01/11] docs(observe): add Observe concept pages (spans, traces, sessions, users, voice) First chunk of the Observe revamp, stacked on the product dropdown reorder. Adds the Observe > Concepts pages as Explanation-mode docs, plus the Mermaid rendering support they depend on, and repoints the Observe > Concepts sidebar to them. - New concept pages: spans, traces, sessions, users, voice-observability, observability-model, otel, traceai - Mermaid support: add Mermaid.astro, register it in vite-docs-transform, skip .mermaid in copy buttons, and add diagram-centering CSS in global.css - Nav: Observe > Concepts now points at /docs/observe/concepts/* Links to not-yet-migrated pages (traceai/*, observe/quickstart, observe/features/tags) are intentionally broken and land in later chunks. --- src/components/CodeCopyButtons.astro | 1 + src/components/docs/Mermaid.astro | 60 +++++++ src/lib/navigation.ts | 11 +- .../observe/concepts/observability-model.mdx | 148 ++++++++++++++++ src/pages/docs/observe/concepts/otel.mdx | 160 ++++++++++++++++++ src/pages/docs/observe/concepts/sessions.mdx | 48 ++++++ src/pages/docs/observe/concepts/spans.mdx | 49 ++++++ src/pages/docs/observe/concepts/traceai.mdx | 158 +++++++++++++++++ src/pages/docs/observe/concepts/traces.mdx | 39 +++++ src/pages/docs/observe/concepts/users.mdx | 51 ++++++ .../observe/concepts/voice-observability.mdx | 51 ++++++ src/plugins/vite-docs-transform.mjs | 1 + src/styles/global.css | 67 ++++++++ 13 files changed, 839 insertions(+), 5 deletions(-) create mode 100644 src/components/docs/Mermaid.astro create mode 100644 src/pages/docs/observe/concepts/observability-model.mdx create mode 100644 src/pages/docs/observe/concepts/otel.mdx create mode 100644 src/pages/docs/observe/concepts/sessions.mdx create mode 100644 src/pages/docs/observe/concepts/spans.mdx create mode 100644 src/pages/docs/observe/concepts/traceai.mdx create mode 100644 src/pages/docs/observe/concepts/traces.mdx create mode 100644 src/pages/docs/observe/concepts/users.mdx create mode 100644 src/pages/docs/observe/concepts/voice-observability.mdx diff --git a/src/components/CodeCopyButtons.astro b/src/components/CodeCopyButtons.astro index b9d9ab3f..26635599 100644 --- a/src/components/CodeCopyButtons.astro +++ b/src/components/CodeCopyButtons.astro @@ -17,6 +17,7 @@ document.querySelectorAll('.docs-content pre').forEach((pre) => { if (pre.hasAttribute('data-copy-ready')) return; if (pre.closest('.code-wrapper')) return; // CodeBlock provides its own button + if (pre.classList.contains('mermaid')) return; // Mermaid diagrams are not code blocks pre.setAttribute('data-copy-ready', ''); const btn = document.createElement('button'); diff --git a/src/components/docs/Mermaid.astro b/src/components/docs/Mermaid.astro new file mode 100644 index 00000000..54899a23 --- /dev/null +++ b/src/components/docs/Mermaid.astro @@ -0,0 +1,60 @@ +--- +/** + * Mermaid diagram, rendered client-side from CDN. + * Usage in MDX: B`} /> + * + * The script is `is:inline` so Astro does not bundle it — the CDN module import + * then runs natively in the browser (matching how the rest of this repo loads + * third-party client scripts). The diagram source lives in the page, per the + * docs playbook. In the full monorepo, `pnpm add mermaid` and swap the CDN + * import for `import mermaid from 'mermaid'`. + */ +interface Props { + chart: string; +} +const { chart } = Astro.props; +--- + +
{chart}
+ + + + diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index 059c31d7..252422ea 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -351,11 +351,12 @@ export const tabNavigation: NavTab[] = [ { title: 'Concepts', items: [ - { title: 'Understanding Observability', href: '/docs/tracing/concepts' }, - { title: 'What are Traces?', href: '/docs/tracing/concepts/traces' }, - { title: 'What are Spans?', href: '/docs/tracing/concepts/spans' }, - { title: 'What is OpenTelemetry?', href: '/docs/tracing/concepts/otel' }, - { title: 'What is traceAI?', href: '/docs/tracing/concepts/traceai' }, + { title: 'Spans', href: '/docs/observe/concepts/spans' }, + { title: 'Traces', href: '/docs/observe/concepts/traces' }, + { title: 'Sessions', href: '/docs/observe/concepts/sessions' }, + { title: 'Users', href: '/docs/observe/concepts/users' }, + { title: 'Voice observability', href: '/docs/observe/concepts/voice-observability' }, + { title: 'Observability model', href: '/docs/observe/concepts/observability-model' }, ] }, { diff --git a/src/pages/docs/observe/concepts/observability-model.mdx b/src/pages/docs/observe/concepts/observability-model.mdx new file mode 100644 index 00000000..7e534d82 --- /dev/null +++ b/src/pages/docs/observe/concepts/observability-model.mdx @@ -0,0 +1,148 @@ +--- +title: "Observability model" +description: "How the pieces of Observe fit together: spans nest into traces, traces group into sessions and users, and eval scores attach on top — all collected as OpenTelemetry data through the traceAI SDK." +slug: "observability-model" +page_type: "concept" +diataxis: "explanation" +products: ["Observe"] +concept_family: "observability" +concept_level: "foundational" +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-18" +last_diagram_reviewed: "2026-06-17" +schema_type: "TechArticle" +primary_question: "How do traces, spans, sessions, users, and evals relate in FutureAGI Observe?" +direct_answer: "Observe is built on a hierarchy: a span is one operation, spans sharing a trace ID form a trace (one request), traces sharing a session ID form a session, and sessions belong to a user. Eval scores attach to spans or traces. All of it is captured as OpenTelemetry data via the traceAI SDK." +seo: + title: "The FutureAGI Observe observability model" + description: "The entity hierarchy behind Observe — spans, traces, sessions, users, and eval scores — and how traceAI and OpenTelemetry collect it." + primary_keyword: "llm observability data model" + direct_answer: true +geo: + answer_target: "How do traces, spans, sessions, and users relate in FutureAGI Observe?" + llm_summary: "Spans nest into traces by trace ID, traces group into sessions by session ID, sessions belong to users, and eval scores attach to spans or traces — all collected via traceAI over OpenTelemetry." +canonical: "/docs/observe/concepts/observability-model" +related: + - "/docs/observe/concepts/traces" + - "/docs/observe/concepts/spans" + - "/docs/observe/concepts/sessions" + - "/docs/observe/concepts/otel" +--- + +## About + +Observe doesn't have one data type — it has a small set of objects that nest. Understanding how they relate is the difference between knowing *where* to look and guessing. A **span** is a single operation. Spans that share a trace ID make up a **trace** — one full request. Traces that share a session ID make up a **session** — one multi-turn conversation. Sessions belong to a **user**. And **eval scores** attach on top, to a span or a whole trace. Every view in Observe — the trace list, the span detail drawer, sessions, dashboards, alerts — is a different lens on this one hierarchy. + +All of it is collected the same way: your app emits spans through the [traceAI](/docs/observe/concepts/traceai) SDK, which is built on [OpenTelemetry](/docs/observe/concepts/otel), and Observe reads them. + +--- + +## Mental model + +The objects form a strict containment hierarchy. The ID on each span is what reconstructs it — you don't assemble traces or sessions by hand; shared IDs do that automatically. + + S["Session · session.id"] + S --> T1["Trace · one request"] + S --> T2["Trace · one request"] + T1 --> SP1["Span · llm call"] + T1 --> SP2["Span · tool call"] + SP1 --> EV["Eval score"] + T1 --> EV2["Eval score"]`} /> + +Read it bottom-up when debugging (a bad span → which trace → which session → which user) and top-down when analyzing (a user's sessions → their traces → the spans inside). + +--- + +## Key terms + +| Object | What it is | Identified by | Learn more | +|---|---|---|---| +| **Span** | One operation — an LLM call, tool call, retrieval, or agent step — with input, output, timing, and cost. | Span ID (+ parent span ID) | [Spans](/docs/observe/concepts/spans) | +| **Trace** | One complete request, made of all the spans that share its trace ID. | Trace ID | [Traces](/docs/observe/concepts/traces) | +| **Session** | A multi-turn conversation — the traces that share a session ID. | `session.id` | [Sessions](/docs/observe/concepts/sessions) | +| **User** | One end user, across all their sessions and traces. | `user.id` | [Users](/docs/observe/concepts/users) | +| **Eval score** | A quality score attached to a span or trace. | Attached to span/trace | [Trace evals](/docs/observe/features/evals) | +| **OpenTelemetry** | The open standard the spans are emitted in. | — | [OpenTelemetry](/docs/observe/concepts/otel) | +| **traceAI** | The SDK that produces the spans. | — | [traceAI SDK](/docs/observe/concepts/traceai) | + +--- + +## How it works in FutureAGI + +Your app emits **spans** through traceAI (or OpenTelemetry directly). Each span carries a trace ID, so all spans from one request form a single **trace**. The backend receives them over OTLP (HTTP or gRPC) and stores them by project — and from there every Observe view runs on the same data. + +|traceAI / OTel SDK| B["OTLP: HTTP or gRPC"] + B --> C["FutureAGI backend"] + C --> D["Observe dashboard"]`} /> + +To enrich the hierarchy — set a `session.id`, a `user.id`, or [tags](/docs/observe/features/tags) — you attach those attributes in code. Wrap the traced work in the context managers and every span inside picks them up: + + +```python +from fi_instrumentation import using_session, using_user + +with using_session("session_123"), using_user("user_456"): + response = run_agent(prompt) +``` + + +For the full set of attributes, see [add attributes and metadata](/docs/traceai/manual-instrumentation/add-attributes-metadata-tags). + +--- + +## Walking the hierarchy when debugging + +The hierarchy is most useful read bottom-up. A typical investigation starts at the smallest object and climbs: + +1. **Start at the span.** A user complained the assistant gave a wrong answer. You open the span that produced it and read its real input and output — the exact prompt and the exact completion, not a paraphrase. +2. **Climb to the trace.** The span alone rarely explains the failure. You move up to its trace and read the other spans in order — the retrieval that fed bad context, the tool call that returned stale data, the agent step that chose the wrong path. The trace is where the *request* becomes legible. +3. **Climb to the session.** If the request looked fine in isolation but the conversation still went wrong, you open its session and read the earlier turns. Multi-turn problems — the assistant losing track, contradicting itself — only show up here. +4. **Climb to the user.** Finally, if the pattern repeats, you pivot to the user to see whether it's one customer's data or a systemic issue across everyone. + +Because the IDs link each level to the next, every climb is one click — you never reassemble context by hand. The same path runs top-down for analysis: start at a user, expand their sessions, then traces, then the spans inside. + +--- + +## When to use this model + +- **Debugging a bad answer** — start at the span that produced it, then read the rest of the trace for context. +- **Analyzing a conversation** — open the session to see every turn in order. +- **Investigating a user** — pivot from a user to all their sessions and traces. +- **Measuring quality** — read eval scores attached at the span or trace level. + +--- + +## Common mistakes + +- **Confusing a span with a trace** — a slow *trace* tells you a request was slow; the *span* tells you which step was slow. They are different levels. See [Spans](/docs/observe/concepts/spans). +- **Expecting sessions without a session ID** — traces only group into a session if they share a `session.id`. Set it in code. +- **Looking for users you never tagged** — per-user views need `user.id` on the spans. + +--- + +## Next steps + + + + The top-level unit: one request, one trace. + + + The operations that make up every trace. + + + Grouping traces by conversation and end user. + + + Send a trace and watch the model fill in. + + diff --git a/src/pages/docs/observe/concepts/otel.mdx b/src/pages/docs/observe/concepts/otel.mdx new file mode 100644 index 00000000..e97129e1 --- /dev/null +++ b/src/pages/docs/observe/concepts/otel.mdx @@ -0,0 +1,160 @@ +--- +title: "OpenTelemetry" +description: "OpenTelemetry is the open, vendor-neutral standard Observe uses to collect and export traces — your app emits OTel spans that the traceAI SDK sends to Observe." +slug: "otel" +page_type: "concept" +diataxis: "explanation" +products: ["Observe"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is OpenTelemetry and how does FutureAGI use it?" +direct_answer: "OpenTelemetry (OTel) is the open standard for collecting traces, metrics, and logs. FutureAGI builds on it: your app emits OTel spans that the traceAI SDK exports to Observe." +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-18" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + title: "OpenTelemetry in Observe" + description: "How Observe builds on OpenTelemetry — your app emits OTel spans, the traceAI SDK exports them over OTLP, and Observe reads them. Vendor-neutral instrumentation." + primary_keyword: "what is opentelemetry llm tracing" + direct_answer: true +geo: + answer_target: "What is OpenTelemetry and how does FutureAGI Observe use it?" + llm_summary: "OpenTelemetry (OTel) is the open, vendor-neutral standard for collecting traces, metrics, and logs. Observe builds on it: your app emits OTel spans and the traceAI SDK exports them over OTLP to Observe, so the same instrumentation works across languages and backends." +canonical: "/docs/observe/concepts/otel" +related: + - "/docs/observe/concepts/traceai" + - "/docs/observe/concepts/spans" + - "/docs/traceai/manual-instrumentation/set-up-tracing" + - "/docs/observe/features/llm-tracing" +--- + +## About + +[OpenTelemetry](https://opentelemetry.io/) (OTel) is the open, vendor-neutral standard for collecting traces, metrics, and logs from software. It defines how a [span](/docs/observe/concepts/spans) is structured and how spans are exported, so any tool that speaks OTel can read them. FutureAGI is built on it: your app emits OTel spans, and [traceAI](/docs/observe/concepts/traceai) exports them over OTLP to Observe. Because it's a standard, the same instrumentation works across languages, frameworks, and backends. + +--- + +## Why it matters + +Building on a standard is what keeps you un-locked-in. Your instrumentation isn't proprietary to FutureAGI — the same OTel spans can go to any OTel-compatible backend, and FutureAGI can ingest spans from anything that emits them. OTel is also built for scale: batch export handles high-volume tracing without blocking your app. So the choice to standardize on OTel is why "set up tracing once" works regardless of your stack. + +--- + +## Mental model + +OTel sits between your app and FutureAGI: your code (or an instrumentor) creates spans, a processor batches them, and an exporter ships them over OTLP. + + B["OTel SDK: create spans"] + B --> C["Span processor: batch"] + C -->|"OTLP HTTP/gRPC"| D["FutureAGI backend"] + D --> E["Observe"]`} /> + +traceAI plugs into this pipeline — it's the layer that creates *LLM-shaped* spans and configures the exporter to point at FutureAGI. + +--- + +## What OTel gives traceAI + +traceAI doesn't reinvent any of the tracing machinery — it inherits three things from OpenTelemetry and adds LLM meaning on top: + +- **The span model.** OTel defines what a [span](/docs/observe/concepts/spans) is: a named, timed unit of work with a start, an end, attributes, status, and a parent. traceAI uses that exact shape and fills the attributes with LLM-specific keys (prompt, completion, token counts, span kind). Because the shape is standard, every span traceAI produces is a valid OTel span. +- **Context propagation.** OTel carries a trace ID and the current parent through an in-process context, so a span created deeper in your call stack automatically nests under the one above it. This is what lets spans from a single request reconstruct into one trace, and what lets `session.id` and `user.id` flow down to child spans without being passed manually. +- **OTLP export.** Finished spans are batched and shipped over OTLP — the OpenTelemetry wire protocol — to a backend. traceAI's job here is just to point that exporter at FutureAGI; the transport itself is plain OTel. + +The split is deliberate: OTel owns *how spans are structured, linked, and shipped*, and traceAI owns *what an LLM span should say*. + +--- + +## Key terms + +| Term | What it is | +|---|---| +| **OTLP** | The OpenTelemetry Protocol — the wire format spans are exported in, over HTTP or gRPC. | +| **Span processor** | The stage that collects finished spans and hands them to the exporter; batched by default. | +| **Exporter** | The component that ships spans over OTLP to a backend such as FutureAGI. | +| **TracerProvider** | The root object that holds the processor and exporter and produces tracers that create spans. | + +--- + +## When to use + +- You want instrumentation that isn't locked to one vendor. +- You already emit OTel spans and want them in Observe. +- You're tracing across multiple languages or services and need one standard. +- You need high-volume tracing with batched export. + +--- + +## When not to use raw OpenTelemetry directly + +You build on OTel either way — Observe only reads OTel spans. The question is whether to write OTel calls yourself or let [traceAI](/docs/observe/concepts/traceai) do it for you. + +- **For LLM and agent calls, don't hand-write OTel spans.** Raw OTel has no notion of a prompt, a completion, or token cost, so you'd be re-deriving traceAI's LLM conventions by hand and risking inconsistent keys. Use a traceAI instrumentor instead — it produces the standard LLM-shaped spans Observe's filters, evals, and dashboards expect. +- **Don't reach for OTel when you just need one custom step traced.** Wrapping your own function is what [tool spans](/docs/traceai/manual-instrumentation/create-tool-spans) are for; they ride the same provider traceAI already configured. +- **Don't treat OTel as a destination.** It collects and ships spans; it does not store, index, or display them. If you need to read traces, that's Observe's job, not OTel's. + +Raw OTel is the right tool only for non-LLM work — a database call, an HTTP handler — that you want in the same trace tree. + +--- + +## What it isn't + +- **OTel is not a backend.** It collects and exports spans; storing and displaying them is FutureAGI's job. +- **OTel is not traceAI.** traceAI is the FutureAGI layer *on top of* OTel that adds LLM-specific spans and conventions. See [traceAI](/docs/observe/concepts/traceai). +- **OTel is not logging.** It's structured, timed traces — not flat log lines. + +--- + +## How FutureAGI uses OpenTelemetry + +`register()` configures an OTel `TracerProvider` with an OTLP exporter pointed at FutureAGI, and a span processor (batched by default). Auto-instrumentors and manual spans both feed that provider. The `transport` argument selects the OTLP transport — `Transport.HTTP` for OTLP/HTTP or `Transport.GRPC` for OTLP/gRPC: + + +```python +from fi_instrumentation import register, Transport +from fi_instrumentation.fi_types import ProjectType + +# OTLP/HTTP +tp = register(project_type=ProjectType.OBSERVE, project_name="my-app", transport=Transport.HTTP) + +# OTLP/gRPC +tp = register(project_type=ProjectType.OBSERVE, project_name="my-app", transport=Transport.GRPC) +``` + + +Self-hosted deployments point the exporter at their own collector URL. See [Set up tracing](/docs/traceai/manual-instrumentation/set-up-tracing). + +--- + +## Common mistakes + +- **Picking a transport your network blocks.** OTLP/gRPC and OTLP/HTTP use different ports and protocols. If gRPC is blocked by a proxy or firewall, switch to `Transport.HTTP`. +- **Expecting spans without calling `register()`.** Nothing reaches Observe until a `TracerProvider` with the FutureAGI exporter exists. `register()` builds it. +- **Treating OTel as the backend.** OTel only collects and exports spans; storing, indexing, and displaying them is Observe's job. + +--- + +## Next steps + + + + The FutureAGI layer built on OpenTelemetry. + + + The unit OpenTelemetry collects. + + + Configure the OTel exporter to FutureAGI. + + + Read the spans once they arrive. + + diff --git a/src/pages/docs/observe/concepts/sessions.mdx b/src/pages/docs/observe/concepts/sessions.mdx new file mode 100644 index 00000000..35bc7387 --- /dev/null +++ b/src/pages/docs/observe/concepts/sessions.mdx @@ -0,0 +1,48 @@ +--- +title: "Sessions" +description: "Reading a whole multi-turn conversation as one unit." +--- + +## A session is one conversation + +A **session** is one multi-turn conversation, reassembled from its [traces](/docs/observe/concepts/traces). When a chatbot answers five messages, that is five separate traces, one per turn. Give them all the same `session.id` and Observe ties them back into one conversation that sits one level above the trace: the session holds the ordered traces, and each trace holds its [spans](/docs/observe/concepts/spans). The name is [OpenTelemetry](/docs/observe/concepts/otel)'s own, and setting it once around a turn's work carries it to every span inside, just like the trace ID. + + T1["Trace (turn 1)"] + S --> T2["Trace (turn 2)"] + S --> T3["Trace (turn 3)"] + T1 --> P1["spans"] + T2 --> P2["spans"] + T3 --> P3["spans"]`} /> + +Take a three-turn support chat. Each turn is its own request, so each is its own trace, but all three share `session.id="chat_abc"`. Observe rolls them into one row you read top to bottom, from the opening question to the resolution, with the whole conversation's duration, cost, and token count in one place. Reuse that same ID across turns and the conversation grows, a fresh ID each turn would leave you with sessions of one trace each. + +## When to use + +Reach for a session when the problem runs across turns, not a single request: the assistant that kept losing track across a conversation, someone who drops off or escalates halfway through a flow, or any time you want the whole chat's duration, cost, and tokens at once instead of per request. + +When the grain is wrong, reach elsewhere: + +- Debugging a single request: open its [trace](/docs/observe/concepts/traces), a session is too coarse +- Rolling up by person, not conversation: use [Users](/docs/observe/concepts/users), which gathers every conversation one person had +- Aggregate trends across many sessions: build a dashboard + +## Why it matters + +A conversation's problems are invisible one request at a time. Whether the assistant stayed coherent across turns, where someone gave up, what a whole support chat cost, none of it shows on a single trace. Grouping the turns into a session puts the conversation back together, so you debug and measure the thing your user actually lived through, not the fragments of it. + +## Keep exploring + + + + Roll every conversation up by the person who had it + + + Read, filter, and sort the Sessions view + + + Attach session.id in traceAI + + diff --git a/src/pages/docs/observe/concepts/spans.mdx b/src/pages/docs/observe/concepts/spans.mdx new file mode 100644 index 00000000..e5a782e7 --- /dev/null +++ b/src/pages/docs/observe/concepts/spans.mdx @@ -0,0 +1,49 @@ +--- +title: "Spans" +description: "Pinpointing the single step behind a slow or wrong answer." +--- + +## A span is one step + +A **span** is one operation inside a [trace](/docs/observe/concepts/traces): a single model call, tool call, retrieval, agent step, guardrail check, or evaluator run. It records its own input and output, when it started and finished, whether it succeeded, and, for model calls, the tokens and cost it ran up. Where a trace is the whole request, a span is one step inside it. + +Under the hood, a span is an [OpenTelemetry](/docs/observe/concepts/otel) span. OpenTelemetry defines the shape, a named, timed unit of work with a status, a parent, and key-value attributes, and traceAI fills those attributes with LLM-specific keys: the span `kind` that says what ran, the prompt and completion, the token counts. So every span you see in Observe is a standard OTel span carrying traceAI's LLM attributes. + + +A parent span, say an agent, holds the child spans it set off, and each of those can have children of its own. That nesting is how Future AGI works out which step triggered which, and it is what lets a trace draw itself as a tree. + + B["chain span"] + A --> C["tool span"] + B --> D["retriever span"] + B --> E["llm span"]`} /> + +Each box is a span with its own timing and attributes. The edges come straight from OpenTelemetry: every span carries the trace ID and its parent span's ID, and OTel propagates that context down your call stack, so a span created deep inside nests under the one above it without you wiring it up. Those same links are what a [trace](/docs/observe/concepts/traces) is rebuilt from. + +## What a span isn't + +- **Not a log line.** A log is a flat text event. A span is a timed unit with structured input, output, status, and attributes, linked to a parent +- **Not an event.** An event is a point-in-time marker inside a span, like an exception. The span is the operation that holds it + +## Why it matters + +A response is only as strong as its weakest step, and that step is usually where things break. The trace tells you a request was slow or wrong; the span tells you which step did it and hands you the evidence: the exact prompt sent to the model, the arguments a tool received, the chunks a retriever pulled back, or the score an evaluator gave. traceAI captures these spans for you on supported frameworks, and where it can't reach, you can [add your own](/docs/traceai/manual-instrumentation/create-tool-spans) so no part of your pipeline stays a black box. + +## Keep exploring + + + + The full request that spans are grouped into + + + The kinds a span can be: LLM, tool, retriever, and the rest + + + Add custom spans where auto-instrumentation stops + + + How spans, traces, sessions, and users fit together + + diff --git a/src/pages/docs/observe/concepts/traceai.mdx b/src/pages/docs/observe/concepts/traceai.mdx new file mode 100644 index 00000000..1ee8ea48 --- /dev/null +++ b/src/pages/docs/observe/concepts/traceai.mdx @@ -0,0 +1,158 @@ +--- +title: "traceAI SDK" +description: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry — it captures LLM, tool, and retrieval calls as standardized spans that Observe reads as traces." +slug: "traceai" +page_type: "concept" +diataxis: "explanation" +products: ["Observe"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is the traceAI SDK?" +direct_answer: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry. It captures model, tool, and retrieval calls as standardized spans, and Observe is the product that reads them as traces." +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-18" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + title: "The traceAI SDK" + description: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry. It captures LLM, tool, and retrieval calls as standardized spans that Observe reads as traces." + primary_keyword: "what is traceai instrumentation" + direct_answer: true +geo: + answer_target: "What is the traceAI SDK and how does it relate to Observe?" + llm_summary: "traceAI is FutureAGI's open-source instrumentation SDK, built on OpenTelemetry. It captures model, tool, and retrieval calls as standardized spans across frameworks; Observe is the product that reads those spans as traces." +canonical: "/docs/observe/concepts/traceai" +related: + - "/docs/observe/concepts/otel" + - "/docs/observe/concepts/spans" + - "/docs/traceai/manual-instrumentation/set-up-tracing" + - "/docs/traceai/auto" +--- + +## About + +traceAI is the instrumentation SDK; Observe is the product that reads its traces. traceAI is FutureAGI's open-source instrumentation SDK, built on [OpenTelemetry](/docs/observe/concepts/otel). It's a set of conventions and per-framework instrumentors that capture what your AI app does — model calls, tool calls, retrievals, agent steps — and map them to standardized [span](/docs/observe/concepts/spans) attributes. Add the instrumentor for your framework, and those calls become traces in Observe without hand-writing spans. traceAI is natively supported by FutureAGI but emits standard OTel, so it works with any OTel-compatible backend too. + +--- + +## Why it matters + +Raw OpenTelemetry knows nothing about LLMs — it has no concept of a prompt, a completion, token cost, or a tool call. traceAI fills that gap: it turns framework calls into *LLM-shaped* spans with consistent keys, so a LangChain trace and an OpenAI trace look the same in Observe and are queryable the same way. That standardization is what makes filtering, evals, and dashboards work across different stacks. + +--- + +## Mental model + +traceAI is the adapter between your framework and OpenTelemetry: the instrumentor wraps the framework, produces standardized spans, and hands them to the OTel pipeline that exports to FutureAGI. + + B["traceAI instrumentor"] + B --> C["Standardized OTel spans"] + C --> D["FutureAGI Observe"]`} /> + +You pick the instrumentor that matches your framework; the rest of the pipeline is the same OTel flow for everyone. + +--- + +## SDK vs product, auto vs manual + +Two distinctions explain most of how traceAI is used. + +**traceAI is the SDK; Observe is the product.** traceAI runs inside your application and produces spans. [Observe](/docs/observe) runs in FutureAGI and reads them — searching, replaying, scoring, and alerting on the traces those spans form. One emits the data; the other consumes it. They meet only at the span: a standard [OpenTelemetry](/docs/observe/concepts/otel) span on the wire. + +**Auto and manual instrumentation are two ways to produce those spans, and they coexist.** Auto-instrumentation is a per-framework instrumentor that wraps a library — install `traceAI-openai`, call `.instrument()`, and every OpenAI call becomes a span with no span code in your app. Manual instrumentation is for the parts no instrumentor reaches: your own business functions, custom retrieval, glue logic. You wrap those as [tool spans](/docs/traceai/manual-instrumentation/create-tool-spans) yourself. Most real apps use both — auto for the framework calls, manual for the code between them — and both feed the same provider, so they nest into one trace. + +That shared provider is what `register()` sets up. Conceptually it does one thing: it builds the OpenTelemetry `TracerProvider` — the exporter pointed at FutureAGI plus a batched span processor — and makes it the active provider. After that call, both the auto-instrumentors and any manual spans attach to it automatically. Nothing reaches Observe until `register()` has run, because before it there is no exporter to ship spans to. + +--- + +## When to use + +- You want LLM/agent calls traced without writing spans by hand. +- You use a supported framework (OpenAI, LangChain, LlamaIndex, CrewAI, …). +- You want consistent, queryable span attributes across different SDKs. +- You want instrumentation that stays portable across OTel backends. + +--- + +## When not to use traceAI + +- **For non-LLM work that has no LLM meaning to capture.** A plain database query or HTTP handler doesn't benefit from traceAI's LLM conventions. Trace it with raw [OpenTelemetry](/docs/observe/concepts/otel) instead; it still lands in the same trace tree. +- **As a stand-in for the backend.** traceAI only produces spans — it doesn't store, search, or display them. If you need to read traces, that's [Observe](/docs/observe), not the SDK. +- **For a framework with no instrumentor, expecting auto-capture.** Each instrumentor wraps one specific framework. If yours isn't in the [catalog](/docs/traceai/auto), auto-instrumentation won't see it — reach for [manual spans](/docs/traceai/manual-instrumentation/create-tool-spans) rather than the wrong instrumentor. + +--- + +## What it isn't + +- **traceAI is not a backend.** It produces spans; FutureAGI stores and displays them. +- **traceAI is not a replacement for OpenTelemetry.** It's complementary — conventions and instrumentors *on top of* OTel. See [OpenTelemetry](/docs/observe/concepts/otel). +- **traceAI is not only for FutureAGI.** It emits standard OTel and works with any compatible backend. + +--- + +## How FutureAGI represents traceAI + +traceAI ships as the core `fi-instrumentation-otel` package plus a per-framework instrumentor you install alongside it. A sample of the Python instrumentors: + +| Package | Instruments | +|---|---| +| `traceAI-openai` | OpenAI | +| `traceAI-anthropic` | Anthropic | +| `traceAI-langchain` | LangChain | +| `traceAI-llamaindex` | LlamaIndex | +| `traceAI-crewai` | CrewAI | +| `traceAI-bedrock` | AWS Bedrock | +| `traceAI-litellm` | LiteLLM | +| `traceAI-google-adk` | Google ADK | +| `traceAI-dspy` | DSPy | +| `traceAI-haystack` | Haystack | + +See the full, current list in the [Auto Instrumentation catalog](/docs/traceai/auto). To wire one up, see [Set up tracing](/docs/traceai/manual-instrumentation/set-up-tracing). + +Install the core package and an instrumentor, register a tracer provider, then instrument the framework with it: + + +```python +# pip install fi-instrumentation-otel traceAI-openai +from fi_instrumentation import register, Transport +from fi_instrumentation.fi_types import ProjectType +from traceai_openai import OpenAIInstrumentor + +tp = register(project_type=ProjectType.OBSERVE, project_name="my-app", transport=Transport.GRPC) +OpenAIInstrumentor().instrument(tracer_provider=tp) +``` + + +--- + +## Common mistakes + +- **Calling `.instrument()` after the client is created → no spans.** Run `OpenAIInstrumentor().instrument(...)` before you construct the framework client, or its calls aren't wrapped and nothing is traced. +- **Registering with the wrong `project_type`.** Use `ProjectType.OBSERVE` for production tracing; a mismatched project type sends spans somewhere you won't find them in Observe. +- **Installing the wrong instrumentor for your framework.** Each instrumentor wraps one framework. A LangChain app needs `traceAI-langchain`; `traceAI-openai` won't capture it. + +--- + +## Next steps + + + + The standard traceAI is built on. + + + What traceAI produces. + + + Install an instrumentor and start capturing. + + + Every supported framework. + + diff --git a/src/pages/docs/observe/concepts/traces.mdx b/src/pages/docs/observe/concepts/traces.mdx new file mode 100644 index 00000000..7e1da630 --- /dev/null +++ b/src/pages/docs/observe/concepts/traces.mdx @@ -0,0 +1,39 @@ +--- +title: "Traces" +description: "Helping you debug an AI response, step by step." +--- + +## A trace is a tree + +A **trace** is a tree of [spans](/docs/observe/concepts/spans). The root span is the operation that kicked off the request, and every other span nests under the step that triggered it. They all share one trace ID, so the whole request stitches back together top to bottom, even when steps run across async tasks or services. + + S1["llm.intent_classification"] + T --> S2["tool.check_order_status"] + T --> S3["chain.generate_reply"] + S3 --> S4["retriever.knowledge_base"] + S3 --> S5["llm.response_generation"]`} /> + +The tree above is one support-agent request. The root `support_agent.run` is the whole request. Under it, `llm.intent_classification` reads the question, `tool.check_order_status` looks up the order, and `chain.generate_reply` writes the answer, which itself calls `retriever.knowledge_base` for the refund policy and `llm.response_generation` for the wording. + +Read top to bottom, the tree is the exact path the request took, so when an answer comes out wrong you can see which step caused it. + +## Why it matters + +Without traces, a wrong or slow answer is a dead end. You see the output but not the steps behind it, so debugging turns into guesswork over flat logs. A trace turns that into a readable path you can walk: you spot that the retriever pulled the wrong policy chunk, that one tool call dragged on for four seconds, or that an eval flagged the answer as unsupported. Latency, cost, errors, and quality all hang off the same request, so you debug from one place instead of stitching logs together by hand. + +## Keep exploring + + + + Group multiple traces into one conversation or customer + + + Instrument your app so it emits traces + + + How spans, traces, sessions, and users fit together + + diff --git a/src/pages/docs/observe/concepts/users.mdx b/src/pages/docs/observe/concepts/users.mdx new file mode 100644 index 00000000..383f5fff --- /dev/null +++ b/src/pages/docs/observe/concepts/users.mdx @@ -0,0 +1,51 @@ +--- +title: "Users" +description: "Following one customer across every conversation they've had." +--- + +## A user is a person across conversations + +You will often want to know *who* a conversation belonged to. A **user** is that person, the end user behind the requests. Setting a `user.id` on your spans rolls every [trace](/docs/observe/concepts/traces) and [session](/docs/observe/concepts/sessions) they generate up under one row: where a session is one conversation, a user is all of their conversations put together, so you can answer "what happened to this customer?" without writing a query. The name is [OpenTelemetry](/docs/observe/concepts/otel)'s own, and it flows to every span in a block the same way the trace ID does. + + S1["Session"] + U --> S2["Session"] + U --> T0["Trace (no session)"] + S1 --> T1["Trace"] + S1 --> T2["Trace"] + S2 --> T3["Trace"]`} /> + +Say one customer is `user.id="cust_42"`. Every request they make carries that ID, whether it is part of a support chat or a one-off question, so Observe gathers all of it under a single row: their sessions and traces, their total cost and token use, when they first showed up and when they were last active, and how their answers scored. Open the row and you have that customer's whole history in one place. + +## When to use + +Reach for a user when the unit is a person, not a chat: a customer reports a bug and you want their entire history, a cost spike traces back to one heavy customer, you are tracking who stuck around and who dropped off (first seen, last seen, session counts), or quality is slipping for a segment and you want [eval](/docs/observe/features/evals) pass-rate per user. + +When the grain is wrong, reach elsewhere: + +- One conversation, not a person: use [Sessions](/docs/observe/concepts/sessions) +- A single request: open its [trace](/docs/observe/concepts/traces) + + +`user.id` is a grouping key, not an auth identity, and Observe never verifies it. It is exported in span data, so use a stable but non-sensitive value like a hashed customer ID, never a raw email or phone number. + + +## Why it matters + +A single trace or session shows one moment; many of the questions that matter are about the person across all their moments. Who is driving cost, who churned after a bad week, whether one segment gets worse answers than another, none of it surfaces until every request a person made rolls up together. The user is that rollup: the customer, not the request. + +## Keep exploring + + + + Group a person's traces into individual conversations + + + Read, filter, and sort the Users view + + + Attach user.id in traceAI + + diff --git a/src/pages/docs/observe/concepts/voice-observability.mdx b/src/pages/docs/observe/concepts/voice-observability.mdx new file mode 100644 index 00000000..747272cb --- /dev/null +++ b/src/pages/docs/observe/concepts/voice-observability.mdx @@ -0,0 +1,51 @@ +--- +title: "Voice observability" +description: "Turning every voice call into a trace you can debug and score." +--- + +## A voice call is a trace + +**Voice observability** captures each voice call as a [trace](/docs/observe/concepts/traces), the same tree of [spans](/docs/observe/concepts/spans) you get from a text app. One call becomes one trace: each back-and-forth turn is a span inside it, and the whole thing carries the transcript, the recording, and the call's duration, turn count, and cost. A spoken conversation lands in the same place as every other request, ready for the same evals, alerts, and filters. + +A voice call reaches Observe by one of two paths: + +- **Managed ingestion**, for hosted providers like Vapi and Retell. Connect the provider once with its API key and assistant ID and switch observability on. Observe pulls the provider's call logs and writes each finished call in as a trace, with no SDK and no code on your side. +- **Auto-instrumentation**, for voice apps you build on LiveKit or Pipecat. Your app emits spans through [traceAI](/docs/observe/concepts/traceai), exactly like any other instrumented service. + +Either way you land on the same thing: one voice call you can open as a trace. + +|"Observe pulls call logs"| T["One voice call, as a trace"] + B["LiveKit / Pipecat app"] -->|"traceAI emits spans"| T + T --> D["Transcript · recording · turns · cost"]`} /> + +Take a support line running on a Vapi assistant. Observe pulls each finished call in as its own trace: read it top to bottom and you follow the conversation turn by turn, with the transcript and recording sitting right on the call and its duration, turn count, and cost totalled up. When a caller reports the agent misheard their order number, you open that one call, jump to the turn where it happened, and play the audio back, instead of guessing from a dashboard. + +## When to use + +Reach for voice observability when what you are debugging is a spoken conversation: a caller who got the wrong answer, an agent that ran long, a call that cost more than it should. It earns its place when you want those calls sitting alongside the rest of your traces, ready to score and monitor. + +When the grain is wrong, reach elsewhere: + +- A text or SDK app, not a voice one: instrument it directly and start at the [quickstart](/docs/observe/quickstart) +- Trends across many calls, not one: build a dashboard + +## Why it matters + +Voice failures are the ones you hear about from a customer, not a log: the agent talked over the caller, misheard a number, or trailed off. A spoken call leaves nothing behind to inspect. Capturing it as a trace changes that. You get the transcript to read, the recording to play, and per-turn timing to see where it dragged, all on one call. And because it is just a trace, everything else works on it too: run [evals](/docs/observe/features/evals) on the conversation, set alerts when calls start failing, filter and export like anything else. + +## Keep exploring + + + + Auto-instrument a LiveKit voice agent through traceAI + + + Score voice conversations for quality and safety + + + How spans, traces, sessions, and users fit together + + diff --git a/src/plugins/vite-docs-transform.mjs b/src/plugins/vite-docs-transform.mjs index dab3550b..2dffacfc 100644 --- a/src/plugins/vite-docs-transform.mjs +++ b/src/plugins/vite-docs-transform.mjs @@ -22,6 +22,7 @@ const COMPONENT_MAP = { CopyButton: '@docs/CopyButton.astro', Expandable: '@docs/Expandable.astro', Icon: '@docs/Icon.astro', + Mermaid: '@docs/Mermaid.astro', Note: '@docs/Note.astro', ParamField: '@docs/ParamField.astro', Prerequisites: '@docs/Prerequisites.astro', diff --git a/src/styles/global.css b/src/styles/global.css index fa266fda..1872c939 100644 --- a/src/styles/global.css +++ b/src/styles/global.css @@ -565,3 +565,70 @@ kbd { stroke-dasharray: 4 4; fill: none; } + +/* Steps component: number badges + connector line (the "step chain"). + Lives here, not in Steps.astro, so it is loaded on every page. The custom + FastNav swaps body content but not page-specific