future-agi · khushalsonawat · Jul 1, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jul 1, 2026
diff --git a/public/images/docs/observe/llm-tracing-overview.webp b/public/images/docs/observe/llm-tracing-overview.webp
diff --git a/src/components/CodeCopyButtons.astro b/src/components/CodeCopyButtons.astro
@@ -17,6 +17,7 @@
     document.querySelectorAll('.docs-content pre').forEach((pre) => {
       if (pre.hasAttribute('data-copy-ready')) return;
       if (pre.closest('.code-wrapper')) return; // CodeBlock provides its own button
+      if (pre.classList.contains('mermaid')) return; // Mermaid diagrams are not code blocks
       pre.setAttribute('data-copy-ready', '');
 
       const btn = document.createElement('button');

diff --git a/src/components/docs/Mermaid.astro b/src/components/docs/Mermaid.astro
@@ -0,0 +1,60 @@
+---
+/**
+ * Mermaid diagram, rendered client-side from CDN.
+ * Usage in MDX:  <Mermaid chart={`flowchart LR\n  A --> B`} />
+ *
+ * The script is `is:inline` so Astro does not bundle it — the CDN module import
+ * then runs natively in the browser (matching how the rest of this repo loads
+ * third-party client scripts). The diagram source lives in the page, per the
+ * docs playbook. In the full monorepo, `pnpm add mermaid` and swap the CDN
+ * import for `import mermaid from 'mermaid'`.
+ */
+interface Props {
+  chart: string;
+}
+const { chart } = Astro.props;
+---
+
+<pre class="mermaid not-prose" data-mermaid-src={chart} style="background: transparent; text-align: center;">{chart}</pre>
+
+<!--
+  Centering CSS for the rendered <svg> lives in src/styles/global.css, not here.
+  A component is:global style is only emitted on pages that use this component,
+  and the custom FastNav does not carry page-specific <style> tags across a
+  client-side navigation, so the diagram would sit flush left when reached via
+  SPA from a page with no diagram. global.css is loaded on every page.
+-->
+
+<script is:inline type="module">
+  /*
+   * Render from the pristine `data-mermaid-src` rather than the element's
+   * textContent. Other client scripts (e.g. the global code-copy buttons) can
+   * inject children into a <pre>, and reading textContent would feed that markup
+   * to the parser and throw a syntax error. The data attribute is immutable.
+   * We also re-render on every navigation path this site uses: Astro view
+   * transitions (astro:after-swap), the page-load event, and the custom FastNav.
+   */
+  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
+  mermaid.initialize({ startOnLoad: false, theme: 'dark', securityLevel: 'loose' });
+
+  async function renderMermaid() {
+    const blocks = document.querySelectorAll('pre.mermaid[data-mermaid-src]:not([data-mermaid-done])');
+    for (const el of blocks) {
+      el.setAttribute('data-mermaid-done', '');
+      const src = el.getAttribute('data-mermaid-src') || '';
+      try {
+        const id = 'mmd-' + Math.random().toString(36).slice(2);
+        const { svg } = await mermaid.render(id, src);
+        el.innerHTML = svg;
+      } catch (err) {
+        console.error('Mermaid render failed:', err);
+        el.removeAttribute('data-mermaid-done'); // let a later navigation retry
+      }
+    }
+  }
+
+  renderMermaid();
+  document.addEventListener('astro:page-load', renderMermaid);
+  document.addEventListener('astro:after-swap', renderMermaid);
+  window.addEventListener('fastnav', renderMermaid);
+</script>
diff --git a/src/components/docs/Steps.astro b/src/components/docs/Steps.astro
@@ -5,50 +5,10 @@
   <slot />
 </div>
 
-<style>
-  .steps-container :global(.step-item) {
-    position: relative;
-    padding-left: 2.5rem;
-    padding-bottom: 1.5rem;
-  }
-
-  .steps-container :global(.step-item)::before {
-    content: counter(step);
-    counter-increment: step;
-    position: absolute;
-    left: 0;
-    top: 0;
-    width: 1.75rem;
-    height: 1.75rem;
-    border-radius: 9999px;
-    background: var(--color-text-primary);
-    color: var(--color-bg-primary);
-    font-size: 0.75rem;
-    font-weight: 600;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-  }
-
-  .steps-container :global(.step-item)::after {
-    content: '';
-    position: absolute;
-    left: 0.8125rem;
-    top: 1.75rem;
-    bottom: 0;
-    width: 2px;
-    background: var(--color-border-default);
-  }
-
-  .steps-container :global(.step-item:last-child)::after {
-    display: none;
-  }
-
-  .steps-container :global(.step-item:last-child) {
-    padding-bottom: 0;
-  }
-
-  .steps-container {
-    counter-reset: step;
-  }
-</style>
+<!--
+  The step-chain styles (number badges + connector line) live in
+  src/styles/global.css, not here. A component-scoped style is emitted as a
+  page-specific <style> tag, and the custom FastNav swaps body content without
+  carrying those tags over, so the chain would vanish when navigating from a
+  page without Steps to one with Steps. global.css is loaded on every page.
+-->
diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts
@@ -69,6 +69,12 @@ export const tabNavigation: NavTab[] = [
               { title: 'Troubleshooting and FAQs', href: '/docs/self-hosting/troubleshooting' },
             ]
           },
+          {
+            title: 'Release notes',
+            items: [
+              { title: "What's new", href: '/docs/release-notes' },
+            ]
+          },
         ]
       },
       {
@@ -347,21 +353,21 @@ export const tabNavigation: NavTab[] = [
         group: 'Observability',
         icon: 'eye',
         items: [
-          { title: 'Overview', href: '/docs/observe' },
+          { title: 'Get Started', href: '/docs/observe' },
+          { title: 'Quickstart', href: '/docs/observe/quickstart' },
           {
             title: 'Concepts',
             items: [
               { title: 'Understanding Observability', href: '/docs/tracing/concepts' },
-              { title: 'What are Traces?', href: '/docs/tracing/concepts/traces' },
-              { title: 'What are Spans?', href: '/docs/tracing/concepts/spans' },
-              { title: 'What is OpenTelemetry?', href: '/docs/tracing/concepts/otel' },
-              { title: 'What is traceAI?', href: '/docs/tracing/concepts/traceai' },
+              { title: 'Spans', href: '/docs/observe/concepts/spans' },
+              { title: 'Traces', href: '/docs/observe/concepts/traces' },
+              { title: 'OpenTelemetry', href: '/docs/tracing/concepts/otel' },
+              { title: 'traceAI', href: '/docs/tracing/concepts/traceai' },
             ]
           },
           {
             title: 'Features',
             items: [
-              { title: 'Set Up Observability', href: '/docs/observe/features/quickstart' },
               { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' },
               { title: 'Sessions', href: '/docs/observe/features/session' },
               { title: 'Users', href: '/docs/observe/features/users' },

diff --git a/src/pages/docs/observe/concepts/spans.mdx b/src/pages/docs/observe/concepts/spans.mdx
@@ -0,0 +1,43 @@
+---
+title: "Spans"
+description: "Pinpointing the single step behind a slow or wrong answer."
+---
+
+## A span is one step
+
+A span is one operation inside a [trace](/docs/observe/concepts/traces): a single model call, tool call, retrieval, agent step, guardrail check, or evaluator run. It records its own input and output, when it started and finished, whether it succeeded, and, for model calls, the tokens and cost it ran up. Where a trace is the whole request, a span is one step inside it.
+
+Under the hood, a span is an [OpenTelemetry](/docs/tracing/concepts/otel) span. OpenTelemetry defines the shape, a named, timed unit of work with a status, a parent, and key-value attributes, and traceAI fills those attributes with LLM-specific keys: the span `kind` that says what ran, the prompt and completion, the token counts. So every span you see in Observe is a standard OTel span carrying traceAI's LLM attributes.
+
+
+A parent span, say an agent, holds the child spans it set off, and each of those can have children of its own. That nesting is how Future AGI works out which step triggered which, and it is what lets a trace draw itself as a tree.
+
+<Mermaid chart={`flowchart TD
+  accTitle: Spans nest inside a trace
+  accDescr: An agent span contains a chain span and a tool span. The chain span contains a retriever span and an LLM span.
+  A["agent span"] --> B["chain span"]
+  A --> C["tool span"]
+  B --> D["retriever span"]
+  B --> E["llm span"]`} />
+
+Each box is a span with its own timing and attributes. The edges come straight from OpenTelemetry: every span carries the trace ID and its parent span's ID, and OTel propagates that context down your call stack, so a span created deep inside nests under the one above it without you wiring it up. Those same links are what a [trace](/docs/observe/concepts/traces) is rebuilt from.
+
+## What a span isn't
+
+- **Not a log line.** A log is a flat text event. A span is a timed unit with structured input, output, status, and attributes, linked to a parent
+- **Not an event.** An event is a point-in-time marker inside a span, like an exception. The span is the operation that holds it
+
+## Why it matters
+
+A response is only as strong as its weakest step, and that step is usually where things break. The trace tells you a request was slow or wrong; the span tells you which step did it and hands you the evidence: the exact prompt sent to the model, the arguments a tool received, the chunks a retriever pulled back, or the score an evaluator gave. traceAI captures these spans for you on supported frameworks, and where it can't reach, you can [add your own](/docs/observe/features/manual-tracing/create-tool-spans) so no part of your pipeline stays a black box.
+
+## Keep exploring
+
+<CardGroup cols={2}>
+  <Card title="Traces" icon="list" href="/docs/observe/concepts/traces">
+    The full request that spans are grouped into
+  </Card>
+  <Card title="Create tool spans" icon="gear" href="/docs/observe/features/manual-tracing/create-tool-spans">
+    Add custom spans where auto-instrumentation stops
+  </Card>
+</CardGroup>
diff --git a/src/pages/docs/observe/concepts/traces.mdx b/src/pages/docs/observe/concepts/traces.mdx
@@ -0,0 +1,44 @@
+---
+title: "Traces"
+description: "Helping you debug an AI response, step by step."
+---
+
+## A trace is a tree
+
+A trace is a tree of [spans](/docs/observe/concepts/spans). The root span is the operation that kicked off the request, and every other span nests under the step that triggered it. They all share one trace ID, so the whole request stitches back together top to bottom, even when steps run across async tasks or services.
+
+<Mermaid chart={`flowchart TD
+  accTitle: A trace is a tree of spans
+  accDescr: The support agent request is the root span with three child spans for intent classification, order lookup, and reply, and the reply span has its own retriever and response children
+  T["support_agent.run"] --> S1["llm.intent_classification"]
+  T --> S2["tool.check_order_status"]
+  T --> S3["chain.generate_reply"]
+  S3 --> S4["retriever.knowledge_base"]
+  S3 --> S5["llm.response_generation"]`} />
+
+The tree above is one support-agent request. The root `support_agent.run` is the whole request. Under it, `llm.intent_classification` reads the question, `tool.check_order_status` looks up the order, and `chain.generate_reply` writes the answer, which itself calls `retriever.knowledge_base` for the refund policy and `llm.response_generation` for the wording.
+
+Read top to bottom, the tree is the exact path the request took, so when an answer comes out wrong you can see which step caused it.
+
+## What a trace isn't
+
+- **Not a session.** A session bundles many traces from one conversation or user. A trace is just one request inside it. See [Sessions and users](/docs/observe/features/session)
+- **Not a log line.** Logs are flat text events. A trace is a timed, structured tree with inputs, outputs, and cost at every step
+
+## Why it matters
+
+Without traces, a wrong or slow answer is a dead end. You see the output but not the steps behind it, so debugging turns into guesswork over flat logs. A trace turns that into a readable path you can walk: you spot that the retriever pulled the wrong policy chunk, that one tool call dragged on for four seconds, or that an eval flagged the answer as unsupported. Latency, cost, errors, and quality all hang off the same request, so you debug from one place instead of stitching logs together by hand.
+
+## Keep exploring
+
+<CardGroup cols={3}>
+  <Card title="Spans" icon="layer-group" href="/docs/observe/concepts/spans">
+    The individual operations a trace is built from
+  </Card>
+  <Card title="Sessions and users" icon="users" href="/docs/observe/features/session">
+    Group multiple traces into one conversation or customer
+  </Card>
+  <Card title="Set up tracing" icon="gear" href="/docs/observe/features/manual-tracing/set-up-tracing">
+    Instrument your app so it emits traces
+  </Card>
+</CardGroup>
diff --git a/src/pages/docs/observe/index.mdx b/src/pages/docs/observe/index.mdx
@@ -1,45 +1,51 @@
 ---
-title: "Future AGI Observe: Monitor LLM Apps in Production"
-description: "Monitor and evaluate LLM applications in production with real-time tracing, session analysis, cost tracking, and alerting."
+title: "Get started with Observe"
+description: "Observe records every request your AI app makes as a trace you can open, search, and score. Send your first trace, then go deeper"
 ---
 
-## About
+Observe shows you what your AI app actually did in production. Every request becomes a **trace**: the step-by-step record of the model calls, tool calls, and retrievals behind one response. When an answer is wrong, slow, or expensive, you open the trace and read what happened instead of guessing.
 
-Observability is how you monitor your AI application after it goes live. Once your app is in production, things change: user inputs vary, model behavior shifts, and issues come up that testing never caught. Observability gives you a continuous view of how your application is performing so you can stay on top of it.
+You only need one trace to begin. Everything else here builds on it.
 
-It tracks every response your application generates, groups them by session and user, scores them for quality, and alerts you when something goes wrong. Instead of finding out about problems from users, you see them in the dashboard first.
+<img src="/images/docs/observe/llm-tracing-overview.webp" alt="Observe trace explorer listing production traces with status, latency, and token columns" style={{ borderRadius: '5px' }} />
+*Every production request, captured as a trace and ready to inspect*
 
-<img src="/images/observe_dashboard.png" alt="Sessions Overview" style={{ borderRadius: '5px'}} />
+## Start here
 
----
-
-## How Observability Connects to Other Features
-
-- **Prototype**: After you promote a winning version in Prototype, its traces continue flowing into Observe so you can monitor production performance against the same quality criteria. [Learn more](/docs/prototype)
-- **Evaluation**: Observability uses the same built-in eval templates to score production traces automatically. Any eval you configured in Prototype or Datasets runs the same way here. [Learn more](/docs/evaluation)
-- **Alerts**: Observability feeds into the alerting system so you are notified when quality, cost, or latency crosses a threshold in production. [Learn more](/docs/observe/features/alerts)
+<CardGroup cols={2}>
+  <Card title="Send your first trace" icon="play" href="/docs/observe/quickstart">
+    Instrument one call and watch it land in Observe, in about five minutes
+  </Card>
+  <Card title="Trace your framework" icon="plug" href="/docs/tracing/auto">
+    One line to trace OpenAI, Anthropic, LangChain, and 30+ more
+  </Card>
+</CardGroup>
 
----
+## Understand the model
 
-## Getting Started with Observability
+A few short pages give you the whole mental model behind Observe. Read these and the rest of the product explains itself:
 
 <CardGroup cols={2}>
-  <Card title="Set Up Observability" icon="play" href="/docs/observe/features/quickstart">
-    Connect the SDK and start capturing traces in minutes.
-  </Card>
-  <Card title="Evals" icon="chart-line" href="/docs/observe/features/evals">
-    Run evaluations on observed traces and sessions.
+  <Card title="Traces" icon="layer-group" href="/docs/observe/concepts/traces">
+    What gets recorded for each request, and how the steps nest
   </Card>
-  <Card title="Sessions" icon="table-rows" href="/docs/observe/features/session">
-    Group and analyze multi-turn interactions.
+  <Card title="Sessions and users" icon="users" href="/docs/observe/features/session">
+    Follow a full conversation, or one customer across sessions
   </Card>
-  <Card title="Users" icon="tags" href="/docs/observe/features/users">
-    Track and analyze activity by user.
+  <Card title="traceAI SDK" icon="code" href="/docs/tracing/concepts/traceai">
+    The open library that sends your traces to Observe
   </Card>
-  <Card title="Alerts & Monitors" icon="zap" href="/docs/observe/features/alerts">
-    Configure alerts for real-time issue detection.
+</CardGroup>
+
+## Once your traces are flowing
+
+Every other feature in Observe is just a different lens on the traces you capture:
+
+<CardGroup cols={2}>
+  <Card title="Score with evals" icon="chart-mixed" href="/docs/observe/features/evals">
+    Attach quality scores to whole traces or single spans
   </Card>
-  <Card title="Voice Observability" icon="plug" href="/docs/observe/features/voice">
-    Monitor voice agent interactions and call quality.
+  <Card title="Alerts and monitors" icon="zap" href="/docs/observe/features/alerts">
+    Get told the moment a metric slips
   </Card>
 </CardGroup>