diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index 8f8646a3..f0713b50 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -353,118 +353,50 @@ export const tabNavigation: NavTab[] = [ group: 'Observability', icon: 'eye', items: [ - { title: 'Get Started', href: '/docs/observe' }, + { title: 'Overview', href: '/docs/observe' }, { title: 'Quickstart', href: '/docs/observe/quickstart' }, { title: 'Concepts', items: [ - { title: 'Understanding Observability', href: '/docs/tracing/concepts' }, { title: 'Spans', href: '/docs/observe/concepts/spans' }, { title: 'Traces', href: '/docs/observe/concepts/traces' }, - { title: 'OpenTelemetry', href: '/docs/tracing/concepts/otel' }, - { title: 'traceAI', href: '/docs/tracing/concepts/traceai' }, + { title: 'Sessions', href: '/docs/observe/concepts/sessions' }, + { title: 'Users', href: '/docs/observe/concepts/users' }, + { title: 'Voice observability', href: '/docs/observe/concepts/voice-observability' }, + { title: 'Observability model', href: '/docs/observe/concepts/observability-model' }, ] }, { - title: 'Features', + title: 'Guides', items: [ - { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, - { title: 'Sessions', href: '/docs/observe/features/session' }, - { title: 'Users', href: '/docs/observe/features/users' }, - { title: 'Alerts & Monitors', href: '/docs/observe/features/alerts' }, - { title: 'Voice Observability', href: '/docs/observe/features/voice' }, - { title: 'Dashboards', href: '/docs/observe/features/dashboard' }, { - title: 'Manual Tracing', + title: 'Explore dashboard', items: [ - { title: 'Set Up Tracing', href: '/docs/observe/features/manual-tracing/set-up-tracing' }, - { title: 'Instrument with traceAI Helpers', href: '/docs/observe/features/manual-tracing/instrument-with-traceai-helpers' }, - { title: 'Get Current Tracer and Span', href: '/docs/observe/features/manual-tracing/get-current-span-context' }, - { title: 'Enriching Spans with Attributes, Metadata, and Tags', href: '/docs/observe/features/manual-tracing/add-attributes-metadata-tags' }, - { title: 'Logging Prompt Templates & Variables', href: '/docs/observe/features/manual-tracing/log-prompt-templates' }, - { title: 'Events, Exceptions, and Status', href: '/docs/observe/features/manual-tracing/add-events-exceptions-status' }, - { title: 'Set Session ID and User ID', href: '/docs/observe/features/manual-tracing/set-session-user-id' }, - { title: 'Tool Spans Creation', href: '/docs/observe/features/manual-tracing/create-tool-spans' }, - { title: 'Mask Span Attributes', href: '/docs/observe/features/manual-tracing/mask-span-attributes' }, - { title: 'Advanced Tracing (OTEL)', href: '/docs/observe/features/manual-tracing/advanced-tracing-examples' }, - { title: 'FI Semantic Conventions', href: '/docs/observe/features/manual-tracing/semantic-conventions' }, - { title: 'In-line Evaluations', href: '/docs/observe/features/manual-tracing/in-line-evals' }, - { title: 'Adding Annotations to your Spans', href: '/docs/observe/features/manual-tracing/annotating-using-api' }, - { title: 'Langfuse Integration', href: '/docs/observe/features/manual-tracing/langfuse-integration' }, + { title: 'Overview', href: '/docs/observe/guides/explore-dashboard' }, + { title: 'Filters', href: '/docs/observe/guides/explore-dashboard/filters' }, + { title: 'Views', href: '/docs/observe/guides/explore-dashboard/views' }, + { title: 'Display options', href: '/docs/observe/guides/explore-dashboard/display-options' }, ] }, + { title: 'Setup alerts', href: '/docs/observe/guides/setup-alerts' }, + { title: 'Setup evals', href: '/docs/observe/guides/setup-evals' }, + { title: 'Explore sessions & users', href: '/docs/observe/features/session' }, ] }, { - title: 'Integration', + title: 'Reference', items: [ - { title: 'Overview', href: '/docs/tracing/auto' }, - { - title: 'LLM Providers', - items: [ - { title: 'OpenAI', href: '/docs/tracing/auto/openai' }, - { title: 'Anthropic', href: '/docs/tracing/auto/anthropic' }, - { title: 'AWS Bedrock', href: '/docs/tracing/auto/bedrock' }, - { title: 'Vertex AI', href: '/docs/tracing/auto/vertexai' }, - { title: 'Google GenAI', href: '/docs/tracing/auto/google_genai' }, - { title: 'Google ADK', href: '/docs/tracing/auto/google_adk' }, - { title: 'Groq', href: '/docs/tracing/auto/groq' }, - { title: 'MistralAI', href: '/docs/tracing/auto/mistralai' }, - { title: 'Together AI', href: '/docs/tracing/auto/togetherai' }, - { title: 'Ollama', href: '/docs/tracing/auto/ollama' }, - { title: 'Portkey', href: '/docs/tracing/auto/portkey' }, - ] - }, - { - title: 'Frameworks & Agents', - items: [ - { title: 'LangChain', href: '/docs/tracing/auto/langchain' }, - { title: 'LangGraph', href: '/docs/tracing/auto/langgraph' }, - { title: 'LlamaIndex', href: '/docs/tracing/auto/llamaindex' }, - { title: 'LlamaIndex Workflows', href: '/docs/tracing/auto/llamaindex-workflows' }, - { title: 'LiteLLM', href: '/docs/tracing/auto/litellm' }, - { title: 'CrewAI', href: '/docs/tracing/auto/crewai' }, - { title: 'AutoGen', href: '/docs/tracing/auto/autogen' }, - { title: 'Haystack', href: '/docs/tracing/auto/haystack' }, - { title: 'DSPy', href: '/docs/tracing/auto/dspy' }, - { title: 'OpenAI Agents', href: '/docs/tracing/auto/openai_agents' }, - { title: 'Smol Agents', href: '/docs/tracing/auto/smol_agents' }, - { title: 'Instructor', href: '/docs/tracing/auto/instructor' }, - { title: 'PromptFlow', href: '/docs/tracing/auto/promptflow' }, - { title: 'Guardrails', href: '/docs/tracing/auto/guardrails' }, - { title: 'MCP', href: '/docs/tracing/auto/mcp' }, - { title: 'Mastra', href: '/docs/tracing/auto/mastra' }, - { title: 'Vercel AI SDK', href: '/docs/tracing/auto/vercel' }, - ] - }, - { - title: 'Voice & Realtime', - items: [ - { title: 'LiveKit', href: '/docs/tracing/auto/livekit' }, - { title: 'Pipecat', href: '/docs/tracing/auto/pipecat' }, - ] - }, - { - title: 'Java', - items: [ - { title: 'Overview', href: '/docs/tracing/auto/java' }, - { title: 'Spring Boot', href: '/docs/tracing/auto/spring-boot' }, - { title: 'OpenAI', href: '/docs/tracing/auto/java/openai' }, - { title: 'Anthropic', href: '/docs/tracing/auto/java/anthropic' }, - { title: 'AWS Bedrock', href: '/docs/tracing/auto/java/bedrock' }, - { title: 'Cohere', href: '/docs/tracing/auto/java/cohere' }, - { title: 'Pinecone', href: '/docs/tracing/auto/java/pinecone' }, - { title: 'LLM Providers', href: '/docs/tracing/auto/java/llm-providers' }, - { title: 'Vector Databases', href: '/docs/tracing/auto/java/vector-databases' }, - { title: 'Frameworks', href: '/docs/tracing/auto/java/frameworks' }, - ] - }, - { - title: 'Other', - items: [ - { title: 'n8n', href: '/docs/integrations/traceai/n8n' }, - ] - }, + { title: 'Filters', href: '/docs/observe/reference/filters' }, + { title: 'traceAI', href: '/docs/observe/concepts/traceai' }, + ] + }, + { + title: 'Troubleshooting', + items: [ + { title: 'No traces appear', href: '/docs/observe/troubleshooting/no-traces-appearing' }, + { title: 'Spans and attributes', href: '/docs/observe/troubleshooting/missing-attributes' }, + { title: 'Dashboard numbers', href: '/docs/observe/troubleshooting/dashboard-numbers-look-wrong' }, + { title: 'Alerts not firing', href: '/docs/observe/troubleshooting/alerts-did-not-fire' }, ] }, ] @@ -892,7 +824,7 @@ export const tabNavigation: NavTab[] = [ title: 'Observability', icon: 'eye', items: [ - { title: 'Implement Observability', href: '/docs/cookbook/observability' }, + { title: 'Improving a LangGraph agent via observability', href: '/docs/cookbook/improve-langgraph-agent-with-observability' }, { title: 'Text-to-SQL Evaluation', href: '/docs/cookbook/text-to-sql' }, ] }, diff --git a/src/lib/redirects.ts b/src/lib/redirects.ts index 93b7acbb..430aee47 100644 --- a/src/lib/redirects.ts +++ b/src/lib/redirects.ts @@ -1,6 +1,7 @@ // Auto-generated redirect map: old Mintlify URLs → new docs URLs // 275 redirects from futureagi.mintlify.app export const redirectMap: Record = { + '/docs/cookbook/observability': '/docs/cookbook/improve-langgraph-agent-with-observability', '/docs/observe/features/annotation-queue-using-sdk': '/docs/annotations/sdk/annotation-queue-using-sdk', '/docs/observe/voice/set-up': '/docs/observe/features/voice', '/docs/quickstart/installation': '/docs/installation', @@ -77,7 +78,7 @@ export const redirectMap: Record = { '/cookbook/cookbook5/How-to-build-and-incrementally-improve-RAG-applications-in-Langchain': '/docs/cookbook/rag-langchain', '/cookbook/cookbook6/How-to-evaluate-RAG-Applications': '/docs/cookbook/evaluate-rag', '/cookbook/cookbook7/Creating-Trustworthy-RAGs-for-Chatbots': '/docs/cookbook/trustworthy-rag', - '/cookbook/cookbook8/How-To-Implement-Observability': '/docs/cookbook/observability', + '/cookbook/cookbook8/How-To-Implement-Observability': '/docs/cookbook/improve-langgraph-agent-with-observability', '/cookbook/cookbook9/How-To-Decrease-RAG-Hallucination': '/docs/cookbook/decrease-hallucination', '/cookbook/integrations/mongodb': '/docs/cookbook/mongodb', '/cookbook/optimization/basic-prompt-optimization': '/docs/cookbook/basic-optimization', diff --git a/src/pages/docs/cookbook/improve-langgraph-agent-with-observability.mdx b/src/pages/docs/cookbook/improve-langgraph-agent-with-observability.mdx new file mode 100644 index 00000000..5b18e132 --- /dev/null +++ b/src/pages/docs/cookbook/improve-langgraph-agent-with-observability.mdx @@ -0,0 +1,251 @@ +--- +title: "Improving a LangGraph agent via observability" +description: "Instrument a LangGraph support agent with Future AGI, score it with an Observe Eval Task, find the turn that fails, fix it from the eval signal, and prove the gain." +--- + + +Build a multi-turn LangGraph support agent, instrument it with [traceAI](/docs/observe/concepts/traceai) so every conversation lands in [Observe](/docs/observe) grouped by [session](/docs/observe/concepts/sessions) and [user](/docs/observe/concepts/users), score it with an Observe [Eval Task](/docs/observe/features/evals), then read the scores to find *where* it fails, fix that one thing in your code, and watch the number move. + + +| Time | Difficulty | Cost | +|------|------------|------| +| 30-40 min | Intermediate | ~$1 in model calls | + +The loop you'll run: + + B["Instrument with traceAI"] + B --> C["Score with an Eval Task"] + C --> D["Diagnose the failing turn"] + D --> E["Fix the prompt, re-run"] + E --> F["Confirm the gain, then alert"]`} /> + + +- FutureAGI account → [app.futureagi.com](https://app.futureagi.com) +- API keys: `FI_API_KEY` and `FI_SECRET_KEY` (see [Get your API keys](/docs/admin-settings)) +- An `OPENAI_API_KEY` +- Python 3.11 + + +## Install + +```bash +pip install fi-instrumentation-otel traceAI-langchain langgraph langchain-openai langchain-core +``` + +```bash +export FI_API_KEY="your-api-key" +export FI_SECRET_KEY="your-secret-key" +export OPENAI_API_KEY="your-openai-api-key" +``` + +## Tutorial + + + + + +Real support answers are only as good as the policy behind them. Keep the knowledge base inline so the recipe runs with no external data, then wire a prebuilt LangGraph ReAct agent over it with two tools. The `search_help_center` tool stashes what it retrieved so you can score grounding later. + +```python +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from langchain_core.vectorstores import InMemoryVectorStore +from langchain_core.tools import tool +from langgraph.prebuilt import create_react_agent + +POLICIES = [ + "Refunds: Orders can be refunded within 30 days of delivery. Items marked 'final sale' " + "are not refundable. Refunds return to the original payment method in 5-7 business days.", + "Shipping: Standard shipping takes 3-5 business days. Express takes 1-2. We ship to the US and Canada only.", + "Account: Customers can reset a password from the login page. Support cannot see or change a password.", + "Returns: To return an item, start a return from the Orders page to get a prepaid label. " + "Returns must be shipped within 14 days of approval.", +] + +store = InMemoryVectorStore.from_texts(POLICIES, OpenAIEmbeddings()) +retriever = store.as_retriever(search_kwargs={"k": 2}) + +last_context = {"text": ""} # holds the policy retrieved this turn, for scoring + +@tool +def search_help_center(query: str) -> str: + """Search the help center for relevant policy text.""" + docs = retriever.invoke(query) + last_context["text"] = "\n\n".join(d.page_content for d in docs) + return last_context["text"] + +@tool +def lookup_order(order_id: str) -> str: + """Look up the status of an order by its ID.""" + return f"Order {order_id}: delivered on 2026-06-20, standard shipping." + +SYSTEM_PROMPT = "You are a helpful customer-support agent. Use the tools to answer." + +llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) +agent = create_react_agent(llm, tools=[search_help_center, lookup_order], prompt=SYSTEM_PROMPT) +``` + + +The `SYSTEM_PROMPT` is deliberately weak. It is the thing you'll fix later, once the evals tell you to. + + + + + + +Register tracing against an **Observe** project and turn on auto-instrumentation. The same `LangChainInstrumentor` captures LangGraph, there is no separate instrumentor. Wrap each turn in `using_session` (the conversation) and `using_user` (the customer), and in one span record the three fields an eval will need: the question, the answer, and the retrieved policy. + +```python +from fi_instrumentation import register, using_session, using_user, FITracer +from fi_instrumentation.fi_types import ProjectType +from traceai_langchain import LangChainInstrumentor + +trace_provider = register( + project_type=ProjectType.OBSERVE, + project_name="support-agent", + set_global_tracer_provider=True, +) +LangChainInstrumentor().instrument(tracer_provider=trace_provider) +tracer = FITracer(trace_provider.get_tracer(__name__)) + +def support_turn(session_id: str, user_id: str, question: str) -> str: + with using_session(session_id), using_user(user_id): + with tracer.start_as_current_span("support_turn") as span: + result = agent.invoke({"messages": [{"role": "user", "content": question}]}) + answer = result["messages"][-1].content + span.set_attribute("raw.input", question) + span.set_attribute("raw.output", answer) + span.set_attribute("raw.context", last_context["text"]) # retrieved policy + return answer + +# two customers, five turns between them +CONVERSATIONS = [ + ("chat_1001", "cust_42", [ + "How long do refunds take?", + "My order was a final sale, can I still get a refund?", + "Can you look up order A-3391?", + ]), + ("chat_1002", "cust_77", [ + "What shipping options do you have?", + "Can you change my password for me?", + ]), +] + +for sid, uid, questions in CONVERSATIONS: + for q in questions: + print(support_turn(sid, uid, q)) + +trace_provider.force_flush() # BatchSpanProcessor buffers up to 5s; flush before a short script exits +``` + +**You should see** five answers in the terminal, and within a few seconds, in **Observe > Traces > `support-agent`**, two sessions: `chat_1001` holding three traces for user `cust_42`, and `chat_1002` holding two for `cust_77`. The final-sale refund turn is the one to watch. + + +No trace? It is almost always order or flush. `register()` and `.instrument()` must run **before** the agent call, and `force_flush()` must run before the process exits. + + + + + + +You won't score answers from the SDK. You configure evals **in the platform** as an Eval Task, so they run on your traces and the whole team sees the same scores. The turn span already carries `raw.input`, `raw.output`, and `raw.context`, so the task has clean attributes to map to. + +In Observe, create an **Eval Task** on the `support-agent` project and pick three evals, chosen so a low score points at a *specific* component: + +- **Context Relevance** — did retrieval fetch the right policy? · map `input` → `raw.input`, `context` → `raw.context` +- **Context Adherence** — did the answer stay grounded in it? · map `output` → `raw.output`, `context` → `raw.context` +- **Completeness** — did the answer fully address the question? · map `input` → `raw.input`, `output` → `raw.output` + +Run it as **Historical** over the five turns you just sent. + + +This step only covers what to pick for this agent. The click-by-click, creating the task, choosing Historical or Continuous, setting sampling, and mapping eval inputs to span attributes, lives in the [Setup evals](/docs/observe/guides/setup-evals) how-to guide in the Observe docs. + + +**You should see** three scores per trace. Read them as a baseline (illustrative shape, your numbers will differ): + +| Turn | Context Relevance | Context Adherence | Completeness | +|---|---|---|---| +| "How long do refunds take?" | 0.91 | 0.88 | 0.86 | +| "Final sale, still refundable?" | 0.87 | **0.41** | 0.55 | +| "Look up order A-3391" | 0.93 | 0.90 | 0.84 | +| "What shipping options do you have?" | 0.90 | 0.85 | 0.83 | +| "Can you change my password for me?" | 0.88 | 0.82 | 0.80 | + + + + + +Don't guess, read the pattern. Filter the trace explorer to the low-scoring slice: + +```text +scores.context_adherence < 0.8 AND fi.span.kind = LLM +``` + +The final-sale turn surfaces, and its scores are the whole story: retrieval is fine (Context Relevance 0.87), grounding is not (Context Adherence 0.41). The right policy was in front of the model and it answered against it anyway. That points at one layer, and only one. To replay the customer's whole conversation around the bad turn, scope the table with `user.id = 'cust_42'` and open the session. + +| Eval signal | Deduction | Where the fix lives | +|---|---|---| +| Context Relevance low | wrong policy retrieved | retrieval: chunking, query, `k`, reranker, filters | +| Relevance ok, **Adherence low** | model ignores good context | **the prompt** | +| Completeness low | partial answer | prompt, or retrieve more context | + + + + + +Relevance is fine and Adherence is low, so the fix is the **prompt**, not the retriever. Constrain the agent to the retrieved policy and give it an out: + +```python +SYSTEM_PROMPT = ( + "You are a customer-support agent. Answer ONLY from the policy text returned by " + "search_help_center. Quote the specific rule you used. If the retrieved policy does " + "not cover the question, say you are not certain and offer to escalate, rather than guessing." +) + +agent = create_react_agent(llm, tools=[search_help_center, lookup_order], prompt=SYSTEM_PROMPT) +``` + +Re-run the same five turns to send fresh traces, then run the Eval Task again. This is the whole point: a code change you deduced from a score, verified by the same eval, with nothing exported and no separate tool. + +**You should see** the final-sale turn move (illustrative): + +| Turn | Context Adherence | Completeness | +|---|---|---| +| "Final sale, still refundable?" | 0.41 → **0.86** | 0.55 → **0.82** | + +The agent now says final-sale items are not refundable and cites the refund policy, instead of inventing a refund path. + + + + + +You're done when all three are true: the final-sale answer refuses a refund and cites the policy; the `support-agent` project shows the moved Context Adherence score; and you can say *why* it moved (the retriever was never the problem, the prompt was). + +A fix you can't monitor will silently regress on the next prompt tweak. Lock it in: + +- **Alert:** an [Evaluation-metric alert](/docs/observe/guides/setup-alerts) on `context_adherence`, operator *Less than*, static `0.8`. Observe pings you before a customer does. +- **Saved view:** save the `scores.context_adherence < 0.8` filter as a reusable "Ungrounded answers" view. See the [Views](/docs/observe/guides/explore-dashboard/views) guide. +- **Regression set:** the turns that scored low are your best test cases, harvest them so the next change has to clear them. + + + + + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| No trace in Observe | `register()`/`.instrument()` ran after the agent call, or the script exited before flush | Register and instrument first; call `trace_provider.force_flush()` before exit | +| Session or user filters return nothing | The turn ran outside the `using_session` / `using_user` context | Keep the agent call inside `with using_session(...), using_user(...)` | +| Eval Task finished but no scores | The task ran Historical before the traces existed, or sampling or filters excluded them | Re-run after sending traces; widen the date range; raise the sampling rate | +| Eval reports a missing input | The turn span didn't set `raw.input` / `raw.output` / `raw.context`, or the mapping points elsewhere | Set the three attributes; map each eval input to them (Context Adherence needs `context` + `output`) | +| Alert never fires | Wrong metric or project type | Use an Evaluation-metric alert on `context_adherence`; monitors work only on `observe` projects | +| Context Relevance is the low one, not Adherence | The failure is retrieval, not the prompt | Fix the retriever (chunking, `k`, filters), not the prompt | + +## Where to go next + +You improved this agent by hand: you read the eval and changed the prompt. To close the loop *automatically*, so the agent's prompt improves without you touching it, feed the same scores into optimization in [Improve a prompt automatically](/docs/cookbook/quickstart/prompt-optimization). diff --git a/src/pages/docs/cookbook/index.mdx b/src/pages/docs/cookbook/index.mdx index ba001f1b..d07f039a 100644 --- a/src/pages/docs/cookbook/index.mdx +++ b/src/pages/docs/cookbook/index.mdx @@ -111,11 +111,11 @@ description: "Practical step-by-step guides for evaluation, optimization, simula - Add monitoring and observability to your AI applications + Score a support agent with an Eval Task, find the failing turn, and fix it from the signal Please export your OpenAI and FutureAGI api keys before proceeding to run the code -### 1. Basic Setup - -```python - -# export FI_API_KEY="xxxasxas" -# export FI_SECRET_KEY="hasdaxxasa21" -# export OPENAI_API_KEY="jasfapsd" - -import os -import gradio as gr - -from langchain_openai import ChatOpenAI -from fi_instrumentation import register -from fi_instrumentation.fi_types import ( - EvalName, - EvalSpanKind, - EvalTag, - EvalTagType, - ProjectType -) - -# Initialize tracing -trace_provider = register( -project_type=ProjectType.OBSERVE, - project_name="Your-Project-Name" -) - -``` - -## Real-World Application Example - -Let's consider a simplified example of a chat application that uses observability. This example illustrates a chatbot application that has Observability in place. - -### Application Overview - -This Gradio-based chat app includes: - -- Integration of OpenAI's GPT model -- Monitoring of real-time responses -- Easy-to-use interface -- Full observability metrics - -### Code Implementation - -```python -import os -import gradio as gr - -from langchain_openai import ChatOpenAI -from fi_instrumentation import register -from traceai_langchain import LangChainInstrumentor -from fi_instrumentation.fi_types import ( - EvalName, - EvalSpanKind, - EvalTag, - EvalTagType, - ProjectType -) - -# Set up tracing with FutureAGI -trace_provider = register( - project_type=ProjectType.OBSERVE, - project_name="Simple-Chat-App" -) - -LangChainInstrumentor().instrument(tracer_provider=trace_provider) - -# Set up the LLM -llm = ChatOpenAI(temperature=0, model="gpt-4o-mini") - -def process_message(message, history): - """Process user message and generate response with observability""" - try: - # Generate response using LLM - response = llm.invoke(message) - - # Return formatted response - return history + [(message, response.content)] - except Exception as e: - error_message = f"Sorry, I encountered an error: {str(e)}" - return history + [(message, error_message)] - -def main(): - with gr.Blocks(theme=gr.themes.Soft()) as demo: - # Create chat interface - chatbot = gr.Chatbot( - label="Simple Chat Assistant", - height=400, - value=[], - type="chat", - autoscroll=True - ) - - with gr.Row(): - msg = gr.Textbox( - label="Message", - placeholder="Type your message here.", - scale=4, - container=False, - autofocus=True, - show_label=False - ) - submit_button = gr.Button( - "Send", - variant="primary", - scale=1, - size="sm" - ) - - # Example queries - gr.Examples( - examples=[ - "What is artificial intelligence?", - "Describe quantum computing in everyday language", - "What are the advantages of observability?", - ], - inputs=msg - ) - - # Handle message submission - msg.submit( - fn=process_message, - inputs=[msg, chatbot], - outputs=[chatbot], - queue=False - ).then( - lambda: "", - None, - msg, - queue=False - ) - - # Also trigger on button click - submit_button.click( - fn=process_message, - inputs=[msg, chatbot], - outputs=[chatbot], - queue=False - ).then( - lambda: "", - None, - msg, - queue=False - ) - - # Launch the demo - demo.launch( - share=True, - show_error=True - ) - -if __name__ == "__main__": - main() -``` - -After this application is installed we can then monitor and configure different features offered by FutureAGI in the dashboard. We can create an Eval Task to evaluate our data generated by the app. - -![FutureAGI Dashboard](/images/docs/cookbook-observability/c81.png)
Dashboard from FutureAGI platform showcasing our deployed application in OBSERVE.
- -To check a specific event for a trace of an application, we can click on one of the traces and check out the flow of our application and its individual events (spans). -![FutureAGI Trace](/images/docs/cookbook-observability/c82.png)
Trace Tree that shows the detailed overview of application session
-### Key Features Explained - -1. **Observability Setup** - - Integration of FutureAGI's instrumentation framework - - Monitoring response quality - - Tracking automatic LLM interaction -2. **Gradio Interface** - - Responsive, modern design - - Live chat functionality - - Integrated error handling -- Example queries for testing -3. **Monitoring Capabilities** - - Response quality metrics - - Error rate monitoring - - Performance monitoring - -## Best Practices for Implementation - -1. **Performance Optimization** - - Employ suitable sampling rates - - Instrumentation overhead monitoring - - Cache strategies implementation -2. **Error Handling** - - Comprehensive error logging -- Friendly error messages -- Gracious degradation -3. **Security Considerations** - - Secure API credentials - - Protection of data privacy - - Implementing access control - -## Common Challenges and Solutions - -| Challenge | Solution | Impact | -| --- | --- | --- | -| High Overhead | Adopt sampling | Lowered resource consumption -| Data Privacy | Utilize data masking | Secure user data | -| Complexity | Utilize auto-instrumentation setup | Simplified implementation | - -## FAQs - -### 1. What is the lowest supported Python version? - -Python 3.10 or later is recommended for best compatibility with FutureAGI's instrumentation framework. - -### 2. How does observability affect application performance? - -The impact on performance becomes negligible when properly used (usually <1% overhead), providing immense value in terms of insights. - -### 3. Can I add observability to current applications? - -Yes, observability can be incorporated into current applications with limited code modification. - -### 4. What kind of metrics can I monitor? - -You can monitor various metrics such as: - -- Latency -- Error rates -- Resource consumption -- Tokens Used -- Cost of workflow -- Evaluation Metrics - -## Next Steps - -Ready to add observability to your app? Here are the steps: - -1. Create an account on FutureAGI -2. Install the necessary packages -3. Add basic instrumentation -4. Monitor and optimize - -## Additional Resources - -- [FutureAGI Documentation](https://docs.futureagi.com/) -- [Gradio Documentation](https://gradio.app/docs) - -Begin implementing observability in your Python AI applications today! Sign up for a free FutureAGI account and start monitoring your application's performance and reliability. - -📩 Subscribe to our [newsletter](https://futureagi.com/blogs) for weekly AI development tips and best practices! \ No newline at end of file diff --git a/src/pages/docs/observe/concepts/observability-model.mdx b/src/pages/docs/observe/concepts/observability-model.mdx new file mode 100644 index 00000000..0bc7665d --- /dev/null +++ b/src/pages/docs/observe/concepts/observability-model.mdx @@ -0,0 +1,112 @@ +--- +title: "Observability model" +description: "How spans, traces, sessions, and users fit together into one hierarchy." +slug: "observability-model" +page_type: "concept" +diataxis: "explanation" +products: ["Observe"] +concept_family: "observability" +concept_level: "foundational" +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-18" +last_diagram_reviewed: "2026-06-17" +schema_type: "TechArticle" +primary_question: "How do traces, spans, sessions, users, and evals relate in FutureAGI Observe?" +direct_answer: "Observe is built on a hierarchy: a span is one operation, spans sharing a trace ID form a trace (one request), traces sharing a session ID form a session, and sessions belong to a user. Eval scores attach to spans or traces. All of it is captured as OpenTelemetry data via the traceAI SDK." +seo: + title: "The FutureAGI Observe observability model" + description: "The entity hierarchy behind Observe — spans, traces, sessions, users, and eval scores — and how traceAI and OpenTelemetry collect it." + primary_keyword: "llm observability data model" + direct_answer: true +geo: + answer_target: "How do traces, spans, sessions, and users relate in FutureAGI Observe?" + llm_summary: "Spans nest into traces by trace ID, traces group into sessions by session ID, sessions belong to users, and eval scores attach to spans or traces — all collected via traceAI over OpenTelemetry." +canonical: "/docs/observe/concepts/observability-model" +related: + - "/docs/observe/concepts/traces" + - "/docs/observe/concepts/spans" + - "/docs/observe/concepts/sessions" + - "https://opentelemetry.io/docs/" +--- + +## A few objects that nest + +Observe is built on a small set of objects that nest: a [span](/docs/observe/concepts/spans) sits inside a [trace](/docs/observe/concepts/traces), a trace inside a [session](/docs/observe/concepts/sessions), and a session belongs to a [user](/docs/observe/concepts/users). Eval scores attach on top, to a span or a whole trace. Knowing how these relate is the difference between knowing where to look and guessing: every view in Observe, from the trace list to dashboards and alerts, is a different lens on this one **hierarchy**. + +All of it is collected the same way: your app emits spans through the [traceAI](/docs/observe/concepts/traceai) SDK, which is built on [OpenTelemetry](https://opentelemetry.io/docs/), and Observe reads them. + +--- + +## Mental model + +The objects form a strict containment hierarchy, and the ID on each span is what reconstructs it. You don't assemble traces or sessions by hand; shared IDs do that automatically. + + S["Session · session.id"] + S --> T1["Trace · one request"] + S --> T2["Trace · one request"] + T1 --> SP1["Span · llm call"] + T1 --> SP2["Span · tool call"] + SP1 --> EV["Eval score"] + T1 --> EV2["Eval score"]`} /> + +Read it bottom-up when debugging (a bad span, up to its trace, its session, its user) and top-down when analyzing (a user's sessions, down to their traces and the spans inside). + +## Key terms + +| Object | What it is | Identified by | Learn more | +|---|---|---|---| +| **Span** | One operation — an LLM call, tool call, retrieval, or agent step — with input, output, timing, and cost. | Span ID (+ parent span ID) | [Spans](/docs/observe/concepts/spans) | +| **Trace** | One complete request, made of all the spans that share its trace ID. | Trace ID | [Traces](/docs/observe/concepts/traces) | +| **Session** | A multi-turn conversation — the traces that share a session ID. | `session.id` | [Sessions](/docs/observe/concepts/sessions) | +| **User** | One end user, across all their sessions and traces. | `user.id` | [Users](/docs/observe/concepts/users) | +| **Eval score** | A quality score attached to a span or trace. | Attached to span/trace | [Trace evals](/docs/observe/features/evals) | +| **OpenTelemetry** | The open standard the spans are emitted in. | — | [OpenTelemetry](https://opentelemetry.io/docs/) | +| **traceAI** | The SDK that produces the spans. | — | [traceAI SDK](/docs/observe/concepts/traceai) | + +## How it works + +Your app emits spans through traceAI (or OpenTelemetry directly). Each span carries a trace ID, so all spans from one request form a single trace. The backend receives them over OTLP (HTTP or gRPC) and stores them by project, and from there every Observe view runs on the same data. + +To enrich the hierarchy, attach a `session.id` and a `user.id` in code, and every span inside picks them up. See [set session and user IDs](/docs/observe/features/manual-tracing/set-session-user-id) and [add attributes and metadata](/docs/observe/features/manual-tracing/add-attributes-metadata-tags). + +## Walking the hierarchy when debugging + +The hierarchy is most useful read bottom-up. A typical investigation starts at the smallest object and climbs: + +1. **Start at the span.** A customer complained the assistant gave a wrong answer. You open the span that produced it and read its real input and output: the exact prompt and completion, not a paraphrase. +2. **Climb to the trace.** The span alone rarely explains the failure. You move up to its trace and read the other spans in order: the retrieval that fed bad context, the tool call that returned stale data, the agent step that chose the wrong path. The trace is where the request becomes legible. +3. **Climb to the session.** If the request looked fine in isolation but the conversation still went wrong, you open its session and read the earlier turns. Multi-turn problems, like the assistant losing track or contradicting itself, only show up here. +4. **Climb to the user.** If the pattern repeats, you pivot to the user to see whether it is one customer's data or a systemic issue across everyone. + +Because the IDs link each level to the next, every climb is one click, and you never reassemble context by hand. The same path runs top-down for analysis: start at a user, expand their sessions, then traces, then the spans inside. + +## When to use this model + +- **Debugging a bad answer**: start at the span that produced it, then read the rest of the trace for context +- **Analyzing a conversation**: open the session to see every turn in order +- **Investigating a customer**: pivot from a user to all their sessions and traces +- **Measuring quality**: read eval scores attached at the span or trace level + +## Common mistakes + +- **Confusing a span with a trace**: a slow trace tells you a request was slow; the span tells you which step was slow. See [Spans](/docs/observe/concepts/spans) +- **Expecting sessions without a session ID**: traces only group into a session if they share a `session.id`. Set it in code +- **Looking for customers you never tagged**: per-user views need `user.id` on the spans + +## Keep exploring + + + + Send a trace and watch the model fill in + + + Score spans and traces for quality and safety + + diff --git a/src/pages/docs/observe/concepts/sessions.mdx b/src/pages/docs/observe/concepts/sessions.mdx new file mode 100644 index 00000000..884459a7 --- /dev/null +++ b/src/pages/docs/observe/concepts/sessions.mdx @@ -0,0 +1,48 @@ +--- +title: "Sessions" +description: "Reading a whole multi-turn conversation as one unit." +--- + +## A session is one conversation + +A **session** is one multi-turn conversation, reassembled from its [traces](/docs/observe/concepts/traces). When a chatbot answers five messages, that is five separate traces, one per turn. Give them all the same `session.id` and Observe ties them back into one conversation that sits one level above the trace: the session holds the ordered traces, and each trace holds its [spans](/docs/observe/concepts/spans). The name is [OpenTelemetry](https://opentelemetry.io/docs/)'s own, and setting it once around a turn's work carries it to every span inside, just like the trace ID. + + T1["Trace (turn 1)"] + S --> T2["Trace (turn 2)"] + S --> T3["Trace (turn 3)"] + T1 --> P1["spans"] + T2 --> P2["spans"] + T3 --> P3["spans"]`} /> + +Take a three-turn support chat. Each turn is its own request, so each is its own trace, but all three share `session.id="chat_abc"`. Observe rolls them into one row you read top to bottom, from the opening question to the resolution, with the whole conversation's duration, cost, and token count in one place. Reuse that same ID across turns and the conversation grows, a fresh ID each turn would leave you with sessions of one trace each. + +## When to use + +Reach for a session when the problem runs across turns, not a single request: the assistant that kept losing track across a conversation, someone who drops off or escalates halfway through a flow, or any time you want the whole chat's duration, cost, and tokens at once instead of per request. + +When the grain is wrong, reach elsewhere: + +- Debugging a single request: open its [trace](/docs/observe/concepts/traces), a session is too coarse +- Rolling up by person, not conversation: use [Users](/docs/observe/concepts/users), which gathers every conversation one person had +- Aggregate trends across many sessions: build a dashboard + +## Why it matters + +A conversation's problems are invisible one request at a time. Whether the assistant stayed coherent across turns, where someone gave up, what a whole support chat cost, none of it shows on a single trace. Grouping the turns into a session puts the conversation back together, so you debug and measure the thing your user actually lived through, not the fragments of it. + +## Keep exploring + + + + Roll every conversation up by the person who had it + + + Read, filter, and sort the Sessions view + + + Attach session.id in traceAI + + diff --git a/src/pages/docs/observe/concepts/spans.mdx b/src/pages/docs/observe/concepts/spans.mdx index cfbb4920..1c3eb015 100644 --- a/src/pages/docs/observe/concepts/spans.mdx +++ b/src/pages/docs/observe/concepts/spans.mdx @@ -5,9 +5,9 @@ description: "Pinpointing the single step behind a slow or wrong answer." ## A span is one step -A span is one operation inside a [trace](/docs/observe/concepts/traces): a single model call, tool call, retrieval, agent step, guardrail check, or evaluator run. It records its own input and output, when it started and finished, whether it succeeded, and, for model calls, the tokens and cost it ran up. Where a trace is the whole request, a span is one step inside it. +A **span** is one operation inside a [trace](/docs/observe/concepts/traces): a single model call, tool call, retrieval, agent step, guardrail check, or evaluator run. It records its own input and output, when it started and finished, whether it succeeded, and, for model calls, the tokens and cost it ran up. Where a trace is the whole request, a span is one step inside it. -Under the hood, a span is an [OpenTelemetry](/docs/tracing/concepts/otel) span. OpenTelemetry defines the shape, a named, timed unit of work with a status, a parent, and key-value attributes, and traceAI fills those attributes with LLM-specific keys: the span `kind` that says what ran, the prompt and completion, the token counts. So every span you see in Observe is a standard OTel span carrying traceAI's LLM attributes. +Under the hood, a span is an [OpenTelemetry](https://opentelemetry.io/docs/) span. OpenTelemetry defines the shape, a named, timed unit of work with a status, a parent, and key-value attributes, and traceAI fills those attributes with LLM-specific keys: the span `kind` that says what ran, the prompt and completion, the token counts. So every span you see in Observe is a standard OTel span carrying traceAI's LLM attributes. A parent span, say an agent, holds the child spans it set off, and each of those can have children of its own. That nesting is how Future AGI works out which step triggered which, and it is what lets a trace draw itself as a tree. @@ -29,15 +29,43 @@ Each box is a span with its own timing and attributes. The edges come straight f ## Why it matters -A response is only as strong as its weakest step, and that step is usually where things break. The trace tells you a request was slow or wrong; the span tells you which step did it and hands you the evidence: the exact prompt sent to the model, the arguments a tool received, the chunks a retriever pulled back, or the score an evaluator gave. traceAI captures these spans for you on supported frameworks, and where it can't reach, you can [add your own](/docs/observe/features/manual-tracing/create-tool-spans) so no part of your pipeline stays a black box. +A response is only as strong as its weakest step, and that step is usually where things break. The trace tells you a request was slow or wrong; the span tells you which step did it and hands you the evidence: the exact prompt sent to the model, the arguments a tool received, the chunks a retriever pulled back, or the score an evaluator gave. traceAI captures these spans for you on supported frameworks, and where it can't reach, you can [add your own](/docs/traceai/manual-instrumentation/create-tool-spans) so no part of your pipeline stays a black box. + +## Span types + +Every span carries a `kind` that says what the operation was. Observe uses it to label the span, pick its icon, and surface the fields that matter. These are the kinds traceAI emits: + +| Type | What it represents | What it captures | +|---|---|---| +| LLM | A single model call | Model, prompt messages, completion, token counts, cost | +| Tool | A function or tool the model invoked | Tool name, arguments, and the result returned | +| Retriever | A lookup against a vector store or index | The query and the documents it returned | +| Embedding | Text turned into vectors | The input text and the embedding model | +| Reranker | Retrieved documents reordered by relevance | The documents and their new order | +| Agent | A top-level step that coordinates others | The child spans (tool calls, retrievals, model calls) it set off | +| Chain | A group of steps run as one unit | The ordered child spans it runs | +| Guardrail | A safety or policy check on an input or output | What was checked and the verdict | +| Evaluator | An eval that scores a span or trace | The metric and the score produced | + +## Span attributes + +Every span carries key-value attributes. Some come straight from [OpenTelemetry](https://opentelemetry.io/docs/) (name, status, timing, parent), and traceAI adds LLM-specific keys on top: the span kind, the prompt and completion, the model, token counts, and cost. + +- **Core (OpenTelemetry)**: name, kind, status, start and end time, parent span ID, trace ID +- **LLM (traceAI)**: model, input messages, output messages, prompt and completion tokens, cost +- **Retrieval**: query, retrieved documents and their scores +- **Grouping**: session.id, user.id, tags ## Keep exploring - + The full request that spans are grouped into - + Add custom spans where auto-instrumentation stops + + How spans, traces, sessions, and users fit together + diff --git a/src/pages/docs/observe/concepts/traceai.mdx b/src/pages/docs/observe/concepts/traceai.mdx new file mode 100644 index 00000000..edf6ac68 --- /dev/null +++ b/src/pages/docs/observe/concepts/traceai.mdx @@ -0,0 +1,158 @@ +--- +title: "traceAI SDK" +description: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry — it captures LLM, tool, and retrieval calls as standardized spans that Observe reads as traces." +slug: "traceai" +page_type: "concept" +diataxis: "explanation" +products: ["Observe"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is the traceAI SDK?" +direct_answer: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry. It captures model, tool, and retrieval calls as standardized spans, and Observe is the product that reads them as traces." +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-18" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + title: "The traceAI SDK" + description: "traceAI is FutureAGI's open-source instrumentation SDK on OpenTelemetry. It captures LLM, tool, and retrieval calls as standardized spans that Observe reads as traces." + primary_keyword: "what is traceai instrumentation" + direct_answer: true +geo: + answer_target: "What is the traceAI SDK and how does it relate to Observe?" + llm_summary: "traceAI is FutureAGI's open-source instrumentation SDK, built on OpenTelemetry. It captures model, tool, and retrieval calls as standardized spans across frameworks; Observe is the product that reads those spans as traces." +canonical: "/docs/observe/concepts/traceai" +related: + - "https://opentelemetry.io/docs/" + - "/docs/observe/concepts/spans" + - "/docs/traceai/manual-instrumentation/set-up-tracing" + - "/docs/traceai/auto" +--- + +## About + +traceAI is the instrumentation SDK; Observe is the product that reads its traces. traceAI is FutureAGI's open-source instrumentation SDK, built on [OpenTelemetry](https://opentelemetry.io/docs/). It's a set of conventions and per-framework instrumentors that capture what your AI app does — model calls, tool calls, retrievals, agent steps — and map them to standardized [span](/docs/observe/concepts/spans) attributes. Add the instrumentor for your framework, and those calls become traces in Observe without hand-writing spans. traceAI is natively supported by FutureAGI but emits standard OTel, so it works with any OTel-compatible backend too. + +--- + +## Why it matters + +Raw OpenTelemetry knows nothing about LLMs — it has no concept of a prompt, a completion, token cost, or a tool call. traceAI fills that gap: it turns framework calls into *LLM-shaped* spans with consistent keys, so a LangChain trace and an OpenAI trace look the same in Observe and are queryable the same way. That standardization is what makes filtering, evals, and dashboards work across different stacks. + +--- + +## Mental model + +traceAI is the adapter between your framework and OpenTelemetry: the instrumentor wraps the framework, produces standardized spans, and hands them to the OTel pipeline that exports to FutureAGI. + + B["traceAI instrumentor"] + B --> C["Standardized OTel spans"] + C --> D["FutureAGI Observe"]`} /> + +You pick the instrumentor that matches your framework; the rest of the pipeline is the same OTel flow for everyone. + +--- + +## SDK vs product, auto vs manual + +Two distinctions explain most of how traceAI is used. + +**traceAI is the SDK; Observe is the product.** traceAI runs inside your application and produces spans. [Observe](/docs/observe) runs in FutureAGI and reads them — searching, replaying, scoring, and alerting on the traces those spans form. One emits the data; the other consumes it. They meet only at the span: a standard [OpenTelemetry](https://opentelemetry.io/docs/) span on the wire. + +**Auto and manual instrumentation are two ways to produce those spans, and they coexist.** Auto-instrumentation is a per-framework instrumentor that wraps a library — install `traceAI-openai`, call `.instrument()`, and every OpenAI call becomes a span with no span code in your app. Manual instrumentation is for the parts no instrumentor reaches: your own business functions, custom retrieval, glue logic. You wrap those as [tool spans](/docs/traceai/manual-instrumentation/create-tool-spans) yourself. Most real apps use both — auto for the framework calls, manual for the code between them — and both feed the same provider, so they nest into one trace. + +That shared provider is what `register()` sets up. Conceptually it does one thing: it builds the OpenTelemetry `TracerProvider` — the exporter pointed at FutureAGI plus a batched span processor — and makes it the active provider. After that call, both the auto-instrumentors and any manual spans attach to it automatically. Nothing reaches Observe until `register()` has run, because before it there is no exporter to ship spans to. + +--- + +## When to use + +- You want LLM/agent calls traced without writing spans by hand. +- You use a supported framework (OpenAI, LangChain, LlamaIndex, CrewAI, …). +- You want consistent, queryable span attributes across different SDKs. +- You want instrumentation that stays portable across OTel backends. + +--- + +## When not to use traceAI + +- **For non-LLM work that has no LLM meaning to capture.** A plain database query or HTTP handler doesn't benefit from traceAI's LLM conventions. Trace it with raw [OpenTelemetry](https://opentelemetry.io/docs/) instead; it still lands in the same trace tree. +- **As a stand-in for the backend.** traceAI only produces spans — it doesn't store, search, or display them. If you need to read traces, that's [Observe](/docs/observe), not the SDK. +- **For a framework with no instrumentor, expecting auto-capture.** Each instrumentor wraps one specific framework. If yours isn't in the [catalog](/docs/traceai/auto), auto-instrumentation won't see it — reach for [manual spans](/docs/traceai/manual-instrumentation/create-tool-spans) rather than the wrong instrumentor. + +--- + +## What it isn't + +- **traceAI is not a backend.** It produces spans; FutureAGI stores and displays them. +- **traceAI is not a replacement for OpenTelemetry.** It's complementary — conventions and instrumentors *on top of* OTel. See [OpenTelemetry](https://opentelemetry.io/docs/). +- **traceAI is not only for FutureAGI.** It emits standard OTel and works with any compatible backend. + +--- + +## How FutureAGI represents traceAI + +traceAI ships as the core `fi-instrumentation-otel` package plus a per-framework instrumentor you install alongside it. A sample of the Python instrumentors: + +| Package | Instruments | +|---|---| +| `traceAI-openai` | OpenAI | +| `traceAI-anthropic` | Anthropic | +| `traceAI-langchain` | LangChain | +| `traceAI-llamaindex` | LlamaIndex | +| `traceAI-crewai` | CrewAI | +| `traceAI-bedrock` | AWS Bedrock | +| `traceAI-litellm` | LiteLLM | +| `traceAI-google-adk` | Google ADK | +| `traceAI-dspy` | DSPy | +| `traceAI-haystack` | Haystack | + +See the full, current list in the [Auto Instrumentation catalog](/docs/traceai/auto). To wire one up, see [Set up tracing](/docs/traceai/manual-instrumentation/set-up-tracing). + +Install the core package and an instrumentor, register a tracer provider, then instrument the framework with it: + + +```python +# pip install fi-instrumentation-otel traceAI-openai +from fi_instrumentation import register, Transport +from fi_instrumentation.fi_types import ProjectType +from traceai_openai import OpenAIInstrumentor + +tp = register(project_type=ProjectType.OBSERVE, project_name="my-app", transport=Transport.GRPC) +OpenAIInstrumentor().instrument(tracer_provider=tp) +``` + + +--- + +## Common mistakes + +- **Calling `.instrument()` after the client is created → no spans.** Run `OpenAIInstrumentor().instrument(...)` before you construct the framework client, or its calls aren't wrapped and nothing is traced. +- **Registering with the wrong `project_type`.** Use `ProjectType.OBSERVE` for production tracing; a mismatched project type sends spans somewhere you won't find them in Observe. +- **Installing the wrong instrumentor for your framework.** Each instrumentor wraps one framework. A LangChain app needs `traceAI-langchain`; `traceAI-openai` won't capture it. + +--- + +## Next steps + + + + The standard traceAI is built on. + + + What traceAI produces. + + + Install an instrumentor and start capturing. + + + Every supported framework. + + diff --git a/src/pages/docs/observe/concepts/traces.mdx b/src/pages/docs/observe/concepts/traces.mdx index 251aa12e..d5cd1cfc 100644 --- a/src/pages/docs/observe/concepts/traces.mdx +++ b/src/pages/docs/observe/concepts/traces.mdx @@ -5,7 +5,7 @@ description: "Helping you debug an AI response, step by step." ## A trace is a tree -A trace is a tree of [spans](/docs/observe/concepts/spans). The root span is the operation that kicked off the request, and every other span nests under the step that triggered it. They all share one trace ID, so the whole request stitches back together top to bottom, even when steps run across async tasks or services. +A **trace** is a tree of [spans](/docs/observe/concepts/spans). The root span is the operation that kicked off the request, and every other span nests under the step that triggered it. They all share one trace ID, so the whole request stitches back together top to bottom, even when steps run across async tasks or services. - - The individual operations a trace is built from - - + Group multiple traces into one conversation or customer - + Instrument your app so it emits traces + + How spans, traces, sessions, and users fit together + diff --git a/src/pages/docs/observe/concepts/users.mdx b/src/pages/docs/observe/concepts/users.mdx new file mode 100644 index 00000000..ccb862c1 --- /dev/null +++ b/src/pages/docs/observe/concepts/users.mdx @@ -0,0 +1,51 @@ +--- +title: "Users" +description: "Following one customer across every conversation they've had." +--- + +## A user is a person across conversations + +You will often want to know *who* a conversation belonged to. A **user** is that person, the end user behind the requests. Setting a `user.id` on your spans rolls every [trace](/docs/observe/concepts/traces) and [session](/docs/observe/concepts/sessions) they generate up under one row: where a session is one conversation, a user is all of their conversations put together, so you can answer "what happened to this customer?" without writing a query. The name is [OpenTelemetry](https://opentelemetry.io/docs/)'s own, and it flows to every span in a block the same way the trace ID does. + + S1["Session"] + U --> S2["Session"] + U --> T0["Trace (no session)"] + S1 --> T1["Trace"] + S1 --> T2["Trace"] + S2 --> T3["Trace"]`} /> + +Say one customer is `user.id="cust_42"`. Every request they make carries that ID, whether it is part of a support chat or a one-off question, so Observe gathers all of it under a single row: their sessions and traces, their total cost and token use, when they first showed up and when they were last active, and how their answers scored. Open the row and you have that customer's whole history in one place. + +## When to use + +Reach for a user when the unit is a person, not a chat: a customer reports a bug and you want their entire history, a cost spike traces back to one heavy customer, you are tracking who stuck around and who dropped off (first seen, last seen, session counts), or quality is slipping for a segment and you want [eval](/docs/observe/features/evals) pass-rate per user. + +When the grain is wrong, reach elsewhere: + +- One conversation, not a person: use [Sessions](/docs/observe/concepts/sessions) +- A single request: open its [trace](/docs/observe/concepts/traces) + + +`user.id` is a grouping key, not an auth identity, and Observe never verifies it. It is exported in span data, so use a stable but non-sensitive value like a hashed customer ID, never a raw email or phone number. + + +## Why it matters + +A single trace or session shows one moment; many of the questions that matter are about the person across all their moments. Who is driving cost, who churned after a bad week, whether one segment gets worse answers than another, none of it surfaces until every request a person made rolls up together. The user is that rollup: the customer, not the request. + +## Keep exploring + + + + Group a person's traces into individual conversations + + + Read, filter, and sort the Users view + + + Attach user.id in traceAI + + diff --git a/src/pages/docs/observe/concepts/voice-observability.mdx b/src/pages/docs/observe/concepts/voice-observability.mdx new file mode 100644 index 00000000..a2f2571e --- /dev/null +++ b/src/pages/docs/observe/concepts/voice-observability.mdx @@ -0,0 +1,63 @@ +--- +title: "Voice observability" +description: "Turning every voice call into a trace you can debug and score." +--- + +## A voice call is a trace + +**Voice observability** captures each voice call as a [trace](/docs/observe/concepts/traces), the same tree of [spans](/docs/observe/concepts/spans) you get from a text app. One call becomes one trace, and each back-and-forth turn is a span inside it. The call carries the transcript, the recording, and its duration, turn count, and cost. A spoken conversation lands in the same place as every other request, ready for the same [evals](/docs/observe/features/evals), alerts, and filters. + +## Inside a voice call + +A turn is more than a single step. In an app you instrument on LiveKit or Pipecat, a turn's span breaks down into speech-to-text, the model call, and text-to-speech, so you can see exactly where a turn went wrong, not just that it did. Managed calls arrive at the turn level, and the transcript and recording sit on the call either way. + + T1["Turn · span"] + C --> T2["Turn · span"] + T1 --> STT["speech to text"] + T1 --> LLM["model call"] + T1 --> TTS["text to speech"] + C --> M["Transcript · recording · turns · cost"]`} /> + +Because it is an ordinary trace, a voice call fits the same [observability model](/docs/observe/concepts/observability-model) as your text traces. + +## How a call reaches Observe + +A voice call reaches Observe by one of two paths. Whichever it takes, it lands as the same trace; what differs is who produces the spans. + +| Path | For | How spans are produced | What you write | +|---|---|---|---| +| **Managed ingestion** | Hosted agents on Vapi or Retell | Observe pulls the provider's call logs | No code: connect the provider and turn observability on | +| **Auto-instrumentation** | Apps built on LiveKit or Pipecat | Your app emits a span per turn through [traceAI](/docs/observe/concepts/traceai) | A few lines of traceAI setup | + +For the managed-ingestion setup, see [Voice observability](/docs/observe/features/voice). + +## Debugging a call + +Take a support line running on a Vapi assistant. Observe pulls each finished call in as its own trace, so you read it top to bottom and follow the conversation turn by turn. When a caller reports the agent misheard their order number, you open that one call, jump to the turn where it happened, and play the audio back, instead of guessing from a dashboard. + +## When to use + +Reach for voice observability when what you are debugging is a spoken conversation: a caller who got the wrong answer, an agent that ran long, a call that cost more than it should. It also fits when you want those calls sitting alongside the rest of your traces, ready to score and monitor. + +When the grain is wrong, reach elsewhere: + +- A text or SDK app, not a voice one: instrument it directly and start at the [quickstart](/docs/observe/quickstart) +- Trends across many calls, not one: build a dashboard + +## Why it matters + +Voice failures are the ones you hear about from a customer, not a log. A spoken call normally leaves nothing behind to inspect; capturing it as a trace changes that, so a complaint becomes a call you can open, read, and replay instead of a guess. + +## Keep exploring + + + + Score voice conversations for quality and safety + + + How spans, traces, sessions, and users fit together + + diff --git a/src/pages/docs/observe/features/session.mdx b/src/pages/docs/observe/features/session.mdx index 7644ef9a..f7b8edb7 100644 --- a/src/pages/docs/observe/features/session.mdx +++ b/src/pages/docs/observe/features/session.mdx @@ -1,123 +1,83 @@ --- -title: "Group Traces by Session: Multi-turn Conversation Analysis" -description: "Group traces into sessions so you can view and analyze multi-turn conversations, chatbot flows, and per-session metrics in Observe." +title: "Explore sessions & users" +description: "Read, filter, and sort the Sessions and Users views in Observe." --- -## About +Once your spans carry `session.id` and `user.id`, Observe groups them into two views: **Sessions**, one row per conversation, and **Users**, one row per end user. This page is how to read, filter, and sort them. To attach the IDs in the first place, see [Set session and user IDs](/docs/traceai/manual-instrumentation/set-session-user-id); for the concepts, see [Sessions](/docs/observe/concepts/sessions) and [Users](/docs/observe/concepts/users). -Sessions group related traces together under a single identifier. A chatbot conversation, a multi-step user journey, or any sequence of LLM calls that belong to the same flow can be tracked as one session. The Observe dashboard shows sessions with their duration, cost, and token usage so you can review the full flow, drill into individual traces, and spot where things went wrong. +## The Sessions view ---- +Open the project and switch to the **Sessions** tab. Each row is one conversation. -## When to use +Observe Sessions tab listing conversations with first and last message, duration, total cost, and trace count +*One row per conversation. Sort by total cost or total traces to find the longest or most expensive sessions.* -- **Chatbot and multi-turn flows**: Group all traces for a single conversation so you can review the full exchange and debug a specific turn. -- **User journey analysis**: Treat one user's sequence of requests as a session to understand behavior and find drop-off points. -- **Session-level metrics**: See total duration, cost, and tokens for an entire session instead of checking each trace individually. -- **Filtering and drill-down**: Filter sessions by time range, open a session to see its traces, then open a trace to see spans and eval results. +The columns roll each conversation up at a glance: ---- +| Column | What it shows | +|---|---| +| **Session Id** | The shared identifier for the conversation | +| **First Message** | The opening message | +| **Last Message** | The most recent message | +| **Duration** | How long the conversation lasted | +| **Total Cost** | Combined cost of all calls in the session | +| **Total Traces** | How many requests were part of it | -## How to - - - - For a trace to appear in a session, the span must carry a **session identifier** via the `session.id` attribute. All traces with the same session name in the same project form one session. The backend creates the session automatically when the first trace with that identifier arrives. - - - - When creating a span manually, set the attribute so the trace is attached to the session: - - - ```python Python - from fi_instrumentation import register, FITracer - - trace_provider = register( - project_type=ProjectType.OBSERVE, - project_name="PROJECT_NAME", - ) - - tracer = FITracer(trace_provider.get_tracer(__name__)) - - with tracer.start_as_current_span( - f"SPAN_NAME", - ) as span: - span.set_status(Status(StatusCode.OK)) - span.set_attribute("session.id", "session123") - span.set_attribute("input.value", "input") - span.set_attribute("output.value", "output") - ``` - ```javascript JS/TS - const { register, ProjectType } = require("@traceai/fi-core"); - - const traceProvider = register({ - projectType: ProjectType.OBSERVE, - projectName: "FUTURE_AGI" - }); - - const tracer = traceProvider.getTracer("manual-instrumentation-example"); - - tracer.startActiveSpan("HandleFunctionCall", {}, (span) => { - span.setAttribute("session.id", "my-session-id"); - span.end(); - }); - ``` - - - - - To tag all spans in a block with the same session, use context so every span gets `session.id` automatically: - - - ```python Python - from fi_instrumentation import using_session - - with using_session(session_id="my-session-id"): - # All spans created within this block get session.id = "my-session-id" - ... - ``` - ```javascript JS/TS - import { context, propagation } from "@opentelemetry/api"; - - const sessionId = "my-js-session-id"; - - const activeContext = context.active(); - const baggageWithSession = propagation.createBaggage({ - "session.id": { value: sessionId } - }); - const newContext = propagation.setBaggage(activeContext, baggageWithSession); - - context.with(newContext, () => { - // All spans created within this block get session.id = "my-js-session-id" - }); - ``` - - - - - In the Observe UI, open the project and go to the Sessions view. You can filter by time range, see a list of sessions with duration and metrics, open a session to see its traces, and click **View Trace** for span-level detail and [eval](/docs/observe/features/evals) results. - - - - - For more on setting `session.id` with Trace AI helpers and context, see the [manual tracing guide](/docs/observe/features/manual-tracing/set-session-user-id). - +Open a session for its detail view, the traces in order, each with its eval scores and annotations. From there you open any trace for the full span tree. ---- +Session detail view showing the traces of one conversation in order with per-turn timing and cost +*A session opened: its traces in order, with per-turn timing and cost.* + +Narrow the list with the filter bar, by `session.id`, metadata, or any span attribute (see the [filter syntax reference](/docs/observe/reference/filters)), and scope it to a time window with the date-range picker, which recomputes the column metrics for the window. For voice and other replayable sessions, configure session replay to step back through a conversation as it happened. + +## The Users view + +Switch to the **Users** view. Each row is one end user. + +Observe Users view listing end users with User ID, First Active, Last Active, number of traces, and number of sessions columns +*One row per user, with trace and session counts rolled up. Sort by trace or session count to find your most active users.* + +The columns roll each user up: + +| Column | What it shows | +|---|---| +| **User ID** | The `user.id` value you set in code | +| **First Active** | When the user's first trace arrived | +| **Last Active** | When their most recent trace arrived | +| **No. of Traces** | How many traces are attributed to the user | +| **No. of Sessions** | How many conversations they had | + +Open a user for their detail view, where cost, evals, and guardrail results break down per session and trace, across a Traces tab and a Sessions tab. + +User detail view with summary metrics, a Traces tab, and a Sessions tab for one end user +*User detail: a Traces tab and a Sessions tab for one end user.* + +Filter and scope the same way as sessions, by `user.id`, metadata, or any span attribute, and by date range. + +## Not seeing your groupings? + +| Symptom | Cause | Fix | +|---|---|---| +| Traces not grouping | The call ran outside the `using_session` / `using_user` block, so spans never got the ID | Make the call inside the block (or a decorated function) | +| One conversation split across sessions | A different `session.id` was used on some turns | Reuse one stable string for the whole conversation | +| One person split across users | A different `user.id` was used on some requests | Reuse one stable string for that person | +| Row exists but no metrics | Spans carried the ID but no cost or token attributes | Confirm the LLM spans are auto-instrumented | + +For every way to attach the IDs, see [Set session and user IDs](/docs/traceai/manual-instrumentation/set-session-user-id). -## Next Steps +## Related - - Connect the SDK and start capturing traces. + + What a session is and when to use one - - Run evaluations on your traced spans to score quality. + + What a user is and when to use one - - View activity and metrics per end user. + + Attach session.id and user.id in traceAI - - Get notified when metrics cross a threshold. + + Operators and fields for the filter bar diff --git a/src/pages/docs/observe/guides/explore-dashboard/display-options.mdx b/src/pages/docs/observe/guides/explore-dashboard/display-options.mdx new file mode 100644 index 00000000..95bbda19 --- /dev/null +++ b/src/pages/docs/observe/guides/explore-dashboard/display-options.mdx @@ -0,0 +1,12 @@ +--- +title: "Display options" +description: "Choose which columns show, how rows are grouped, and how the trace table is laid out." +--- + +Display options control how the trace table looks: which columns are visible, how they are ordered, and how rows are grouped. Tune them on the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart) so the table shows what you care about. + +## In this guide + +- Show, hide, and reorder columns +- Group rows and adjust density +- Save your layout as part of a view diff --git a/src/pages/docs/observe/guides/explore-dashboard/filters.mdx b/src/pages/docs/observe/guides/explore-dashboard/filters.mdx new file mode 100644 index 00000000..731d936a --- /dev/null +++ b/src/pages/docs/observe/guides/explore-dashboard/filters.mdx @@ -0,0 +1,14 @@ +--- +title: "Filters" +description: "Narrow the trace table to exactly the traces you want by status, model, time, and more." +--- + +Filters cut a busy trace table down to the traces you actually care about, whether that is failed requests, one model, a time window, or a single conversation. This guide filters the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart). + +## In this guide + +- Filter the trace table by status, model, latency, cost, and time +- Combine conditions to narrow the results further +- Filter by session and user to follow one conversation or customer + +For the full set of fields and operators, see [Filter syntax](/docs/observe/reference/filters). diff --git a/src/pages/docs/observe/guides/explore-dashboard/index.mdx b/src/pages/docs/observe/guides/explore-dashboard/index.mdx new file mode 100644 index 00000000..31ef3564 --- /dev/null +++ b/src/pages/docs/observe/guides/explore-dashboard/index.mdx @@ -0,0 +1,17 @@ +--- +title: "Explore dashboard" +description: "Find your way around the Observe dashboard: projects, the trace table, and where each metric lives." +--- + +Once traces start flowing in, everything you do in Observe happens in the dashboard. This guide walks the layout using the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart), so you know where projects, traces, and metrics live before you dive into evals, alerts, and filters. + + +Throughout these guides we follow one end-to-end example, the [Improving a LangGraph agent via observability](/docs/cookbook/improve-langgraph-agent-with-observability) cookbook. It instruments a real app and exercises every Observe feature, so you can read a guide here and watch it applied start to finish there. + + +## In this guide + +- Switch between projects and open the `self-improving-agent` project +- Read the trace table: status, model, latency, tokens, and cost per row +- Open a single trace to inspect its spans +- Find where evals, sessions, users, alerts, and dashboards sit in the navigation diff --git a/src/pages/docs/observe/guides/explore-dashboard/trace-drawers.mdx b/src/pages/docs/observe/guides/explore-dashboard/trace-drawers.mdx new file mode 100644 index 00000000..cdac4b41 --- /dev/null +++ b/src/pages/docs/observe/guides/explore-dashboard/trace-drawers.mdx @@ -0,0 +1,12 @@ +--- +title: "Trace drawers" +description: "Open a trace to inspect its spans, inputs, outputs, and timing in the detail drawer." +--- + +Click a row in the trace table and the trace drawer opens: the span tree on one side, and each span's input, output, timing, and attributes on the other. This is where you read what actually happened in a request, using the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart). + +## In this guide + +- Open a trace and read its span tree +- Inspect a span's input, output, and attributes +- Follow timing and status down the tree diff --git a/src/pages/docs/observe/guides/explore-dashboard/views.mdx b/src/pages/docs/observe/guides/explore-dashboard/views.mdx new file mode 100644 index 00000000..a17967ce --- /dev/null +++ b/src/pages/docs/observe/guides/explore-dashboard/views.mdx @@ -0,0 +1,13 @@ +--- +title: "Views" +description: "Save a filtered trace table as a view so you can return to it in one click." +--- + +Once you have filtered the trace table down to something useful, save it as a view so you and your team can jump back to it without rebuilding the filters each time. This guide saves a view on the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart). + +## In this guide + +- Filter the trace table to the traces you want, see [Using Filters](/docs/observe/guides/explore-dashboard/filters) +- Save the current filters as a named view +- Switch between saved views +- Share a view with your team diff --git a/src/pages/docs/observe/guides/setup-alerts.mdx b/src/pages/docs/observe/guides/setup-alerts.mdx new file mode 100644 index 00000000..36469a54 --- /dev/null +++ b/src/pages/docs/observe/guides/setup-alerts.mdx @@ -0,0 +1,13 @@ +--- +title: "Setup alerts" +description: "Get notified when your project degrades, so you hear about problems before your users do." +--- + +Alerts watch your traces and metrics and notify you when something crosses a threshold you set, like a spike in errors, a jump in latency, or a failing eval. This guide adds alerts to the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart). + +## In this guide + +- Choose what to watch: error rate, latency, cost, or an eval score +- Set the threshold and the time window that trigger the alert +- Pick where the alert is delivered +- Confirm the alert fires, and review its history diff --git a/src/pages/docs/observe/guides/setup-evals.mdx b/src/pages/docs/observe/guides/setup-evals.mdx new file mode 100644 index 00000000..5dc06e6b --- /dev/null +++ b/src/pages/docs/observe/guides/setup-evals.mdx @@ -0,0 +1,15 @@ +--- +title: "Setup evals" +description: "Attach evaluations to your traces so every response is scored on the metrics you care about." +--- + +Evals score the responses your app produces, so you measure quality, safety, or accuracy on real production traffic instead of eyeballing traces one by one. This guide sets up evals on the `self-improving-agent` project from the [quickstart](/docs/observe/quickstart). + +## In this guide + +- Pick the metric you want to score: task completion, factual accuracy, safety, tone, and more +- Attach an eval to the project's traces +- Read the eval column in the trace table, and open a trace to see per-span scores +- Filter and alert on eval results + +To understand what evals measure before you wire them up, see [Run evals on traces](/docs/observe/features/evals). diff --git a/src/pages/docs/observe/index.mdx b/src/pages/docs/observe/index.mdx index 4dd5a957..a82e1d18 100644 --- a/src/pages/docs/observe/index.mdx +++ b/src/pages/docs/observe/index.mdx @@ -1,5 +1,5 @@ --- -title: "Get started with Observe" +title: "Overview" description: "Observe records every request your AI app makes as a trace you can open, search, and score. Send your first trace, then go deeper" --- @@ -16,7 +16,7 @@ You only need one trace to begin. Everything else here builds on it. Instrument one call and watch it land in Observe, in about five minutes - + One line to trace OpenAI, Anthropic, LangChain, and 30+ more
@@ -26,13 +26,16 @@ You only need one trace to begin. Everything else here builds on it. A few short pages give you the whole mental model behind Observe. Read these and the rest of the product explains itself: + + How traces, spans, sessions, and scores fit together + What gets recorded for each request, and how the steps nest - + Follow a full conversation, or one customer across sessions - + The open library that sends your traces to Observe diff --git a/src/pages/docs/observe/quickstart.mdx b/src/pages/docs/observe/quickstart.mdx index d3a95900..9c83118d 100644 --- a/src/pages/docs/observe/quickstart.mdx +++ b/src/pages/docs/observe/quickstart.mdx @@ -7,7 +7,7 @@ Get your first trace into Observe in about five minutes, without changing your a ## In this page -You will install the traceAI instrumentor, register an Observe project, run a single OpenAI call, and confirm the trace in the dashboard with its model, latency, and token cost. The same four steps work for 30+ frameworks, so once OpenAI is traced you have the pattern for the rest of your stack. +You will install the traceAI instrumentor, register an Observe project, run a single OpenAI call, and confirm the trace in the dashboard with its model, latency, and token cost. The same four steps work for 30+ frameworks, so once OpenAI is traced you have the pattern for the rest of your stack. New to tracing? Read the [observability model](/docs/observe/concepts/observability-model) first. ## Prerequisites @@ -58,7 +58,7 @@ Pin the packages to the version you test against, so a later release cannot chan # Connect to Future AGI and create (or reuse) an Observe project trace_provider = register( project_type=ProjectType.OBSERVE, - project_name="my-first-project", + project_name="self-improving-agent", transport=Transport.GRPC, ) @@ -69,7 +69,7 @@ Pin the packages to the version you test against, so a later release cannot chan client = OpenAI() completion = client.chat.completions.create( model="gpt-4o", - messages=[{"role": "user", "content": "Write a one-sentence bedtime story about a unicorn."}], + messages=[{"role": "user", "content": "Suggest one habit I could build to keep improving every day."}], ) print(completion.choices[0].message.content) ``` @@ -82,7 +82,7 @@ Pin the packages to the version you test against, so a later release cannot chan // Connect to Future AGI and create (or reuse) an Observe project const traceProvider = register({ project_type: ProjectType.OBSERVE, - project_name: "my-first-project", + project_name: "self-improving-agent", }); // Auto-instrument OpenAI: every call is now traced @@ -95,7 +95,7 @@ Pin the packages to the version you test against, so a later release cannot chan const client = new OpenAI(); const completion = await client.chat.completions.create({ model: "gpt-4o", - messages: [{ role: "user", content: "Write a one-sentence bedtime story about a unicorn." }], + messages: [{ role: "user", content: "Suggest one habit I could build to keep improving every day." }], }); console.log(completion.choices[0].message.content); ``` @@ -104,13 +104,13 @@ Pin the packages to the version you test against, so a later release cannot chan Expected terminal output (the wording varies): ```text - Under a sky of silver stars, a gentle unicorn dipped its horn into a - moonlit pool and wished every sleeping child sweet dreams. + Spend five minutes each evening noting one thing you learned that day. + Small daily reflections compound into steady improvement over time. ``` - Open **Observe → my-first-project → Tracing**. Within a few seconds you will see one trace row with **status OK**, the **model**, the **latency**, and the **token count**. Click it to read the prompt, the completion, and the span timing + Open **Observe → self-improving-agent → Tracing**. Within a few seconds you will see one trace row with **status OK**, the **model**, the **latency**, and the **token count**. Click it to read the prompt, the completion, and the span timing Observe trace explorer with one new OpenAI trace showing OK status, model, latency, and token columns *Your request, now a trace. If the row is here with an OK status, instrumentation is working end to end* @@ -121,13 +121,13 @@ Pin the packages to the version you test against, so a later release cannot chan That row is a [trace](/docs/observe/concepts/traces), the full record of one request. Because this example made a single OpenAI call, the trace holds one [span](/docs/observe/concepts/spans): the `llm` operation, carrying the model, the prompt and completion, the token counts, and the cost. -The same four steps instrument 30+ frameworks. Swap the instrumentor for your stack and the flow is identical, see [all framework integrations](/docs/tracing/auto). +The same four steps instrument 30+ frameworks. Swap the instrumentor for your stack and the flow is identical, see [all framework integrations](/docs/traceai/auto). ## Not seeing your trace? - **No trace appears**: a short script can exit before the exporter flushes. Call `trace_provider.force_flush()` before the process ends - **Wrong or empty project**: confirm `project_name` matches the project you are viewing, and that `FI_API_KEY` and `FI_SECRET_KEY` belong to this workspace -- **Still nothing**: widen the date picker (it defaults to the last 7 days) and turn on **Auto refresh** +- **Still nothing**: widen the date picker (it defaults to the last 7 days) and turn on **Auto refresh**. For deeper help see [No traces appear](/docs/observe/troubleshooting/no-traces-appearing) ## Dive deeper diff --git a/src/pages/docs/observe/reference/export-formats.mdx b/src/pages/docs/observe/reference/export-formats.mdx new file mode 100644 index 00000000..b281742d --- /dev/null +++ b/src/pages/docs/observe/reference/export-formats.mdx @@ -0,0 +1,92 @@ +--- +title: "Export and endpoints" +description: "Reference for getting trace data out of Observe — exporting the current view, the available formats, and the OTLP endpoints traces are sent to (cloud and self-hosted)." +slug: "export-formats" +page_type: "reference" +products: ["Observe"] +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + title: "Trace export and endpoints in Observe" + description: "Reference for getting trace data out of Observe — exporting the current view, the available formats, and the OTLP endpoints traces are sent to (cloud and self-hosted)." + primary_keyword: "export traces endpoints futureagi" + direct_answer: true +geo: + answer_target: "How do I export traces from Observe, and what endpoints does traceAI send to?" + llm_summary: "Observe exports the current trace-explorer view (filters and time range) to CSV from the download icon. Inbound, the traceAI SDK sends spans over OTLP to FutureAGI — set FI_BASE_URL (HTTP) or FI_GRPC_URL (gRPC), defaulting to FutureAGI cloud, or point them at your own collector when self-hosted." +canonical: "/docs/observe/reference/export-formats" +related: + - "/docs/observe/features/llm-tracing" + - "/docs/traceai/manual-instrumentation/set-up-tracing" + - "https://opentelemetry.io/docs/" +--- + +## About + +There are two directions for trace data: **out of** Observe (exporting what you're viewing) and **into** Observe (the OTLP endpoints traceAI sends to). This page covers both. + +## Export from the trace explorer + +The download icon in the [trace explorer](/docs/observe/features/llm-tracing) header exports the **current view** — the traces that match your active filters and time range. + +**CSV is the only export format.** There is no JSON or Parquet download from the trace explorer. + +| Format | Use for | +|---|---| +| CSV | Spreadsheet analysis, sharing, importing elsewhere. | + +The CSV holds one row per span in the current view, with the columns the explorer shows: trace ID, span name, status, model, provider, latency, token counts, cost, timestamp, and any eval scores present on the span. + + + The export reflects the current view, so a very large view may be truncated. Narrow the filters or time range to export a complete slice. + + +## Ingestion endpoints + +traceAI exports spans over OTLP to FutureAGI. The transport and target are environment-driven: + +| Variable | Transport | Default | +|---|---|---| +| `FI_BASE_URL` | HTTP collector | FutureAGI cloud collector | +| `FI_GRPC_URL` | gRPC collector | FutureAGI cloud collector | + +When unset, both variables default to the FutureAGI cloud collector, so a cloud project needs no endpoint configuration — only `FI_API_KEY` and `FI_SECRET_KEY`. + +- **Cloud:** leave the defaults; set `FI_API_KEY` and `FI_SECRET_KEY`. +- **Self-hosted:** point `FI_BASE_URL` / `FI_GRPC_URL` at your own collector host so spans stay in your network. + +Choose the transport with `transport=Transport.HTTP` (default) or `Transport.GRPC` in `register()`. See [Set up tracing](/docs/traceai/manual-instrumentation/set-up-tracing). + +### Endpoint contract + +The ingestion endpoint is an **OTLP/traces** receiver — you don't call it directly; the traceAI SDK's exporter does. The contract: + +| Aspect | Detail | +|---|---| +| Protocol | OTLP over HTTP (protobuf) or gRPC — the [OpenTelemetry](https://opentelemetry.io/docs/) standard, not a proprietary API. | +| Operation | Export spans (write-only). There is no read/query endpoint; reading happens in the trace explorer. | +| Auth | `FI_API_KEY` + `FI_SECRET_KEY` from the [keys page](https://app.futureagi.com/dashboard/keys), sent by the SDK on every export. Keys are workspace-scoped. | +| Success | The exporter batches spans and sends them in the background; a successful export returns no payload. Spans appear in Observe within seconds. | +| Errors | A `401` means the keys are wrong for this workspace; a `4xx` means a malformed/oversized batch; transient `5xx`/network errors are retried by the batch exporter. | +| Limits | Spans are sent by the **batch** span processor on an interval, so a short-lived process must call `trace_provider.force_flush()` before exit or the last batch is lost. Very large payloads (huge prompts/outputs) can be dropped — [mask or trim](/docs/traceai/manual-instrumentation/mask-span-attributes) them at the SDK. | +| Versioning | Pin `fi-instrumentation-otel` and each instrumentor to a tested version so a release can't change span shape under you; the wire format follows the OTLP version the SDK ships. | + + + Span input and output can carry customer data before they leave your process. Redact at the SDK with `TraceConfig` or the `FI_HIDE_*` variables — see [Mask span attributes](/docs/traceai/manual-instrumentation/mask-span-attributes). + + +## Related + + + + Filter, then export the current view. + + + Configure the OTLP endpoint and transport. + + diff --git a/src/pages/docs/observe/reference/filters.mdx b/src/pages/docs/observe/reference/filters.mdx new file mode 100644 index 00000000..b5a5e678 --- /dev/null +++ b/src/pages/docs/observe/reference/filters.mdx @@ -0,0 +1,92 @@ +--- +title: "Filters" +description: "How to filter traces in Observe: the filter modes, every filterable property, the metrics you can filter and aggregate on, and ready-to-paste queries." +--- + +The **Filter** panel in the [trace explorer](/docs/observe/features/llm-tracing) narrows which traces are shown. It offers three modes: plain-language AI search, a Basic property/condition/value builder, and a Query expression for power users. This page lists the modes, every property you can filter on, the metrics you can filter and aggregate by, and a set of ready-to-paste queries. + +## Filter modes + +| Mode | Use it for | +|---|---| +| AI search | Describe what you want in plain English (e.g. *"errors on gpt-4o today"*) and the filter is built for you | +| Basic | Pick a property, a condition, and a value. Add several; they apply together (AND) | +| Query | Write a filter expression directly, for conditions the Basic builder can't express | + + + **Query mode uses symbolic operators** (`=`, `!=`, `contains`, `>`, `<`), while **Basic mode uses the word equivalents** (`is`, `is not`, `contains`, `greater than`, `less than`). They mean the same thing: the property table below lists the word forms, and the ready-to-use queries use the symbols. + + +## Filterable properties + +Each property maps to a [span](/docs/observe/concepts/spans) attribute key. Use the property name in the Basic builder, or the attribute key in a Query expression. + +| Property | Attribute key | Example value | Operators | +|---|---|---|---| +| Trace ID | `trace.id` | `7f3c1a9b…` | `is`, `is not` | +| Trace Name | `trace.name` | `support_agent.run` | `is`, `is not`, `contains` | +| Span Name | `span.name` | `tool.check_order_status` | `is`, `is not`, `contains` | +| Status | `status` | `OK`, `ERROR` | `is`, `is not` | +| Model | `llm.model_name` | `gpt-4o` | `is`, `is not`, `contains` | +| Node Type | `node.type` | `llm`, `chain`, `tool` | `is`, `is not` | +| Span Kind | `fi.span.kind` | `LLM`, `RETRIEVER`, `TOOL` | `is`, `is not` | +| User ID | `user.id` | `user_8821` | `is`, `is not`, `contains` | +| Provider | `llm.provider` | `openai`, `anthropic` | `is`, `is not` | +| Service / Trace Name | `service.name` | `checkout-service` | `is`, `is not`, `contains` | +| Latency | `latency` | `5000` (ms, numeric) | `greater than`, `less than` | +| Eval score | `eval.score` / `scores.` | `0.5` (numeric) | `greater than`, `less than` | +| Tag | `tag.tags` | `needs-review` (list of strings) | `contains` | + +In addition, **annotation values** attached to a span are filterable, using `is` / `is not` on an annotation value. + + + Property names and attribute keys are case-sensitive in Query mode. Status values are upper-case (`OK`, `ERROR`); model and provider names match what the SDK reported. + + +## Ready-to-use filters + +Paste any of these into the **Query** tab. Each line finds a common class of trace. + +```text +status = ERROR AND llm.model_name = gpt-4o # errors on a specific model +latency > 5000 # slow spans, over 5 seconds +user.id = user_8821 # every trace for one end user +eval.score < 0.5 # low-scoring responses +fi.span.kind = RETRIEVER # retriever spans only +tag.tags contains needs-review # traces carrying a tag +``` + + + Combine conditions with `AND` to narrow, and reuse the same expression as a saved view in the trace explorer so the whole team sees the same slice. + + +## Basic operators + +| Operator | Applies to | +|---|---| +| `is` / `is not` | Exact match (status, model, provider, enums) | +| `contains` | Substring match (names, inputs, user ID) | +| `greater than` / `less than` | Numeric values (latency, tokens, eval score) | + +## Metrics + +Alongside the properties above, these are the metrics Observe computes from your spans: the values you sort the trace table by and aggregate on a [dashboard](/docs/observe/features/dashboard) widget. Nothing is precomputed, each is derived from the spans that match your filters and time window. + +| Metric | Unit | What it measures | +|---|---|---| +| Span count | count | Number of spans matching the filters | +| Error count | count | Number of spans or traces with `ERROR` status | +| Span response time | ms | Latency of a span | +| LLM response time | ms | Latency of LLM spans specifically | +| Token usage | tokens | Tokens consumed (prompt + completion) | +| Cost | USD | Computed cost of model calls | +| Eval pass-rate | % | Share of evaluated spans that passed their eval | + +- **Latency is per span, not per trace.** Span response time measures one operation; a whole request's wall-clock time is the root span's duration, so filter to root spans to compare requests. +- **Cost and token usage only populate on LLM spans.** A tool or retrieval span adds to span count but contributes zero tokens and zero cost. + +## Aggregations and granularity + +Aggregate a metric with **Sum, Average, Median, Count, Distinct count,** or **Min / Max**, over a time bucket of **minute, hour, day, week,** or **month**. The available granularities adjust to the selected time range, a 12-month range will not offer minute granularity. + +If a dashboard number looks wrong, see [Dashboard numbers look wrong](/docs/observe/troubleshooting/dashboard-numbers-look-wrong). diff --git a/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx b/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx new file mode 100644 index 00000000..f111020f --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx @@ -0,0 +1,93 @@ +--- +title: "Alerts not firing" +description: "A metric crossed your threshold but no alert arrived. Usual causes: the monitor's frequency hasn't elapsed, it's muted, the threshold direction is wrong, or notifications are misconfigured." +slug: "alerts-did-not-fire" +page_type: "troubleshooting" +products: ["Observe"] +failure_surface: "dashboard" +symptom: "alert did not fire" +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-05-25" +last_screenshotted: "2026-06-19" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + title: "Alerts not firing in Observe" + description: "A metric crossed your threshold but no alert arrived. Usual causes: the monitor's frequency hasn't elapsed, it's muted, the threshold direction is wrong, or notifications are misconfigured." + primary_keyword: "alert did not fire monitor" + direct_answer: true +geo: + answer_target: "Why did my Observe monitor not fire an alert?" + llm_summary: "When a metric crosses a threshold but no alert arrives, check timing first — a monitor only evaluates on its alert_frequency (min 5, default 60 min) — then whether it's muted, whether the threshold operator and value match the breach, whether a percentage-change baseline has enough history, and whether the email or Slack notification channel is configured correctly." +canonical: "/docs/observe/troubleshooting/alerts-did-not-fire" +related: + - "/docs/observe/features/alerts" + - "/docs/observe/features/dashboard" + - "/docs/observe/reference/filters" +--- + + +## Symptom + +A metric crossed what you thought was the threshold, but no email or Slack arrived. The usual causes are timing (the monitor only evaluates on its schedule), the monitor being muted, the threshold direction or value being set differently than you remember, or the notification channel itself failing. Check the schedule and mute state first — those explain most "missing" alerts. + +- A metric clearly breached the limit but no notification came. +- Alerts used to arrive and stopped. +- The alert log shows nothing for the period you expected. + +Observe alerts list showing an Active 'High latency (p95 > 2s)' monitor with Last Triggered '-' and 0 triggers +*An alert configured but never fired: Status is Active, yet Last Triggered shows '-' and the trigger count is 0. That points to schedule timing, a mute, or a threshold set differently than expected — not a broken metric.* + +--- + +## Quick checks + +- One `alert_frequency` cycle has elapsed since the breach (minimum 5, default 60 minutes). +- The monitor is **not** muted (`is_mute`). +- `threshold_operator` and the critical value match the direction of the breach. +- `notification_emails` and/or `slack_webhook_url` are set and valid. + +## Causes and fixes + +| Cause | What you see | Fix | +|---|---|---| +| Frequency hasn't elapsed | A brief breach between evaluation runs left no alert | A monitor evaluates on `alert_frequency` (minimum 5, default 60 minutes). Lower the frequency if you need faster detection. | +| Monitor is muted | The monitor keeps evaluating but no notification arrives | `is_mute` stops notifications while evaluation continues; unmute it. | +| Threshold direction or value | A spike didn't fire a "less than" monitor (or vice versa) | `threshold_operator` (`Greater than` / `Less than`) and the critical value must match the breach you expect. | +| Percentage-change baseline | A new project never alerts on a percentage-change monitor | A percentage-change monitor needs enough history in its `auto_threshold_time_window` to compute a baseline. | +| Notification channel | The alert log shows a fire, but no email/Slack arrives | Verify `notification_emails` (up to 5) and/or `slack_webhook_url`; a bad webhook silently drops the message. | + +## Diagnostic checks + +Open the monitor and read its **frequency, mute state, operator, threshold value, and notification channels**, then check the alert log: + +- A log entry with no email/Slack points at the notification channel (`notification_emails` / `slack_webhook_url`). +- No log entry at all points at timing (`alert_frequency`), mute (`is_mute`), or the threshold direction. + +## Minimal smoke test + +Set a deliberately easy threshold, wait one `alert_frequency` cycle, and confirm an alert log entry plus the email/Slack message arrive. Then restore the real threshold. + +## Escalate + +If the monitor still won't fire on a confirmed breach, contact support@futureagi.com with the monitor name, its config, and the breach timestamp. + +## Prevent recurrence + +- Match `alert_frequency` to how fast you need to know — don't leave it at 60 if minutes matter. +- Test each notification channel once when you create the monitor. + +## Next steps + + + + How monitors and thresholds are configured. + + + Confirm the metric trend the alert watches. + + diff --git a/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx b/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx new file mode 100644 index 00000000..4aba70f9 --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx @@ -0,0 +1,94 @@ +--- +title: "Dashboard numbers" +description: "A dashboard widget shows a number you didn't expect. Almost always it's the time range, granularity, aggregation, filters, or sampling — not bad data." +slug: "dashboard-numbers-look-wrong" +page_type: "troubleshooting" +products: ["Observe"] +failure_surface: "dashboard" +symptom: "dashboard numbers look wrong" +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-05-25" +last_screenshotted: "2026-06-19" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + title: "Dashboard numbers look wrong in Observe" + description: "A dashboard widget shows a number you didn't expect. Almost always it's the time range, granularity, aggregation, filters, or sampling — not bad data." + primary_keyword: "dashboard numbers wrong observability" + direct_answer: true +geo: + answer_target: "Why does an Observe dashboard widget show a number I didn't expect?" + llm_summary: "When a dashboard widget shows an unexpected number, the data is almost always right and the query is reading it differently than assumed. Check the time range, granularity, aggregation, and filters first — plus eval sampling and timezone — and cross-check one value against the trace explorer for the same window." +canonical: "/docs/observe/troubleshooting/dashboard-numbers-look-wrong" +related: + - "/docs/observe/features/dashboard" + - "/docs/observe/features/llm-tracing" + - "/docs/observe/reference/filters" +--- + + +## Symptom + +A widget shows a number that doesn't match what you expected — cost too low, latency too high, a count that seems off. Almost always the data is right and the *query* is reading it differently than you assumed: the time range, granularity, aggregation, or filters change what a widget reports. Check those four before suspecting the underlying traces. + +- A metric looks far higher or lower than reality. +- Two widgets that "should" match don't. +- A number changed when you only changed the time range or granularity. + +Observe System Metrics dashboard with latency, tokens, traffic, and cost charts over a 30-day range +*The System Metrics view. When a number here looks off, the data is usually right and the query is reading it differently — check the time range, granularity, aggregation, and filters before suspecting the traces.* + +--- + +## Quick checks + +- The widget's **time range** matches the window you have in mind. +- The **granularity** (bucket size) is what you expect — per-hour and per-day give different numbers. +- The **aggregation** (sum / average / median) answers the question you're asking. +- No stray **filter** (model, status, attribute) is silently narrowing the data. + +## Causes and fixes + +| Cause | What you see | Fix | +|---|---|---| +| Time range / granularity | A number changed when you only changed the window or bucket size | A chart reflects the selected window and bucket. Set both to match your expectation — *average latency per hour* and *per day* differ from the same traces. | +| Aggregation mismatch | Two widgets that "should" match don't | Sum vs. average vs. median answer different questions — confirm the widget uses the one you mean. | +| Filters narrowing the data | A metric looks far lower than reality | A widget filter (model, status, attribute) silently excludes traces; clear it to compare against the full set. | +| Eval sampling | An eval-based metric covers fewer spans than total traffic | If a metric is built on evals run at a sampling rate, it covers a *subset* of spans, not all of them. | +| Timezone | An apparent gap or spike at a day boundary | Day boundaries follow the dashboard timezone — the boundary effect, not missing data. | + +## Diagnostic checks + +Open the widget editor and read its **time range, granularity, aggregation, group-by, and filters**. Then cross-check one value against the [trace explorer](/docs/observe/features/llm-tracing) for the exact same window: + +- Apply the same time range and filters in the trace explorer. +- Count the matching traces (or read the latency/cost column) and compare to the widget. +- If the two agree, the widget config — not the data — explains the number. + +## Minimal smoke test + +Set the widget's time range and granularity to match your expectation, clear extra filters, and confirm the value lines up with a trace-explorer count for the same window. They should reconcile within the rounding of the chosen aggregation. + +## Escalate + +If a value still can't be reconciled with the trace list for the same window, contact support@futureagi.com with the dashboard, the widget config, and the window. + +## Prevent recurrence + +- Label widgets with their aggregation and window so readers don't misread them. +- Keep one "all traffic, no filters" reference widget to sanity-check the others. + +## Next steps + + + + How widgets are configured. + + + Cross-check a number against the raw traces. + + diff --git a/src/pages/docs/observe/troubleshooting/missing-attributes.mdx b/src/pages/docs/observe/troubleshooting/missing-attributes.mdx new file mode 100644 index 00000000..f565423c --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/missing-attributes.mdx @@ -0,0 +1,104 @@ +--- +title: "Spans and attributes" +description: "The trace appears but spans are missing or fields like input/output are blank. Usual causes: masking is on, the instrumentor isn't attached, or a custom key isn't indexed." +slug: "missing-attributes" +page_type: "troubleshooting" +products: ["Observe"] +failure_surface: "sdk" +symptom: "spans or attributes missing from trace" +audience: ["engineer"] +difficulty: "intermediate" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-05-25" +last_screenshotted: "2026-06-19" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + title: "Spans or attributes missing from a trace" + description: "The trace appears but spans are missing or fields like input/output are blank. Usual causes: masking is on, the instrumentor isn't attached, or a custom key isn't indexed." + primary_keyword: "missing spans attributes trace" + direct_answer: true +geo: + answer_target: "Why are spans or attributes missing from a trace in Observe?" + llm_summary: "If a trace shows but spans or fields like input/output are missing, check redaction first (FI_HIDE_INPUTS/OUTPUTS or TraceConfig masking makes blanks expected), then confirm the framework's instrumentor is attached, that custom attributes were set on the active span before it closed, and that you're filtering on indexed semantic-convention keys with supported value types." +canonical: "/docs/observe/troubleshooting/missing-attributes" +related: + - "/docs/observe/features/llm-tracing" + - "/docs/traceai/manual-instrumentation/add-attributes-metadata-tags" + - "/docs/traceai/manual-instrumentation/mask-span-attributes" + - "/docs/observe/concepts/spans" +--- + + +## Symptom + +The trace shows up, but it's incomplete — a nested span is missing, or fields like input and output are blank, or a custom attribute you set isn't there. The usual causes are redaction being switched on (in which case blank is *expected*), the framework's instrumentor not being attached, or an attribute set on the wrong span or after it closed. Check redaction first, because a hidden field is working as designed, not a bug. + +- A span's input/output show as hidden or blank. +- A framework's child spans (e.g. nested LangGraph nodes) don't appear. +- A custom attribute you set isn't on the span, or you can't filter by it. + +Observe trace explorer showing an empty trace list — 'No traces found' — with the attribute columns present but unpopulated +*When spans or attributes aren't arriving, the trace list reads empty like this — the columns exist but nothing fills them. Confirm the instrumentor is attached and that redaction isn't hiding fields before assuming data was dropped.* + +--- + +## Quick checks + +- Redaction is **off** for the fields you expect to see (`FI_HIDE_INPUTS` / `FI_HIDE_OUTPUTS` and `TraceConfig` masking). +- The framework's instrumentor is installed and `instrument()` ran against the same tracer provider. +- Custom attributes are set while the span is **still active**, before its `with` block closes. +- Anything you filter on uses a [semantic-convention](/docs/traceai/manual-instrumentation/semantic-conventions) key with a supported value type. + +## Causes and fixes + +| Cause | What you see | Fix | +|---|---|---| +| Redaction is on (check first) | Input/output render as hidden or blank, but the span is otherwise complete | Confirm whether `FI_HIDE_INPUTS` / `FI_HIDE_OUTPUTS` or `TraceConfig` masking is set — if so, the blank is expected. See [Mask span attributes](/docs/traceai/manual-instrumentation/mask-span-attributes). | +| Instrumentor not attached for that framework | A framework's child spans (e.g. nested LangGraph nodes) never appear | Install and `instrument()` the instrumentor for the missing framework, attached to the provider. | +| Attribute set on the wrong span / after close | A custom attribute you set isn't on the span | Set attributes while the span is active; a value set after the `with` block closes is dropped. | +| Custom key isn't indexed for filtering | The attribute is on the span but you can't filter by it | Use a [semantic-convention](/docs/traceai/manual-instrumentation/semantic-conventions) key where one exists — the UI filters on standard keys. | +| Unsupported value type | The attribute is silently dropped | Attribute values must be string, bool, int, float, or an array of those. | + +## Diagnostic commands + +Print one span's attributes from a span exporter to confirm what actually reached the SDK, so you can tell a redacted field from a missing one: + +```python +from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExporter + +class PrintAttributes(SpanExporter): + def export(self, spans): + for span in spans: + print(span.name, dict(span.attributes)) + +trace_provider.add_span_processor(SimpleSpanProcessor(PrintAttributes())) +``` + +If the attribute prints here but isn't filterable in the UI, the key isn't indexed; if it doesn't print at all, it was set on the wrong span or after close. + +## Minimal smoke test + +Re-run one request, then open the trace, click the span, and check the **attributes** list. The previously-missing span or attribute should now show in the span detail, and you should be able to filter by it for standard keys. + +## Escalate + +If you're still stuck, contact support@futureagi.com with your `project_name`, the trace ID, the framework + instrumentor versions, and the attribute you expected. + +## Prevent recurrence + +- Decide masking deliberately and document it, so blank fields aren't mistaken for bugs. +- Prefer semantic-convention keys for anything you'll filter or evaluate on. + +## Next steps + + + + How attributes get onto spans. + + + Why a field might be intentionally hidden. + + diff --git a/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx b/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx new file mode 100644 index 00000000..64482f67 --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx @@ -0,0 +1,115 @@ +--- +title: "No traces appear" +description: "Your app ran but no trace shows up in FutureAGI Observe. The usual causes are an unflushed short-lived process, the wrong project_type, missing FI_API_KEY or FI_SECRET_KEY, or a date-picker window that is too narrow." +slug: "no-traces-appearing" +page_type: "troubleshooting" +products: ["Observe"] +failure_surface: "sdk" +symptom: "no traces appear in observe" +audience: ["engineer"] +difficulty: "beginner" +status: "review" +owner: "observability" +reviewers: ["observability-eng"] +last_tested: "2026-06-17" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + title: "No traces appear in FutureAGI Observe — fixes" + description: "Fix an app that runs but sends no traces to Observe: flush a short-lived process, correct the project_type, set FI_API_KEY and FI_SECRET_KEY, and widen the date picker." + primary_keyword: "no traces appearing futureagi" + direct_answer: true +geo: + answer_target: "Why are no traces appearing in FutureAGI Observe?" + llm_summary: "Traces usually fail to appear because a short-lived process exited before force_flush(), the wrong project_type was set, FI_API_KEY or FI_SECRET_KEY is missing, or the date-picker window is too narrow. Flush the process, fix the keys and project_type, and widen the date range, then resend a request." +canonical: "/docs/observe/troubleshooting/no-traces-appearing" +related: + - "/docs/observe/quickstart" + - "/docs/observe/features/llm-tracing" + - "/docs/traceai/troubleshooting/spans-not-exported" + - "/docs/traceai/manual-instrumentation/set-up-tracing" +--- + +## Symptom + +You instrumented your app with traceAI and ran it, but no trace shows up in the [trace explorer](/docs/observe/features/llm-tracing). Typically: + +- A request ran with no error, but no new row appears in the trace list. +- A short script (a one-off `python app.py`) never produces a trace. +- Traces appeared before but stopped after a code change. + +The most common cause is a short-lived process that exited before its spans flushed; the next most common are the wrong `project_type`, missing keys, or a date window that hides the trace. Work the checks below in order — the first one fixes the large majority of cases. + +## Quick checks + +- The process **stays alive** long enough to export, or calls `force_flush()` before exiting. +- `FI_API_KEY` and `FI_SECRET_KEY` are set to this workspace's keys. +- `register()` is called with the correct `project_type` and `project_name`, **before** the framework client is created. +- The date picker is widened to **Today** (not the default 7-day window) and **Auto refresh** is on. + +## Causes and fixes + +| Cause | What you see | Fix | +|---|---|---| +| Short-lived process not flushed (most common) | A one-off script runs clean but no trace appears; long-running services are fine | Call `trace_provider.force_flush()` before the process ends, or pass `batch=False` to `register()`. | +| Wrong `project_type` | App runs, keys are valid, but traces never land in the project you expect | Set `project_type=ProjectType.OBSERVE` (and the matching `project_name`) in `register()`. | +| Missing `FI_API_KEY` / `FI_SECRET_KEY` | Export fails or is silently dropped; nothing reaches Observe | Set both env vars to this workspace's keys before the app starts. | +| Instrumented after the client was created | Some or all spans never emit because the client wasn't wrapped | Call `register()` and the instrumentor **before** constructing the framework client. | +| Date-picker window too narrow | The trace exists but is filtered out of the view | Widen the date range to **Today** and enable **Auto refresh**. | + +The Observe trace explorer date-range picker, widened so a recent trace falls inside the selected window + +## Diagnostic commands + +Confirm the keys are present in the environment the app actually runs in: + +```bash +env | grep -E "FI_API_KEY|FI_SECRET_KEY" +``` + +Force a flush in a short script so spans are exported before the process exits: + +```python +from fi_instrumentation import register +from fi_instrumentation.fi_types import ProjectType + +trace_provider = register( + project_type=ProjectType.OBSERVE, + project_name="my-project", +) + +# ... run one request ... + +trace_provider.force_flush() +``` + +If spans still never leave the SDK, work through [Spans not exported](/docs/traceai/troubleshooting/spans-not-exported). + +## Minimal smoke test + +Send one request, then open **Observe → your project → Tracing** with **Auto refresh** on and the date range widened to **Today**. A new trace should appear within seconds, **Status OK**, with input, output, latency, and model populated. If it doesn't, recheck the causes above in order. + +## Prevent recurrence + +- Add `trace_provider.force_flush()` to short scripts and job runners. +- Call `register()` + `instrument()` once at startup, before any client is built — see [Set up tracing](/docs/traceai/manual-instrumentation/set-up-tracing). +- Keep `FI_API_KEY`, `FI_SECRET_KEY`, and `project_type` in your startup config so they can't drift per environment. + +If you're still stuck, collect your `project_name`, a request timestamp, your installed `fi-instrumentation-otel` and instrumentor versions, and any stderr, and contact support@futureagi.com. + +## Next steps + + + + Get a first trace flowing end to end. + + + The setup this page diagnoses. + + + When spans never leave the SDK at all. + + + Where traces should appear. + +