feat: add agent graph tracker (#89)

andrewklatzke · web-flow · commit 9d371faa43c3 · 2026-03-02T13:06:06.000-09:00
**Requirements** - [ ] I have added test coverage for new or changed functionality - [x] I have followed the repository's [pull request submission guidelines](../blob/main/CONTRIBUTING.md#submitting-pull-requests) - [ ] I have validated my changes against all supported platform versions **Describe the solution you've provided** Implements the `AIGraphTracker` class to handle tracking events for agent graphs in the SDK. **Describe alternatives you've considered** This is the primary implementation of the tracker. It's broken into its own tracker rather than polluting the interface of the BaseTracker or AiConfigTracker since this is a separate entity that has its own interaction & metric patterns. **Additional context** This implements the following events to be tracked: ``` Edge-level metrics $ld:ai:graph:redirect<{ sourceKey, redirectedTarget }> $ld:ai:graph:handoff_success<{ sourceKey, targetKey }> $ld:ai:graph:handoff_failure<{ sourceKey, targetKey }> Node level metrics $ld:ai:graph:node_invocation<{ graphKey, configKey }> $ld:ai:graph:tool_call<{ graphKey, configKey, toolKey }> <judge_metrics><{ graphKey, configKey }> (judge on a specific node) Graph metrics $ld:ai:graph:invocation_success $ld:ai:graph:invocation_failure $ld:ai:graph:latency (total latency of the entire graph invocation) $ld:ai:graph:total_tokens (total token usage of the graph invocation) $ld:ai:graph:path<{ graphKey, ...configKey }> <judge_metrics><{ graphKey }> (judges on final output) ```  --- > [!NOTE] > **Medium Risk** > Adds new LaunchDarkly `track()` event emission for agent graph executions and wires it into `LDAIClient.agent_graph`, increasing telemetry volume and coupling to variation metadata; incorrect usage could affect metrics quality but not core evaluation behavior. > > **Overview** > Adds a new `AIGraphTracker` to emit LaunchDarkly tracking events for agent graph executions (graph invocation success/failure, latency, total tokens, execution path, node invocations/tool calls, redirects/handoffs, and judge metric events). > > `LDAIClient.agent_graph()` now constructs an `AIGraphTracker` from flag `_ldMeta` (variation key/version) and returns it on the `AgentGraphDefinition` (including disabled/invalid graph returns), which also gains a `tracker` field plus `get_tracker()` accessor. `AIGraphTracker` is exported from `ldai.__init__` for SDK consumers. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit e6c1ff3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>
diff --git a/packages/sdk/server-ai/src/ldai/__init__.py b/packages/sdk/server-ai/src/ldai/__init__.py
@@ -13,6 +13,7 @@
     Edge, JudgeConfiguration, LDAIAgent, LDAIAgentConfig, LDAIAgentDefaults,
     LDMessage, ModelConfig, ProviderConfig)
 from ldai.providers.types import EvalScore, JudgeResponse
+from ldai.tracker import AIGraphTracker
 
 __all__ = [
     'LDAIClient',
@@ -21,6 +22,7 @@
     'AIAgentConfigRequest',
     'AIAgents',
     'AIAgentGraphConfig',
+    'AIGraphTracker',
     'Edge',
     'AICompletionConfig',
     'AICompletionConfigDefault',
diff --git a/packages/sdk/server-ai/src/ldai/agent_graph/__init__.py b/packages/sdk/server-ai/src/ldai/agent_graph/__init__.py
@@ -1,11 +1,11 @@
 """Graph implementation for managing AI agent graphs."""
 
-from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Set
 
 from ldclient import Context
 
 from ldai.models import AIAgentConfig, AIAgentGraphConfig, Edge
+from ldai.tracker import AIGraphTracker
 
 DEFAULT_FALSE = AIAgentConfig(key="", enabled=False)
 
@@ -54,11 +54,21 @@ def __init__(
         nodes: Dict[str, AgentGraphNode],
         context: Context,
         enabled: bool,
+        tracker: Optional[AIGraphTracker] = None,
     ):
         self._agent_graph = agent_graph
         self._context = context
         self._nodes = nodes
         self.enabled = enabled
+        self._tracker = tracker
+
+    def get_tracker(self) -> Optional[AIGraphTracker]:
+        """
+        Get the graph tracker for this graph definition.
+
+        :return: The AIGraphTracker instance, or None if not available.
+        """
+        return self._tracker
 
     def is_enabled(self) -> bool:
         """Check if the graph is enabled."""
diff --git a/packages/sdk/server-ai/src/ldai/client.py b/packages/sdk/server-ai/src/ldai/client.py
@@ -16,7 +16,7 @@
                          ProviderConfig)
 from ldai.providers.ai_provider_factory import AIProviderFactory
 from ldai.sdk_info import AI_SDK_LANGUAGE, AI_SDK_NAME, AI_SDK_VERSION
-from ldai.tracker import LDAIConfigTracker
+from ldai.tracker import AIGraphTracker, LDAIConfigTracker
 
 _TRACK_SDK_INFO = '$ld:ai:sdk:info'
 _TRACK_USAGE_COMPLETION_CONFIG = '$ld:ai:usage:completion-config'
@@ -474,6 +474,19 @@ def agent_graph(
         """
         variation = self._client.variation(key, context, {})
 
+        # Extract variation metadata for tracker
+        variation_key = variation.get("_ldMeta", {}).get("variationKey", "")
+        version = int(variation.get("_ldMeta", {}).get("version", 1))
+
+        # Create graph tracker
+        tracker = AIGraphTracker(
+            self._client,
+            variation_key,
+            key,
+            version,
+            context,
+        )
+
         if not variation.get("root"):
             log.debug(f"Agent graph {key} is disabled, no root config key found")
             return AgentGraphDefinition(
@@ -486,6 +499,7 @@ def agent_graph(
                 nodes={},
                 context=context,
                 enabled=False,
+                tracker=tracker,
             )
 
         edge_keys = list[str](variation.get("edges", {}).keys())
@@ -513,6 +527,7 @@ def agent_graph(
                 nodes={},
                 context=context,
                 enabled=False,
+                tracker=tracker,
             )
 
         try:
@@ -543,6 +558,7 @@ def agent_graph(
                 nodes={},
                 context=context,
                 enabled=False,
+                tracker=tracker,
             )
 
         nodes = AgentGraphDefinition.build_nodes(
@@ -555,6 +571,7 @@ def agent_graph(
             nodes=nodes,
             context=context,
             enabled=agent_graph_config.enabled,
+            tracker=tracker,
         )
 
     def agents(
diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py
@@ -1,7 +1,7 @@
 import time
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 from ldclient import Context, LDClient
 
@@ -407,3 +407,244 @@ def _openai_to_token_usage(data: dict) -> TokenUsage:
         input=data.get("prompt_tokens", 0),
         output=data.get("completion_tokens", 0),
     )
+
+
+class AIGraphTracker:
+    """
+    Tracks graph-level, node-level, and edge-level metrics for AI agent graph operations.
+    """
+
+    def __init__(
+        self,
+        ld_client: LDClient,
+        variation_key: str,
+        graph_key: str,
+        version: int,
+        context: Context,
+    ):
+        """
+        Initialize an AI Graph tracker.
+
+        :param ld_client: LaunchDarkly client instance.
+        :param variation_key: Variation key for tracking.
+        :param graph_key: Graph configuration key for tracking.
+        :param version: Version of the variation.
+        :param context: Context for evaluation.
+        """
+        self._ld_client = ld_client
+        self._variation_key = variation_key
+        self._graph_key = graph_key
+        self._version = version
+        self._context = context
+
+    def __get_track_data(self):
+        """
+        Get tracking data for events.
+
+        :return: Dictionary containing variation, graph key, and version.
+        """
+        track_data = {
+            "variationKey": self._variation_key,
+            "graphKey": self._graph_key,
+            "version": self._version,
+        }
+        return track_data
+
+    def track_invocation_success(self) -> None:
+        """
+        Track a successful graph invocation.
+        """
+        self._ld_client.track(
+            "$ld:ai:graph:invocation_success",
+            self._context,
+            self.__get_track_data(),
+            1,
+        )
+
+    def track_invocation_failure(self) -> None:
+        """
+        Track an unsuccessful graph invocation.
+        """
+        self._ld_client.track(
+            "$ld:ai:graph:invocation_failure",
+            self._context,
+            self.__get_track_data(),
+            1,
+        )
+
+    def track_latency(self, duration: int) -> None:
+        """
+        Track the total latency of graph execution.
+
+        :param duration: Duration in milliseconds.
+        """
+        self._ld_client.track(
+            "$ld:ai:graph:latency",
+            self._context,
+            self.__get_track_data(),
+            duration,
+        )
+
+    def track_total_tokens(self, tokens: TokenUsage) -> None:
+        """
+        Track aggregated token usage across the entire graph invocation.
+
+        :param tokens: Token usage data.
+        """
+        self._ld_client.track(
+            "$ld:ai:graph:total_tokens",
+            self._context,
+            self.__get_track_data(),
+            tokens.total,
+        )
+
+    def track_path(self, path: List[str]) -> None:
+        """
+        Track the execution path through the graph.
+
+        :param path: An array of configuration keys representing the sequence of nodes executed during graph traversal.
+        """
+        track_data = {**self.__get_track_data(), "path": path}
+        self._ld_client.track(
+            "$ld:ai:graph:path",
+            self._context,
+            track_data,
+            1,
+        )
+
+    def track_judge_response(self, response: Any) -> None:
+        """
+        Track judge responses for the final graph output.
+
+        :param response: JudgeResponse object containing evals and success status.
+        """
+        from ldai.providers.types import EvalScore, JudgeResponse
+
+        if isinstance(response, JudgeResponse):
+            if response.evals:
+                track_data = self.__get_track_data()
+                if response.judge_config_key:
+                    track_data = {**track_data, "judgeConfigKey": response.judge_config_key}
+
+                for metric_key, eval_score in response.evals.items():
+                    if isinstance(eval_score, EvalScore):
+                        self._ld_client.track(
+                            metric_key,
+                            self._context,
+                            track_data,
+                            eval_score.score,
+                        )
+
+    def track_node_invocation(self, config_key: str) -> None:
+        """
+        Track when a node is invoked during graph execution.
+
+        :param config_key: The configuration key of the node being invoked.
+        """
+        track_data = {**self.__get_track_data(), "configKey": config_key}
+        self._ld_client.track(
+            "$ld:ai:graph:node_invocation",
+            self._context,
+            track_data,
+            1,
+        )
+
+    def track_tool_call(self, config_key: str, tool_key: str) -> None:
+        """
+        Track tool calls made by nodes during graph execution.
+
+        :param config_key: The configuration key of the node making the tool call.
+        :param tool_key: The key of the tool being called.
+        """
+        track_data = {
+            **self.__get_track_data(),
+            "configKey": config_key,
+            "toolKey": tool_key,
+        }
+        self._ld_client.track(
+            "$ld:ai:graph:tool_call",
+            self._context,
+            track_data,
+            1,
+        )
+
+    def track_node_judge_response(self, config_key: str, response: Any) -> None:
+        """
+        Track judge responses for a specific node.
+
+        :param config_key: The configuration key of the node being evaluated.
+        :param response: JudgeResponse object containing evals and success status.
+        """
+        from ldai.providers.types import EvalScore, JudgeResponse
+
+        if isinstance(response, JudgeResponse):
+            if response.evals:
+                track_data = {**self.__get_track_data(), "configKey": config_key}
+                if response.judge_config_key:
+                    track_data = {**track_data, "judgeConfigKey": response.judge_config_key}
+
+                for metric_key, eval_score in response.evals.items():
+                    if isinstance(eval_score, EvalScore):
+                        self._ld_client.track(
+                            metric_key,
+                            self._context,
+                            track_data,
+                            eval_score.score,
+                        )
+
+    def track_redirect(self, source_key: str, redirected_target: str) -> None:
+        """
+        Track when a node redirects to a different target than originally specified.
+
+        :param source_key: The configuration key of the source node.
+        :param redirected_target: The configuration key of the target node that was redirected to.
+        """
+        track_data = {
+            **self.__get_track_data(),
+            "sourceKey": source_key,
+            "redirectedTarget": redirected_target,
+        }
+        self._ld_client.track(
+            "$ld:ai:graph:redirect",
+            self._context,
+            track_data,
+            1,
+        )
+
+    def track_handoff_success(self, source_key: str, target_key: str) -> None:
+        """
+        Track successful handoffs between nodes.
+
+        :param source_key: The configuration key of the source node.
+        :param target_key: The configuration key of the target node.
+        """
+        track_data = {
+            **self.__get_track_data(),
+            "sourceKey": source_key,
+            "targetKey": target_key,
+        }
+        self._ld_client.track(
+            "$ld:ai:graph:handoff_success",
+            self._context,
+            track_data,
+            1,
+        )
+
+    def track_handoff_failure(self, source_key: str, target_key: str) -> None:
+        """
+        Track failed handoffs between nodes.
+
+        :param source_key: The configuration key of the source node.
+        :param target_key: The configuration key of the target node.
+        """
+        track_data = {
+            **self.__get_track_data(),
+            "sourceKey": source_key,
+            "targetKey": target_key,
+        }
+        self._ld_client.track(
+            "$ld:ai:graph:handoff_failure",
+            self._context,
+            track_data,
+            1,
+        )