grafana · academo · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · xnyo
@@ -70,6 +70,12 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) {
 	}, nil
 }
 
+func (c *agenticClientImpl) getFreshContext() []llmprovider.Message {
+	return []llmprovider.Message{
+		llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
+	}
+}
+
 // CallLLM executes an agentic loop with tools to answer questions about code.
 // Each question is processed sequentially, with follow-up questions benefiting
 // from the context accumulated by earlier questions.
@@ -97,9 +103,7 @@ func (c *agenticClientImpl) CallLLM(
 	c.executor = newToolExecutor(repositoryPath)
 
 	// Build initial messages with system prompt only (no user message yet)
-	messages := []llmprovider.Message{
-		llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
-	}
+	messages := c.getFreshContext()
 
 	// Print debug log file path before starting the loop
 	printDebugLogPath()
@@ -135,7 +139,7 @@ func (c *agenticClientImpl) CallLLM(
 		messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question))
 
 		// Run the question loop
-		updatedMessages, answer, err := c.runQuestionLoop(
+		updatedMessages, answer, usage, err := c.runQuestionLoop(
 			ctx,
 			provider,
 			messages,
@@ -168,6 +172,19 @@ func (c *agenticClientImpl) CallLLM(
 			}
 			return nil, fmt.Errorf("question %d exhausted budget without providing answer", questionIndex+1)
 		}
+
+		debugLog(
+			"AgenticClient: accumulated context tokens after question %d: %d",
+			questionIndex+1,
+			usage.TotalTokens,
+		)
+		if usage.TotalTokens > 100000 {
+			debugLog(
+				"AgenticClient: context reached %d tokens (>100k). Flushing context to start fresh.",
+				usage.TotalTokens,
+			)
+			messages = c.getFreshContext()
+		}
 	}
 
 	debugLog("AgenticClient: successfully answered all %d questions", len(questions))
@@ -182,10 +199,11 @@ func (c *agenticClientImpl) runQuestionLoop(
 	messages []llmprovider.Message,
 	toolsBudget int,
 	questionIndex int,
-) ([]llmprovider.Message, *AnswerSchema, error) {
+) ([]llmprovider.Message, *AnswerSchema, llmprovider.Usage, error) {
 	toolCallsRemaining := toolsBudget
 	consecutiveNoTools := 0
 	iteration := 0
+	var lastUsage llmprovider.Usage
 
 	budgetNudged := false
 
@@ -208,16 +226,18 @@ func (c *agenticClientImpl) runQuestionLoop(
 		resp, err := c.callLLMWithRetry(ctx, provider, messages)
 		if err != nil {
 			debugLog("AgenticClient: LLM call failed: %v", err)
-			return messages, nil, fmt.Errorf(
+			return messages, nil, llmprovider.Usage{}, fmt.Errorf(
 				"LLM call failed after %d retries: %w",
 				maxLLMRetries,
 				err,
 			)
 		}
 
+		lastUsage = resp.Usage
+
 		if len(resp.Choices) == 0 {
 			debugLog("AgenticClient: no choices in response")
-			return messages, nil, fmt.Errorf("no response from LLM")
+			return messages, nil, llmprovider.Usage{}, fmt.Errorf("no response from LLM")
 		}
 
 		choice := resp.Choices[0]
@@ -239,15 +259,18 @@ func (c *agenticClientImpl) runQuestionLoop(
 				maxConsecutiveNoTools,
 			)
 			if consecutiveNoTools >= maxConsecutiveNoTools {
-				return messages, nil, fmt.Errorf(
+				return messages, nil, resp.Usage, fmt.Errorf(
 					"agent failed to use tools after %d consecutive attempts",
 					maxConsecutiveNoTools,
 				)
 			}
 
 			// Add the AI response and remind to use tools
 			if choice.Content != "" {
-				messages = append(messages, llmprovider.TextMessage(llmprovider.RoleAI, choice.Content))
+				messages = append(
+					messages,
+					llmprovider.TextMessage(llmprovider.RoleAI, choice.Content),
+				)
 			}
 			debugLog("AgenticClient: reminding agent to use tools")
 			messages = append(messages, llmprovider.TextMessage(
@@ -321,13 +344,13 @@ func (c *agenticClientImpl) runQuestionLoop(
 		})
 		if answer != nil {
 			debugLog("AgenticClient: received answer for question %d", questionIndex+1)
-			return messages, answer, nil
+			return messages, answer, resp.Usage, nil
 		}
 	}
 
 	// Budget exhausted without answer
 	debugLog("AgenticClient: question %d exhausted budget", questionIndex+1)
-	return messages, nil, nil
+	return messages, nil, lastUsage, nil
 }
 
 // processToolCall processes a single tool call and returns the response message and optional answer

@@ -73,7 +73,7 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
 			testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
 			require.NoError(t, err)
 
-			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			prompt := "Does this application access the filesystem (read or write files)?"
 
 			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
 
@@ -99,7 +99,7 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) {
 			testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
 			require.NoError(t, err)
 
-			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			prompt := "Does this application access the filesystem (read or write files)?"
 
 			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
 
@@ -126,8 +126,8 @@ func TestAgenticClient_TwoQuestions(t *testing.T) {
 			require.NoError(t, err)
 
 			questions := []string{
-				"Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.",
-				"Which specific files contain the filesystem operations and what operations do they perform?",
+				"Does this application access the filesystem (read or write files)?",
+				"Does this application make any external HTTP requests to a remote server?",
 			}
 
 			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
@@ -152,6 +152,8 @@ func TestAgenticClient_TwoQuestions(t *testing.T) {
 				"Second answer's question should match",
 			)
 			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
+			require.Equal(t, false, answers[1].ShortAnswer,
+				"Second answer should be false - app does not make HTTP requests")
 		})
 	}
 }
@@ -167,9 +169,9 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 			require.NoError(t, err)
 
 			questions := []string{
-				"Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.",
-				"Which specific files contain the filesystem operations and what operations do they perform?",
-				"Does this application use any caching mechanisms? If so, describe how the cache works.",
+				"Does this application access the filesystem (read or write files)?",
+				"Does this application make any external HTTP requests to a remote server?",
+				"Does this application use any caching mechanisms?",
 			}
 
 			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
@@ -194,6 +196,8 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 				"Second answer's question should match",
 			)
 			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
+			require.Equal(t, false, answers[1].ShortAnswer,
+				"Second answer should be false - app does not make HTTP requests")
 
 			require.Equal(
 				t,
@@ -202,6 +206,8 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 				"Third answer's question should match",
 			)
 			require.NotEmpty(t, answers[2].Answer, "Third answer should be populated")
+			require.Equal(t, true, answers[2].ShortAnswer,
+				"Third answer should be true - app uses caching")
 		})
 	}
 }
@@ -286,6 +286,11 @@ func fromAnthropicResponse(resp *anthropic.Message) *llmprovider.Response {
 
 	return &llmprovider.Response{
 		Choices: []*llmprovider.Choice{choice},
+		Usage: llmprovider.Usage{
+			InputTokens:  int(resp.Usage.InputTokens),
+			OutputTokens: int(resp.Usage.OutputTokens),
+			TotalTokens:  int(resp.Usage.InputTokens + resp.Usage.OutputTokens),
+		},
 	}
 }
 

@@ -351,6 +351,12 @@ func fromGenAIResponse(resp *genai.GenerateContentResponse) (*llmprovider.Respon
 		result.Choices = append(result.Choices, choice)
 	}
 
+	if resp.UsageMetadata != nil {
+		result.Usage.InputTokens = int(resp.UsageMetadata.PromptTokenCount)
+		result.Usage.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
+		result.Usage.TotalTokens = int(resp.UsageMetadata.TotalTokenCount)
+	}
+
 	return result, nil
 }
 

@@ -217,5 +217,9 @@ func fromOpenAIResponse(resp *openai.ChatCompletion) *llmprovider.Response {
 		result.Choices = append(result.Choices, choice)
 	}
 
+	result.Usage.InputTokens = int(resp.Usage.PromptTokens)
+	result.Usage.OutputTokens = int(resp.Usage.CompletionTokens)
+	result.Usage.TotalTokens = int(resp.Usage.TotalTokens)
+
 	return result
 }
@@ -81,9 +81,17 @@ type ThinkingPart struct {
 
 func (ThinkingPart) partMarker() {}
 
+// Usage tracks token usage metrics.
+type Usage struct {
+	InputTokens  int
+	OutputTokens int
+	TotalTokens  int
+}
+
 // Response is the result of a GenerateContent call.
 type Response struct {
 	Choices []*Choice
+	Usage   Usage
 }
 
 // Choice is a single response candidate.