diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index 9a6cd7a4..ba8e0b73 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -70,6 +70,12 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) { }, nil } +func (c *agenticClientImpl) getFreshContext() []llmprovider.Message { + return []llmprovider.Message{ + llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt), + } +} + // CallLLM executes an agentic loop with tools to answer questions about code. // Each question is processed sequentially, with follow-up questions benefiting // from the context accumulated by earlier questions. @@ -97,9 +103,7 @@ func (c *agenticClientImpl) CallLLM( c.executor = newToolExecutor(repositoryPath) // Build initial messages with system prompt only (no user message yet) - messages := []llmprovider.Message{ - llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt), - } + messages := c.getFreshContext() // Print debug log file path before starting the loop printDebugLogPath() @@ -135,7 +139,7 @@ func (c *agenticClientImpl) CallLLM( messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question)) // Run the question loop - updatedMessages, answer, err := c.runQuestionLoop( + updatedMessages, answer, usage, err := c.runQuestionLoop( ctx, provider, messages, @@ -168,6 +172,19 @@ func (c *agenticClientImpl) CallLLM( } return nil, fmt.Errorf("question %d exhausted budget without providing answer", questionIndex+1) } + + debugLog( + "AgenticClient: accumulated context tokens after question %d: %d", + questionIndex+1, + usage.TotalTokens, + ) + if usage.TotalTokens > 100000 { + debugLog( + "AgenticClient: context reached %d tokens (>100k). Flushing context to start fresh.", + usage.TotalTokens, + ) + messages = c.getFreshContext() + } } debugLog("AgenticClient: successfully answered all %d questions", len(questions)) @@ -182,10 +199,11 @@ func (c *agenticClientImpl) runQuestionLoop( messages []llmprovider.Message, toolsBudget int, questionIndex int, -) ([]llmprovider.Message, *AnswerSchema, error) { +) ([]llmprovider.Message, *AnswerSchema, llmprovider.Usage, error) { toolCallsRemaining := toolsBudget consecutiveNoTools := 0 iteration := 0 + var lastUsage llmprovider.Usage budgetNudged := false @@ -208,16 +226,18 @@ func (c *agenticClientImpl) runQuestionLoop( resp, err := c.callLLMWithRetry(ctx, provider, messages) if err != nil { debugLog("AgenticClient: LLM call failed: %v", err) - return messages, nil, fmt.Errorf( + return messages, nil, llmprovider.Usage{}, fmt.Errorf( "LLM call failed after %d retries: %w", maxLLMRetries, err, ) } + lastUsage = resp.Usage + if len(resp.Choices) == 0 { debugLog("AgenticClient: no choices in response") - return messages, nil, fmt.Errorf("no response from LLM") + return messages, nil, llmprovider.Usage{}, fmt.Errorf("no response from LLM") } choice := resp.Choices[0] @@ -239,7 +259,7 @@ func (c *agenticClientImpl) runQuestionLoop( maxConsecutiveNoTools, ) if consecutiveNoTools >= maxConsecutiveNoTools { - return messages, nil, fmt.Errorf( + return messages, nil, resp.Usage, fmt.Errorf( "agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools, ) @@ -247,7 +267,10 @@ func (c *agenticClientImpl) runQuestionLoop( // Add the AI response and remind to use tools if choice.Content != "" { - messages = append(messages, llmprovider.TextMessage(llmprovider.RoleAI, choice.Content)) + messages = append( + messages, + llmprovider.TextMessage(llmprovider.RoleAI, choice.Content), + ) } debugLog("AgenticClient: reminding agent to use tools") messages = append(messages, llmprovider.TextMessage( @@ -321,13 +344,13 @@ func (c *agenticClientImpl) runQuestionLoop( }) if answer != nil { debugLog("AgenticClient: received answer for question %d", questionIndex+1) - return messages, answer, nil + return messages, answer, resp.Usage, nil } } // Budget exhausted without answer debugLog("AgenticClient: question %d exhausted budget", questionIndex+1) - return messages, nil, nil + return messages, nil, lastUsage, nil } // processToolCall processes a single tool call and returns the response message and optional answer diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index abf72676..71cd22ab 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -73,7 +73,7 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) { testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) require.NoError(t, err) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + prompt := "Does this application access the filesystem (read or write files)?" answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) @@ -99,7 +99,7 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) { testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) require.NoError(t, err) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + prompt := "Does this application access the filesystem (read or write files)?" answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) @@ -126,8 +126,8 @@ func TestAgenticClient_TwoQuestions(t *testing.T) { require.NoError(t, err) questions := []string{ - "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", - "Which specific files contain the filesystem operations and what operations do they perform?", + "Does this application access the filesystem (read or write files)?", + "Does this application make any external HTTP requests to a remote server?", } answers, err := client.CallLLM(context.Background(), questions, testDataPath) @@ -152,6 +152,8 @@ func TestAgenticClient_TwoQuestions(t *testing.T) { "Second answer's question should match", ) require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") + require.Equal(t, false, answers[1].ShortAnswer, + "Second answer should be false - app does not make HTTP requests") }) } } @@ -167,9 +169,9 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { require.NoError(t, err) questions := []string{ - "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", - "Which specific files contain the filesystem operations and what operations do they perform?", - "Does this application use any caching mechanisms? If so, describe how the cache works.", + "Does this application access the filesystem (read or write files)?", + "Does this application make any external HTTP requests to a remote server?", + "Does this application use any caching mechanisms?", } answers, err := client.CallLLM(context.Background(), questions, testDataPath) @@ -194,6 +196,8 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { "Second answer's question should match", ) require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") + require.Equal(t, false, answers[1].ShortAnswer, + "Second answer should be false - app does not make HTTP requests") require.Equal( t, @@ -202,6 +206,8 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { "Third answer's question should match", ) require.NotEmpty(t, answers[2].Answer, "Third answer should be populated") + require.Equal(t, true, answers[2].ShortAnswer, + "Third answer should be true - app uses caching") }) } } diff --git a/pkg/llmprovider/anthropicprovider/client.go b/pkg/llmprovider/anthropicprovider/client.go index 5eeed004..f6a089fb 100644 --- a/pkg/llmprovider/anthropicprovider/client.go +++ b/pkg/llmprovider/anthropicprovider/client.go @@ -286,6 +286,11 @@ func fromAnthropicResponse(resp *anthropic.Message) *llmprovider.Response { return &llmprovider.Response{ Choices: []*llmprovider.Choice{choice}, + Usage: llmprovider.Usage{ + InputTokens: int(resp.Usage.InputTokens), + OutputTokens: int(resp.Usage.OutputTokens), + TotalTokens: int(resp.Usage.InputTokens + resp.Usage.OutputTokens), + }, } } diff --git a/pkg/llmprovider/geminiprovider/client.go b/pkg/llmprovider/geminiprovider/client.go index f7ad596a..d660e7cb 100644 --- a/pkg/llmprovider/geminiprovider/client.go +++ b/pkg/llmprovider/geminiprovider/client.go @@ -351,6 +351,12 @@ func fromGenAIResponse(resp *genai.GenerateContentResponse) (*llmprovider.Respon result.Choices = append(result.Choices, choice) } + if resp.UsageMetadata != nil { + result.Usage.InputTokens = int(resp.UsageMetadata.PromptTokenCount) + result.Usage.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount) + result.Usage.TotalTokens = int(resp.UsageMetadata.TotalTokenCount) + } + return result, nil } diff --git a/pkg/llmprovider/openaiprovider/client.go b/pkg/llmprovider/openaiprovider/client.go index 4dd782cc..56266d93 100644 --- a/pkg/llmprovider/openaiprovider/client.go +++ b/pkg/llmprovider/openaiprovider/client.go @@ -217,5 +217,9 @@ func fromOpenAIResponse(resp *openai.ChatCompletion) *llmprovider.Response { result.Choices = append(result.Choices, choice) } + result.Usage.InputTokens = int(resp.Usage.PromptTokens) + result.Usage.OutputTokens = int(resp.Usage.CompletionTokens) + result.Usage.TotalTokens = int(resp.Usage.TotalTokens) + return result } diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go index d6a592e8..840dce59 100644 --- a/pkg/llmprovider/types.go +++ b/pkg/llmprovider/types.go @@ -81,9 +81,17 @@ type ThinkingPart struct { func (ThinkingPart) partMarker() {} +// Usage tracks token usage metrics. +type Usage struct { + InputTokens int + OutputTokens int + TotalTokens int +} + // Response is the result of a GenerateContent call. type Response struct { Choices []*Choice + Usage Usage } // Choice is a single response candidate.