From 587ef808ad34393914c6e37f163aa98e6186d548 Mon Sep 17 00:00:00 2001 From: Jehoon Shin Date: Mon, 18 May 2026 18:28:46 +0900 Subject: [PATCH] fix(anthropic): surface cache_read/write tokens in metadata chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic returns `input_tokens` as the NON-CACHED portion only when prompt caching is in use. `cache_read_input_tokens` and `cache_creation_input_tokens` were dropped during metadata chunk formatting, so downstream consumers (`Agent.metrics.accumulated_usage` and anything that derives cost from it) saw only the uncached delta and under-reported real usage / cost — sometimes by an order of magnitude on image+text workloads where the image dominates the cached prefix. This change: - Maps `cache_read_input_tokens` → `cacheReadInputTokens` and `cache_creation_input_tokens` → `cacheWriteInputTokens` on the metadata chunk, both already defined as optional members of `types.event_loop.Usage`. - Recomputes `totalTokens` as `uncached_input + cache_read + cache_write + output_tokens` so it reflects the actual billed input. - Omits the cache fields when both are zero/absent, preserving the existing chunk shape for non-cached responses (no consumer change required). Added tests covering the cached and non-cached metadata shapes. --- src/strands/models/anthropic.py | 23 ++++++++--- tests/strands/models/test_anthropic.py | 54 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py index 812171a0c..ae028064d 100644 --- a/src/strands/models/anthropic.py +++ b/src/strands/models/anthropic.py @@ -359,14 +359,27 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent: case "metadata": usage = event["usage"] + input_tokens = usage["input_tokens"] + output_tokens = usage["output_tokens"] + cache_read = usage.get("cache_read_input_tokens") or 0 + cache_write = usage.get("cache_creation_input_tokens") or 0 + # Anthropic reports `input_tokens` as the NON-CACHED portion only. + # `totalTokens` should reflect everything billed on the input side: + # uncached + cache reads + cache writes. + total_input = input_tokens + cache_read + cache_write + usage_chunk: dict[str, int] = { + "inputTokens": input_tokens, + "outputTokens": output_tokens, + "totalTokens": total_input + output_tokens, + } + if cache_read: + usage_chunk["cacheReadInputTokens"] = cache_read + if cache_write: + usage_chunk["cacheWriteInputTokens"] = cache_write return { "metadata": { - "usage": { - "inputTokens": usage["input_tokens"], - "outputTokens": usage["output_tokens"], - "totalTokens": usage["input_tokens"] + usage["output_tokens"], - }, + "usage": usage_chunk, "metrics": { "latencyMs": 0, # TODO }, diff --git a/tests/strands/models/test_anthropic.py b/tests/strands/models/test_anthropic.py index 0ebdb161c..357d58266 100644 --- a/tests/strands/models/test_anthropic.py +++ b/tests/strands/models/test_anthropic.py @@ -727,6 +727,60 @@ def test_format_chunk_metadata(model): assert tru_chunk == exp_chunk +def test_format_chunk_metadata_with_cache_tokens(model): + """When prompt caching is active, Anthropic returns cache_read_input_tokens + and cache_creation_input_tokens alongside input_tokens; surface them so + downstream cost accounting reflects what the user is billed for.""" + event = { + "type": "metadata", + "usage": { + "input_tokens": 5, + "output_tokens": 7, + "cache_read_input_tokens": 100, + "cache_creation_input_tokens": 50, + }, + } + + tru_chunk = model.format_chunk(event) + exp_chunk = { + "metadata": { + "usage": { + "inputTokens": 5, + "outputTokens": 7, + # 5 (uncached) + 100 (cache read) + 50 (cache write) + 7 (output) + "totalTokens": 162, + "cacheReadInputTokens": 100, + "cacheWriteInputTokens": 50, + }, + "metrics": { + "latencyMs": 0, + }, + }, + } + + assert tru_chunk == exp_chunk + + +def test_format_chunk_metadata_omits_zero_cache_tokens(model): + """When cache fields are absent or zero, keep the legacy chunk shape so + consumers expecting only inputTokens/outputTokens keep working.""" + event = { + "type": "metadata", + "usage": { + "input_tokens": 5, + "output_tokens": 7, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + }, + } + + tru_chunk = model.format_chunk(event) + + assert "cacheReadInputTokens" not in tru_chunk["metadata"]["usage"] + assert "cacheWriteInputTokens" not in tru_chunk["metadata"]["usage"] + assert tru_chunk["metadata"]["usage"]["totalTokens"] == 12 + + def test_format_chunk_unknown(model): event = {"type": "unknown"}