From 8da7b82e21e34285d069f74aeb8cae387d54bf23 Mon Sep 17 00:00:00 2001
From: Anass <anass@getlago.com>
Date: Wed, 20 May 2026 16:11:34 +0200
Subject: [PATCH 1/5] Add native Anthropic SDK support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- src/lago_agent_sdk/adapters/anthropic_native.py — extract_anthropic_native
- src/lago_agent_sdk/wrappers/anthropic.py — wraps messages.create (sync + async,
  streaming and non-streaming) and messages.stream context manager
- Wired into sdk.wrap() dispatch and adapters/__init__.py exports
- anthropic = ["anthropic>=0.30"] optional-dep group
- 19 new unit tests + 3 live integration tests; 256 unit tests pass
- Coverage 80.71% — gate maintained
- 9 captured response fixtures from real Anthropic API
- README + CHANGELOG updated
---
 CHANGELOG.md                                  |   8 +
 README.md                                     |  42 +++-
 pyproject.toml                                |   8 +
 src/lago_agent_sdk/adapters/__init__.py       |   2 +
 .../adapters/anthropic_native.py              |  91 +++++++
 src/lago_agent_sdk/sdk.py                     |   8 +-
 src/lago_agent_sdk/wrappers/anthropic.py      | 231 ++++++++++++++++++
 tests/integration/test_live_anthropic.py      | 114 +++++++++
 .../anthropic_native/01_plain_haiku.json      |  33 +++
 .../anthropic_native/02_plain_sonnet.json     |  33 +++
 .../anthropic_native/03_tool_use.json         |  39 +++
 .../anthropic_native/04_cache_create_5m.json  |  33 +++
 .../anthropic_native/05_cache_read.json       |  33 +++
 .../anthropic_native/06_cache_create_1h.json  |  33 +++
 .../07_extended_thinking.json                 |  38 +++
 .../fixtures/anthropic_native/08_stream.json  | 142 +++++++++++
 .../anthropic_native/09_multi_turn.json       |  33 +++
 .../adapters/fixtures/capture_anthropic.py    | 179 ++++++++++++++
 tests/unit/adapters/test_anthropic_native.py  | 152 ++++++++++++
 tests/unit/test_wrapper_anthropic.py          | 223 +++++++++++++++++
 uv.lock                                       | 163 +++++++++++-
 21 files changed, 1624 insertions(+), 14 deletions(-)
 create mode 100644 src/lago_agent_sdk/adapters/anthropic_native.py
 create mode 100644 src/lago_agent_sdk/wrappers/anthropic.py
 create mode 100644 tests/integration/test_live_anthropic.py
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/08_stream.json
 create mode 100644 tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json
 create mode 100644 tests/unit/adapters/fixtures/capture_anthropic.py
 create mode 100644 tests/unit/adapters/test_anthropic_native.py
 create mode 100644 tests/unit/test_wrapper_anthropic.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4cd6396..fa696ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented here. Format follows [Kee
 
 ## [Unreleased]
 
+### Added
+- Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants).
+- `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`.
+- `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`.
+- 19 new unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`). Total: 256 unit tests, ≥80% coverage maintained.
+- 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn).
+
+
 ## [0.1.0] — initial release
 
 ### Added
diff --git a/README.md b/README.md
index 89ad3f9..b8855c7 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ pip install lago-agent-sdk
 
 For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
 For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
+For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
 
 ## Quickstart — Bedrock
 
@@ -52,6 +53,25 @@ sdk.flush()
 
 The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background.
 
+## Quickstart — Anthropic
+
+```python
+from anthropic import Anthropic
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(Anthropic(api_key="..."))
+
+resp = client.messages.create(
+    model="claude-sonnet-4-6",
+    max_tokens=200,
+    messages=[{"role": "user", "content": "Hello"}],
+)
+sdk.flush()
+```
+
+Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case.
+
 ## Quickstart — Mistral
 
 ```python
@@ -92,9 +112,9 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
 |---|---|---|
 | AWS Bedrock | `Converse` (sync + stream) | ✓ |
 | AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ |
+| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
 | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
 | OpenAI | native SDK | Phase 2 |
-| Anthropic | native SDK | Phase 2 |
 | Google Gemini | native SDK | Phase 2 |
 | LiteLLM | callback bridge | Phase 4 |
 
@@ -102,16 +122,16 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
 
 `CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider:
 
-| Field | Lago metric code | Bedrock | Mistral native |
-|---|---|---|---|
-| input | `llm_input_tokens` | ✓ | ✓ |
-| output | `llm_output_tokens` | ✓ | ✓ |
-| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ (when cache hits) |
-| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✗ |
-| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✗ |
-| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output) |
-| tool_calls | `llm_tool_calls` | ✓ | ✓ |
-| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ |
+| Field | Lago metric code | Bedrock | Anthropic native | Mistral native |
+|---|---|---|---|---|
+| input | `llm_input_tokens` | ✓ | ✓ | ✓ |
+| output | `llm_output_tokens` | ✓ | ✓ | ✓ |
+| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) |
+| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ |
+| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ |
+| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) |
+| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ |
+| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ | ✗ |
 
 Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships.
 
diff --git a/pyproject.toml b/pyproject.toml
index 8c23f42..4044de0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,9 @@ dev = [
     "mypy>=1.10",
     "types-requests>=2.31",
 ]
+anthropic = [
+    "anthropic>=0.30",
+]
 
 [project.urls]
 Homepage = "https://www.getlago.com"
@@ -81,3 +84,8 @@ files = ["src/lago_agent_sdk"]
 [[tool.mypy.overrides]]
 module = ["boto3.*", "botocore.*", "mistralai.*"]
 ignore_missing_imports = true
+
+[dependency-groups]
+dev = [
+    "anthropic>=0.30",
+]
diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py
index 31f94e8..217ed3d 100644
--- a/src/lago_agent_sdk/adapters/__init__.py
+++ b/src/lago_agent_sdk/adapters/__init__.py
@@ -1,8 +1,10 @@
+from .anthropic_native import extract_anthropic_native
 from .bedrock_converse import extract_bedrock_converse
 from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter
 from .mistral_native import extract_mistral_native
 
 __all__ = [
+    "extract_anthropic_native",
     "extract_bedrock_converse",
     "extract_bedrock_invoke",
     "pick_invoke_adapter",
diff --git a/src/lago_agent_sdk/adapters/anthropic_native.py b/src/lago_agent_sdk/adapters/anthropic_native.py
new file mode 100644
index 0000000..5943676
--- /dev/null
+++ b/src/lago_agent_sdk/adapters/anthropic_native.py
@@ -0,0 +1,91 @@
+"""Anthropic native adapter — verified against real fixtures.
+
+Field mapping:
+  usage.input_tokens                                 → input
+  usage.output_tokens                                → output
+  usage.cache_read_input_tokens                      → cache_read
+  usage.cache_creation_input_tokens                  → cache_write
+  usage.cache_creation.ephemeral_5m_input_tokens     → cache_write_5m
+  usage.cache_creation.ephemeral_1h_input_tokens     → cache_write_1h
+  count of content[].type == "tool_use"              → tool_calls
+
+Not exposed by Anthropic (folded into output_tokens):
+  reasoning_tokens — even with extended thinking enabled
+
+Unknown usage fields (service_tier, inference_geo, server_tool_use, …) land in extras.
+"""
+
+from __future__ import annotations
+
+from typing import Any, cast
+
+from ..canonical import CanonicalUsage
+
+_KNOWN_USAGE_FIELDS = {
+    "input_tokens",
+    "output_tokens",
+    "cache_read_input_tokens",
+    "cache_creation_input_tokens",
+    "cache_creation",
+}
+
+
+def _safe_dict(v: Any) -> dict[str, Any]:
+    return v if isinstance(v, dict) else {}
+
+
+def _safe_int(v: Any) -> int:
+    try:
+        return max(0, int(v or 0))
+    except (TypeError, ValueError):
+        return 0
+
+
+def _to_dict(obj: Any) -> dict[str, Any]:
+    """Best-effort pydantic-or-dict to dict (Anthropic SDK returns pydantic Message objects)."""
+    if isinstance(obj, dict):
+        return obj
+    if hasattr(obj, "model_dump"):
+        try:
+            return cast(dict[str, Any], obj.model_dump())
+        except Exception:  # noqa: BLE001
+            pass
+    return {}
+
+
+def extract_anthropic_native(response: Any, model_id: str = "") -> CanonicalUsage:
+    """Translate an Anthropic native response (Message or dict) → CanonicalUsage.
+
+    Accepts the SDK's pydantic Message object, a dict (e.g. captured fixture),
+    or a synthetic `{"usage": {...}}` blob produced by the streaming wrapper.
+    """
+    resp = _to_dict(response) if not isinstance(response, dict) else response
+
+    usage = _safe_dict(resp.get("usage"))
+    cache_creation = _safe_dict(usage.get("cache_creation"))
+
+    content = resp.get("content")
+    tool_calls = (
+        sum(1 for b in content if isinstance(b, dict) and b.get("type") == "tool_use")
+        if isinstance(content, list)
+        else 0
+    )
+
+    extras: dict[str, Any] = {}
+    for k, v in usage.items():
+        if k not in _KNOWN_USAGE_FIELDS:
+            extras[k] = v
+
+    return CanonicalUsage(
+        input=_safe_int(usage.get("input_tokens")),
+        output=_safe_int(usage.get("output_tokens")),
+        cache_read=_safe_int(usage.get("cache_read_input_tokens")),
+        cache_write=_safe_int(usage.get("cache_creation_input_tokens")),
+        cache_write_5m=_safe_int(cache_creation.get("ephemeral_5m_input_tokens")),
+        cache_write_1h=_safe_int(cache_creation.get("ephemeral_1h_input_tokens")),
+        tool_calls=tool_calls,
+        model=model_id or (resp.get("model") if isinstance(resp.get("model"), str) else "") or "",
+        provider="anthropic",
+        api="native",
+        extras=extras,
+    )
diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py
index c401df5..c303e03 100644
--- a/src/lago_agent_sdk/sdk.py
+++ b/src/lago_agent_sdk/sdk.py
@@ -83,13 +83,17 @@ def wrap(
             from .wrappers.mistral import wrap_mistral_client
 
             return wrap_mistral_client(self, client, dimensions=dimensions, subscription=subscription)
+        if kind == "anthropic":
+            from .wrappers.anthropic import wrap_anthropic_client
+
+            return wrap_anthropic_client(self, client, dimensions=dimensions, subscription=subscription)
         if kind == "unknown":
             raise UnknownClientError(
                 f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. "
-                "Supported: boto3 bedrock-runtime, mistralai.client.Mistral."
+                "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, anthropic.Anthropic / AsyncAnthropic."
             )
         raise UnknownClientError(
-            f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral'."
+            f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral', 'anthropic'."
         )
 
     # ------------------------------------------------------------------
diff --git a/src/lago_agent_sdk/wrappers/anthropic.py b/src/lago_agent_sdk/wrappers/anthropic.py
new file mode 100644
index 0000000..50da482
--- /dev/null
+++ b/src/lago_agent_sdk/wrappers/anthropic.py
@@ -0,0 +1,231 @@
+"""anthropic SDK wrapper.
+
+Wraps the public methods of `Anthropic.messages` (and `AsyncAnthropic.messages`)
+in place — instrumentation never breaks the customer's call.
+
+Methods wrapped:
+  - .create(...)                   — non-streaming and stream=True both supported
+  - .stream(...)                   — sync context-manager helper
+  - AsyncMessages.create(...)      — async non-streaming and stream=True
+  - AsyncMessages.stream(...)      — async context-manager helper
+
+Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from kwargs
+before forwarding so Anthropic's strict validation doesn't reject it.
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+from ..adapters import extract_anthropic_native
+
+logger = logging.getLogger("lago_agent_sdk.wrappers.anthropic")
+
+_INSTRUMENTED_ATTR = "_lago_instrumented"
+_LAGO_KWARG = "extra_lago"
+
+
+def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]:
+    return kwargs.pop(_LAGO_KWARG, {}) or {}
+
+
+def _is_message_like(obj: Any) -> bool:
+    """Anthropic Message objects expose `.usage` and `.content`; streams don't.
+
+    Safe against properties that raise — falls through to False so the customer's
+    call is never broken by attribute-access surprises in their custom objects.
+    """
+    try:
+        if isinstance(obj, dict):
+            return "usage" in obj
+        # hasattr propagates non-AttributeError exceptions on Py3; guard explicitly.
+        return hasattr(obj, "usage")
+    except Exception:  # noqa: BLE001
+        return False
+
+
+def wrap_anthropic_client(
+    sdk: Any,
+    client: Any,
+    dimensions: dict[str, Any] | None = None,
+    subscription: str | None = None,
+) -> Any:
+    """In-place wrap of an `anthropic.Anthropic` or `anthropic.AsyncAnthropic` client. Idempotent."""
+    if getattr(client, _INSTRUMENTED_ATTR, False):
+        logger.info("lago: anthropic client already wrapped — skipping")
+        return client
+
+    base_dims = dict(dimensions or {})
+    base_sub = subscription
+
+    messages = getattr(client, "messages", None)
+    if messages is None:
+        logger.warning("lago: anthropic client has no .messages — skipping wrap")
+        return client
+
+    original_create = getattr(messages, "create", None)
+    original_stream = getattr(messages, "stream", None)
+    is_async = type(client).__name__.startswith("Async")
+
+    def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]:
+        sub = lago_opts.get("subscription") or base_sub
+        dims = {**base_dims, **(lago_opts.get("dimensions") or {})}
+        return sub, dims
+
+    def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None:
+        try:
+            usage = extract_anthropic_native(payload, model_id=model_id)
+            sdk.emit(usage, subscription=sub, dimensions=dims)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("lago: anthropic emit failed: %s", exc)
+
+    # ------------------------------------------------------------------
+    # Sync messages.create — auto-detects streaming via response shape
+    # ------------------------------------------------------------------
+    def _create(*args: Any, **kwargs: Any) -> Any:
+        assert original_create is not None
+        lago_opts = _pop_lago_kwarg(kwargs)
+        model_id = kwargs.get("model", "")
+        sub, dims = _resolve_opts(lago_opts)
+        response = original_create(*args, **kwargs)
+
+        if _is_message_like(response):
+            _emit_from(response, model_id, sub, dims)
+            return response
+
+        # Streaming — wrap the iterator to capture the final usage on close.
+        def _wrap_stream(src: Iterator[Any]) -> Iterator[Any]:
+            last_usage: dict[str, Any] | None = None
+            try:
+                for event in src:
+                    payload = event.model_dump() if hasattr(event, "model_dump") else event
+                    if isinstance(payload, dict):
+                        usage = payload.get("usage")
+                        if isinstance(usage, dict):
+                            last_usage = {"usage": usage}
+                    yield event
+            finally:
+                if last_usage is not None:
+                    _emit_from(last_usage, model_id, sub, dims)
+
+        return _wrap_stream(response)
+
+    # ------------------------------------------------------------------
+    # Async messages.create — same as sync, awaited
+    # ------------------------------------------------------------------
+    async def _create_async(*args: Any, **kwargs: Any) -> Any:
+        assert original_create is not None
+        lago_opts = _pop_lago_kwarg(kwargs)
+        model_id = kwargs.get("model", "")
+        sub, dims = _resolve_opts(lago_opts)
+        response = await original_create(*args, **kwargs)
+
+        if _is_message_like(response):
+            _emit_from(response, model_id, sub, dims)
+            return response
+
+        async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]:
+            last_usage: dict[str, Any] | None = None
+            try:
+                async for event in src:
+                    payload = event.model_dump() if hasattr(event, "model_dump") else event
+                    if isinstance(payload, dict):
+                        usage = payload.get("usage")
+                        if isinstance(usage, dict):
+                            last_usage = {"usage": usage}
+                    yield event
+            finally:
+                if last_usage is not None:
+                    _emit_from(last_usage, model_id, sub, dims)
+
+        return _wrap_async_stream(response)
+
+    # ------------------------------------------------------------------
+    # messages.stream context manager (sync + async)
+    #
+    # Anthropic returns a MessageStreamManager (sync) / AsyncMessageStreamManager
+    # (async). Both have .__enter__/.__exit__ and the inner stream object
+    # exposes .get_final_message() after the with-block closes.
+    # ------------------------------------------------------------------
+    def _wrap_stream_manager(*args: Any, **kwargs: Any) -> Any:
+        assert original_stream is not None
+        lago_opts = _pop_lago_kwarg(kwargs)
+        model_id = kwargs.get("model", "")
+        sub, dims = _resolve_opts(lago_opts)
+        inner = original_stream(*args, **kwargs)
+        return _LagoStreamManager(inner, sdk, model_id, sub, dims, is_async=is_async)
+
+    if original_create is not None:
+        messages.create = _create_async if is_async else _create
+    if original_stream is not None:
+        messages.stream = _wrap_stream_manager
+
+    setattr(client, _INSTRUMENTED_ATTR, True)
+    return client
+
+
+class _LagoStreamManager:
+    """Proxies Anthropic's MessageStreamManager and emits on close.
+
+    Works for both sync (`with`) and async (`async with`) variants by detecting
+    which __exit__ kind is being called.
+    """
+
+    def __init__(
+        self,
+        inner: Any,
+        sdk: Any,
+        model_id: str,
+        sub: str | None,
+        dims: dict[str, Any],
+        *,
+        is_async: bool,
+    ) -> None:
+        self._inner = inner
+        self._sdk = sdk
+        self._model_id = model_id
+        self._sub = sub
+        self._dims = dims
+        self._stream: Any = None
+        self._is_async = is_async
+
+    # ----- sync -----
+    def __enter__(self) -> Any:
+        self._stream = self._inner.__enter__()
+        return self._stream
+
+    def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> Any:
+        try:
+            result = self._inner.__exit__(exc_type, exc, tb)
+        finally:
+            self._emit_final()
+        return result
+
+    # ----- async -----
+    async def __aenter__(self) -> Any:
+        self._stream = await self._inner.__aenter__()
+        return self._stream
+
+    async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> Any:
+        try:
+            result = await self._inner.__aexit__(exc_type, exc, tb)
+        finally:
+            self._emit_final()
+        return result
+
+    def _emit_final(self) -> None:
+        try:
+            final = (
+                self._stream.get_final_message()
+                if self._stream and hasattr(self._stream, "get_final_message")
+                else None
+            )
+            if final is not None:
+                from ..adapters import extract_anthropic_native
+
+                usage = extract_anthropic_native(final, model_id=self._model_id)
+                self._sdk.emit(usage, subscription=self._sub, dimensions=self._dims)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("lago: anthropic stream-manager emit failed: %s", exc)
diff --git a/tests/integration/test_live_anthropic.py b/tests/integration/test_live_anthropic.py
new file mode 100644
index 0000000..1b60e67
--- /dev/null
+++ b/tests/integration/test_live_anthropic.py
@@ -0,0 +1,114 @@
+"""End-to-end Anthropic integration test — live API + mocked Lago.
+
+Skipped unless ANTHROPIC_API_KEY is set.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+import pytest
+
+from lago_agent_sdk import LagoSDK
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("ANTHROPIC_API_KEY"),
+    reason="ANTHROPIC_API_KEY not set",
+)
+
+
+class _MockLago(BaseHTTPRequestHandler):
+    def do_POST(self):  # noqa: N802
+        n = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(n)
+        self.server.received.append(json.loads(body))  # type: ignore[attr-defined]
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(b'{"ok": true}')
+
+    def log_message(self, *_args, **_kwargs):
+        return
+
+
+def _spawn_lago():
+    s = HTTPServer(("127.0.0.1", 0), _MockLago)
+    s.received = []  # type: ignore[attr-defined]
+    threading.Thread(target=s.serve_forever, daemon=True).start()
+    return s, f"http://127.0.0.1:{s.server_port}"
+
+
+def test_live_anthropic_messages_create_emits_to_lago() -> None:
+    from anthropic import Anthropic
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]))
+        client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=20,
+            messages=[{"role": "user", "content": "Say hi"}],
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = [e for p in server.received for e in p["events"]]  # type: ignore[attr-defined]
+        codes = {e["code"] for e in events}
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        for e in events:
+            assert e["properties"]["api"] == "native"
+            assert e["properties"]["provider"] == "anthropic"
+    finally:
+        server.shutdown()
+
+
+def test_live_anthropic_streaming_emits_from_final_delta() -> None:
+    from anthropic import Anthropic
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]))
+        for _ in client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=20,
+            messages=[{"role": "user", "content": "Say hi"}],
+            stream=True,
+        ):
+            pass
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = [e for p in server.received for e in p["events"]]  # type: ignore[attr-defined]
+        codes = {e["code"] for e in events}
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+    finally:
+        server.shutdown()
+
+
+def test_live_anthropic_messages_stream_context_manager() -> None:
+    from anthropic import Anthropic
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]))
+        with client.messages.stream(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=20,
+            messages=[{"role": "user", "content": "Say hi"}],
+        ) as stream:
+            for _ in stream.text_stream:
+                pass
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = [e for p in server.received for e in p["events"]]  # type: ignore[attr-defined]
+        codes = {e["code"] for e in events}
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+    finally:
+        server.shutdown()
diff --git a/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json b/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json
new file mode 100644
index 0000000..ebe80c3
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-haiku-4-5-20251001",
+  "_response": {
+    "id": "msg_014oRrBt8p4HqV5k5eS1RyKN",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.",
+        "type": "text"
+      }
+    ],
+    "model": "claude-haiku-4-5-20251001",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "not_available",
+      "input_tokens": 13,
+      "output_tokens": 35,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json b/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json
new file mode 100644
index 0000000..36071ff
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_01Y3fakcdpcj6tk6FJrxV5GJ",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social bonds, and remarkable ability to communicate using a variety of clicks, whistles, and other sounds.",
+        "type": "text"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "global",
+      "input_tokens": 13,
+      "output_tokens": 39,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json b/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json
new file mode 100644
index 0000000..5dc4e83
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json
@@ -0,0 +1,39 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_01FBZLSB8UduV9akqUkEtyEW",
+    "container": null,
+    "content": [
+      {
+        "id": "toolu_01AMkoyrfvRgYBCA21zpGXNi",
+        "caller": {
+          "type": "direct"
+        },
+        "input": {
+          "city": "Tokyo"
+        },
+        "name": "get_weather",
+        "type": "tool_use"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "tool_use",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "global",
+      "input_tokens": 658,
+      "output_tokens": 38,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json b/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json
new file mode 100644
index 0000000..2fdaf1f
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_01VtgdQSaox4WMdGvHipchpn",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "**4**\n\nSteps:\n1. Start with 2\n2. Add 2\n3. Result = **4**",
+        "type": "text"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 2803
+      },
+      "cache_creation_input_tokens": 2803,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "global",
+      "input_tokens": 13,
+      "output_tokens": 30,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json b/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json
new file mode 100644
index 0000000..c7fc4bb
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_011bpj3E8EKkiNzynaPfBuJu",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "**3 + 3 = 6**\n\n**Step 1:** Start with 3.\n**Step 2:** Add ",
+        "type": "text"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "max_tokens",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 2803,
+      "inference_geo": "global",
+      "input_tokens": 13,
+      "output_tokens": 30,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json b/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json
new file mode 100644
index 0000000..8624bc4
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_01CnMDjYpfBiy7JWKyMeNWsL",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "Hi! How can I help you today?",
+        "type": "text"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 2808,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 2808,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "global",
+      "input_tokens": 7,
+      "output_tokens": 12,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json b/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json
new file mode 100644
index 0000000..4d0002d
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json
@@ -0,0 +1,38 @@
+{
+  "_model_id": "claude-sonnet-4-6",
+  "_response": {
+    "id": "msg_01EAk8qJYaKDY7YNx6hiKBta",
+    "container": null,
+    "content": [
+      {
+        "signature": "ErQCCmUIDRgCKkCFY8OE3VsRVnV6nXU/77sHcbai8ab7vVMPJKrFLAG73brAKyvMuOUHY3F3hARXd9ZCb++Z1sJ6emEjEdif2OwRMhFjbGF1ZGUtc29ubmV0LTQtNjgAQgh0aGlua2luZxIM6cZezoae3YMBKeUHGgw/iy0PoRrY0tY3z48iML7CqI0ZluxPJPXTAI3QxkdVG3dzXfxNpDhRWvOHvrURM5KpKm8gasgo5LLIJfQa0ip9EvggIrnDJKCXnOk5BFOsoNUe+2QdTzF7tGepNuhmB9R10uJzdEft/L/pw847GzejnrRYrahkVoNAv8hx/tiQABnoEbPqLixGimf2FhSMEUzzaHHoYiNlIdaR9cCZOXQ3hgyKrfkjg004wt6SaCLbZxm5SLZ8Yi5gsgVC0kAYAQ==",
+        "thinking": "We need to prove that 1\u00b3 + 2\u00b3 + 3\u00b3 + ... + n\u00b3 = (1 + 2 + 3 + ... + n)\u00b2\n\nI'll use mathematical induction.",
+        "type": "thinking"
+      },
+      {
+        "citations": null,
+        "text": "# Proof: Sum of First n Cubes\n\n## Statement to Prove\n\n$$\\sum_{k=1}^{n} k^3 = \\left(\\sum_{k=1}^{n} k\\right)^2$$\n\nUsing the known closed form for the linear sum, this is equivalent to:\n\n$$1^3 + 2^3 + 3^3 + \\cdots + n^3 = \\left(\\frac{n(n+1)}{2}\\right)^2$$\n\n## Proof by Mathematical Induction\n\n### Base Case (n = 1)\n\n**Left side:** $1^3 = 1$\n\n**Right side:** $\\left(\\dfrac{1 \\cdot 2}{2}\\right)^2 = (1)^2 = 1$ \u2713\n\n### Inductive Step\n\n**Inductive Hypothesis:** Assume the statement holds for some $n = m \\geq 1$:\n\n$$1^3 + 2^3 + \\cdots + m^3 = \\left(\\frac{m(m+1)}{2}\\right)^2$$\n\n**Goal:** Prove the statement holds for $n = m + 1$:\n\n$$1^3 + 2^3 + \\cdots + m^3 + (m+1)^3 = \\left(\\frac{(m+1)(m+2)}{2}\\right)^2$$\n\n**Derivation:**\n\nStarting with the left side of the goal:\n\n$$\\underbrace{1^3 + 2^3 + \\cdots + m^3}_{\\text{apply hypothesis}} + (m+1)^3$$\n\n$$= \\left(\\frac{m(m+1)}{2}\\right)^2 + (m+1)^3$$\n\n$$= \\frac{m^2(m+1)^2}{4} + (m+1)^3$$\n\nFactor out $(m+1)^2$:\n\n$$= (m+1)^2\\left(\\frac{m^2}{4} + (m+1)\\right)$$\n\nCombine terms inside the parentheses over a common denominator of 4:\n\n$$= (m+1)^2\\left(\\frac{m^2 + 4(m+1)}{4}\\right)$$\n\n$$= (m+1)^2\\left(\\frac{m^2 + 4m + 4}{4}\\right)$$\n\nFactor the numerator as a perfect square:\n\n$$= (m+1)^2\\left(\\frac{(m+2)^2}{4}\\right)$$\n\nRearrange:\n\n$$= \\frac{(m+1)^2(m+2)^2}{4}$$\n\n$$= \\left(\\frac{(m+1)(m+2)}{2}\\right)^2$$\n\nThis is exactly the right side of the goal. $\\blacksquare$\n\n## Conclusion\n\nBy the principle of mathematical induction, for all positive integers $n$:\n\n$$\\boxed{1^3 + 2^3 + 3^3 + \\cdots + n^3 = \\left(\\frac{n(n+1)}{2}\\right)^2 = \\left(1 + 2 + 3 + \\cdots + n\\right)^2}$$",
+        "type": "text"
+      }
+    ],
+    "model": "claude-sonnet-4-6",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "global",
+      "input_tokens": 66,
+      "output_tokens": 862,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/08_stream.json b/tests/unit/adapters/fixtures/anthropic_native/08_stream.json
new file mode 100644
index 0000000..6b55d43
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/08_stream.json
@@ -0,0 +1,142 @@
+{
+  "_model_id": "claude-haiku-4-5-20251001",
+  "_response": {
+    "events": [
+      {
+        "message": {
+          "id": "msg_01A9ASsVixqCzpb4qwbcCkd2",
+          "container": null,
+          "content": [],
+          "model": "claude-haiku-4-5-20251001",
+          "role": "assistant",
+          "stop_details": null,
+          "stop_reason": null,
+          "stop_sequence": null,
+          "type": "message",
+          "usage": {
+            "cache_creation": {
+              "ephemeral_1h_input_tokens": 0,
+              "ephemeral_5m_input_tokens": 0
+            },
+            "cache_creation_input_tokens": 0,
+            "cache_read_input_tokens": 0,
+            "inference_geo": "not_available",
+            "input_tokens": 13,
+            "output_tokens": 8,
+            "server_tool_use": null,
+            "service_tier": "standard"
+          }
+        },
+        "type": "message_start"
+      },
+      {
+        "content_block": {
+          "citations": null,
+          "text": "",
+          "type": "text"
+        },
+        "index": 0,
+        "type": "content_block_start"
+      },
+      {
+        "delta": {
+          "text": "Dolphins are highly intelligent marine mammals known",
+          "type": "text_delta"
+        },
+        "index": 0,
+        "type": "content_block_delta"
+      },
+      {
+        "type": "text",
+        "text": "Dolphins are highly intelligent marine mammals known",
+        "snapshot": "Dolphins are highly intelligent marine mammals known"
+      },
+      {
+        "delta": {
+          "text": " for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks",
+          "type": "text_delta"
+        },
+        "index": 0,
+        "type": "content_block_delta"
+      },
+      {
+        "type": "text",
+        "text": " for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks",
+        "snapshot": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks"
+      },
+      {
+        "delta": {
+          "text": " and whistles.",
+          "type": "text_delta"
+        },
+        "index": 0,
+        "type": "content_block_delta"
+      },
+      {
+        "type": "text",
+        "text": " and whistles.",
+        "snapshot": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles."
+      },
+      {
+        "index": 0,
+        "type": "content_block_stop",
+        "content_block": {
+          "citations": null,
+          "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.",
+          "type": "text",
+          "parsed_output": null
+        }
+      },
+      {
+        "delta": {
+          "container": null,
+          "stop_details": null,
+          "stop_reason": "end_turn",
+          "stop_sequence": null
+        },
+        "type": "message_delta",
+        "usage": {
+          "cache_creation_input_tokens": 0,
+          "cache_read_input_tokens": 0,
+          "input_tokens": 13,
+          "output_tokens": 35,
+          "server_tool_use": null
+        }
+      },
+      {
+        "type": "message_stop",
+        "message": {
+          "id": "msg_01A9ASsVixqCzpb4qwbcCkd2",
+          "container": null,
+          "content": [
+            {
+              "citations": null,
+              "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.",
+              "type": "text",
+              "parsed_output": null
+            }
+          ],
+          "model": "claude-haiku-4-5-20251001",
+          "role": "assistant",
+          "stop_details": null,
+          "stop_reason": "end_turn",
+          "stop_sequence": null,
+          "type": "message",
+          "usage": {
+            "cache_creation": {
+              "ephemeral_1h_input_tokens": 0,
+              "ephemeral_5m_input_tokens": 0
+            },
+            "cache_creation_input_tokens": 0,
+            "cache_read_input_tokens": 0,
+            "inference_geo": "not_available",
+            "input_tokens": 13,
+            "output_tokens": 35,
+            "server_tool_use": null,
+            "service_tier": "standard"
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json b/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json
new file mode 100644
index 0000000..eb3ff9c
--- /dev/null
+++ b/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json
@@ -0,0 +1,33 @@
+{
+  "_model_id": "claude-haiku-4-5-20251001",
+  "_response": {
+    "id": "msg_01Tg1sNzgCXenQnUBLwQ8Ycr",
+    "container": null,
+    "content": [
+      {
+        "citations": null,
+        "text": "4 times 3 equals 12.",
+        "type": "text"
+      }
+    ],
+    "model": "claude-haiku-4-5-20251001",
+    "role": "assistant",
+    "stop_details": null,
+    "stop_reason": "end_turn",
+    "stop_sequence": null,
+    "type": "message",
+    "usage": {
+      "cache_creation": {
+        "ephemeral_1h_input_tokens": 0,
+        "ephemeral_5m_input_tokens": 0
+      },
+      "cache_creation_input_tokens": 0,
+      "cache_read_input_tokens": 0,
+      "inference_geo": "not_available",
+      "input_tokens": 34,
+      "output_tokens": 14,
+      "server_tool_use": null,
+      "service_tier": "standard"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/capture_anthropic.py b/tests/unit/adapters/fixtures/capture_anthropic.py
new file mode 100644
index 0000000..73893a2
--- /dev/null
+++ b/tests/unit/adapters/fixtures/capture_anthropic.py
@@ -0,0 +1,179 @@
+"""Capture real Anthropic native API responses for adapter design.
+
+Saves raw responses to tests/unit/adapters/fixtures/anthropic_native/<scenario>.json
+so we can verify mappings against reality before writing the adapter.
+
+Reads ANTHROPIC_API_KEY from env.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import sys
+
+from anthropic import Anthropic
+
+OUT = pathlib.Path(__file__).parent / "anthropic_native"
+OUT.mkdir(parents=True, exist_ok=True)
+
+
+def to_dict(response) -> dict:
+    """Anthropic SDK returns pydantic models — convert to plain dict for JSON."""
+    if hasattr(response, "model_dump"):
+        return response.model_dump()
+    if hasattr(response, "dict"):
+        return response.dict()
+    return json.loads(response.json()) if hasattr(response, "json") else dict(response)
+
+
+def save(name: str, model: str, payload: dict) -> None:
+    path = OUT / f"{name}.json"
+    path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str))
+    print(f"  ✓ saved {path.name}")
+
+
+def main() -> int:
+    key = os.environ.get("ANTHROPIC_API_KEY")
+    if not key:
+        print("error: set ANTHROPIC_API_KEY", file=sys.stderr)
+        return 2
+
+    client = Anthropic(api_key=key)
+    PROMPT = "Write one sentence about dolphins."
+
+    # Rename badge: the script header reads "Sonnet 4.5" but the API only exposes 4-6+ now.
+    # ----- 1. Plain call (small model) -----
+    print("\n[1] plain — claude-haiku-4-5-20251001")
+    r = client.messages.create(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=80,
+        messages=[{"role": "user", "content": PROMPT}],
+    )
+    save("01_plain_haiku", "claude-haiku-4-5-20251001", to_dict(r))
+
+    # ----- 2. Plain call (Sonnet, larger) -----
+    print("\n[2] plain — claude-sonnet-4-6")
+    r = client.messages.create(
+        model="claude-sonnet-4-6",
+        max_tokens=80,
+        messages=[{"role": "user", "content": PROMPT}],
+    )
+    save("02_plain_sonnet", "claude-sonnet-4-6", to_dict(r))
+
+    # ----- 3. Tool use -----
+    print("\n[3] tool use — claude-sonnet-4-6 with weather tool")
+    tools = [
+        {
+            "name": "get_weather",
+            "description": "Get the current weather for a city.",
+            "input_schema": {
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"],
+            },
+        }
+    ]
+    r = client.messages.create(
+        model="claude-sonnet-4-6",
+        max_tokens=200,
+        tools=tools,
+        tool_choice={"type": "any"},
+        messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
+    )
+    save("03_tool_use", "claude-sonnet-4-6", to_dict(r))
+
+    # ----- 4. Cache create (5m default TTL) — long system prompt -----
+    print("\n[4] cache create — long system + cache_control 5m default")
+    LONG_TEXT = ("You are a helpful assistant. Answer concisely. " * 200) + (
+        "Always cite step by step. " * 100
+    )
+    cached_body = {
+        "model": "claude-sonnet-4-6",
+        "max_tokens": 30,
+        "system": [{"type": "text", "text": LONG_TEXT, "cache_control": {"type": "ephemeral"}}],
+        "messages": [{"role": "user", "content": "What's 2+2?"}],
+    }
+    r = client.messages.create(**cached_body)
+    save("04_cache_create_5m", "claude-sonnet-4-6", to_dict(r))
+
+    # ----- 5. Cache read (same long system, different user question) -----
+    print("\n[5] cache read — same cached_control content, second call")
+    cached_body["messages"] = [{"role": "user", "content": "What's 3+3?"}]
+    r = client.messages.create(**cached_body)
+    save("05_cache_read", "claude-sonnet-4-6", to_dict(r))
+
+    # ----- 6. Cache 1h TTL -----
+    print("\n[6] cache 1h — explicit ttl")
+    cached_1h = {
+        "model": "claude-sonnet-4-6",
+        "max_tokens": 30,
+        "system": [
+            {
+                "type": "text",
+                "text": LONG_TEXT + " (1h variant)",
+                "cache_control": {"type": "ephemeral", "ttl": "1h"},
+            }
+        ],
+        "messages": [{"role": "user", "content": "Hi"}],
+    }
+    try:
+        r = client.messages.create(**cached_1h)
+        save("06_cache_create_1h", "claude-sonnet-4-6", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  1h TTL not available on this account/region: {str(exc)[:160]}")
+
+    # ----- 7. Extended thinking (reasoning) -----
+    print("\n[7] extended thinking — claude-sonnet-4-6")
+    try:
+        r = client.messages.create(
+            model="claude-sonnet-4-6",
+            max_tokens=2048,
+            thinking={"type": "enabled", "budget_tokens": 1024},
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Prove that the sum of the first n cubes equals the square of the sum of "
+                        "the first n positive integers. Show each algebraic step."
+                    ),
+                }
+            ],
+        )
+        save("07_extended_thinking", "claude-sonnet-4-6", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  extended thinking error: {str(exc)[:160]}")
+
+    # ----- 8. Streaming -----
+    print("\n[8] streaming — claude-haiku-4-5-20251001")
+    events: list[dict] = []
+    with client.messages.stream(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=60,
+        messages=[{"role": "user", "content": PROMPT}],
+    ) as stream:
+        for event in stream:
+            events.append(to_dict(event))
+    save("08_stream", "claude-haiku-4-5-20251001", {"events": events})
+
+    # ----- 9. Multi-turn -----
+    print("\n[9] multi-turn — claude-haiku-4-5-20251001")
+    convo = [
+        {"role": "user", "content": "What is 2+2?"},
+        {"role": "assistant", "content": "2+2 equals 4."},
+        {"role": "user", "content": "And times 3?"},
+    ]
+    r = client.messages.create(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=40,
+        messages=convo,
+    )
+    save("09_multi_turn", "claude-haiku-4-5-20251001", to_dict(r))
+
+    print("\nDone. Inspect tests/unit/adapters/fixtures/anthropic_native/*.json")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/unit/adapters/test_anthropic_native.py b/tests/unit/adapters/test_anthropic_native.py
new file mode 100644
index 0000000..13bff69
--- /dev/null
+++ b/tests/unit/adapters/test_anthropic_native.py
@@ -0,0 +1,152 @@
+"""Anthropic native adapter — verified against real fixtures."""
+
+from __future__ import annotations
+
+import json
+import pathlib
+
+from lago_agent_sdk.adapters import extract_anthropic_native
+
+FIX = pathlib.Path(__file__).parent / "fixtures" / "anthropic_native"
+
+
+def _load(name: str) -> tuple[str, dict]:
+    data = json.loads((FIX / name).read_text())
+    return data["_model_id"], data["_response"]
+
+
+# --------------------------------------------------------------------------
+# Real fixtures
+# --------------------------------------------------------------------------
+def test_plain_haiku() -> None:
+    model_id, resp = _load("01_plain_haiku.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.input == 13
+    assert u.output == 35
+    assert u.cache_read == 0
+    assert u.cache_write == 0
+    assert u.tool_calls == 0
+    assert u.api == "native"
+    assert u.provider == "anthropic"
+    assert u.model == "claude-haiku-4-5-20251001"
+
+
+def test_plain_sonnet() -> None:
+    model_id, resp = _load("02_plain_sonnet.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.input == 13
+    assert u.output == 39
+
+
+def test_tool_use_counts_tool_calls() -> None:
+    model_id, resp = _load("03_tool_use.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.input == 658
+    assert u.output == 38
+    assert u.tool_calls == 1
+
+
+def test_cache_create_5m() -> None:
+    model_id, resp = _load("04_cache_create_5m.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.cache_write == 2803
+    assert u.cache_write_5m == 2803
+    assert u.cache_write_1h == 0
+    assert u.cache_read == 0
+
+
+def test_cache_read_after_create() -> None:
+    model_id, resp = _load("05_cache_read.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.cache_read == 2803
+    assert u.cache_write == 0
+    assert u.cache_write_5m == 0
+
+
+def test_cache_create_1h() -> None:
+    model_id, resp = _load("06_cache_create_1h.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.cache_write == 2808
+    assert u.cache_write_1h == 2808
+    assert u.cache_write_5m == 0
+
+
+def test_extended_thinking_bundles_into_output_tokens() -> None:
+    """Anthropic's extended thinking does NOT expose reasoning_tokens — they're folded into output_tokens."""
+    model_id, resp = _load("07_extended_thinking.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.input == 66
+    assert u.output == 862  # all 862 includes thinking + final answer
+    assert u.reasoning == 0  # confirmed: Anthropic doesn't separate it
+    # content has both 'thinking' and 'text' blocks — neither counts as a tool call
+    assert u.tool_calls == 0
+
+
+def test_multi_turn() -> None:
+    model_id, resp = _load("09_multi_turn.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert u.input == 34
+    assert u.output == 14
+
+
+def test_unknown_top_usage_field_lands_in_extras() -> None:
+    """service_tier, inference_geo, server_tool_use are new fields → drift detection."""
+    model_id, resp = _load("01_plain_haiku.json")
+    u = extract_anthropic_native(resp, model_id=model_id)
+    assert "service_tier" in u.extras
+    assert "inference_geo" in u.extras
+    assert "server_tool_use" in u.extras
+
+
+# --------------------------------------------------------------------------
+# Synthetic
+# --------------------------------------------------------------------------
+def test_handles_pydantic_via_model_dump() -> None:
+    class FakePydantic:
+        def model_dump(self) -> dict:
+            return {
+                "model": "claude-sonnet-4-6",
+                "content": [{"type": "text", "text": "hi"}],
+                "usage": {
+                    "input_tokens": 5,
+                    "output_tokens": 7,
+                    "cache_creation_input_tokens": 0,
+                    "cache_read_input_tokens": 0,
+                    "cache_creation": {
+                        "ephemeral_5m_input_tokens": 0,
+                        "ephemeral_1h_input_tokens": 0,
+                    },
+                },
+            }
+
+    u = extract_anthropic_native(FakePydantic(), model_id="claude-sonnet-4-6")
+    assert u.input == 5
+    assert u.output == 7
+    assert u.api == "native"
+
+
+def test_multiple_tool_use_blocks_counted() -> None:
+    resp = {
+        "usage": {"input_tokens": 10, "output_tokens": 20},
+        "content": [
+            {"type": "text", "text": "..."},
+            {"type": "tool_use", "id": "t1"},
+            {"type": "tool_use", "id": "t2"},
+            {"type": "tool_use", "id": "t3"},
+        ],
+    }
+    u = extract_anthropic_native(resp, model_id="claude-sonnet-4-6")
+    assert u.tool_calls == 3
+
+
+def test_no_usage_returns_zeros() -> None:
+    u = extract_anthropic_native({}, model_id="claude-sonnet-4-6")
+    assert u.input == 0
+    assert u.output == 0
+    assert not u.nonzero_numeric()
+
+
+def test_survives_non_dict_usage() -> None:
+    assert extract_anthropic_native({"usage": True}, model_id="x").input == 0
+    assert extract_anthropic_native({"usage": "bogus"}, model_id="x").output == 0
+    assert extract_anthropic_native(None, model_id="x").input == 0
diff --git a/tests/unit/test_wrapper_anthropic.py b/tests/unit/test_wrapper_anthropic.py
new file mode 100644
index 0000000..08c7a70
--- /dev/null
+++ b/tests/unit/test_wrapper_anthropic.py
@@ -0,0 +1,223 @@
+"""Anthropic wrapper tests — fake client, no live API."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from lago_agent_sdk import LagoSDK
+
+
+class FakeMessage:
+    """Mimics Anthropic's Message pydantic object."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+        # expose .usage and .content as attribute access for _is_message_like check
+        self.usage = payload.get("usage")
+        self.content = payload.get("content", [])
+
+    def model_dump(self) -> dict[str, Any]:
+        return self._payload
+
+
+class FakeStreamEvent:
+    """Mimics one of Anthropic's MessageStreamEvent objects (MessageDelta/Start/etc.)."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+
+    def model_dump(self) -> dict[str, Any]:
+        return self._payload
+
+
+class FakeMessages:
+    def __init__(self) -> None:
+        self.create_calls = 0
+        self.stream_calls = 0
+
+    def create(self, **kwargs: Any) -> Any:
+        self.create_calls += 1
+        assert "extra_lago" not in kwargs
+        if kwargs.get("stream") is True:
+            events = [
+                FakeStreamEvent({"type": "message_start", "message": {"usage": {"input_tokens": 12}}}),
+                FakeStreamEvent(
+                    {
+                        "type": "message_delta",
+                        "delta": {"stop_reason": "end_turn"},
+                        "usage": {"input_tokens": 12, "output_tokens": 22},
+                    }
+                ),
+                FakeStreamEvent({"type": "message_stop"}),
+            ]
+            return iter(events)
+        return FakeMessage(
+            {
+                "model": kwargs.get("model", "claude-sonnet-4-6"),
+                "content": [{"type": "text", "text": "hi"}],
+                "usage": {
+                    "input_tokens": 8,
+                    "output_tokens": 16,
+                    "cache_creation_input_tokens": 0,
+                    "cache_read_input_tokens": 0,
+                    "cache_creation": {
+                        "ephemeral_5m_input_tokens": 0,
+                        "ephemeral_1h_input_tokens": 0,
+                    },
+                },
+            }
+        )
+
+    def stream(self, **kwargs: Any) -> Any:
+        self.stream_calls += 1
+        assert "extra_lago" not in kwargs
+        outer = self
+
+        class _FakeStreamManager:
+            def __enter__(self_inner) -> Any:
+                outer._final = FakeMessage(
+                    {
+                        "model": kwargs.get("model", "claude-sonnet-4-6"),
+                        "content": [{"type": "text", "text": "hi"}],
+                        "usage": {
+                            "input_tokens": 5,
+                            "output_tokens": 11,
+                        },
+                    }
+                )
+                return _FakeStreamHandle(outer._final)
+
+            def __exit__(self_inner, exc_type, exc, tb) -> Any:  # noqa: D401
+                return False
+
+        return _FakeStreamManager()
+
+
+class _FakeStreamHandle:
+    def __init__(self, final: FakeMessage) -> None:
+        self._final = final
+        self.text_stream = iter(["hi"])
+
+    def get_final_message(self) -> FakeMessage:
+        return self._final
+
+
+class FakeAnthropic:
+    """Mimics `from anthropic import Anthropic; Anthropic(api_key=...)`."""
+
+    def __init__(self) -> None:
+        self.messages = FakeMessages()
+
+
+# Module path needs to contain 'anthropic' so detector.py routes to anthropic wrapper.
+FakeAnthropic.__module__ = "anthropic.fake"
+
+
+def _new_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list[dict]]:
+    received: list[dict] = []
+
+    def sender(batch: list[dict]) -> None:
+        received.extend(batch)
+
+    sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub)
+    sdk._queue._sender = sender  # type: ignore[attr-defined]
+    return sdk, received
+
+
+def test_wrap_messages_create_emits_input_and_output() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeAnthropic()
+    client = sdk.wrap(fake)
+    resp = client.messages.create(model="claude-sonnet-4-6", messages=[])
+    assert resp.usage["input_tokens"] == 8
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 8
+    assert by_code["llm_output_tokens"] == 16
+
+
+def test_wrap_strips_extra_lago_and_uses_per_call_sub() -> None:
+    sdk, received = _new_sdk("sub_default")
+    fake = FakeAnthropic()
+    client = sdk.wrap(fake)
+    client.messages.create(
+        model="claude-sonnet-4-6",
+        messages=[],
+        extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}},
+    )
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    assert all(e["external_subscription_id"] == "sub_per_call" for e in received)
+    assert received[0]["properties"]["feature"] == "X"
+
+
+def test_wrap_double_wrap_is_idempotent() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeAnthropic()
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    fake.messages.create(model="claude-sonnet-4-6", messages=[])
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    assert len(received) == 2  # input + output, not 6
+    assert fake.messages.create_calls == 1
+
+
+def test_wrap_create_with_stream_captures_usage_from_message_delta() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeAnthropic()
+    client = sdk.wrap(fake)
+    events = list(client.messages.create(model="claude-sonnet-4-6", messages=[], stream=True))
+    assert len(events) == 3
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 12
+    assert by_code["llm_output_tokens"] == 22
+
+
+def test_wrap_messages_stream_context_manager_emits_on_close() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeAnthropic()
+    client = sdk.wrap(fake)
+    with client.messages.stream(model="claude-sonnet-4-6", messages=[]) as stream:
+        list(stream.text_stream)
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 5
+    assert by_code["llm_output_tokens"] == 11
+
+
+def test_instrumentation_failure_does_not_break_call() -> None:
+    sdk, _ = _new_sdk()
+
+    class BadMessage:
+        @property
+        def usage(self):
+            raise RuntimeError("boom")
+
+        @property
+        def content(self):
+            return []
+
+        def model_dump(self):
+            raise RuntimeError("boom")
+
+    class BadMessages:
+        def create(self, **_kw):
+            return BadMessage()
+
+    class BadAnthropic:
+        def __init__(self):
+            self.messages = BadMessages()
+
+    BadAnthropic.__module__ = "anthropic.fake"
+
+    client = sdk.wrap(BadAnthropic())
+    # Adapter will crash inside, but wrap must still return resp.
+    resp = client.messages.create(model="x", messages=[])
+    assert resp is not None
+    sdk.shutdown(timeout=1.0)
diff --git a/uv.lock b/uv.lock
index f1fc5c8..a40fdc7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -15,6 +15,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "anthropic"
+version = "0.103.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "docstring-parser" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/57/0b758b08cf4606c94d63a997d67a0063f7438efbaf81cfedd0d7c0c69d67/anthropic-0.103.1.tar.gz", hash = "sha256:21c12f4fc0fdd87a2e80d58479cd0af640062b3cfb82bbfa01c7977acd4defeb", size = 848877, upload-time = "2026-05-19T15:43:27.698Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/ec/cf357cf571377a39552c1530390a9b79bbdb6ea463f48fbe4e3624141e3b/anthropic-0.103.1-py3-none-any.whl", hash = "sha256:b9a523fac34e64caf6ee55fdbda213950e6a744b906fce100d34909aad2cd8f4", size = 832551, upload-time = "2026-05-19T15:43:29.663Z" },
+]
+
 [[package]]
 name = "anyio"
 version = "4.13.0"
@@ -345,6 +364,24 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11'" },
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
+[[package]]
+name = "docstring-parser"
+version = "0.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/4d/f332313098c1de1b2d2ff91cf2674415cc7cddab2ca1b01ae29774bd5fdf/docstring_parser-0.18.0.tar.gz", hash = "sha256:292510982205c12b1248696f44959db3cdd1740237a968ea1e2e7a900eeb2015", size = 29341, upload-time = "2026-04-14T04:09:19.867Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" },
+]
+
 [[package]]
 name = "eval-type-backport"
 version = "0.3.1"
@@ -446,6 +483,109 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/b5/55f06bb281d92fb3cc86d14e1def2bd908bb77693183e7cb1f5a3c388b0c/jiter-0.15.0.tar.gz", hash = "sha256:4251acc80e2b7c9b7b8823456ea0fceeb0734dac2df7636d3c711b38476b5a76", size = 166640, upload-time = "2026-05-19T10:09:48.361Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/da/76a2c7e510ba15fe323d9509c223ab272da79ea59f54488f4a78da6426db/jiter-0.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:edebcf7d1f601199084bb6e844d7dc67e03e04f6ac786b0332d616635c4ff7a4", size = 310849, upload-time = "2026-05-19T10:06:51.944Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/8e/827be942883a4dc0862c48626ff41af3320b1902d136a0bf4b9041f2c567/jiter-0.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f924585cdacf631cd382b657966847bb537bf9ed0a6f9b991da5f05a631480f", size = 314991, upload-time = "2026-05-19T10:06:53.522Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/38/be2832be361ba1b9517c76f46d30b64e985be1dd43c974f4c3a4b1844436/jiter-0.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abbf258599526ad0326fe51e252e24f2bd6f24f1852681b4b78feda3808f1d18", size = 340843, upload-time = "2026-05-19T10:06:55.071Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/d8/90f01fb83c0c7ba509303ec93e32a308fbfa167d264860b01c0fd0dbbd06/jiter-0.15.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c468136b8bd6bb18c8786e4236a1fa27362f24cb23450ba0cb204ab379b8e6f", size = 365116, upload-time = "2026-05-19T10:06:56.893Z" },
+    { url = "https://files.pythonhosted.org/packages/91/38/94593d34f8c67a0b6f6cbc027f016ffa9780b3a858a7a86f6fd7a15bcc1e/jiter-0.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05906b93d72f03339e6bb7cf8dc10ebda64a0266126eed6beba79e20abcf5fd4", size = 457970, upload-time = "2026-05-19T10:06:58.707Z" },
+    { url = "https://files.pythonhosted.org/packages/df/04/d79962dd49d00c97e2a9b4cacea1947904d02135936960351f9a96d4c1a6/jiter-0.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30ce785d2adb8e32c3f7741442370a74834ec4c01f3c48f0750227a0b4ef27d6", size = 375744, upload-time = "2026-05-19T10:07:00.471Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/2e/5d37abe2be0e819c21e2338bebd410e481763ce526a9138c8c3652fa0123/jiter-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd73e3da91a0a722d67165e849ce2cdc10de0e0d48738c142be8c6c5f310f4c", size = 349609, upload-time = "2026-05-19T10:07:01.829Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/90/98768ad2ed90c1fda15d64157de2dfbf73c1c074d4b1bfaca915480bc7cf/jiter-0.15.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:ceb8fc27d38793f9c97149be8302720c5b22e5c195a37bf2c45dc36c4600a512", size = 354366, upload-time = "2026-05-19T10:07:03.587Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c4/fbfb806209f1fe4b7dccdfb07bc62bb044300734a945b06fd64db446ef6a/jiter-0.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d726e3ceeb337191324b49de298142f27c3ad10886341555d1d5315b5f252c6a", size = 393519, upload-time = "2026-05-19T10:07:05.08Z" },
+    { url = "https://files.pythonhosted.org/packages/37/1c/b9c257cd70cb453b6d10f3ebf0402cdb11669ab455389096f09839670290/jiter-0.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2c8aea7781d2a372227871de4e1a1332aa96f5a89fd76c5e835dafdbad102887", size = 519952, upload-time = "2026-05-19T10:07:06.589Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/1a/aa85027db7ab15829c12feebbc33b404f53fc399bd559d85fd0d6365ff0d/jiter-0.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cf4bd113a69c0a740e27cb962ce10630c36d2b8f59d759a651b955ee9d18a823", size = 550770, upload-time = "2026-05-19T10:07:08.228Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/54/8c3f65c8a5687925e84708f19d63f7f37d28e2b86a48d951702ad94424d8/jiter-0.15.0-cp310-cp310-win32.whl", hash = "sha256:d92a5cd21fdb083931d546c207aa29633787c5dc5b02daab2d32b843f88a2c53", size = 209303, upload-time = "2026-05-19T10:07:10.006Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/0528a1eb9f42dd2d8228a0711458628f35924d131f623eaebc35fd23d3d4/jiter-0.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:e58585a58209d72691ce2d62a9147445f5a87beb0bde97fde284c96ae392a3d1", size = 200404, upload-time = "2026-05-19T10:07:11.426Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/13/daa722f5765c393576f466378f9dfd29d77c9bed939e0688f96afa3601ea/jiter-0.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0f862193b8696249d22ec433e85fd2ab0ad9596bc3e45e6c0bc55e8aeba97be2", size = 310899, upload-time = "2026-05-19T10:07:12.89Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/82/2d2551829b082f4b6d82b9f939b031fb808a10aab1ec0664f82e150bb9a2/jiter-0.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1303d4d68a9b051ea90502402063ecf3807da00ad2affa19ca1ae3b90b3c5f67", size = 314963, upload-time = "2026-05-19T10:07:14.539Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/0a/8b1a51466f7fe9f31dbe4bc7e0ca848674f9825e0f737b929b97e8c60aa7/jiter-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392b8ab019e5502d08aff85c6272209c24bc2cbe706ea82a56368f524236614a", size = 341730, upload-time = "2026-05-19T10:07:15.869Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/2a/e71dea19822e2e404e83992a08c1d6b9b617bb944f28c9c2fbd85d02c91e/jiter-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:773b6eb282ce11ee19f05f6b2d4404fa308e5bbd353b0b80a0262caad6db2cd7", size = 366214, upload-time = "2026-05-19T10:07:17.259Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/59/97e1fa539d124a509a00ab7f669289d1c1d236ecabf12948a18f16c91082/jiter-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2c0c44d569ce0f2850f5c926f8caeb5f245fbc84475aeb36efccc2103e6dbd", size = 459527, upload-time = "2026-05-19T10:07:18.741Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/7a/4a68d331aef8cf2e2393c14a3aacb635c62aa86071b0229899fb5baaa907/jiter-0.15.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:032396229564bca02440396bd327710719f724f5e7b7e9f7a8eb3faa4a2c2281", size = 375451, upload-time = "2026-05-19T10:07:20.208Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/7e/1c445c2b6f0e30a274dc8082e0c3c7825411cce80d726bccd697c98cc8d3/jiter-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d37768fce7f88dd2a8c6091f2325dea27d30d30d5c6e7a1c0f0af77723b708", size = 349428, upload-time = "2026-05-19T10:07:22.372Z" },
+    { url = "https://files.pythonhosted.org/packages/00/94/e20d38984fc17a636371bffd2ae0f698124fdc8e75ef969cd2da6ba7cea7/jiter-0.15.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2c9cb907439d20bd0c7d7565ca01ee52234203208433749bae5b516907526928", size = 355405, upload-time = "2026-05-19T10:07:23.916Z" },
+    { url = "https://files.pythonhosted.org/packages/94/fa/4d09f814779d0ea80a28ed8e4c6662ec9a4a8ecef0ac52190ebac6262d14/jiter-0.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9100ddbec09741cc66feb0fc6773f8bdbd0e3c345689368f260082ff85dcc0cd", size = 393688, upload-time = "2026-05-19T10:07:25.854Z" },
+    { url = "https://files.pythonhosted.org/packages/54/9d/8eb5d4fb8bf7e93a75964a5da71a75c67c864baf7fa3f98598187b3c7e57/jiter-0.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ae1b0d82ac2d987f9ea512b1c9adfcc71a28de3dea3a6039b54d76cffda9901e", size = 520853, upload-time = "2026-05-19T10:07:27.303Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/2c/5e07874e59e623a943a0acf1552a80d05b70f31b402287a8fc6d7ec634c7/jiter-0.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8020c99ec13a7db2b6f96cbe82ef4721c88b426a4892f27478044af0284615ef", size = 551016, upload-time = "2026-05-19T10:07:28.846Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/d2d34422143474cadc15b60d482b1c35683dbc5c63c24346ddd0df09bcaf/jiter-0.15.0-cp311-cp311-win32.whl", hash = "sha256:42bfb257930800cf43e7c62c832402c704ab60797c992faf88d20e903eac8f32", size = 209518, upload-time = "2026-05-19T10:07:30.431Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/7d/52778b930e5cc3e52a37d950b1c10494244308b4329b25a0ff0d88303a81/jiter-0.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:860a74063284a2ae9bfedd694f299cc2c68e2696c5f3d440cc9d18bb81b9dd04", size = 200565, upload-time = "2026-05-19T10:07:32.125Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/4f/d9b4067feb69b3fa6eb0488e1b59e2ad5b463fe39f59e527eab2aca00bb0/jiter-0.15.0-cp311-cp311-win_arm64.whl", hash = "sha256:37a10c377ce3a4a85f4a67f28b7afe093154cde77eaf248a72e856aa08b4d865", size = 195488, upload-time = "2026-05-19T10:07:33.846Z" },
+    { url = "https://files.pythonhosted.org/packages/44/53/4f6bddbcde3c71e56d0aa1337ec95950f3d27dd4153e25aadf0feac71751/jiter-0.15.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0e90a1c315a0226ec822d973817967f9223b7701546c8c2a7913e7ab0926294d", size = 308793, upload-time = "2026-05-19T10:07:35.25Z" },
+    { url = "https://files.pythonhosted.org/packages/01/84/c01099b59a285a1ebba64ae93f62bfa036675340fd1b0045ae65890a0442/jiter-0.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c9004af7c8d67cce7f1aae1026fb55607f4aa600710d08ede3a3ce4aeefe7e0", size = 309570, upload-time = "2026-05-19T10:07:36.919Z" },
+    { url = "https://files.pythonhosted.org/packages/58/64/8fb7f9d45bb98190355454cd04dad8d8f27223d6bd52f83af07f637168a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c210f8b35dc6f30aafd4b4365ca89b9d1189f21ab49b8e68fa6322a847aef138", size = 336783, upload-time = "2026-05-19T10:07:38.694Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/b6/f5739011d009b3a30f6a53c5240979030ba29ae46a8c67e3a15759f7c37d/jiter-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f30bae8bc1c2d613e28e5af3e8cceb09b742f1c8a8a5f839fb67afaffc03b61", size = 363555, upload-time = "2026-05-19T10:07:40.832Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/12/98a9d9f766665e8a3b6252454e17cb0c464606a28cf2fa09399b003345fa/jiter-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c60e71b6d10cfc284c9bf36bd885e8d44c46f688ce50aa91b5edd90181dea687", size = 452255, upload-time = "2026-05-19T10:07:42.62Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/d5/60f972840f79c5e7544fce567c56f1e4e50468f996baba3e78d823dd62a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ab068bce62a45aa3e7367eceaffb5dde60b7eb853be8dece45132e3d0ff4879", size = 373559, upload-time = "2026-05-19T10:07:44.201Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/cf/d46ef1234ba335aabc2f013210db8e0821a22f5e644a2e9449df199ecc23/jiter-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa248c9eb220197d363f688818dac2fd4b2f0cd7d843ca7105d652034823427d", size = 346055, upload-time = "2026-05-19T10:07:46.005Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/63/4d2749d8d54d230bad9b3a6b0d00cc28c6ff6b2fdffc26a8ccf76cc5a974/jiter-0.15.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2a77aadd57cac1682e4401a72724d2796d89a4ba129b1a5812aa94ee480826eb", size = 351406, upload-time = "2026-05-19T10:07:47.855Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/b9/9965b990035d8773328e0a8c8b457a87bf2b19f6c4126d9d99296be5d16a/jiter-0.15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ae901f3a55bfafdde31d289590fa25e3245735a2b1e8c7cc15871710a002871", size = 389357, upload-time = "2026-05-19T10:07:49.665Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/55/9ddf903deda1413e87fed792f416b7123daee5b8efbad6a202a7421c36a5/jiter-0.15.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f0b271b462769543716f92d3a4f90527df6ef5ed05ee95ec4137f513e21e1b77", size = 517263, upload-time = "2026-05-19T10:07:51.537Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/76/a0c40ad064d3a20a4fde231e35d56e9a01ce82164278180e82d5daf85469/jiter-0.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2fb6a5d26af81fc0f00f9360a891e05cf755e149bba391c4d563adc54812973d", size = 548646, upload-time = "2026-05-19T10:07:53.196Z" },
+    { url = "https://files.pythonhosted.org/packages/23/4f/eca9b954942916ba2f453891b8593ab444cd872396fe66a3936616f236f3/jiter-0.15.0-cp312-cp312-win32.whl", hash = "sha256:c2f6bb8b5216ab9e7873bc08b5d7bef2b8abbb578a3069bf1cd14a45d71d771d", size = 206427, upload-time = "2026-05-19T10:07:55.307Z" },
+    { url = "https://files.pythonhosted.org/packages/95/bf/8ead82a87495149542748e828d153fd232a512a22c83b02c4815c1a9c7d8/jiter-0.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:40b2c7e92c44a84d748d21706c68dc6ff8161d80b59c99d774721a0d2317d7c7", size = 197300, upload-time = "2026-05-19T10:07:56.651Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/e4/9b8a78fb2d894471bc344e37f1949bdd784bd914d031dba0ba3a40c71dd7/jiter-0.15.0-cp312-cp312-win_arm64.whl", hash = "sha256:cc0bc345cf2df9d1c00ac443f50d543c1ccfa8b0422cb85b1ab70d681c0b255b", size = 192702, upload-time = "2026-05-19T10:07:58.307Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/f4/f708c900ecee41b2025ef8413d5351e5649eb2125c506f6720cc69b06f5c/jiter-0.15.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c11465f97e2abf45a014b83b730222f8f1c5335e802c7055a67d50de6f1f4e3", size = 307829, upload-time = "2026-05-19T10:07:59.704Z" },
+    { url = "https://files.pythonhosted.org/packages/86/59/db537c0949e83668c38481d426b9f2fd5ab758c4ee53a811dd0a510626a0/jiter-0.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e7b1776f0797956c509e123d0952d10d293a9492dea9f288ab9570ec01d1a5", size = 308445, upload-time = "2026-05-19T10:08:01.184Z" },
+    { url = "https://files.pythonhosted.org/packages/37/38/ea0e13b18c30ef951da0d47d39e7fa9edb82a93a62990ffbd7cea9b622d4/jiter-0.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:351a341c2105aa430b7047e30f1bf7975f6313b00165d3fc07be2edaf741f279", size = 336181, upload-time = "2026-05-19T10:08:02.688Z" },
+    { url = "https://files.pythonhosted.org/packages/58/fc/2303901b16c4ba05865588990a420c0b4156270b44379c20931544a1d962/jiter-0.15.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ab395feec8d249ec4044e228e98a7033f043426a265df439dc3698823f0a4e4", size = 362985, upload-time = "2026-05-19T10:08:04.394Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/6f/11bace093c52e7d4d26c8e606ccd7ae8c972189622469ec0d9e28161e28b/jiter-0.15.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2a438005b6f22d0273413484d6094d7c2c5d10ec1b3a3bf128e0d1d3ba53258", size = 453292, upload-time = "2026-05-19T10:08:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/22/db/987f2f086ca4d7a6582eb4ccd513f9b26b42d9e4243a087609a3137a8fc7/jiter-0.15.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f18f85e4218d1b40f000f42a92239a7a61a902cd42c65e6c360dbd17dcb20894", size = 373501, upload-time = "2026-05-19T10:08:07.857Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/7c/89fbcabb2739b7a5b8dc959a1b6c5761f6484f5fed3486854b3c789bb1de/jiter-0.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1aa62e277fc1cbd80e6deacae6f4d983b41b3d7728e0645c5d741a6149bba45", size = 344683, upload-time = "2026-05-19T10:08:09.431Z" },
+    { url = "https://files.pythonhosted.org/packages/30/6f/6cca7692e7dddfec6d8d76c54dc97f2af2a41df4ac0674b999df1f09a5f3/jiter-0.15.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:6550fa135c7deb8ead6af49ed7ff648532ea8334a1447fe34a36315ef79c5c29", size = 350892, upload-time = "2026-05-19T10:08:11.352Z" },
+    { url = "https://files.pythonhosted.org/packages/39/14/0338d6190cb8e6d22e677ab1d4eabd4117f67cca70c54cd04b82ff64e068/jiter-0.15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:066f8f33f18b2419cd8213b2436fa7fbc9c499f315971cfa3ce1f9820c001b1b", size = 388723, upload-time = "2026-05-19T10:08:12.912Z" },
+    { url = "https://files.pythonhosted.org/packages/90/31/cc19f4a1bdb6afb09ce6a2f2615aa8d44d994eba0d8e6105ed1af920e736/jiter-0.15.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:75e8a04e91432dde9f1838373cf93d23726c79d3e908d319acf0e796f85592e7", size = 516648, upload-time = "2026-05-19T10:08:14.808Z" },
+    { url = "https://files.pythonhosted.org/packages/49/9f/833c541512cd091b63c10c0381973dfe11bc7a503a818c16384417e0c81e/jiter-0.15.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a97261f1fccb8e50ecd2890a96e46efdc3f57c80a197324c6777827231eca712", size = 547382, upload-time = "2026-05-19T10:08:16.927Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/11/e7b70e91f90bc4477e8eee9e8a5f7cf3cb41b4525d6394dc98a714eb8f7f/jiter-0.15.0-cp313-cp313-win32.whl", hash = "sha256:c77496cb10bd7549690fbbab3e5ec05857b83e49276f4a9423a766ddd2afcd4c", size = 205845, upload-time = "2026-05-19T10:08:18.401Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/23/5c20d9ad6f02c493e4023e5d2d09e1c1f15fe2753c9102c544aff068a88e/jiter-0.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b15741f501469009ae0ae90b7147958a664a7dede40aa7ff174a8a4645f546d0", size = 196842, upload-time = "2026-05-19T10:08:20.131Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/11/1eb400ef248e8c925fd883fbe325daf5e42cd1b0d308539dd332bd4f7ffc/jiter-0.15.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d6a60072b44c3c2b797a7ddcbcbbf2b34ea3cfd4721580fbfd2a09d9d9b84ba", size = 192212, upload-time = "2026-05-19T10:08:21.807Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/60/2fd8d7c79da8acf9b7b277c7616847773779356b92acfc9bb158452174da/jiter-0.15.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ef1fd24d9413f6209e00d3d5a453e67acfe004a25cc6c8e8484faed4311ab9e8", size = 315065, upload-time = "2026-05-19T10:08:23.218Z" },
+    { url = "https://files.pythonhosted.org/packages/46/f4/008fb7d65e8ac2abf00811651a661e025c4ba80bbc6f378450384ddd3aed/jiter-0.15.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:144f8e72cb53dab146347b91cceac01f5481237f2b93b4a339a1ee8f8878b67c", size = 339444, upload-time = "2026-05-19T10:08:24.701Z" },
+    { url = "https://files.pythonhosted.org/packages/00/55/90b0c7b9c6896c0f2a591dd36d36b71d22e09674bfef178fa03ba3f81499/jiter-0.15.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553fcac2ef2cb990877f9fc0833b8b629a3e6a5670b6b5fd58219b41a653ddc4", size = 347779, upload-time = "2026-05-19T10:08:26.408Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/69666cec5000fd57734c118437394516c749ae8dbeea9fb66d6fef9c4775/jiter-0.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:774f93f65031856bf14ad9f59bdcab8b8cad501e5ceabd51ba3525f76937a25b", size = 200395, upload-time = "2026-05-19T10:08:28.055Z" },
+    { url = "https://files.pythonhosted.org/packages/39/04/a6aa62cd27e8149b0d28df5561f10f6cceaf7935a9ccf3f1c5a05f9a0cd8/jiter-0.15.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f1e1754960f38ec40613a07e5e372df67acb3b890fb383b6fb3de3e49ddbf3c7", size = 190516, upload-time = "2026-05-19T10:08:29.35Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d2/079f350ebf7859d081de30aa890f9e3be68516f754f3ba32366ffff4dcee/jiter-0.15.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:ac0d9ddea4350974be7a221fc25895f251a8fee748c889bdced2141c0fec1a49", size = 308884, upload-time = "2026-05-19T10:08:31.667Z" },
+    { url = "https://files.pythonhosted.org/packages/04/4e/a2c30a7f69b48c03b20935d647479106fe932f6e63f75faf53937197e05d/jiter-0.15.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01a8222cf05ab1128e239421156c207949808acaaea2bdfd33130ae666786e86", size = 310028, upload-time = "2026-05-19T10:08:33.304Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/2e7cdfd3cf8ca967be38c48f5cf474d79f089efaf559a40f15984a77ae69/jiter-0.15.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:182226cbc930c9fab81bc2e41a4da672f89539906dadb05e75670ac07b94f71f", size = 337485, upload-time = "2026-05-19T10:08:35.259Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/11/15a1aa28b120b8ee5b4f1fb894c125046225f09847738bd64233d3b84883/jiter-0.15.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:71683c38c825452999b5717fcae07ea708e8c93003e808be4319c1b02e3d176e", size = 364223, upload-time = "2026-05-19T10:08:36.694Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/25/f442e8af5f3d0dcf47b39e83a0efd9ee45ea946aa6d04625dc3181eae3b6/jiter-0.15.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f2218e6a9e5c18bc10fe6d41ac189c442c88eacf11bad9f28ef95a9bef00e6", size = 456387, upload-time = "2026-05-19T10:08:38.143Z" },
+    { url = "https://files.pythonhosted.org/packages/da/f4/37f2d2c9f64f49af7da652ed7532bb5a2372e588e6927c3fdd76f911db65/jiter-0.15.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5157de9f76eb4bc5ea74a1219366a25f945ad305641d74e04f59c54087091aa9", size = 374461, upload-time = "2026-05-19T10:08:39.869Z" },
+    { url = "https://files.pythonhosted.org/packages/60/28/edcfbbbf0cb15436f36664a8908a0df47ab9006298d4cd937dc08ea932d6/jiter-0.15.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c5db5527c221249a876160663ab891ace358c17f7b9c93ec1478b7f0550e5c", size = 345924, upload-time = "2026-05-19T10:08:41.668Z" },
+    { url = "https://files.pythonhosted.org/packages/47/13/89fba6398dab7f202b7278c4b4aac122399d2c0183971c4a57a3b7088df5/jiter-0.15.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:3e4540b8e74e4268811ac05db226a6a128ff572e7e0ce3f1163b693cadb184cd", size = 352283, upload-time = "2026-05-19T10:08:43.091Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/da/0f6af8cef2c565a1ab44d970f268c43ccaa72707386ea6388e6fe2b6cd26/jiter-0.15.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:62ebd14e47e9aed9df4472afcb2663668ce4d74891cd54f86bf6e44029d6dc89", size = 389985, upload-time = "2026-05-19T10:08:44.915Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/ec/b9cb7d6d29e24ee14910266157d2a279d7a8f60ee0df7fa840882976ba64/jiter-0.15.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0be6f5ad41a809f303f416d17cec92a7a725902fb9b4f3de3d19362ac0ef8554", size = 517695, upload-time = "2026-05-19T10:08:46.486Z" },
+    { url = "https://files.pythonhosted.org/packages/64/5e/6d1bda880723aae0ad86b4b763f044362448efe31e3e819635d41cb03451/jiter-0.15.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:813dfbb17d65328bf86e5f0905dd277ba2265d3ca20556e86c0c7035b7182e5a", size = 548868, upload-time = "2026-05-19T10:08:48.026Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/72/7de501cf38dcacaf35098796f3a50e0f2e338baba18a58946c618544b809/jiter-0.15.0-cp314-cp314-win32.whl", hash = "sha256:50e51156192722a9c58db112837d3f8ef96fb3c5ecc14e95f409134b08b158ec", size = 206380, upload-time = "2026-05-19T10:08:49.738Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/a9/e19addf4b0c1bdce52c6da12351e6bc42c340c45e7c09e2158e46d293ccc/jiter-0.15.0-cp314-cp314-win_amd64.whl", hash = "sha256:30ce1a5d16b5641dc935d50ef775af6a0871e3d14ab05d6fc54dff371b78e558", size = 197687, upload-time = "2026-05-19T10:08:51.088Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/c9/776b1db01db25fc6c1d58d1979a37b0a9fe787e5f5b1d062d2eaacb77923/jiter-0.15.0-cp314-cp314-win_arm64.whl", hash = "sha256:510c8b3c17a0ed9ac69850c0438dada3c9b82d9c4d589fcb62002a5a9cf3a866", size = 192571, upload-time = "2026-05-19T10:08:52.451Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/f6/45bb4670bacf300fd2c7abadbfb3af376e5f1b6ae75fd9bc069891d15870/jiter-0.15.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7553333dd0930c104a5a0db8df72bf7219fe663d731383b576bb6ed6351c984d", size = 317151, upload-time = "2026-05-19T10:08:53.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/68/ed635ad5acd7b73e454283083bbb7c8205ad10e88b0d9d7d793b09fe8226/jiter-0.15.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2143ab06181d2b029eedcb6af3cebe95f11bbac62441781860f98ee9330a6a6", size = 341243, upload-time = "2026-05-19T10:08:55.383Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/db/3ff4176b817b8ea33879e71e13d8bc2b0d481a7ed3fe9e080f333d415c16/jiter-0.15.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6eac374c5c975709b69c10f09afd199df74150172156ad10c8d4fd785b7da995", size = 363629, upload-time = "2026-05-19T10:08:56.928Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/24/5f8270e0ba9c883582f96f722f8a0b58015c7ce1f8c6d4571cf394e99b6b/jiter-0.15.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3b3b775e33d3bfaec9899edc526ae97b0da0bf9d071a46124ba419149a414f8", size = 456198, upload-time = "2026-05-19T10:08:58.618Z" },
+    { url = "https://files.pythonhosted.org/packages/45/5b/76fc02b0b5c54c3d18c60653156e2f76fde1816f9b4722db68d6ee2c897e/jiter-0.15.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3071db3346334beae1360b46da4606da57bf3528c167b3c38533afaf9f2c5", size = 373710, upload-time = "2026-05-19T10:09:00.151Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/52/4310821b0ea9277994d3e1f49fc6a4b34e4800caebacb2c0af81da59a454/jiter-0.15.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6694a173ecabc12eb60efbc0b474464ead1951ff65cd8b1e72100715c64512b", size = 349901, upload-time = "2026-05-19T10:09:01.621Z" },
+    { url = "https://files.pythonhosted.org/packages/93/fe/67648c35b3594fba8854ac64cc8a826d8bcd18324bbdb53d77697c60b6ef/jiter-0.15.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:a254e10b593624d230c365b6d616b22ca0ad65e63a16e6631c2b3466022e6ba8", size = 352438, upload-time = "2026-05-19T10:09:03.216Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/28/0a1879d07ad6b3e025a2750027363452ced93c2d16d1c9d4b153ffd51c91/jiter-0.15.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8d2955167274e15d79a7a020afdd9b39c990eb80b2d89fca695d92dcfdd38ec", size = 388152, upload-time = "2026-05-19T10:09:04.741Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/78/46c6f6b56ba85c90021f4afd72ed42f691f8f84daacb5fe27277070e3858/jiter-0.15.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:acf4ee4d1fc55917239fe72972fb292dd773055d05eb040d36f4326e02cc2c0e", size = 517707, upload-time = "2026-05-19T10:09:06.231Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/cb/720662d4c88fcad606e826fef5424365527ba43ce4868a479aed8f8c507e/jiter-0.15.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:e7196e56f1cd69af1dbb07dff02dcfb260a50b45a82d409d92a06fedb32473b5", size = 548241, upload-time = "2026-05-19T10:09:08.093Z" },
+    { url = "https://files.pythonhosted.org/packages/60/e3/935b8034fd143f21125c87d51404a9e0e1449186a494405721ff5d1d695e/jiter-0.15.0-cp314-cp314t-win32.whl", hash = "sha256:7f6163c0f10b055245f814dcc59f4818da60dfe72f3e72ab89fc24b6bd5e9c52", size = 207950, upload-time = "2026-05-19T10:09:09.616Z" },
+    { url = "https://files.pythonhosted.org/packages/93/59/984fd9ece895953dad3e0880a650e766f5a2da2c5514f0eafdaaabbeb5f9/jiter-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:980c256edb05b78a111b99c4de3b1d32e31634b867fd1fc2cf726e7b7bba9854", size = 200055, upload-time = "2026-05-19T10:09:11.367Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/a4/cf8d779feb133a27a2e3bc833bccb9e13aa332cdf820497ebf72c10ce8c3/jiter-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:66b1880df2d01e206e8339769d1c7c1753bcb653efd6289e203f6f24ebada0c0", size = 191244, upload-time = "2026-05-19T10:09:12.74Z" },
+    { url = "https://files.pythonhosted.org/packages/65/43/1fc62172aa98b50a7de9a25554060db510f85c89cfbed0dfe13e1907a139/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:411fa4dfa5a7ae3d11491027ffb9beadec3996010a986862db70d91abba1c750", size = 305585, upload-time = "2026-05-19T10:09:35.995Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/c4/dd58fcd9e2df83666e5c1c1347bef58ce919cd8efc3ffa38aeea62ce493b/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:2b0074e2f56eb2dacca1689760fd2852a068f85a0547a157b82cb4cafeb6768b", size = 306936, upload-time = "2026-05-19T10:09:37.435Z" },
+    { url = "https://files.pythonhosted.org/packages/39/86/b695e16f1180c07f43ea98e73ecd21cf63fa2e1b0c1103739013784d11ae/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913d02d29c9606643418d9ccfc3b72492ab25a6bf7889934e09a3490f8d3438b", size = 342453, upload-time = "2026-05-19T10:09:39.294Z" },
+    { url = "https://files.pythonhosted.org/packages/34/56/55d76614af37fe3f22a3347d1e410d2a15da581997cb2da499a625000bb5/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b15d3ec9b0449c40e85319bdb4caa8b77ab526e74f5532ed94bec15e2f66822c", size = 345606, upload-time = "2026-05-19T10:09:40.727Z" },
+    { url = "https://files.pythonhosted.org/packages/73/38/505941b2b092fd5bbbd60a52a880db1173f1690ae6751bed3af1c9ddcb4e/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:631f13a3d04e97d4e083993b10f4b99530e3a10d953e2eb5e196b7dc7f812ce0", size = 303769, upload-time = "2026-05-19T10:09:42.203Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/95/a06692b29e77473f286e1ec1f426d3ca44d7b5843be8ad21d7a5f3fcdcc0/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:b6c0ffae686c39bf3737be60793783267628783ea42545632c10b291105aee45", size = 305128, upload-time = "2026-05-19T10:09:43.657Z" },
+    { url = "https://files.pythonhosted.org/packages/23/85/7270d7ad41d6061a25b950c6bf91d638bd9aacb113200a8c8d57a055fd67/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d54fb5b31dea401a41af3f8a7d2512e9b6a6a005491e6166c7e4ffab9639a9c", size = 340459, upload-time = "2026-05-19T10:09:45.452Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/8d/302cb2057b7513327b4d575cff6b1d066ee6431a5357fc3f8867cd684406/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d5d6090cdc1b7c9e780dfb04949a990adb1e301a2fc0bbcee7de4638d33f9a", size = 344469, upload-time = "2026-05-19T10:09:46.864Z" },
+]
+
 [[package]]
 name = "jmespath"
 version = "1.1.0"
@@ -473,6 +613,9 @@ dependencies = [
 ]
 
 [package.optional-dependencies]
+anthropic = [
+    { name = "anthropic" },
+]
 bedrock = [
     { name = "boto3" },
 ]
@@ -491,8 +634,14 @@ mistral = [
     { name = "mistralai" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "anthropic" },
+]
+
 [package.metadata]
 requires-dist = [
+    { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.30" },
     { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34" },
     { name = "boto3", marker = "extra == 'dev'", specifier = ">=1.34" },
     { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6" },
@@ -506,7 +655,10 @@ requires-dist = [
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6" },
     { name = "types-requests", marker = "extra == 'dev'", specifier = ">=2.31" },
 ]
-provides-extras = ["bedrock", "mistral", "dev"]
+provides-extras = ["bedrock", "mistral", "dev", "anthropic"]
+
+[package.metadata.requires-dev]
+dev = [{ name = "anthropic", specifier = ">=0.30" }]
 
 [[package]]
 name = "librt"
@@ -992,6 +1144,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
 [[package]]
 name = "sortedcontainers"
 version = "2.4.0"

From c23e6920df40a9d6ef4018d22c4041778f289cfe Mon Sep 17 00:00:00 2001
From: Anass <anass@getlago.com>
Date: Fri, 22 May 2026 09:33:07 +0200
Subject: [PATCH 2/5] Fix flaky test_repeated_overflow_keeps_window_sliding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test set max_batch_size == max_buffer_size == 100, which caused the
push that brings the buffer to 100 to trigger a wake on the background
worker. The worker would take a batch (emptying the buffer) and then race
with the remaining 150 pushes to call slow_sender. On CI's slower runners
the worker sometimes squeezed in additional batches before slow_sender
finally blocked, leaving the buffer with fewer items than the expected
sliding window.

Setting max_batch_size > max_buffer_size guarantees push() never sets the
wake event (buffer can never reach max_batch_size). Combined with a long
flush_interval the worker only runs once shutdown() releases the pause in
the finally block — fully deterministic. Verified with 5 consecutive runs.
---
 tests/unit/test_buffer_overflow.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_buffer_overflow.py b/tests/unit/test_buffer_overflow.py
index 19d2e0f..d1c9907 100644
--- a/tests/unit/test_buffer_overflow.py
+++ b/tests/unit/test_buffer_overflow.py
@@ -45,7 +45,11 @@ def test_repeated_overflow_keeps_window_sliding():
     def slow_sender(batch):
         paused.wait(timeout=30.0)
 
-    q = EventQueue(sender=slow_sender, flush_interval=10.0, max_batch_size=100, max_buffer_size=100)
+    # max_batch_size > max_buffer_size keeps the background worker from ever
+    # being woken by push (buffer can't exceed max_batch_size). Combined with
+    # a long flush_interval, the test is deterministic — the worker only runs
+    # once shutdown() releases `paused` in the finally block.
+    q = EventQueue(sender=slow_sender, flush_interval=60.0, max_batch_size=10_000, max_buffer_size=100)
     try:
         for i in range(250):  # 150 events overflow
             q.push({"i": i})

From 0f79c5a8342bdc590f94bedb3b8e0d9b6439b7ac Mon Sep 17 00:00:00 2001
From: Anass <anass@getlago.com>
Date: Fri, 29 May 2026 11:30:13 +0200
Subject: [PATCH 3/5] Add native OpenAI SDK support (Chat Completions +
 Responses API)

Adapter handles both API shapes with auto-detection:

  Chat Completions (client.chat.completions.create):
    usage.prompt_tokens                                -> input
    usage.completion_tokens                            -> output
    usage.prompt_tokens_details.cached_tokens          -> cache_read
    usage.prompt_tokens_details.audio_tokens           -> audio_input
    usage.completion_tokens_details.reasoning_tokens   -> reasoning   (o-series)
    usage.completion_tokens_details.audio_tokens       -> audio_output
    count of choices[0].message.tool_calls             -> tool_calls

  Responses API (client.responses.create):
    usage.input_tokens                                 -> input
    usage.output_tokens                                -> output
    usage.input_tokens_details.cached_tokens           -> cache_read
    usage.output_tokens_details.reasoning_tokens       -> reasoning
    count of output[].type == "function_call"          -> tool_calls

Wrapper covers both methods, sync + streaming, on both OpenAI and AsyncOpenAI.
For Chat Completions streaming, auto-injects stream_options.include_usage=true
when missing so the final chunk carries usage data (without that flag, OpenAI
emits no usage on streamed responses).

CanonicalUsage extended with audio_output (mapped to llm_audio_output_tokens)
to capture GPT-4o-audio output usage.

OpenAI is the first provider to actually populate llm_reasoning_tokens
(o-series surfaces reasoning tokens separately; Anthropic/Bedrock fold them
into output_tokens).

Predicted Outputs tokens (accepted/rejected_prediction_tokens) are
intentionally not surfaced -- documented in the adapter docstring as a
v1 gap.

27 new unit tests (18 adapter + 9 wrapper). 5 live integration tests gated
on OPENAI_API_KEY. 10 captured response fixtures from the real OpenAI API.

Total: 283 unit tests passing, ruff + mypy strict clean.
---
 CHANGELOG.md                                  |  14 +-
 README.md                                     |  56 +-
 pyproject.toml                                |   6 +-
 src/lago_agent_sdk/adapters/__init__.py       |   2 +
 src/lago_agent_sdk/adapters/openai_native.py  | 157 ++++
 src/lago_agent_sdk/canonical.py               |   2 +
 src/lago_agent_sdk/config.py                  |   1 +
 src/lago_agent_sdk/sdk.py                     |  10 +-
 src/lago_agent_sdk/wrappers/openai.py         | 185 +++++
 tests/integration/test_live_openai.py         | 190 +++++
 .../unit/adapters/fixtures/capture_openai.py  | 226 +++++
 .../fixtures/openai_native/01_plain_chat.json |  42 +
 .../openai_native/02_tool_use_chat.json       |  51 ++
 .../openai_native/03_cache_call1_chat.json    |  42 +
 .../openai_native/04_cache_call2_chat.json    |  42 +
 .../openai_native/05_streaming_chat.json      | 776 ++++++++++++++++++
 .../openai_native/06_reasoning_chat.json      |  42 +
 .../openai_native/07_multi_turn_chat.json     |  42 +
 .../openai_native/08_plain_responses.json     |  80 ++
 .../openai_native/09_tool_use_responses.json  |  94 +++
 .../openai_native/10_reasoning_responses.json |  88 ++
 tests/unit/adapters/test_openai_native.py     | 228 +++++
 tests/unit/test_wrapper_openai.py             | 296 +++++++
 23 files changed, 2652 insertions(+), 20 deletions(-)
 create mode 100644 src/lago_agent_sdk/adapters/openai_native.py
 create mode 100644 src/lago_agent_sdk/wrappers/openai.py
 create mode 100644 tests/integration/test_live_openai.py
 create mode 100644 tests/unit/adapters/fixtures/capture_openai.py
 create mode 100644 tests/unit/adapters/fixtures/openai_native/01_plain_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/08_plain_responses.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json
 create mode 100644 tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json
 create mode 100644 tests/unit/adapters/test_openai_native.py
 create mode 100644 tests/unit/test_wrapper_openai.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fa696ed..4d857d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,10 +5,22 @@ All notable changes to this project will be documented here. Format follows [Kee
 ## [Unreleased]
 
 ### Added
+- Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`.
+- `extract_openai_native` adapter handles both API shapes with auto-detection:
+  - Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`.
+  - Responses API: `input_tokens`, `output_tokens`, `input_tokens_details.cached_tokens`, `output_tokens_details.reasoning_tokens`, count of `output[].type == "function_call"`.
+- **First provider to populate `llm_reasoning_tokens`** — OpenAI o-series models (`o4-mini`, `o1`, etc.) surface reasoning token counts separately.
+- Auto-injection of `stream_options={"include_usage": True}` when the customer sets `stream=True` without it, so streamed Chat Completions emit usage on the final chunk.
+- `audio_output` field added to `CanonicalUsage` (maps to `llm_audio_output_tokens`), populated by GPT-4o-audio responses.
+- `openai` optional dependency group: `pip install 'lago-agent-sdk[openai]'`.
+- 27 new unit tests (18 adapter + 9 wrapper) and 5 live integration tests (gated on `OPENAI_API_KEY`). Total: 283 unit tests.
+- 10 captured response fixtures from the real OpenAI API (plain chat, tool use, auto-caching, streaming with usage, o-series reasoning, multi-turn, Responses API plain + tool use + reasoning).
+
+### Previously in unreleased (Anthropic)
 - Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants).
 - `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`.
 - `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`.
-- 19 new unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`). Total: 256 unit tests, ≥80% coverage maintained.
+- 19 unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`).
 - 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn).
 
 
diff --git a/README.md b/README.md
index b8855c7..ba96fc3 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ pip install lago-agent-sdk
 For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
 For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
 For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
+For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
 
 ## Quickstart — Bedrock
 
@@ -88,6 +89,27 @@ resp = client.chat.complete(
 sdk.flush()
 ```
 
+## Quickstart — OpenAI
+
+```python
+from openai import OpenAI
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(OpenAI(api_key="..."))
+
+resp = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[{"role": "user", "content": "Hello"}],
+    max_completion_tokens=200,
+)
+sdk.flush()
+```
+
+Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses.
+
+**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
+
 ## Multi-tenant — pick a subscription per call
 
 Three ways to set the `external_subscription_id`, in priority order:
@@ -114,26 +136,28 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
 | AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ |
 | Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
 | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
-| OpenAI | native SDK | Phase 2 |
-| Google Gemini | native SDK | Phase 2 |
+| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ |
+| Google Gemini | native SDK | Phase 3 |
 | LiteLLM | callback bridge | Phase 4 |
 
 ## Token dimensions captured
 
-`CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider:
-
-| Field | Lago metric code | Bedrock | Anthropic native | Mistral native |
-|---|---|---|---|---|
-| input | `llm_input_tokens` | ✓ | ✓ | ✓ |
-| output | `llm_output_tokens` | ✓ | ✓ | ✓ |
-| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) |
-| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ |
-| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ |
-| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) |
-| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ |
-| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ | ✗ |
-
-Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships.
+`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
+
+| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI |
+|---|---|---|---|---|---|
+| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ |
+| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ |
+| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) |
+| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ (auto-cache; OpenAI doesn't surface creation counts) |
+| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ |
+| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series models)** |
+| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ |
+| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio input) |
+| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio output) |
+| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3 — multimodal adapter) |
+
+OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
 
 ## Error policy
 
diff --git a/pyproject.toml b/pyproject.toml
index 4044de0..77b3897 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,9 @@ dev = [
 anthropic = [
     "anthropic>=0.30",
 ]
+openai = [
+    "openai>=1.50",
+]
 
 [project.urls]
 Homepage = "https://www.getlago.com"
@@ -82,10 +85,11 @@ strict = true
 files = ["src/lago_agent_sdk"]
 
 [[tool.mypy.overrides]]
-module = ["boto3.*", "botocore.*", "mistralai.*"]
+module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*"]
 ignore_missing_imports = true
 
 [dependency-groups]
 dev = [
     "anthropic>=0.30",
+    "openai>=1.50",
 ]
diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py
index 217ed3d..1d24920 100644
--- a/src/lago_agent_sdk/adapters/__init__.py
+++ b/src/lago_agent_sdk/adapters/__init__.py
@@ -2,6 +2,7 @@
 from .bedrock_converse import extract_bedrock_converse
 from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter
 from .mistral_native import extract_mistral_native
+from .openai_native import extract_openai_native
 
 __all__ = [
     "extract_anthropic_native",
@@ -9,4 +10,5 @@
     "extract_bedrock_invoke",
     "pick_invoke_adapter",
     "extract_mistral_native",
+    "extract_openai_native",
 ]
diff --git a/src/lago_agent_sdk/adapters/openai_native.py b/src/lago_agent_sdk/adapters/openai_native.py
new file mode 100644
index 0000000..55bd09d
--- /dev/null
+++ b/src/lago_agent_sdk/adapters/openai_native.py
@@ -0,0 +1,157 @@
+"""OpenAI native adapter — verified against real fixtures.
+
+Handles both Chat Completions API (`client.chat.completions.create`) and the
+Responses API (`client.responses.create`). They share a similar concept but
+use different field names — we detect which by looking at the usage shape.
+
+CHAT COMPLETIONS field mapping (`usage.*`):
+  prompt_tokens                                    → input
+  completion_tokens                                → output
+  prompt_tokens_details.cached_tokens              → cache_read
+  prompt_tokens_details.audio_tokens               → audio_input
+  completion_tokens_details.reasoning_tokens       → reasoning   (o-series models)
+  completion_tokens_details.audio_tokens           → audio_output (GPT-4o-audio output)
+  count of choices[0].message.tool_calls           → tool_calls
+
+RESPONSES API field mapping (`usage.*`):
+  input_tokens                                     → input
+  output_tokens                                    → output
+  input_tokens_details.cached_tokens               → cache_read
+  output_tokens_details.reasoning_tokens           → reasoning
+  count of output[].type == "function_call"        → tool_calls
+
+Not exposed by either API:
+  cache_write, cache_write_5m, cache_write_1h — OpenAI auto-caches without
+  surfacing creation counts.
+
+Known gaps (intentional, documented):
+  - completion_tokens_details.accepted_prediction_tokens — Predicted Outputs
+    feature: subset of completion_tokens (the ones that matched the prediction).
+    Skipped to avoid double-counting against completion_tokens.
+  - completion_tokens_details.rejected_prediction_tokens — Predicted Outputs:
+    extra cost beyond completion_tokens (prediction tokens the model rejected).
+    Skipped for v1 — customers using Predicted Outputs can read this from
+    `extras["completion_tokens_details"]` (if drift-detection captures it) or
+    via the openai response object directly.
+"""
+
+from __future__ import annotations
+
+from typing import Any, cast
+
+from ..canonical import CanonicalUsage
+
+# Top-level usage fields we recognize across BOTH chat completions and responses APIs.
+_KNOWN_USAGE_FIELDS = {
+    # chat completions
+    "prompt_tokens",
+    "completion_tokens",
+    "total_tokens",
+    "prompt_tokens_details",
+    "completion_tokens_details",
+    # responses API
+    "input_tokens",
+    "output_tokens",
+    "input_tokens_details",
+    "output_tokens_details",
+}
+
+
+def _safe_dict(v: Any) -> dict[str, Any]:
+    return v if isinstance(v, dict) else {}
+
+
+def _safe_int(v: Any) -> int:
+    try:
+        return max(0, int(v or 0))
+    except (TypeError, ValueError):
+        return 0
+
+
+def _to_dict(obj: Any) -> dict[str, Any]:
+    """Best-effort pydantic-or-dict to dict (OpenAI SDK returns pydantic objects)."""
+    if isinstance(obj, dict):
+        return obj
+    if hasattr(obj, "model_dump"):
+        try:
+            return cast(dict[str, Any], obj.model_dump())
+        except Exception:  # noqa: BLE001
+            pass
+    return {}
+
+
+def _count_chat_tool_calls(resp: dict[str, Any]) -> int:
+    """choices[0].message.tool_calls is a list of called functions in Chat Completions."""
+    choices = resp.get("choices")
+    if not isinstance(choices, list) or not choices:
+        return 0
+    first = choices[0]
+    if not isinstance(first, dict):
+        return 0
+    message = _safe_dict(first.get("message"))
+    tcs = message.get("tool_calls")
+    return len(tcs) if isinstance(tcs, list) else 0
+
+
+def _count_responses_tool_calls(resp: dict[str, Any]) -> int:
+    """In the Responses API, tool invocations are items in `output` with type == "function_call"."""
+    output = resp.get("output")
+    if not isinstance(output, list):
+        return 0
+    return sum(1 for item in output if isinstance(item, dict) and item.get("type") == "function_call")
+
+
+def extract_openai_native(response: Any, model_id: str = "") -> CanonicalUsage:
+    """Translate an OpenAI response (chat completion or responses API) → CanonicalUsage.
+
+    Accepts the SDK's pydantic objects, dicts (e.g. captured fixtures), or the
+    synthetic `{"usage": {...}}` blob produced by the streaming wrapper.
+    """
+    resp = _to_dict(response) if not isinstance(response, dict) else response
+    usage = _safe_dict(resp.get("usage"))
+
+    # Detect which API shape we have. Chat Completions uses prompt_tokens;
+    # Responses API uses input_tokens. They never both appear.
+    is_responses_api = "input_tokens" in usage and "prompt_tokens" not in usage
+
+    if is_responses_api:
+        input_tokens = _safe_int(usage.get("input_tokens"))
+        output_tokens = _safe_int(usage.get("output_tokens"))
+        input_details = _safe_dict(usage.get("input_tokens_details"))
+        output_details = _safe_dict(usage.get("output_tokens_details"))
+        cache_read = _safe_int(input_details.get("cached_tokens"))
+        reasoning = _safe_int(output_details.get("reasoning_tokens"))
+        audio_input = _safe_int(input_details.get("audio_tokens"))
+        audio_output = 0  # not exposed by Responses API today
+        tool_calls = _count_responses_tool_calls(resp)
+        api = "responses"
+    else:
+        input_tokens = _safe_int(usage.get("prompt_tokens"))
+        output_tokens = _safe_int(usage.get("completion_tokens"))
+        prompt_details = _safe_dict(usage.get("prompt_tokens_details"))
+        completion_details = _safe_dict(usage.get("completion_tokens_details"))
+        cache_read = _safe_int(prompt_details.get("cached_tokens"))
+        reasoning = _safe_int(completion_details.get("reasoning_tokens"))
+        audio_input = _safe_int(prompt_details.get("audio_tokens"))
+        audio_output = _safe_int(completion_details.get("audio_tokens"))
+        tool_calls = _count_chat_tool_calls(resp)
+        api = "chat_completions"
+
+    extras: dict[str, Any] = {}
+    for k, v in usage.items():
+        if k not in _KNOWN_USAGE_FIELDS:
+            extras[k] = v
+
+    return CanonicalUsage(
+        input=input_tokens,
+        output=output_tokens,
+        cache_read=cache_read,
+        reasoning=reasoning,
+        audio_input=audio_input,
+        audio_output=audio_output,
+        tool_calls=tool_calls,
+        model=model_id or (resp.get("model") if isinstance(resp.get("model"), str) else "") or "",
+        provider="openai",
+        api=api,
+        extras=extras,
+    )
diff --git a/src/lago_agent_sdk/canonical.py b/src/lago_agent_sdk/canonical.py
index 6ec8dff..715a595 100644
--- a/src/lago_agent_sdk/canonical.py
+++ b/src/lago_agent_sdk/canonical.py
@@ -18,6 +18,7 @@ class CanonicalUsage:
     tool_calls: int = 0
     image_input: int = 0
     audio_input: int = 0
+    audio_output: int = 0
     model: str = ""
     provider: str = ""
     api: str = ""
@@ -34,6 +35,7 @@ class CanonicalUsage:
         "tool_calls",
         "image_input",
         "audio_input",
+        "audio_output",
     )
 
     def nonzero_numeric(self) -> dict[str, int]:
diff --git a/src/lago_agent_sdk/config.py b/src/lago_agent_sdk/config.py
index 0383117..28bb1c5 100644
--- a/src/lago_agent_sdk/config.py
+++ b/src/lago_agent_sdk/config.py
@@ -16,6 +16,7 @@
     "tool_calls": "llm_tool_calls",
     "image_input": "llm_image_input_tokens",
     "audio_input": "llm_audio_input_tokens",
+    "audio_output": "llm_audio_output_tokens",
 }
 
 
diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py
index c303e03..ce55ccb 100644
--- a/src/lago_agent_sdk/sdk.py
+++ b/src/lago_agent_sdk/sdk.py
@@ -87,13 +87,19 @@ def wrap(
             from .wrappers.anthropic import wrap_anthropic_client
 
             return wrap_anthropic_client(self, client, dimensions=dimensions, subscription=subscription)
+        if kind == "openai":
+            from .wrappers.openai import wrap_openai_client
+
+            return wrap_openai_client(self, client, dimensions=dimensions, subscription=subscription)
         if kind == "unknown":
             raise UnknownClientError(
                 f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. "
-                "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, anthropic.Anthropic / AsyncAnthropic."
+                "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, "
+                "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI."
             )
         raise UnknownClientError(
-            f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral', 'anthropic'."
+            f"Client kind '{kind}' is not yet supported. "
+            "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai'."
         )
 
     # ------------------------------------------------------------------
diff --git a/src/lago_agent_sdk/wrappers/openai.py b/src/lago_agent_sdk/wrappers/openai.py
new file mode 100644
index 0000000..1864986
--- /dev/null
+++ b/src/lago_agent_sdk/wrappers/openai.py
@@ -0,0 +1,185 @@
+"""openai SDK wrapper.
+
+Wraps the public methods of `OpenAI` (and `AsyncOpenAI`) clients in place —
+instrumentation never breaks the customer's call.
+
+Methods wrapped:
+  - .chat.completions.create(...)  — non-streaming and stream=True both supported
+  - .responses.create(...)         — Responses API, sync + streaming
+  - AsyncOpenAI variants of both   — async non-streaming and stream=True
+
+Streaming behavior:
+  When `stream=True` is passed without `stream_options={"include_usage": True}`
+  (Chat Completions) we automatically inject it so the final chunk carries the
+  usage payload we need to bill. Without that flag, OpenAI's stream emits no
+  usage data and the customer gets silent under-billing.
+
+Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from
+kwargs before forwarding so OpenAI's strict validation doesn't reject it.
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+from ..adapters import extract_openai_native
+
+logger = logging.getLogger("lago_agent_sdk.wrappers.openai")
+
+_INSTRUMENTED_ATTR = "_lago_instrumented"
+_LAGO_KWARG = "extra_lago"
+
+
+def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]:
+    return kwargs.pop(_LAGO_KWARG, {}) or {}
+
+
+def _ensure_stream_options_include_usage(kwargs: dict[str, Any]) -> None:
+    """If stream=True without include_usage, inject it. No-op otherwise.
+
+    Only meaningful for Chat Completions; the Responses API exposes usage on its
+    own final event already.
+    """
+    if not kwargs.get("stream"):
+        return
+    so = kwargs.get("stream_options")
+    if isinstance(so, dict):
+        # Respect customer's explicit choice if they set it
+        if "include_usage" in so:
+            return
+        kwargs["stream_options"] = {**so, "include_usage": True}
+    else:
+        kwargs["stream_options"] = {"include_usage": True}
+
+
+def _is_response_like(obj: Any) -> bool:
+    """Real responses expose `.usage`; Stream iterators don't.
+
+    Safe against properties that raise — falls through to False so the customer's
+    call is never broken.
+    """
+    try:
+        if isinstance(obj, dict):
+            return "usage" in obj
+        return hasattr(obj, "usage")
+    except Exception:  # noqa: BLE001
+        return False
+
+
+def wrap_openai_client(
+    sdk: Any,
+    client: Any,
+    dimensions: dict[str, Any] | None = None,
+    subscription: str | None = None,
+) -> Any:
+    """In-place wrap of an `openai.OpenAI` or `openai.AsyncOpenAI` client. Idempotent."""
+    if getattr(client, _INSTRUMENTED_ATTR, False):
+        logger.info("lago: openai client already wrapped — skipping")
+        return client
+
+    base_dims = dict(dimensions or {})
+    base_sub = subscription
+    is_async = type(client).__name__.startswith("Async")
+
+    def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]:
+        sub = lago_opts.get("subscription") or base_sub
+        dims = {**base_dims, **(lago_opts.get("dimensions") or {})}
+        return sub, dims
+
+    def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None:
+        try:
+            usage = extract_openai_native(payload, model_id=model_id)
+            sdk.emit(usage, subscription=sub, dimensions=dims)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("lago: openai emit failed: %s", exc)
+
+    def _make_sync_create(original: Any) -> Any:
+        def _create(*args: Any, **kwargs: Any) -> Any:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            _ensure_stream_options_include_usage(kwargs)
+            model_id = kwargs.get("model", "")
+            sub, dims = _resolve_opts(lago_opts)
+            response = original(*args, **kwargs)
+
+            if _is_response_like(response):
+                _emit_from(response, model_id, sub, dims)
+                return response
+
+            # Streaming — wrap the iterator to capture the final usage on close.
+            def _wrap_stream(src: Iterator[Any]) -> Iterator[Any]:
+                last_usage: dict[str, Any] | None = None
+                try:
+                    for event in src:
+                        payload = event.model_dump() if hasattr(event, "model_dump") else event
+                        if isinstance(payload, dict):
+                            usage = payload.get("usage")
+                            if isinstance(usage, dict) and usage:
+                                last_usage = {"usage": usage}
+                        yield event
+                finally:
+                    if last_usage is not None:
+                        _emit_from(last_usage, model_id, sub, dims)
+
+            return _wrap_stream(response)
+
+        return _create
+
+    def _make_async_create(original: Any) -> Any:
+        async def _create_async(*args: Any, **kwargs: Any) -> Any:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            _ensure_stream_options_include_usage(kwargs)
+            model_id = kwargs.get("model", "")
+            sub, dims = _resolve_opts(lago_opts)
+            response = await original(*args, **kwargs)
+
+            if _is_response_like(response):
+                _emit_from(response, model_id, sub, dims)
+                return response
+
+            async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]:
+                last_usage: dict[str, Any] | None = None
+                try:
+                    async for event in src:
+                        payload = event.model_dump() if hasattr(event, "model_dump") else event
+                        if isinstance(payload, dict):
+                            usage = payload.get("usage")
+                            if isinstance(usage, dict) and usage:
+                                last_usage = {"usage": usage}
+                        yield event
+                finally:
+                    if last_usage is not None:
+                        _emit_from(last_usage, model_id, sub, dims)
+
+            return _wrap_async_stream(response)
+
+        return _create_async
+
+    # ------------------------------------------------------------------
+    # chat.completions.create
+    # ------------------------------------------------------------------
+    chat = getattr(client, "chat", None)
+    completions = getattr(chat, "completions", None) if chat is not None else None
+    if completions is not None:
+        original_chat_create = getattr(completions, "create", None)
+        if original_chat_create is not None:
+            completions.create = (
+                _make_async_create(original_chat_create) if is_async else _make_sync_create(original_chat_create)
+            )
+
+    # ------------------------------------------------------------------
+    # responses.create
+    # ------------------------------------------------------------------
+    responses_namespace = getattr(client, "responses", None)
+    if responses_namespace is not None:
+        original_responses_create = getattr(responses_namespace, "create", None)
+        if original_responses_create is not None:
+            responses_namespace.create = (
+                _make_async_create(original_responses_create)
+                if is_async
+                else _make_sync_create(original_responses_create)
+            )
+
+    setattr(client, _INSTRUMENTED_ATTR, True)
+    return client
diff --git a/tests/integration/test_live_openai.py b/tests/integration/test_live_openai.py
new file mode 100644
index 0000000..4a90189
--- /dev/null
+++ b/tests/integration/test_live_openai.py
@@ -0,0 +1,190 @@
+"""End-to-end OpenAI integration test — live API + mocked Lago.
+
+Skipped unless OPENAI_API_KEY is set.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+import pytest
+
+from lago_agent_sdk import LagoSDK
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("OPENAI_API_KEY"),
+    reason="OPENAI_API_KEY not set",
+)
+
+
+class _MockLago(BaseHTTPRequestHandler):
+    def do_POST(self):  # noqa: N802
+        n = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(n)
+        self.server.received.append(json.loads(body))  # type: ignore[attr-defined]
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(b'{"ok": true}')
+
+    def log_message(self, *_args, **_kwargs):
+        return
+
+
+def _spawn_lago():
+    s = HTTPServer(("127.0.0.1", 0), _MockLago)
+    s.received = []  # type: ignore[attr-defined]
+    threading.Thread(target=s.serve_forever, daemon=True).start()
+    return s, f"http://127.0.0.1:{s.server_port}"
+
+
+def _collect_events(server) -> list[dict]:
+    return [e for p in server.received for e in p["events"]]
+
+
+def _codes(events) -> set[str]:
+    return {e["code"] for e in events}
+
+
+# --------------------------------------------------------------------------
+# Chat Completions
+# --------------------------------------------------------------------------
+def test_live_openai_chat_completions_create_emits_to_lago() -> None:
+    from openai import OpenAI
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"]))
+        client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Say hi"}],
+            max_completion_tokens=20,
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        for e in events:
+            assert e["properties"]["api"] == "chat_completions"
+            assert e["properties"]["provider"] == "openai"
+    finally:
+        server.shutdown()
+
+
+def test_live_openai_chat_completions_streaming_emits_from_final_chunk() -> None:
+    from openai import OpenAI
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"]))
+        # Note: stream_options.include_usage is auto-injected by the wrapper
+        for _ in client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "Say hi"}],
+            max_completion_tokens=20,
+            stream=True,
+        ):
+            pass
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+    finally:
+        server.shutdown()
+
+
+def test_live_openai_chat_completions_tool_use_emits_tool_calls() -> None:
+    from openai import OpenAI
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"]))
+        client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the current weather for a city.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"city": {"type": "string"}},
+                            "required": ["city"],
+                        },
+                    },
+                }
+            ],
+            tool_choice={"type": "function", "function": {"name": "get_weather"}},
+            max_completion_tokens=200,
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        assert "llm_tool_calls" in _codes(events)
+    finally:
+        server.shutdown()
+
+
+def test_live_openai_reasoning_model_emits_reasoning_tokens() -> None:
+    """o-series models populate completion_tokens_details.reasoning_tokens.
+    First provider to actually expose this metric."""
+    from openai import OpenAI
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"]))
+        client.chat.completions.create(
+            model="o4-mini",
+            messages=[{"role": "user", "content": "What is 17 * 23? Just the number."}],
+            max_completion_tokens=2000,
+        )
+        assert sdk.flush(timeout=30.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        assert "llm_reasoning_tokens" in codes  # ← the key win for OpenAI
+    finally:
+        server.shutdown()
+
+
+# --------------------------------------------------------------------------
+# Responses API
+# --------------------------------------------------------------------------
+def test_live_openai_responses_create_emits_to_lago() -> None:
+    from openai import OpenAI
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"]))
+        client.responses.create(
+            model="gpt-4o-mini",
+            input="Say hi",
+            max_output_tokens=20,
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        for e in events:
+            assert e["properties"]["api"] == "responses"
+            assert e["properties"]["provider"] == "openai"
+    finally:
+        server.shutdown()
diff --git a/tests/unit/adapters/fixtures/capture_openai.py b/tests/unit/adapters/fixtures/capture_openai.py
new file mode 100644
index 0000000..5bcdd25
--- /dev/null
+++ b/tests/unit/adapters/fixtures/capture_openai.py
@@ -0,0 +1,226 @@
+"""Capture real OpenAI API responses for adapter design.
+
+Saves raw responses to tests/unit/adapters/fixtures/openai_native/<scenario>.json
+so we can verify the field mappings against reality before writing the adapter.
+
+Covers both Chat Completions (`client.chat.completions.create`) and
+the Responses API (`client.responses.create`) — they have different
+usage shapes.
+
+Reads OPENAI_API_KEY from env.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import sys
+
+from openai import OpenAI
+
+OUT = pathlib.Path(__file__).parent / "openai_native"
+OUT.mkdir(parents=True, exist_ok=True)
+
+
+def to_dict(response) -> dict:
+    """OpenAI SDK returns pydantic models — convert to plain dict for JSON."""
+    if hasattr(response, "model_dump"):
+        return response.model_dump()
+    if hasattr(response, "dict"):
+        return response.dict()
+    return json.loads(response.json()) if hasattr(response, "json") else dict(response)
+
+
+def save(name: str, model: str, payload: dict) -> None:
+    path = OUT / f"{name}.json"
+    path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str))
+    print(f"  ✓ saved {path.name}")
+
+
+def main() -> int:
+    key = os.environ.get("OPENAI_API_KEY")
+    if not key:
+        print("error: set OPENAI_API_KEY", file=sys.stderr)
+        return 2
+
+    client = OpenAI(api_key=key)
+    PROMPT = "Write one sentence about dolphins."
+
+    # =================================================================
+    # Chat Completions API — client.chat.completions.create(...)
+    # =================================================================
+
+    # ----- 1. Plain chat completion -----
+    print("\n[1] plain chat — gpt-4o-mini")
+    r = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": PROMPT}],
+        max_completion_tokens=80,
+    )
+    save("01_plain_chat", "gpt-4o-mini", to_dict(r))
+
+    # ----- 2. Tool use (function calling) -----
+    print("\n[2] tool use chat — gpt-4o-mini with weather tool")
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather for a city.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+    r = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
+        tools=tools,
+        tool_choice={"type": "function", "function": {"name": "get_weather"}},
+        max_completion_tokens=200,
+    )
+    save("02_tool_use_chat", "gpt-4o-mini", to_dict(r))
+
+    # ----- 3. Cache hit attempt — long prompt sent twice (OpenAI auto-caches >1024 tokens) -----
+    print("\n[3] cache attempt — long prompt, call 1 then call 2")
+    long_prompt = (
+        "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. "
+        * 200
+    )
+    msgs = [
+        {"role": "system", "content": long_prompt},
+        {"role": "user", "content": "What is 2+2?"},
+    ]
+    r1 = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=msgs,
+        max_completion_tokens=20,
+    )
+    save("03_cache_call1_chat", "gpt-4o-mini", to_dict(r1))
+
+    msgs2 = [
+        {"role": "system", "content": long_prompt},
+        {"role": "user", "content": "What is 3+3?"},
+    ]
+    r2 = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=msgs2,
+        max_completion_tokens=20,
+    )
+    save("04_cache_call2_chat", "gpt-4o-mini", to_dict(r2))
+
+    # ----- 5. Streaming with usage included -----
+    print("\n[5] streaming chat — gpt-4o-mini with stream_options.include_usage")
+    chunks: list[dict] = []
+    stream = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": PROMPT}],
+        max_completion_tokens=60,
+        stream=True,
+        stream_options={"include_usage": True},
+    )
+    for chunk in stream:
+        chunks.append(to_dict(chunk))
+    save("05_streaming_chat", "gpt-4o-mini", {"chunks": chunks})
+
+    # ----- 6. Reasoning model (o-series) — exposes reasoning_tokens -----
+    print("\n[6] reasoning chat — o4-mini")
+    try:
+        r = client.chat.completions.create(
+            model="o4-mini",
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        "Prove that the sum of the first n cubes equals the square of the sum "
+                        "of the first n positive integers. Show each step."
+                    ),
+                }
+            ],
+            max_completion_tokens=2000,
+        )
+        save("06_reasoning_chat", "o4-mini", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  o4-mini error: {str(exc)[:160]}")
+
+    # ----- 7. Multi-turn -----
+    print("\n[7] multi-turn chat — gpt-4o-mini")
+    convo = [
+        {"role": "user", "content": "What is 2+2?"},
+        {"role": "assistant", "content": "2+2 equals 4."},
+        {"role": "user", "content": "And times 3?"},
+    ]
+    r = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=convo,
+        max_completion_tokens=40,
+    )
+    save("07_multi_turn_chat", "gpt-4o-mini", to_dict(r))
+
+    # =================================================================
+    # Responses API — client.responses.create(...)
+    # =================================================================
+
+    # ----- 8. Plain Responses API call -----
+    print("\n[8] plain responses — gpt-4o-mini")
+    try:
+        r = client.responses.create(
+            model="gpt-4o-mini",
+            input=PROMPT,
+            max_output_tokens=80,
+        )
+        save("08_plain_responses", "gpt-4o-mini", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  responses.create error: {str(exc)[:160]}")
+
+    # ----- 9. Responses API with tool use -----
+    print("\n[9] tool use responses — gpt-4o-mini")
+    try:
+        r = client.responses.create(
+            model="gpt-4o-mini",
+            input="What's the weather in Tokyo?",
+            tools=[
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "description": "Get current weather for a city.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                        "required": ["city"],
+                    },
+                }
+            ],
+            tool_choice="required",
+            max_output_tokens=200,
+        )
+        save("09_tool_use_responses", "gpt-4o-mini", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  responses tool use error: {str(exc)[:160]}")
+
+    # ----- 10. Reasoning via Responses API -----
+    print("\n[10] reasoning responses — o4-mini")
+    try:
+        r = client.responses.create(
+            model="o4-mini",
+            input=(
+                "Prove that the sum of the first n cubes equals the square of the sum "
+                "of the first n positive integers. Show each step."
+            ),
+            reasoning={"effort": "low"},
+            max_output_tokens=2000,
+        )
+        save("10_reasoning_responses", "o4-mini", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  responses reasoning error: {str(exc)[:160]}")
+
+    print("\nDone. Inspect tests/unit/adapters/fixtures/openai_native/*.json")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json b/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json
new file mode 100644
index 0000000..e573d42
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json
@@ -0,0 +1,42 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn8rdIFbTd9EfFHjOXXA4pRjMF8R",
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Dolphins are highly intelligent marine mammals known for their playful behavior, strong social bonds, and complex communication skills.",
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780044361,
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": "fp_4f2a4e4dd8",
+    "usage": {
+      "completion_tokens": 23,
+      "prompt_tokens": 13,
+      "total_tokens": 36,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 0,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 0
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json b/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json
new file mode 100644
index 0000000..1cd0872
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json
@@ -0,0 +1,51 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn8sYZgalPsBNluUfJ7rWW80Fwh0",
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": null,
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": [
+            {
+              "id": "call_eihD8nbuIvE6wVEC26EyyKty",
+              "function": {
+                "arguments": "{\"city\":\"Tokyo\"}",
+                "name": "get_weather"
+              },
+              "type": "function"
+            }
+          ]
+        }
+      }
+    ],
+    "created": 1780044362,
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": "fp_e2d886d409",
+    "usage": {
+      "completion_tokens": 5,
+      "prompt_tokens": 60,
+      "total_tokens": 65,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 0,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 0
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json b/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json
new file mode 100644
index 0000000..239fdbe
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json
@@ -0,0 +1,42 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn8troaHOR66ipcDGaytoKv6Vdmf",
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "2 + 2 equals 4. \n\n**Reasoning step by step:**\n1. Identify",
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780044363,
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": "fp_196f526a25",
+    "usage": {
+      "completion_tokens": 20,
+      "prompt_tokens": 3819,
+      "total_tokens": 3839,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 0,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 0
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json b/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json
new file mode 100644
index 0000000..ef3664f
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json
@@ -0,0 +1,42 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn8uVrLuXJiU7Ef7z5Yix39CCOus",
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "3 + 3 = 6. \n\n**Reasoning:**\n1. Start with the number",
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780044364,
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": "fp_196f526a25",
+    "usage": {
+      "completion_tokens": 20,
+      "prompt_tokens": 3819,
+      "total_tokens": 3839,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 0,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 3712
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json b/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json
new file mode 100644
index 0000000..c748280
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json
@@ -0,0 +1,776 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "chunks": [
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": "",
+              "function_call": null,
+              "refusal": null,
+              "role": "assistant",
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "A0RHlUO5d"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": "D",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "x3EVvwJs9P"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": "olph",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "FmQqISP"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": "ins",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "AwlMSuh8"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " are",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "M29Dn9T"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " highly",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "Uv8y"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " intelligent",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "Wo7jeAXq3tBZ84p"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " marine",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "5mQE"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " mammals",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "zaw"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " known",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "L4rI7"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " for",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "KRkgzRl"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " their",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "8XDZI"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " playful",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "NIG"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " behavior",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "Fm"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": ",",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "jyjIDVS3vK"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " complex",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "8I4"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " social",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "yHI5"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " structures",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": ""
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": ",",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "SNNBP953Gk"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " and",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "omShNu9"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " ability",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "alj"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " to",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "3t0LIxjK"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " communicate",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "0ITzn2FwU9chcTG"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " using",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "6Mi6K"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " a",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "lPGTprxfJ"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " variety",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "3oV"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " of",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "mqLE8UfV"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": " vocal",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "bCG7c"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": "izations",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "sUl"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": ".",
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "EAucOccXmU"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [
+          {
+            "delta": {
+              "content": null,
+              "function_call": null,
+              "refusal": null,
+              "role": null,
+              "tool_calls": null
+            },
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null
+          }
+        ],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": null,
+        "obfuscation": "hPVGr"
+      },
+      {
+        "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv",
+        "choices": [],
+        "created": 1780044365,
+        "model": "gpt-4o-mini-2024-07-18",
+        "object": "chat.completion.chunk",
+        "service_tier": "default",
+        "system_fingerprint": "fp_4f2a4e4dd8",
+        "usage": {
+          "completion_tokens": 29,
+          "prompt_tokens": 13,
+          "total_tokens": 42,
+          "completion_tokens_details": {
+            "accepted_prediction_tokens": 0,
+            "audio_tokens": 0,
+            "reasoning_tokens": 0,
+            "rejected_prediction_tokens": 0
+          },
+          "prompt_tokens_details": {
+            "audio_tokens": 0,
+            "cached_tokens": 0
+          }
+        },
+        "obfuscation": "jYiy3qiNKb"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json b/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json
new file mode 100644
index 0000000..8cf766c
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json
@@ -0,0 +1,42 @@
+{
+  "_model_id": "o4-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn8wt8xNufUS91CnuPY3a2IUhErr",
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here is a classic proof by induction (one can also use a telescoping argument; see footnote).  We want to show for all integers n\u22651:  \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 \u2002=\u2002 [1 + 2 + \u22ef + n]\u00b2 \u2002=\u2002 [n(n + 1)/2]\u00b2.\n\n**1. Base case (n=1).**  \nLHS = 1\u00b3 = 1.  \nRHS = [1\u00b7(1+1)/2]\u00b2 = [1\u00b72/2]\u00b2 = 1\u00b2 = 1.  \nSo the formula holds at n=1.\n\n**2. Inductive step.**  \nAssume the formula holds for some n\u22651, i.e. assume  \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 = [n(n+1)/2]\u00b2.  \nWe must prove it then holds for n+1, namely  \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3 = [(n+1)(n+2)/2]\u00b2.\n\nStarting from the LHS at n+1 we write  \n\u20031\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3  \n= (1\u00b3 + \u22ef + n\u00b3) + (n+1)\u00b3  \n\u27f6 by the induction hypothesis  \n= [n(n+1)/2]\u00b2   +   (n+1)\u00b3.  \n\nFactor out (n+1)\u00b2 from these two terms:  \n\u2003[n(n+1)/2]\u00b2 + (n+1)\u00b3  \n= (n+1)\u00b2 \u00b7 [ n\u00b2/4  +  (n+1) ]  \n(because [n(n+1)/2]\u00b2 = (n+1)\u00b2\u00b7n\u00b2/4).  \n\nCombine the bracket:  \n\u2003n\u00b2/4  +  (n+1)  \n= n\u00b2/4  +  (4n+4)/4  \n= (n\u00b2 + 4n + 4)/4  \n= (n+2)\u00b2/4.  \n\nHence  \n\u2003(1\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3)  \n= (n+1)\u00b2 \u00b7 [(n+2)\u00b2/4]  \n= [(n+1)(n+2)/2]\u00b2,  \n\nwhich is exactly the desired formula at n+1.  This completes the induction.  \nTherefore by induction the identity holds for all n\u22651.\n\n\u220e\n\nFootnote (telescoping proof).  Observe  \n\u2003a_k := [k(k+1)/2]\u00b2.  \nThen  \n\u2003a_k \u2013 a_{k\u20131}  \n= [k(k+1)/2]\u00b2 \u2013 [(k\u20131)k/2]\u00b2  \n= (1/4){ k\u00b2[(k+1)\u00b2 \u2013 (k\u20131)\u00b2] }  \n= (1/4){ k\u00b2[ (k\u00b2+2k+1) \u2013 (k\u00b2\u20132k+1) ] }  \n= (1/4){ k\u00b2\u00b74k }  \n= k\u00b3.  \nTherefore  \n\u2003\u2211_{k=1}^n k\u00b3 = \u2211_{k=1}^n (a_k\u2013a_{k\u20131}) = a_n \u2013 a_0 = [n(n+1)/2]\u00b2.",
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780044366,
+    "model": "o4-mini-2025-04-16",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": null,
+    "usage": {
+      "completion_tokens": 1579,
+      "prompt_tokens": 33,
+      "total_tokens": 1612,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 832,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 0
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json b/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json
new file mode 100644
index 0000000..e543675
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json
@@ -0,0 +1,42 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "chatcmpl-Dkn96bNjx7tRhEKJbRCcXPVYlugPs",
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "4 times 3 equals 12.",
+          "refusal": null,
+          "role": "assistant",
+          "annotations": [],
+          "audio": null,
+          "function_call": null,
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780044376,
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "chat.completion",
+    "service_tier": "default",
+    "system_fingerprint": "fp_da89e836d0",
+    "usage": {
+      "completion_tokens": 8,
+      "prompt_tokens": 34,
+      "total_tokens": 42,
+      "completion_tokens_details": {
+        "accepted_prediction_tokens": 0,
+        "audio_tokens": 0,
+        "reasoning_tokens": 0,
+        "rejected_prediction_tokens": 0
+      },
+      "prompt_tokens_details": {
+        "audio_tokens": 0,
+        "cached_tokens": 0
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json b/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json
new file mode 100644
index 0000000..2c2aad7
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json
@@ -0,0 +1,80 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "resp_0a1d246c78dc3537006a195258f9f481a1816a2d26b761d722",
+    "created_at": 1780044376.0,
+    "error": null,
+    "incomplete_details": null,
+    "instructions": null,
+    "metadata": {},
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "response",
+    "output": [
+      {
+        "id": "msg_0a1d246c78dc3537006a19525a405081a193ec1a80b639a122",
+        "content": [
+          {
+            "annotations": [],
+            "text": "Dolphins are highly intelligent marine mammals known for their playful behavior and complex social structures.",
+            "type": "output_text",
+            "logprobs": []
+          }
+        ],
+        "role": "assistant",
+        "status": "completed",
+        "type": "message",
+        "phase": null
+      }
+    ],
+    "parallel_tool_calls": true,
+    "temperature": 1.0,
+    "tool_choice": "auto",
+    "tools": [],
+    "top_p": 1.0,
+    "background": false,
+    "completed_at": 1780044378.0,
+    "conversation": null,
+    "max_output_tokens": 80,
+    "max_tool_calls": null,
+    "previous_response_id": null,
+    "prompt": null,
+    "prompt_cache_key": null,
+    "prompt_cache_retention": "in_memory",
+    "reasoning": {
+      "effort": null,
+      "generate_summary": null,
+      "summary": null,
+      "context": null
+    },
+    "safety_identifier": null,
+    "service_tier": "default",
+    "status": "completed",
+    "text": {
+      "format": {
+        "type": "text"
+      },
+      "verbosity": "medium"
+    },
+    "top_logprobs": 0,
+    "truncation": "disabled",
+    "usage": {
+      "input_tokens": 13,
+      "input_tokens_details": {
+        "cached_tokens": 0
+      },
+      "output_tokens": 19,
+      "output_tokens_details": {
+        "reasoning_tokens": 0
+      },
+      "total_tokens": 32
+    },
+    "user": null,
+    "billing": {
+      "payer": "developer"
+    },
+    "frequency_penalty": 0.0,
+    "moderation": null,
+    "presence_penalty": 0.0,
+    "store": true
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json b/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json
new file mode 100644
index 0000000..a6e3484
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json
@@ -0,0 +1,94 @@
+{
+  "_model_id": "gpt-4o-mini",
+  "_response": {
+    "id": "resp_0c3beedd7242846a006a19525ae42881949bd176fd7d3aa90e",
+    "created_at": 1780044378.0,
+    "error": null,
+    "incomplete_details": null,
+    "instructions": null,
+    "metadata": {},
+    "model": "gpt-4o-mini-2024-07-18",
+    "object": "response",
+    "output": [
+      {
+        "arguments": "{\"city\":\"Tokyo\"}",
+        "call_id": "call_O98Bwd1iE01xgrPVmPli3M0i",
+        "name": "get_weather",
+        "type": "function_call",
+        "id": "fc_0c3beedd7242846a006a19525c212081948e414c52f9ec8029",
+        "namespace": null,
+        "status": "completed"
+      }
+    ],
+    "parallel_tool_calls": true,
+    "temperature": 1.0,
+    "tool_choice": "required",
+    "tools": [
+      {
+        "name": "get_weather",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "city": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "city"
+          ],
+          "additionalProperties": false
+        },
+        "strict": true,
+        "type": "function",
+        "defer_loading": null,
+        "description": "Get current weather for a city."
+      }
+    ],
+    "top_p": 1.0,
+    "background": false,
+    "completed_at": 1780044380.0,
+    "conversation": null,
+    "max_output_tokens": 200,
+    "max_tool_calls": null,
+    "previous_response_id": null,
+    "prompt": null,
+    "prompt_cache_key": null,
+    "prompt_cache_retention": "in_memory",
+    "reasoning": {
+      "effort": null,
+      "generate_summary": null,
+      "summary": null,
+      "context": null
+    },
+    "safety_identifier": null,
+    "service_tier": "default",
+    "status": "completed",
+    "text": {
+      "format": {
+        "type": "text"
+      },
+      "verbosity": "medium"
+    },
+    "top_logprobs": 0,
+    "truncation": "disabled",
+    "usage": {
+      "input_tokens": 53,
+      "input_tokens_details": {
+        "cached_tokens": 0
+      },
+      "output_tokens": 6,
+      "output_tokens_details": {
+        "reasoning_tokens": 0
+      },
+      "total_tokens": 59
+    },
+    "user": null,
+    "billing": {
+      "payer": "developer"
+    },
+    "frequency_penalty": 0.0,
+    "moderation": null,
+    "presence_penalty": 0.0,
+    "store": true
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json b/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json
new file mode 100644
index 0000000..56b184d
--- /dev/null
+++ b/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json
@@ -0,0 +1,88 @@
+{
+  "_model_id": "o4-mini",
+  "_response": {
+    "id": "resp_01686e0eda1186ad006a19525c9244819281061c5e851add1a",
+    "created_at": 1780044380.0,
+    "error": null,
+    "incomplete_details": null,
+    "instructions": null,
+    "metadata": {},
+    "model": "o4-mini-2025-04-16",
+    "object": "response",
+    "output": [
+      {
+        "id": "rs_01686e0eda1186ad006a19525e3b6081928c78127466f13ae9",
+        "summary": [],
+        "type": "reasoning",
+        "content": null,
+        "encrypted_content": null,
+        "status": null
+      },
+      {
+        "id": "msg_01686e0eda1186ad006a1952607b7c8192aabd7ae10c7ed3c1",
+        "content": [
+          {
+            "annotations": [],
+            "text": "Here is a proof by mathematical induction that for every positive integer n,  \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3  =  (1 + 2 + \u22ef + n)\u00b2.  \n\nNotation. Let  \n\u2003S(n)  := 1\u00b3 + 2\u00b3 + \u22ef + n\u00b3,  \nand recall the well\u2010known formula  \n\u2003T(n) := 1 + 2 + \u22ef + n  =  n(n + 1)/2.  \n\nWe will show  \n\u2003S(n)  =  [T(n)]\u00b2  =  [n(n + 1)/2]\u00b2.  \n\n1. Base case (n = 1).  \n  S(1) = 1\u00b3 = 1,  \n  T(1) = 1, so [T(1)]\u00b2 = 1\u00b2 = 1.  \n  Hence S(1) = [T(1)]\u00b2.  \n\n2. Inductive step.  \n  Assume that for some k \u2265 1,  \n    S(k)  =  [T(k)]\u00b2  =  [k(k + 1)/2]\u00b2.  \n  We must show  \n    S(k + 1)  =  [T(k + 1)]\u00b2.  \n\n  Now  \n    S(k + 1)  \n    = S(k) + (k + 1)\u00b3  \n    = [k(k + 1)/2]\u00b2   +   (k + 1)\u00b3       (by the inductive hypothesis)  \n    =  (k\u00b2 (k + 1)\u00b2)/4   +   (k + 1)\u00b3.  \n\n  Factor out (k + 1)\u00b2/4 from the sum:  \n    =  (k + 1)\u00b2   \n        \u00b7 [k\u00b2/4  +  4\u00b7(k + 1)/4 ]  \n      =  (k + 1)\u00b2   \n        \u00b7 [ (k\u00b2 + 4(k + 1)) / 4 ]  \n      =  (k + 1)\u00b2   \n        \u00b7 [ (k\u00b2 + 4k + 4) / 4 ]  \n      =  (k + 1)\u00b2   \n        \u00b7 [ (k + 2)\u00b2 / 4 ]  \n    =  [ (k + 1)(k + 2) / 2 ]\u00b2.  \n\n  But (k + 1)(k + 2)/2 = T(k + 1), so we conclude  \n    S(k + 1)  =  [T(k + 1)]\u00b2.  \n\nBy the principle of mathematical induction, the formula  \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3  =  [n(n + 1)/2]\u00b2  \nholds for all positive integers n.  \nEquivalently, the sum of the first n cubes equals the square of the sum of the first n positive integers.",
+            "type": "output_text",
+            "logprobs": []
+          }
+        ],
+        "role": "assistant",
+        "status": "completed",
+        "type": "message",
+        "phase": null
+      }
+    ],
+    "parallel_tool_calls": true,
+    "temperature": 1.0,
+    "tool_choice": "auto",
+    "tools": [],
+    "top_p": 1.0,
+    "background": false,
+    "completed_at": 1780044387.0,
+    "conversation": null,
+    "max_output_tokens": 2000,
+    "max_tool_calls": null,
+    "previous_response_id": null,
+    "prompt": null,
+    "prompt_cache_key": null,
+    "prompt_cache_retention": "in_memory",
+    "reasoning": {
+      "effort": "low",
+      "generate_summary": null,
+      "summary": null,
+      "context": "current_turn"
+    },
+    "safety_identifier": null,
+    "service_tier": "default",
+    "status": "completed",
+    "text": {
+      "format": {
+        "type": "text"
+      },
+      "verbosity": "medium"
+    },
+    "top_logprobs": 0,
+    "truncation": "disabled",
+    "usage": {
+      "input_tokens": 33,
+      "input_tokens_details": {
+        "cached_tokens": 0
+      },
+      "output_tokens": 981,
+      "output_tokens_details": {
+        "reasoning_tokens": 320
+      },
+      "total_tokens": 1014
+    },
+    "user": null,
+    "billing": {
+      "payer": "developer"
+    },
+    "frequency_penalty": 0.0,
+    "moderation": null,
+    "presence_penalty": 0.0,
+    "store": true
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/test_openai_native.py b/tests/unit/adapters/test_openai_native.py
new file mode 100644
index 0000000..71ffb16
--- /dev/null
+++ b/tests/unit/adapters/test_openai_native.py
@@ -0,0 +1,228 @@
+"""OpenAI native adapter — verified against real fixtures."""
+
+from __future__ import annotations
+
+import json
+import pathlib
+
+from lago_agent_sdk.adapters import extract_openai_native
+
+FIX = pathlib.Path(__file__).parent / "fixtures" / "openai_native"
+
+
+def _load(name: str) -> tuple[str, dict]:
+    data = json.loads((FIX / name).read_text())
+    return data["_model_id"], data["_response"]
+
+
+# --------------------------------------------------------------------------
+# Chat Completions fixtures
+# --------------------------------------------------------------------------
+def test_plain_chat() -> None:
+    model_id, resp = _load("01_plain_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 13
+    assert u.output == 23
+    assert u.cache_read == 0
+    assert u.reasoning == 0
+    assert u.tool_calls == 0
+    assert u.audio_input == 0
+    assert u.audio_output == 0
+    assert u.api == "chat_completions"
+    assert u.provider == "openai"
+
+
+def test_tool_use_chat_counts_tool_calls() -> None:
+    model_id, resp = _load("02_tool_use_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 60
+    assert u.output == 5
+    assert u.tool_calls == 1
+    assert u.api == "chat_completions"
+
+
+def test_cache_call1_no_cache_yet() -> None:
+    """First call with a long prompt — OpenAI hasn't cached it yet."""
+    model_id, resp = _load("03_cache_call1_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 3819
+    assert u.output == 20
+    assert u.cache_read == 0
+
+
+def test_cache_call2_auto_cached() -> None:
+    """Second call with the same long prompt — OpenAI auto-caches, exposes cached_tokens."""
+    model_id, resp = _load("04_cache_call2_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 3819
+    assert u.output == 20
+    assert u.cache_read == 3712  # most of the system prompt cached
+    # OpenAI doesn't expose cache_write / cache_write_5m / cache_write_1h
+    assert u.cache_write == 0
+    assert u.cache_write_5m == 0
+
+
+def test_streaming_chat_final_chunk_carries_usage() -> None:
+    """When stream_options.include_usage=True, the final chunk carries the usage payload."""
+    model_id, resp = _load("05_streaming_chat.json")
+    chunks = resp["chunks"]
+    # Find the chunk with usage (it's the last one)
+    final_with_usage = next((c for c in reversed(chunks) if c.get("usage")), None)
+    assert final_with_usage is not None
+    u = extract_openai_native(final_with_usage, model_id=model_id)
+    assert u.input == 13
+    assert u.output == 29
+    assert u.api == "chat_completions"
+
+
+def test_reasoning_chat_exposes_reasoning_tokens() -> None:
+    """o-series models populate completion_tokens_details.reasoning_tokens — first provider to do so."""
+    model_id, resp = _load("06_reasoning_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 33
+    assert u.output == 1579
+    assert u.reasoning == 832  # actual measured value — not folded away
+    assert u.tool_calls == 0
+
+
+def test_multi_turn_chat() -> None:
+    model_id, resp = _load("07_multi_turn_chat.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 34
+    assert u.output == 8
+
+
+# --------------------------------------------------------------------------
+# Responses API fixtures
+# --------------------------------------------------------------------------
+def test_plain_responses() -> None:
+    model_id, resp = _load("08_plain_responses.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 13
+    assert u.output == 19
+    assert u.api == "responses"
+    assert u.provider == "openai"
+
+
+def test_tool_use_responses_counts_function_calls() -> None:
+    """Responses API encodes tool calls as items in `output[]` with type 'function_call'."""
+    model_id, resp = _load("09_tool_use_responses.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 53
+    assert u.output == 6
+    assert u.tool_calls == 1
+    assert u.api == "responses"
+
+
+def test_reasoning_responses() -> None:
+    model_id, resp = _load("10_reasoning_responses.json")
+    u = extract_openai_native(resp, model_id=model_id)
+    assert u.input == 33
+    assert u.output == 981
+    assert u.reasoning == 320
+    assert u.api == "responses"
+
+
+# --------------------------------------------------------------------------
+# API detection
+# --------------------------------------------------------------------------
+def test_chat_completions_shape_detected() -> None:
+    """`prompt_tokens` in usage → Chat Completions."""
+    u = extract_openai_native(
+        {"usage": {"prompt_tokens": 1, "completion_tokens": 1}},
+        model_id="gpt-4o",
+    )
+    assert u.api == "chat_completions"
+
+
+def test_responses_api_shape_detected() -> None:
+    """`input_tokens` (without prompt_tokens) → Responses API."""
+    u = extract_openai_native(
+        {"usage": {"input_tokens": 1, "output_tokens": 1}},
+        model_id="gpt-4o",
+    )
+    assert u.api == "responses"
+
+
+# --------------------------------------------------------------------------
+# Robustness
+# --------------------------------------------------------------------------
+def test_handles_pydantic_via_model_dump() -> None:
+    class FakePydantic:
+        def model_dump(self) -> dict:
+            return {
+                "model": "gpt-4o-mini",
+                "choices": [{"message": {"tool_calls": [{"id": "t1"}, {"id": "t2"}]}}],
+                "usage": {
+                    "prompt_tokens": 5,
+                    "completion_tokens": 7,
+                    "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+                    "completion_tokens_details": {
+                        "reasoning_tokens": 3,
+                        "audio_tokens": 0,
+                    },
+                },
+            }
+
+    u = extract_openai_native(FakePydantic(), model_id="gpt-4o-mini")
+    assert u.input == 5
+    assert u.output == 7
+    assert u.reasoning == 3
+    assert u.tool_calls == 2
+    assert u.api == "chat_completions"
+
+
+def test_no_usage_returns_zeros() -> None:
+    u = extract_openai_native({}, model_id="gpt-4o-mini")
+    assert u.input == 0
+    assert u.output == 0
+    assert not u.nonzero_numeric()
+
+
+def test_survives_non_dict_usage() -> None:
+    assert extract_openai_native({"usage": True}, model_id="x").input == 0
+    assert extract_openai_native({"usage": "bogus"}, model_id="x").output == 0
+    assert extract_openai_native(None, model_id="x").input == 0
+
+
+def test_unknown_top_usage_field_lands_in_extras() -> None:
+    """If OpenAI adds a new top-level field, drift detection picks it up."""
+    resp = {
+        "usage": {
+            "prompt_tokens": 5,
+            "completion_tokens": 7,
+            "future_field_xyz": "novel",
+        }
+    }
+    u = extract_openai_native(resp, model_id="gpt-4o")
+    assert u.extras.get("future_field_xyz") == "novel"
+
+
+def test_audio_input_mapped_from_prompt_details() -> None:
+    """Chat Completions audio input lives at usage.prompt_tokens_details.audio_tokens."""
+    resp = {
+        "usage": {
+            "prompt_tokens": 100,
+            "completion_tokens": 50,
+            "prompt_tokens_details": {"audio_tokens": 42, "cached_tokens": 0},
+            "completion_tokens_details": {"audio_tokens": 0, "reasoning_tokens": 0},
+        }
+    }
+    u = extract_openai_native(resp, model_id="gpt-4o-audio")
+    assert u.audio_input == 42
+    assert u.audio_output == 0
+
+
+def test_audio_output_mapped_from_completion_details() -> None:
+    """GPT-4o-audio output audio lives at usage.completion_tokens_details.audio_tokens."""
+    resp = {
+        "usage": {
+            "prompt_tokens": 100,
+            "completion_tokens": 50,
+            "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
+            "completion_tokens_details": {"audio_tokens": 33, "reasoning_tokens": 0},
+        }
+    }
+    u = extract_openai_native(resp, model_id="gpt-4o-audio")
+    assert u.audio_input == 0
+    assert u.audio_output == 33
diff --git a/tests/unit/test_wrapper_openai.py b/tests/unit/test_wrapper_openai.py
new file mode 100644
index 0000000..43acd95
--- /dev/null
+++ b/tests/unit/test_wrapper_openai.py
@@ -0,0 +1,296 @@
+"""OpenAI wrapper tests — fake client, no live API."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from lago_agent_sdk import LagoSDK
+
+
+class FakeChatCompletion:
+    """Mimics openai's ChatCompletion pydantic object."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+        # expose .usage so the wrapper's _is_response_like check passes
+        self.usage = payload.get("usage")
+
+    def model_dump(self) -> dict[str, Any]:
+        return self._payload
+
+
+class FakeResponsesResponse:
+    """Mimics openai's Response object (Responses API)."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+        self.usage = payload.get("usage")
+
+    def model_dump(self) -> dict[str, Any]:
+        return self._payload
+
+
+class FakeStreamChunk:
+    """Mimics a ChatCompletionChunk."""
+
+    def __init__(self, payload: dict[str, Any]) -> None:
+        self._payload = payload
+
+    def model_dump(self) -> dict[str, Any]:
+        return self._payload
+
+
+class FakeCompletions:
+    def __init__(self) -> None:
+        self.create_calls = 0
+        self.last_kwargs: dict[str, Any] | None = None
+
+    def create(self, **kwargs: Any) -> Any:
+        self.create_calls += 1
+        # extra_lago must be stripped by the wrapper before reaching here
+        assert "extra_lago" not in kwargs
+        self.last_kwargs = dict(kwargs)
+
+        if kwargs.get("stream") is True:
+            # Stream yields several chunks; the LAST one carries usage
+            # (because the wrapper auto-injects stream_options.include_usage).
+            chunks = [
+                FakeStreamChunk(
+                    {"choices": [{"delta": {"content": "hi"}}], "usage": None},
+                ),
+                FakeStreamChunk(
+                    {
+                        "choices": [],
+                        "usage": {
+                            "prompt_tokens": 12,
+                            "completion_tokens": 22,
+                            "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+                            "completion_tokens_details": {
+                                "reasoning_tokens": 0,
+                                "audio_tokens": 0,
+                            },
+                        },
+                    }
+                ),
+            ]
+            return iter(chunks)
+
+        # Non-streaming: return a ChatCompletion-like object with .usage
+        return FakeChatCompletion(
+            {
+                "model": kwargs.get("model", "gpt-4o-mini"),
+                "choices": [{"message": {"role": "assistant", "content": "hi", "tool_calls": None}}],
+                "usage": {
+                    "prompt_tokens": 8,
+                    "completion_tokens": 16,
+                    "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+                    "completion_tokens_details": {"reasoning_tokens": 0, "audio_tokens": 0},
+                },
+            }
+        )
+
+
+class FakeChat:
+    def __init__(self) -> None:
+        self.completions = FakeCompletions()
+
+
+class FakeResponsesNamespace:
+    def __init__(self) -> None:
+        self.create_calls = 0
+
+    def create(self, **kwargs: Any) -> Any:
+        self.create_calls += 1
+        assert "extra_lago" not in kwargs
+        return FakeResponsesResponse(
+            {
+                "model": kwargs.get("model", "gpt-4o-mini"),
+                "output": [{"type": "function_call", "name": "get_weather"}],
+                "usage": {
+                    "input_tokens": 53,
+                    "output_tokens": 6,
+                    "input_tokens_details": {"cached_tokens": 0},
+                    "output_tokens_details": {"reasoning_tokens": 0},
+                },
+            }
+        )
+
+
+class FakeOpenAI:
+    """Mimics `from openai import OpenAI; OpenAI(api_key=...)`."""
+
+    def __init__(self) -> None:
+        self.chat = FakeChat()
+        self.responses = FakeResponsesNamespace()
+
+
+# Module path needs to contain 'openai' so detector routes to openai wrapper.
+FakeOpenAI.__module__ = "openai.fake"
+
+
+def _new_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list[dict]]:
+    received: list[dict] = []
+
+    def sender(batch: list[dict]) -> None:
+        received.extend(batch)
+
+    sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub)
+    sdk._queue._sender = sender  # type: ignore[attr-defined]
+    return sdk, received
+
+
+# --------------------------------------------------------------------------
+# Chat Completions
+# --------------------------------------------------------------------------
+def test_wrap_chat_completions_create_emits_input_and_output() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    resp = client.chat.completions.create(model="gpt-4o-mini", messages=[])
+    assert resp.usage["prompt_tokens"] == 8
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 8
+    assert by_code["llm_output_tokens"] == 16
+
+
+def test_wrap_strips_extra_lago_and_uses_per_call_sub() -> None:
+    sdk, received = _new_sdk("sub_default")
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[],
+        extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}},
+    )
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    assert all(e["external_subscription_id"] == "sub_per_call" for e in received)
+    assert received[0]["properties"]["feature"] == "X"
+
+
+def test_wrap_double_wrap_is_idempotent() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeOpenAI()
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    fake.chat.completions.create(model="gpt-4o-mini", messages=[])
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    assert len(received) == 2  # input + output, not 6
+    assert fake.chat.completions.create_calls == 1
+
+
+def test_wrap_create_with_stream_captures_usage_from_final_chunk() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    chunks = list(client.chat.completions.create(model="gpt-4o-mini", messages=[], stream=True))
+    assert len(chunks) == 2  # first chunk + usage chunk
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 12
+    assert by_code["llm_output_tokens"] == 22
+
+
+def test_wrap_auto_injects_stream_options_include_usage() -> None:
+    """Customer passes stream=True without stream_options — wrapper injects include_usage:True."""
+    sdk, _ = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    list(client.chat.completions.create(model="gpt-4o-mini", messages=[], stream=True))
+    sdk.shutdown(timeout=1.0)
+    seen = fake.chat.completions.last_kwargs or {}
+    assert seen.get("stream_options") == {"include_usage": True}
+
+
+def test_wrap_respects_customer_explicit_include_usage_false() -> None:
+    """If customer set include_usage=False explicitly, we don't override."""
+    sdk, _ = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    list(
+        client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[],
+            stream=True,
+            stream_options={"include_usage": False},
+        )
+    )
+    sdk.shutdown(timeout=1.0)
+    seen = fake.chat.completions.last_kwargs or {}
+    assert seen.get("stream_options") == {"include_usage": False}
+
+
+def test_wrap_preserves_existing_stream_options_keys() -> None:
+    """Existing stream_options keys are kept; include_usage is added alongside."""
+    sdk, _ = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    list(
+        client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[],
+            stream=True,
+            stream_options={"some_other_option": "value"},
+        )
+    )
+    sdk.shutdown(timeout=1.0)
+    seen = fake.chat.completions.last_kwargs or {}
+    assert seen.get("stream_options") == {"some_other_option": "value", "include_usage": True}
+
+
+# --------------------------------------------------------------------------
+# Responses API
+# --------------------------------------------------------------------------
+def test_wrap_responses_create_emits_input_output_and_tool_calls() -> None:
+    sdk, received = _new_sdk()
+    fake = FakeOpenAI()
+    client = sdk.wrap(fake)
+    resp = client.responses.create(model="gpt-4o-mini", input="hi")
+    assert resp.usage["input_tokens"] == 53
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received}
+    assert by_code["llm_input_tokens"] == 53
+    assert by_code["llm_output_tokens"] == 6
+    assert by_code["llm_tool_calls"] == 1
+
+
+# --------------------------------------------------------------------------
+# Failure isolation
+# --------------------------------------------------------------------------
+def test_instrumentation_failure_does_not_break_call() -> None:
+    sdk, _ = _new_sdk()
+
+    class BadResp:
+        @property
+        def usage(self):
+            raise RuntimeError("boom")
+
+        def model_dump(self):
+            raise RuntimeError("boom")
+
+    class BadCompletions:
+        def create(self, **_kw):
+            return BadResp()
+
+    class BadChat:
+        def __init__(self):
+            self.completions = BadCompletions()
+
+    class BadOpenAI:
+        def __init__(self):
+            self.chat = BadChat()
+            self.responses = None  # responses namespace deliberately omitted
+
+    BadOpenAI.__module__ = "openai.fake"
+
+    client = sdk.wrap(BadOpenAI())
+    # Adapter will crash inside, but wrap must still return resp.
+    resp = client.chat.completions.create(model="x", messages=[])
+    assert resp is not None
+    sdk.shutdown(timeout=1.0)

From 6c487ab7449761e43b2dd6ec58500c1297323e7b Mon Sep 17 00:00:00 2001
From: Anass <anass@getlago.com>
Date: Fri, 29 May 2026 13:50:11 +0200
Subject: [PATCH 4/5] Add native Gemini (google-genai) SDK support

Adapter maps usage_metadata fields to CanonicalUsage:

  prompt_token_count                                   -> input
  candidates_token_count                               -> output
  cached_content_token_count                           -> cache_read
  thoughts_token_count                                 -> reasoning
  prompt_tokens_details[modality=AUDIO].token_count    -> audio_input
  prompt_tokens_details[modality=IMAGE].token_count    -> image_input
  candidates_tokens_details[modality=AUDIO].token_count -> audio_output
  count of candidates[0].content.parts[].function_call -> tool_calls

Wrapper covers client.models.generate_content + generate_content_stream
(sync) and the async variants under client.aio.models. Idempotent via
_lago_instrumented sentinel.

Detector now returns 'gemini' (was 'google') for google-genai clients --
matches the naming convention used by other providers (bedrock, anthropic,
openai, mistral).

Semantic note vs OpenAI:
  Gemini's `thoughts_token_count` is ADDITIVE to `candidates_token_count`
  (verified by math across all 5 fixtures: input + output + reasoning = total).
  OpenAI's `reasoning_tokens` is a SUBSET of `completion_tokens`.
  Documented in adapter docstring + README for customers configuring
  per-metric billing.

Gemini 2.5 emits reasoning tokens by default (no explicit thinking_config
needed) -- second provider populating llm_reasoning_tokens.

21 new unit tests (15 adapter + 6 wrapper). 4 live integration tests
gated on GEMINI_API_KEY. 5 captured response fixtures (plain, tool use,
streaming, thinking, multi-turn).

Total: 304 unit tests passing, ruff + mypy strict clean.
---
 CHANGELOG.md                                  |   9 +
 README.md                                     |  53 +++--
 pyproject.toml                                |   6 +-
 src/lago_agent_sdk/adapters/__init__.py       |   2 +
 src/lago_agent_sdk/adapters/gemini_native.py  | 135 +++++++++++
 src/lago_agent_sdk/detector.py                |   2 +-
 src/lago_agent_sdk/sdk.py                     |   9 +-
 src/lago_agent_sdk/wrappers/gemini.py         | 157 +++++++++++++
 tests/integration/test_live_gemini.py         | 154 +++++++++++++
 .../unit/adapters/fixtures/capture_gemini.py  | 138 +++++++++++
 .../gemini_native/01_plain_flash.json         |  82 +++++++
 .../fixtures/gemini_native/02_tool_use.json   |  90 ++++++++
 .../fixtures/gemini_native/03_streaming.json  |  85 +++++++
 .../fixtures/gemini_native/04_thinking.json   |  82 +++++++
 .../fixtures/gemini_native/05_multi_turn.json |  82 +++++++
 tests/unit/adapters/test_gemini_native.py     | 217 ++++++++++++++++++
 tests/unit/test_wrapper_gemini.py             | 214 +++++++++++++++++
 17 files changed, 1500 insertions(+), 17 deletions(-)
 create mode 100644 src/lago_agent_sdk/adapters/gemini_native.py
 create mode 100644 src/lago_agent_sdk/wrappers/gemini.py
 create mode 100644 tests/integration/test_live_gemini.py
 create mode 100644 tests/unit/adapters/fixtures/capture_gemini.py
 create mode 100644 tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json
 create mode 100644 tests/unit/adapters/fixtures/gemini_native/02_tool_use.json
 create mode 100644 tests/unit/adapters/fixtures/gemini_native/03_streaming.json
 create mode 100644 tests/unit/adapters/fixtures/gemini_native/04_thinking.json
 create mode 100644 tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json
 create mode 100644 tests/unit/adapters/test_gemini_native.py
 create mode 100644 tests/unit/test_wrapper_gemini.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d857d3..cda593c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,15 @@ All notable changes to this project will be documented here. Format follows [Kee
 ## [Unreleased]
 
 ### Added
+- Native `google-genai` SDK support covering `client.models.generate_content` + `generate_content_stream`, sync + async (`client.aio.models`).
+- `extract_gemini_native` adapter maps `usage_metadata`: `prompt_token_count → input`, `candidates_token_count → output`, `cached_content_token_count → cache_read`, `thoughts_token_count → reasoning`, `prompt_tokens_details[modality=AUDIO/IMAGE] → audio_input/image_input`, `candidates_tokens_details[modality=AUDIO] → audio_output`, count of `candidates[0].content.parts[].function_call → tool_calls`.
+- **Gemini 2.5 surfaces reasoning tokens by default** (`thoughts_token_count`) — fires `llm_reasoning_tokens` automatically. Note the semantic difference vs OpenAI: Gemini's reasoning is ADDITIVE to output (`candidates + thoughts = total billable output`); OpenAI's reasoning is a SUBSET of `completion_tokens`. Documented in adapter docstring + README.
+- `gemini` optional dependency group: `pip install 'lago-agent-sdk[gemini]'`.
+- 21 new unit tests (15 adapter + 6 wrapper) and 4 live integration tests (gated on `GEMINI_API_KEY`). Total: 304 unit tests.
+- 5 captured response fixtures from the real Gemini API (plain, tool use, streaming, thinking, multi-turn).
+- Detector now returns `gemini` (was `google`) for `google-genai` clients.
+
+### Added (OpenAI — earlier in this branch)
 - Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`.
 - `extract_openai_native` adapter handles both API shapes with auto-detection:
   - Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`.
diff --git a/README.md b/README.md
index ba96fc3..2d1fb0b 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
 For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
 For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
 For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
+For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`).
 
 ## Quickstart — Bedrock
 
@@ -110,6 +111,28 @@ Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client
 
 **Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
 
+## Quickstart — Gemini
+
+```python
+from google import genai
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(genai.Client(api_key="..."))
+
+resp = client.models.generate_content(
+    model="gemini-2.5-flash",
+    contents="Hello",
+)
+sdk.flush()
+```
+
+Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`).
+
+**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI:
+- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output)
+- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning)
+
 ## Multi-tenant — pick a subscription per call
 
 Three ways to set the `external_subscription_id`, in priority order:
@@ -137,25 +160,29 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
 | Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
 | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
 | OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ |
-| Google Gemini | native SDK | Phase 3 |
+| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) | ✓ |
 | LiteLLM | callback bridge | Phase 4 |
 
 ## Token dimensions captured
 
 `CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
 
-| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI |
-|---|---|---|---|---|---|
-| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ |
-| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ |
-| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) |
-| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ (auto-cache; OpenAI doesn't surface creation counts) |
-| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ |
-| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series models)** |
-| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ |
-| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio input) |
-| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio output) |
-| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3 — multimodal adapter) |
+| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini |
+|---|---|---|---|---|---|---|
+| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) |
+| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ | ✗ |
+| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | ✗ |
+| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** |
+| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
+| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
+| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3) | ✓ (multimodal IMAGE) |
+
+**Semantic note on `reasoning`:**
+- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`.
+- **Gemini's `thoughts_token_count` is ADDITIVE to `output`** — `candidates + thoughts = total billable output`.
 
 OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
 
diff --git a/pyproject.toml b/pyproject.toml
index 77b3897..4c6bfd7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,9 @@ anthropic = [
 openai = [
     "openai>=1.50",
 ]
+gemini = [
+    "google-genai>=1.0",
+]
 
 [project.urls]
 Homepage = "https://www.getlago.com"
@@ -85,11 +88,12 @@ strict = true
 files = ["src/lago_agent_sdk"]
 
 [[tool.mypy.overrides]]
-module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*"]
+module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*", "google.*"]
 ignore_missing_imports = true
 
 [dependency-groups]
 dev = [
     "anthropic>=0.30",
     "openai>=1.50",
+    "google-genai>=1.0",
 ]
diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py
index 1d24920..88d3a71 100644
--- a/src/lago_agent_sdk/adapters/__init__.py
+++ b/src/lago_agent_sdk/adapters/__init__.py
@@ -1,6 +1,7 @@
 from .anthropic_native import extract_anthropic_native
 from .bedrock_converse import extract_bedrock_converse
 from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter
+from .gemini_native import extract_gemini_native
 from .mistral_native import extract_mistral_native
 from .openai_native import extract_openai_native
 
@@ -9,6 +10,7 @@
     "extract_bedrock_converse",
     "extract_bedrock_invoke",
     "pick_invoke_adapter",
+    "extract_gemini_native",
     "extract_mistral_native",
     "extract_openai_native",
 ]
diff --git a/src/lago_agent_sdk/adapters/gemini_native.py b/src/lago_agent_sdk/adapters/gemini_native.py
new file mode 100644
index 0000000..8cb0f06
--- /dev/null
+++ b/src/lago_agent_sdk/adapters/gemini_native.py
@@ -0,0 +1,135 @@
+"""Gemini native adapter — verified against real fixtures.
+
+Wraps the modern `google-genai` SDK (`from google import genai`). Both
+`client.models.generate_content` (sync + async) and
+`client.models.generate_content_stream` (sync + async) put usage in
+`response.usage_metadata` (the final chunk for streaming).
+
+Field mapping (`usage_metadata.*`):
+  prompt_token_count                                      → input
+  candidates_token_count                                  → output
+  cached_content_token_count                              → cache_read
+  thoughts_token_count                                    → reasoning
+                                                            (Gemini 2.5; ADDITIVE
+                                                            to candidates, not a subset)
+  prompt_tokens_details[modality=AUDIO].token_count       → audio_input
+  prompt_tokens_details[modality=IMAGE].token_count       → image_input
+  candidates_tokens_details[modality=AUDIO].token_count   → audio_output
+
+Tool calls: count of candidates[0].content.parts[] entries that have a
+non-null `function_call` field.
+
+Semantic note vs OpenAI:
+  Gemini's `thoughts_token_count` is ADDITIVE to `candidates_token_count`
+  (total billable output for Google = candidates + thoughts).
+  OpenAI's `reasoning_tokens` is a SUBSET of `completion_tokens`.
+  When a customer bills on both `llm_output_tokens` and `llm_reasoning_tokens`
+  as separate Lago metrics, the Gemini-side sum reflects the full Google bill;
+  the OpenAI-side `llm_output_tokens` already includes reasoning.
+
+Unknown top-level usage fields land in `extras` (drift detection).
+"""
+
+from __future__ import annotations
+
+from typing import Any, cast
+
+from ..canonical import CanonicalUsage
+
+_KNOWN_USAGE_FIELDS = {
+    "prompt_token_count",
+    "candidates_token_count",
+    "cached_content_token_count",
+    "thoughts_token_count",
+    "tool_use_prompt_token_count",
+    "total_token_count",
+    "prompt_tokens_details",
+    "candidates_tokens_details",
+    "cache_tokens_details",
+    "tool_use_prompt_tokens_details",
+    "traffic_type",
+}
+
+
+def _safe_dict(v: Any) -> dict[str, Any]:
+    return v if isinstance(v, dict) else {}
+
+
+def _safe_int(v: Any) -> int:
+    try:
+        return max(0, int(v or 0))
+    except (TypeError, ValueError):
+        return 0
+
+
+def _to_dict(obj: Any) -> dict[str, Any]:
+    """Best-effort pydantic-or-dict → dict (google-genai returns pydantic objects)."""
+    if isinstance(obj, dict):
+        return obj
+    if hasattr(obj, "model_dump"):
+        try:
+            return cast(dict[str, Any], obj.model_dump())
+        except Exception:  # noqa: BLE001
+            pass
+    return {}
+
+
+def _modality_token_count(
+    details: list[dict[str, Any]] | Any, modality: str
+) -> int:
+    """Sum token_count from a list of {modality, token_count} entries matching the given modality."""
+    if not isinstance(details, list):
+        return 0
+    total = 0
+    for entry in details:
+        if isinstance(entry, dict) and entry.get("modality") == modality:
+            total += _safe_int(entry.get("token_count"))
+    return total
+
+
+def _count_tool_calls(resp: dict[str, Any]) -> int:
+    """Count parts in candidates[0].content.parts[] that have a function_call."""
+    candidates = resp.get("candidates")
+    if not isinstance(candidates, list) or not candidates:
+        return 0
+    first = candidates[0]
+    if not isinstance(first, dict):
+        return 0
+    content = _safe_dict(first.get("content"))
+    parts = content.get("parts")
+    if not isinstance(parts, list):
+        return 0
+    return sum(1 for p in parts if isinstance(p, dict) and p.get("function_call") is not None)
+
+
+def extract_gemini_native(response: Any, model_id: str = "") -> CanonicalUsage:
+    """Translate a google-genai response (GenerateContentResponse or dict) → CanonicalUsage.
+
+    Accepts the SDK's pydantic objects, dicts (e.g. captured fixtures), or a
+    synthetic `{"usage_metadata": {...}}` blob produced by the streaming wrapper.
+    """
+    resp = _to_dict(response) if not isinstance(response, dict) else response
+    usage = _safe_dict(resp.get("usage_metadata"))
+
+    prompt_details = usage.get("prompt_tokens_details")
+    candidates_details = usage.get("candidates_tokens_details")
+
+    extras: dict[str, Any] = {}
+    for k, v in usage.items():
+        if k not in _KNOWN_USAGE_FIELDS:
+            extras[k] = v
+
+    return CanonicalUsage(
+        input=_safe_int(usage.get("prompt_token_count")),
+        output=_safe_int(usage.get("candidates_token_count")),
+        cache_read=_safe_int(usage.get("cached_content_token_count")),
+        reasoning=_safe_int(usage.get("thoughts_token_count")),
+        audio_input=_modality_token_count(prompt_details, "AUDIO"),
+        audio_output=_modality_token_count(candidates_details, "AUDIO"),
+        image_input=_modality_token_count(prompt_details, "IMAGE"),
+        tool_calls=_count_tool_calls(resp),
+        model=model_id or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "") or "",
+        provider="gemini",
+        api="native",
+        extras=extras,
+    )
diff --git a/src/lago_agent_sdk/detector.py b/src/lago_agent_sdk/detector.py
index f20def8..91315e2 100644
--- a/src/lago_agent_sdk/detector.py
+++ b/src/lago_agent_sdk/detector.py
@@ -38,6 +38,6 @@ def detect_client_kind(client: Any) -> str:
     if cls_name == "mistral" and "mistral" in module:
         return "mistral"
     if "google" in module and ("genai" in module or "generativeai" in module):
-        return "google"
+        return "gemini"
 
     return "unknown"
diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py
index ce55ccb..4400f79 100644
--- a/src/lago_agent_sdk/sdk.py
+++ b/src/lago_agent_sdk/sdk.py
@@ -91,15 +91,20 @@ def wrap(
             from .wrappers.openai import wrap_openai_client
 
             return wrap_openai_client(self, client, dimensions=dimensions, subscription=subscription)
+        if kind == "gemini":
+            from .wrappers.gemini import wrap_gemini_client
+
+            return wrap_gemini_client(self, client, dimensions=dimensions, subscription=subscription)
         if kind == "unknown":
             raise UnknownClientError(
                 f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. "
                 "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, "
-                "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI."
+                "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI, "
+                "google.genai.Client."
             )
         raise UnknownClientError(
             f"Client kind '{kind}' is not yet supported. "
-            "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai'."
+            "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai', 'gemini'."
         )
 
     # ------------------------------------------------------------------
diff --git a/src/lago_agent_sdk/wrappers/gemini.py b/src/lago_agent_sdk/wrappers/gemini.py
new file mode 100644
index 0000000..f53ec51
--- /dev/null
+++ b/src/lago_agent_sdk/wrappers/gemini.py
@@ -0,0 +1,157 @@
+"""google-genai SDK wrapper.
+
+Wraps the public methods of `genai.Client.models` (sync) and `genai.Client.aio.models`
+(async) in place — instrumentation never breaks the customer's call.
+
+Methods wrapped:
+  - models.generate_content(...)         — sync, returns GenerateContentResponse
+  - models.generate_content_stream(...)  — sync, returns iterator of chunks (last has usage)
+  - aio.models.generate_content(...)     — async, awaited
+  - aio.models.generate_content_stream(...) — async, yields chunks
+
+Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from
+kwargs before forwarding so the SDK's strict validation doesn't reject it.
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+from ..adapters import extract_gemini_native
+
+logger = logging.getLogger("lago_agent_sdk.wrappers.gemini")
+
+_INSTRUMENTED_ATTR = "_lago_instrumented"
+_LAGO_KWARG = "extra_lago"
+
+
+def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]:
+    return kwargs.pop(_LAGO_KWARG, {}) or {}
+
+
+def wrap_gemini_client(
+    sdk: Any,
+    client: Any,
+    dimensions: dict[str, Any] | None = None,
+    subscription: str | None = None,
+) -> Any:
+    """In-place wrap of a `google.genai.Client`. Idempotent."""
+    if getattr(client, _INSTRUMENTED_ATTR, False):
+        logger.info("lago: gemini client already wrapped — skipping")
+        return client
+
+    base_dims = dict(dimensions or {})
+    base_sub = subscription
+
+    def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]:
+        sub = lago_opts.get("subscription") or base_sub
+        dims = {**base_dims, **(lago_opts.get("dimensions") or {})}
+        return sub, dims
+
+    def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None:
+        try:
+            usage = extract_gemini_native(payload, model_id=model_id)
+            sdk.emit(usage, subscription=sub, dimensions=dims)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("lago: gemini emit failed: %s", exc)
+
+    def _make_sync_generate(original: Any) -> Any:
+        def _generate(*args: Any, **kwargs: Any) -> Any:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            model_id = kwargs.get("model") or (args[0] if args else "")
+            sub, dims = _resolve_opts(lago_opts)
+            response = original(*args, **kwargs)
+            _emit_from(response, str(model_id), sub, dims)
+            return response
+
+        return _generate
+
+    def _make_async_generate(original: Any) -> Any:
+        async def _generate_async(*args: Any, **kwargs: Any) -> Any:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            model_id = kwargs.get("model") or (args[0] if args else "")
+            sub, dims = _resolve_opts(lago_opts)
+            response = await original(*args, **kwargs)
+            _emit_from(response, str(model_id), sub, dims)
+            return response
+
+        return _generate_async
+
+    def _make_sync_stream(original: Any) -> Any:
+        def _stream(*args: Any, **kwargs: Any) -> Iterator[Any]:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            model_id = kwargs.get("model") or (args[0] if args else "")
+            sub, dims = _resolve_opts(lago_opts)
+            src = original(*args, **kwargs)
+
+            def _iter() -> Iterator[Any]:
+                last_with_usage: Any = None
+                try:
+                    for chunk in src:
+                        payload = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk
+                        if isinstance(payload, dict) and payload.get("usage_metadata"):
+                            last_with_usage = {"usage_metadata": payload["usage_metadata"]}
+                        yield chunk
+                finally:
+                    if last_with_usage is not None:
+                        _emit_from(last_with_usage, str(model_id), sub, dims)
+
+            return _iter()
+
+        return _stream
+
+    def _make_async_stream(original: Any) -> Any:
+        async def _stream_async(*args: Any, **kwargs: Any) -> AsyncIterator[Any]:
+            lago_opts = _pop_lago_kwarg(kwargs)
+            model_id = kwargs.get("model") or (args[0] if args else "")
+            sub, dims = _resolve_opts(lago_opts)
+            src = await original(*args, **kwargs)
+
+            async def _aiter() -> AsyncIterator[Any]:
+                last_with_usage: Any = None
+                try:
+                    async for chunk in src:
+                        payload = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk
+                        if isinstance(payload, dict) and payload.get("usage_metadata"):
+                            last_with_usage = {"usage_metadata": payload["usage_metadata"]}
+                        yield chunk
+                finally:
+                    if last_with_usage is not None:
+                        _emit_from(last_with_usage, str(model_id), sub, dims)
+
+            return _aiter()
+
+        return _stream_async
+
+    # ------------------------------------------------------------------
+    # client.models.* (sync)
+    # ------------------------------------------------------------------
+    models = getattr(client, "models", None)
+    if models is not None:
+        original_generate = getattr(models, "generate_content", None)
+        if original_generate is not None:
+            models.generate_content = _make_sync_generate(original_generate)
+
+        original_stream = getattr(models, "generate_content_stream", None)
+        if original_stream is not None:
+            models.generate_content_stream = _make_sync_stream(original_stream)
+
+    # ------------------------------------------------------------------
+    # client.aio.models.* (async)
+    # ------------------------------------------------------------------
+    aio = getattr(client, "aio", None)
+    if aio is not None:
+        aio_models = getattr(aio, "models", None)
+        if aio_models is not None:
+            original_aio_generate = getattr(aio_models, "generate_content", None)
+            if original_aio_generate is not None:
+                aio_models.generate_content = _make_async_generate(original_aio_generate)
+
+            original_aio_stream = getattr(aio_models, "generate_content_stream", None)
+            if original_aio_stream is not None:
+                aio_models.generate_content_stream = _make_async_stream(original_aio_stream)
+
+    setattr(client, _INSTRUMENTED_ATTR, True)
+    return client
diff --git a/tests/integration/test_live_gemini.py b/tests/integration/test_live_gemini.py
new file mode 100644
index 0000000..4ac5de6
--- /dev/null
+++ b/tests/integration/test_live_gemini.py
@@ -0,0 +1,154 @@
+"""End-to-end Gemini integration test — live API + mocked Lago.
+
+Skipped unless GEMINI_API_KEY is set.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+import pytest
+
+from lago_agent_sdk import LagoSDK
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("GEMINI_API_KEY"),
+    reason="GEMINI_API_KEY not set",
+)
+
+
+class _MockLago(BaseHTTPRequestHandler):
+    def do_POST(self):  # noqa: N802
+        n = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(n)
+        self.server.received.append(json.loads(body))  # type: ignore[attr-defined]
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(b'{"ok": true}')
+
+    def log_message(self, *_args, **_kwargs):
+        return
+
+
+def _spawn_lago():
+    s = HTTPServer(("127.0.0.1", 0), _MockLago)
+    s.received = []  # type: ignore[attr-defined]
+    threading.Thread(target=s.serve_forever, daemon=True).start()
+    return s, f"http://127.0.0.1:{s.server_port}"
+
+
+def _collect_events(server) -> list[dict]:
+    return [e for p in server.received for e in p["events"]]
+
+
+def _codes(events) -> set[str]:
+    return {e["code"] for e in events}
+
+
+def test_live_gemini_generate_content_emits_to_lago() -> None:
+    from google import genai
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"]))
+        client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents="Say hi",
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        for e in events:
+            assert e["properties"]["api"] == "native"
+            assert e["properties"]["provider"] == "gemini"
+    finally:
+        server.shutdown()
+
+
+def test_live_gemini_streaming_captures_usage_from_final_chunk() -> None:
+    from google import genai
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"]))
+        for _ in client.models.generate_content_stream(
+            model="gemini-2.5-flash",
+            contents="Count from 1 to 3.",
+        ):
+            pass
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+    finally:
+        server.shutdown()
+
+
+def test_live_gemini_thinking_emits_reasoning() -> None:
+    """Gemini 2.5 emits thoughts_token_count → llm_reasoning_tokens event."""
+    from google import genai
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"]))
+        client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents="What is 17 * 23? Show your reasoning step by step.",
+        )
+        assert sdk.flush(timeout=15.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        codes = _codes(events)
+        assert "llm_input_tokens" in codes
+        assert "llm_output_tokens" in codes
+        # Gemini 2.5 reasons even without explicit thinking_config
+        assert "llm_reasoning_tokens" in codes
+    finally:
+        server.shutdown()
+
+
+def test_live_gemini_tool_use_emits_tool_calls() -> None:
+    from google import genai
+    from google.genai import types as genai_types
+
+    server, url = _spawn_lago()
+    try:
+        sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int")
+        client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"]))
+        weather_fn = genai_types.FunctionDeclaration(
+            name="get_weather",
+            description="Get the current weather for a city.",
+            parameters=genai_types.Schema(
+                type="OBJECT",
+                properties={"city": genai_types.Schema(type="STRING")},
+                required=["city"],
+            ),
+        )
+        client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents="What's the weather in Tokyo?",
+            config=genai_types.GenerateContentConfig(
+                tools=[genai_types.Tool(function_declarations=[weather_fn])],
+                tool_config=genai_types.ToolConfig(
+                    function_calling_config=genai_types.FunctionCallingConfig(mode="ANY"),
+                ),
+            ),
+        )
+        assert sdk.flush(timeout=10.0)
+        sdk.shutdown(timeout=2.0)
+        events = _collect_events(server)
+        assert "llm_tool_calls" in _codes(events)
+    finally:
+        server.shutdown()
diff --git a/tests/unit/adapters/fixtures/capture_gemini.py b/tests/unit/adapters/fixtures/capture_gemini.py
new file mode 100644
index 0000000..3ecf59e
--- /dev/null
+++ b/tests/unit/adapters/fixtures/capture_gemini.py
@@ -0,0 +1,138 @@
+"""Capture real Gemini API responses for adapter design.
+
+Saves raw responses to tests/unit/adapters/fixtures/gemini_native/<scenario>.json
+so we can verify the field mappings against reality before writing the adapter.
+
+Uses the modern `google-genai` SDK: `from google import genai`.
+
+Reads GEMINI_API_KEY from env.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import sys
+
+from google import genai
+from google.genai import types
+
+OUT = pathlib.Path(__file__).parent / "gemini_native"
+OUT.mkdir(parents=True, exist_ok=True)
+
+
+def to_dict(response) -> dict:
+    """google-genai SDK returns pydantic models — convert to plain dict for JSON."""
+    if hasattr(response, "model_dump"):
+        return response.model_dump()
+    if hasattr(response, "dict"):
+        return response.dict()
+    return json.loads(response.json()) if hasattr(response, "json") else dict(response)
+
+
+def save(name: str, model: str, payload: dict) -> None:
+    path = OUT / f"{name}.json"
+    path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str))
+    print(f"  ✓ saved {path.name}")
+
+
+def main() -> int:
+    key = os.environ.get("GEMINI_API_KEY")
+    if not key:
+        print("error: set GEMINI_API_KEY", file=sys.stderr)
+        return 2
+
+    client = genai.Client(api_key=key)
+    PROMPT = "Write one sentence about dolphins."
+
+    # ----- 1. Plain call (cheap flash model) -----
+    print("\n[1] plain — gemini-2.5-flash")
+    r = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=PROMPT,
+    )
+    save("01_plain_flash", "gemini-2.5-flash", to_dict(r))
+
+    # ----- 2. Tool use (function calling) -----
+    print("\n[2] tool use — gemini-2.5-flash with weather function")
+    weather_fn = types.FunctionDeclaration(
+        name="get_weather",
+        description="Get the current weather for a city.",
+        parameters=types.Schema(
+            type="OBJECT",
+            properties={"city": types.Schema(type="STRING")},
+            required=["city"],
+        ),
+    )
+    r = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents="What's the weather in Tokyo?",
+        config=types.GenerateContentConfig(
+            tools=[types.Tool(function_declarations=[weather_fn])],
+            tool_config=types.ToolConfig(
+                function_calling_config=types.FunctionCallingConfig(mode="ANY"),
+            ),
+        ),
+    )
+    save("02_tool_use", "gemini-2.5-flash", to_dict(r))
+
+    # ----- 3. Streaming with usage metadata -----
+    print("\n[3] streaming — gemini-2.5-flash")
+    chunks: list[dict] = []
+    for chunk in client.models.generate_content_stream(
+        model="gemini-2.5-flash",
+        contents="Count from 1 to 5, one number per line.",
+    ):
+        chunks.append(to_dict(chunk))
+    save("03_streaming", "gemini-2.5-flash", {"chunks": chunks})
+
+    # ----- 4. Thinking mode (Gemini 2.5 — emits thoughts_token_count) -----
+    print("\n[4] thinking — gemini-2.5-flash with thinking_config")
+    try:
+        r = client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents=(
+                "Prove that the sum of the first n cubes equals the square of "
+                "the sum of the first n positive integers. Show each step."
+            ),
+            config=types.GenerateContentConfig(
+                thinking_config=types.ThinkingConfig(include_thoughts=False, thinking_budget=2048),
+            ),
+        )
+        save("04_thinking", "gemini-2.5-flash", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  thinking config error: {str(exc)[:160]}")
+
+    # ----- 5. Multi-turn -----
+    print("\n[5] multi-turn — gemini-2.5-flash (3 turns)")
+    convo = [
+        types.Content(role="user", parts=[types.Part(text="What is 2+2?")]),
+        types.Content(role="model", parts=[types.Part(text="2+2 equals 4.")]),
+        types.Content(role="user", parts=[types.Part(text="And times 3?")]),
+    ]
+    r = client.models.generate_content(model="gemini-2.5-flash", contents=convo)
+    save("05_multi_turn", "gemini-2.5-flash", to_dict(r))
+
+    # ----- 6. Explicit cache (Gemini's CachedContent API) -----
+    # Note: requires a sufficiently large prompt (>32k tokens for flash) so we skip
+    # for the demo; documented but not part of the captured fixture set.
+    print("\n[6] (explicit-cache fixture skipped — needs >32k-token prompt)")
+
+    # ----- 7. Larger model for cross-shape comparison -----
+    print("\n[7] plain — gemini-2.5-pro")
+    try:
+        r = client.models.generate_content(
+            model="gemini-2.5-pro",
+            contents=PROMPT,
+        )
+        save("07_plain_pro", "gemini-2.5-pro", to_dict(r))
+    except Exception as exc:  # noqa: BLE001
+        print(f"  gemini-2.5-pro error: {str(exc)[:160]}")
+
+    print("\nDone. Inspect tests/unit/adapters/fixtures/gemini_native/*.json")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json b/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json
new file mode 100644
index 0000000..1f226e9
--- /dev/null
+++ b/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json
@@ -0,0 +1,82 @@
+{
+  "_model_id": "gemini-2.5-flash",
+  "_response": {
+    "sdk_http_response": {
+      "headers": {
+        "x-gemini-service-tier": "standard",
+        "content-type": "application/json; charset=UTF-8",
+        "vary": "Origin, X-Origin, Referer",
+        "content-encoding": "gzip",
+        "date": "Fri, 29 May 2026 11:03:34 GMT",
+        "server": "scaffolding on HTTPServer2",
+        "x-xss-protection": "0",
+        "x-frame-options": "SAMEORIGIN",
+        "x-content-type-options": "nosniff",
+        "server-timing": "gfet4t7; dur=4026",
+        "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+        "transfer-encoding": "chunked"
+      },
+      "body": null
+    },
+    "candidates": [
+      {
+        "content": {
+          "parts": [
+            {
+              "media_resolution": null,
+              "code_execution_result": null,
+              "executable_code": null,
+              "file_data": null,
+              "function_call": null,
+              "function_response": null,
+              "inline_data": null,
+              "text": "Dolphins are highly intelligent and social marine mammals known for their playful acrobatics and sophisticated use of echolocation.",
+              "thought": null,
+              "thought_signature": null,
+              "video_metadata": null,
+              "tool_call": null,
+              "tool_response": null,
+              "part_metadata": null
+            }
+          ],
+          "role": "model"
+        },
+        "citation_metadata": null,
+        "finish_message": null,
+        "token_count": null,
+        "finish_reason": "STOP",
+        "grounding_metadata": null,
+        "avg_logprobs": null,
+        "index": 0,
+        "logprobs_result": null,
+        "safety_ratings": null,
+        "url_context_metadata": null
+      }
+    ],
+    "create_time": null,
+    "model_version": "gemini-2.5-flash",
+    "prompt_feedback": null,
+    "response_id": "gnIZaribIvXzxs0Pt-rpkAI",
+    "usage_metadata": {
+      "cache_tokens_details": null,
+      "cached_content_token_count": null,
+      "candidates_token_count": 23,
+      "candidates_tokens_details": null,
+      "prompt_token_count": 7,
+      "prompt_tokens_details": [
+        {
+          "modality": "TEXT",
+          "token_count": 7
+        }
+      ],
+      "thoughts_token_count": 442,
+      "tool_use_prompt_token_count": null,
+      "tool_use_prompt_tokens_details": null,
+      "total_token_count": 472,
+      "traffic_type": null
+    },
+    "model_status": null,
+    "automatic_function_calling_history": [],
+    "parsed": null
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json b/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json
new file mode 100644
index 0000000..3be2e59
--- /dev/null
+++ b/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json
@@ -0,0 +1,90 @@
+{
+  "_model_id": "gemini-2.5-flash",
+  "_response": {
+    "sdk_http_response": {
+      "headers": {
+        "x-gemini-service-tier": "standard",
+        "content-type": "application/json; charset=UTF-8",
+        "vary": "Origin, X-Origin, Referer",
+        "content-encoding": "gzip",
+        "date": "Fri, 29 May 2026 11:03:35 GMT",
+        "server": "scaffolding on HTTPServer2",
+        "x-xss-protection": "0",
+        "x-frame-options": "SAMEORIGIN",
+        "x-content-type-options": "nosniff",
+        "server-timing": "gfet4t7; dur=1142",
+        "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+        "transfer-encoding": "chunked"
+      },
+      "body": null
+    },
+    "candidates": [
+      {
+        "content": {
+          "parts": [
+            {
+              "media_resolution": null,
+              "code_execution_result": null,
+              "executable_code": null,
+              "file_data": null,
+              "function_call": {
+                "id": null,
+                "args": {
+                  "city": "Tokyo"
+                },
+                "name": "get_weather",
+                "partial_args": null,
+                "will_continue": null
+              },
+              "function_response": null,
+              "inline_data": null,
+              "text": null,
+              "thought": null,
+              "thought_signature": "b'\\n\\xba\\x02\\x01\\x0c9\\xd6\\xc7r\\xeb\\xe3\\x83E\\xad\\xbc\\xd82E\\x1c\\xb0*\\xcf\\xa6\\x07N\\xcc\\xd2\\xb4\\x14\\xa2\\x1f\\xda\\xc1\\xd8\\x01\\x16\\xba\\x15\\xf3f5\\x8c\\xdb\\n\\xc9\\xb47`\\xd8\\x05\\xc0\"x-\\x80\\xce\\x83\\xe1\\xb5h\\x9c\\xf5\\x95\\xce\\xc6\\xa2\\x0bX\\xe4r\\x82\\xcdGu\\xcc\\xf7\\x06\\x0f\\xb5?\\xe6k5n\\xfb\\\\\\x99-\\xa88\\xaat\\x00l\\x0e\\xdb\\xea\\x1b\\x0b\\x95gi\\xf3\\xc3\\xc9F\\x81\\x94\\x08\\xceE\\x13~:\\x827\\xebc\\xda\\xd3\\xd99IE\\xff\\xd8\\x8f\\x1e_+\\xf8\\xf77g\\xc3\\xa5\\xc3cHPf3\\xf0\\x8a\\xef\\x19M\\x0f\\xc5K\\x15\\x1c\\x86\\x95\\x00\\x1drB`\\xbc\\xcb\\xac\\xf5\\xde%\\xa6\\xfc<\\x8f\\x82.\\x029\\xab\\xa0\\xcdU\\x02\\xc5\\xb1[:\\x1b\\xc8\\xd8\\xbc\\xdc\\xcc\\x19m\\xe9\\xb5\\xb8\\xcf\\x9b\\xd0;\\xeb\\x8d \\x16,7\\x84\\xbeS\\xd3k\\x0f\\xa5\\xb62\\xddl\\xe6\\xdf\\xe1\\xda\\x7fnAa\\xb6\\x86 \\x97\\rE\\xac\\xac0\\xacaU\\xbc\\xbb|J\\xf0\\xe5o\\xd9YV\\xed\\xaa\\x01\\xd3\\x94x\\xe6\\xc3!\\xa4>\\x8dJ\\\\Q\\x86\\xcc\\xf6\\xb2\\'(\\\\>k\\xd6\\x10\\xb5\\xab\\x1b*\\xde\\t\\xea\\xc5\\xca\\x97\\xa0\\x18G\\xde\\xcdu\\xc1u\\xfc0yv\\xa4?3y\\x8b\\xd6\\xca>\\x90\\xe95\\x14\\x03U\\x12\\xdckW\\x00\\x9eL\\r\\x06\\x841'",
+              "video_metadata": null,
+              "tool_call": null,
+              "tool_response": null,
+              "part_metadata": null
+            }
+          ],
+          "role": "model"
+        },
+        "citation_metadata": null,
+        "finish_message": null,
+        "token_count": null,
+        "finish_reason": "STOP",
+        "grounding_metadata": null,
+        "avg_logprobs": null,
+        "index": 0,
+        "logprobs_result": null,
+        "safety_ratings": null,
+        "url_context_metadata": null
+      }
+    ],
+    "create_time": null,
+    "model_version": "gemini-2.5-flash",
+    "prompt_feedback": null,
+    "response_id": "hnIZauLSI77zxs0Pm_iDmQY",
+    "usage_metadata": {
+      "cache_tokens_details": null,
+      "cached_content_token_count": null,
+      "candidates_token_count": 15,
+      "candidates_tokens_details": null,
+      "prompt_token_count": 49,
+      "prompt_tokens_details": [
+        {
+          "modality": "TEXT",
+          "token_count": 49
+        }
+      ],
+      "thoughts_token_count": 69,
+      "tool_use_prompt_token_count": null,
+      "tool_use_prompt_tokens_details": null,
+      "total_token_count": 133,
+      "traffic_type": null
+    },
+    "model_status": null,
+    "automatic_function_calling_history": null,
+    "parsed": null
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/gemini_native/03_streaming.json b/tests/unit/adapters/fixtures/gemini_native/03_streaming.json
new file mode 100644
index 0000000..c736593
--- /dev/null
+++ b/tests/unit/adapters/fixtures/gemini_native/03_streaming.json
@@ -0,0 +1,85 @@
+{
+  "_model_id": "gemini-2.5-flash",
+  "_response": {
+    "chunks": [
+      {
+        "sdk_http_response": {
+          "headers": {
+            "content-type": "text/event-stream",
+            "content-disposition": "attachment",
+            "vary": "Origin, X-Origin, Referer",
+            "transfer-encoding": "chunked",
+            "date": "Fri, 29 May 2026 11:03:36 GMT",
+            "server": "scaffolding on HTTPServer2",
+            "x-xss-protection": "0",
+            "x-frame-options": "SAMEORIGIN",
+            "x-content-type-options": "nosniff",
+            "server-timing": "gfet4t7; dur=885",
+            "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000"
+          },
+          "body": null
+        },
+        "candidates": [
+          {
+            "content": {
+              "parts": [
+                {
+                  "media_resolution": null,
+                  "code_execution_result": null,
+                  "executable_code": null,
+                  "file_data": null,
+                  "function_call": null,
+                  "function_response": null,
+                  "inline_data": null,
+                  "text": "1\n2\n3\n4\n5",
+                  "thought": null,
+                  "thought_signature": null,
+                  "video_metadata": null,
+                  "tool_call": null,
+                  "tool_response": null,
+                  "part_metadata": null
+                }
+              ],
+              "role": "model"
+            },
+            "citation_metadata": null,
+            "finish_message": null,
+            "token_count": null,
+            "finish_reason": "STOP",
+            "grounding_metadata": null,
+            "avg_logprobs": null,
+            "index": 0,
+            "logprobs_result": null,
+            "safety_ratings": null,
+            "url_context_metadata": null
+          }
+        ],
+        "create_time": null,
+        "model_version": "gemini-2.5-flash",
+        "prompt_feedback": null,
+        "response_id": "h3IZapK5NI_hxs0P2p2dqQQ",
+        "usage_metadata": {
+          "cache_tokens_details": null,
+          "cached_content_token_count": null,
+          "candidates_token_count": 9,
+          "candidates_tokens_details": null,
+          "prompt_token_count": 14,
+          "prompt_tokens_details": [
+            {
+              "modality": "TEXT",
+              "token_count": 14
+            }
+          ],
+          "thoughts_token_count": 29,
+          "tool_use_prompt_token_count": null,
+          "tool_use_prompt_tokens_details": null,
+          "total_token_count": 52,
+          "traffic_type": null
+        },
+        "model_status": null,
+        "automatic_function_calling_history": null,
+        "parsed": null
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/gemini_native/04_thinking.json b/tests/unit/adapters/fixtures/gemini_native/04_thinking.json
new file mode 100644
index 0000000..64b4439
--- /dev/null
+++ b/tests/unit/adapters/fixtures/gemini_native/04_thinking.json
@@ -0,0 +1,82 @@
+{
+  "_model_id": "gemini-2.5-flash",
+  "_response": {
+    "sdk_http_response": {
+      "headers": {
+        "x-gemini-service-tier": "standard",
+        "content-type": "application/json; charset=UTF-8",
+        "vary": "Origin, X-Origin, Referer",
+        "content-encoding": "gzip",
+        "date": "Fri, 29 May 2026 11:03:48 GMT",
+        "server": "scaffolding on HTTPServer2",
+        "x-xss-protection": "0",
+        "x-frame-options": "SAMEORIGIN",
+        "x-content-type-options": "nosniff",
+        "server-timing": "gfet4t7; dur=11714",
+        "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+        "transfer-encoding": "chunked"
+      },
+      "body": null
+    },
+    "candidates": [
+      {
+        "content": {
+          "parts": [
+            {
+              "media_resolution": null,
+              "code_execution_result": null,
+              "executable_code": null,
+              "file_data": null,
+              "function_call": null,
+              "function_response": null,
+              "inline_data": null,
+              "text": "To prove that the sum of the first n cubes equals the square of the sum of the first n positive integers, we need to show that:\n\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\sum_{k=1}^{n} k \\right)^2 $$\n\nWe know the formula for the sum of the first n positive integers:\n$$ \\sum_{k=1}^{n} k = \\frac{n(n+1)}{2} $$\n\nSo, the statement we need to prove can be written as:\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\frac{n(n+1)}{2} \\right)^2 $$\n$$ \\sum_{k=1}^{n} k^3 = \\frac{n^2(n+1)^2}{4} $$\n\nWe will prove this by mathematical induction.\n\n---\n\n**Proof by Mathematical Induction**\n\nLet P(n) be the statement: $\\sum_{k=1}^{n} k^3 = \\frac{n^2(n+1)^2}{4}$.\n\n**Step 1: Base Case (n=1)**\n\nWe need to show that P(1) is true.\nLeft Hand Side (LHS):\n$ \\sum_{k=1}^{1} k^3 = 1^3 = 1 $\n\nRight Hand Side (RHS):\n$ \\frac{1^2(1+1)^2}{4} = \\frac{1^2(2)^2}{4} = \\frac{1 \\cdot 4}{4} = 1 $\n\nSince LHS = RHS (1 = 1), the statement P(1) is true.\n\n**Step 2: Inductive Hypothesis**\n\nAssume that P(m) is true for some positive integer m.\nThat is, assume:\n$$ \\sum_{k=1}^{m} k^3 = \\frac{m^2(m+1)^2}{4} $$\n\n**Step 3: Inductive Step (Prove P(m+1))**\n\nWe need to show that if P(m) is true, then P(m+1) is also true.\nWe need to prove that:\n$$ \\sum_{k=1}^{m+1} k^3 = \\frac{(m+1)^2((m+1)+1)^2}{4} $$\n$$ \\sum_{k=1}^{m+1} k^3 = \\frac{(m+1)^2(m+2)^2}{4} $$\n\nLet's start with the LHS of the statement P(m+1):\n$$ \\sum_{k=1}^{m+1} k^3 = \\left( \\sum_{k=1}^{m} k^3 \\right) + (m+1)^3 $$\n\nNow, using the Inductive Hypothesis (substituting the assumed value for the sum up to m):\n$$ = \\frac{m^2(m+1)^2}{4} + (m+1)^3 $$\n\nTo combine these terms, we can factor out $(m+1)^2$:\n$$ = (m+1)^2 \\left[ \\frac{m^2}{4} + (m+1) \\right] $$\n\nNow, find a common denominator inside the brackets:\n$$ = (m+1)^2 \\left[ \\frac{m^2}{4} + \\frac{4(m+1)}{4} \\right] $$\n$$ = (m+1)^2 \\left[ \\frac{m^2 + 4m + 4}{4} \\right] $$\n\nRecognize that the numerator $m^2 + 4m + 4$ is a perfect square trinomial: $(m+2)^2$.\n$$ = (m+1)^2 \\left[ \\frac{(m+2)^2}{4} \\right] $$\n$$ = \\frac{(m+1)^2(m+2)^2}{4} $$\n\nThis is exactly the RHS of the statement P(m+1).\n\n**Step 4: Conclusion**\n\nSince the base case P(1) is true, and the inductive step shows that if P(m) is true then P(m+1) is true, by the principle of mathematical induction, the statement P(n) is true for all positive integers n.\n\nTherefore, the sum of the first n cubes equals the square of the sum of the first n positive integers:\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\sum_{k=1}^{n} k \\right)^2 $$",
+              "thought": null,
+              "thought_signature": null,
+              "video_metadata": null,
+              "tool_call": null,
+              "tool_response": null,
+              "part_metadata": null
+            }
+          ],
+          "role": "model"
+        },
+        "citation_metadata": null,
+        "finish_message": null,
+        "token_count": null,
+        "finish_reason": "STOP",
+        "grounding_metadata": null,
+        "avg_logprobs": null,
+        "index": 0,
+        "logprobs_result": null,
+        "safety_ratings": null,
+        "url_context_metadata": null
+      }
+    ],
+    "create_time": null,
+    "model_version": "gemini-2.5-flash",
+    "prompt_feedback": null,
+    "response_id": "iHIZaqGILdbMvdIPt6ussAQ",
+    "usage_metadata": {
+      "cache_tokens_details": null,
+      "cached_content_token_count": null,
+      "candidates_token_count": 1003,
+      "candidates_tokens_details": null,
+      "prompt_token_count": 27,
+      "prompt_tokens_details": [
+        {
+          "modality": "TEXT",
+          "token_count": 27
+        }
+      ],
+      "thoughts_token_count": 1546,
+      "tool_use_prompt_token_count": null,
+      "tool_use_prompt_tokens_details": null,
+      "total_token_count": 2576,
+      "traffic_type": null
+    },
+    "model_status": null,
+    "automatic_function_calling_history": [],
+    "parsed": null
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json b/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json
new file mode 100644
index 0000000..c173eea
--- /dev/null
+++ b/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json
@@ -0,0 +1,82 @@
+{
+  "_model_id": "gemini-2.5-flash",
+  "_response": {
+    "sdk_http_response": {
+      "headers": {
+        "x-gemini-service-tier": "standard",
+        "content-type": "application/json; charset=UTF-8",
+        "vary": "Origin, X-Origin, Referer",
+        "content-encoding": "gzip",
+        "date": "Fri, 29 May 2026 11:03:49 GMT",
+        "server": "scaffolding on HTTPServer2",
+        "x-xss-protection": "0",
+        "x-frame-options": "SAMEORIGIN",
+        "x-content-type-options": "nosniff",
+        "server-timing": "gfet4t7; dur=1538",
+        "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000",
+        "transfer-encoding": "chunked"
+      },
+      "body": null
+    },
+    "candidates": [
+      {
+        "content": {
+          "parts": [
+            {
+              "media_resolution": null,
+              "code_execution_result": null,
+              "executable_code": null,
+              "file_data": null,
+              "function_call": null,
+              "function_response": null,
+              "inline_data": null,
+              "text": "Okay, taking the previous result (4) and multiplying it by 3:\n\n4 * 3 = 12",
+              "thought": null,
+              "thought_signature": null,
+              "video_metadata": null,
+              "tool_call": null,
+              "tool_response": null,
+              "part_metadata": null
+            }
+          ],
+          "role": "model"
+        },
+        "citation_metadata": null,
+        "finish_message": null,
+        "token_count": null,
+        "finish_reason": "STOP",
+        "grounding_metadata": null,
+        "avg_logprobs": null,
+        "index": 0,
+        "logprobs_result": null,
+        "safety_ratings": null,
+        "url_context_metadata": null
+      }
+    ],
+    "create_time": null,
+    "model_version": "gemini-2.5-flash",
+    "prompt_feedback": null,
+    "response_id": "lHIZasOaHvu3vdIP-9KoyQQ",
+    "usage_metadata": {
+      "cache_tokens_details": null,
+      "cached_content_token_count": null,
+      "candidates_token_count": 25,
+      "candidates_tokens_details": null,
+      "prompt_token_count": 22,
+      "prompt_tokens_details": [
+        {
+          "modality": "TEXT",
+          "token_count": 22
+        }
+      ],
+      "thoughts_token_count": 147,
+      "tool_use_prompt_token_count": null,
+      "tool_use_prompt_tokens_details": null,
+      "total_token_count": 194,
+      "traffic_type": null
+    },
+    "model_status": null,
+    "automatic_function_calling_history": [],
+    "parsed": null
+  }
+}
\ No newline at end of file
diff --git a/tests/unit/adapters/test_gemini_native.py b/tests/unit/adapters/test_gemini_native.py
new file mode 100644
index 0000000..d98c1c6
--- /dev/null
+++ b/tests/unit/adapters/test_gemini_native.py
@@ -0,0 +1,217 @@
+"""Gemini native adapter — verified against real fixtures captured via google-genai."""
+
+from __future__ import annotations
+
+import json
+import pathlib
+
+from lago_agent_sdk.adapters import extract_gemini_native
+
+FIX = pathlib.Path(__file__).parent / "fixtures" / "gemini_native"
+
+
+def _load(name: str) -> tuple[str, dict]:
+    data = json.loads((FIX / name).read_text())
+    return data["_model_id"], data["_response"]
+
+
+# --------------------------------------------------------------------------
+# Real fixtures
+# --------------------------------------------------------------------------
+def test_plain_flash() -> None:
+    """Plain call to gemini-2.5-flash: input/output/reasoning all populated."""
+    model_id, resp = _load("01_plain_flash.json")
+    u = extract_gemini_native(resp, model_id=model_id)
+    assert u.input == 7
+    assert u.output == 23
+    # Gemini 2.5 emits thoughts even without explicit thinking config
+    assert u.reasoning == 442
+    assert u.tool_calls == 0
+    assert u.cache_read == 0
+    assert u.api == "native"
+    assert u.provider == "gemini"
+
+
+def test_tool_use_counts_function_calls() -> None:
+    """A function_call in candidates[0].content.parts[] increments tool_calls."""
+    model_id, resp = _load("02_tool_use.json")
+    u = extract_gemini_native(resp, model_id=model_id)
+    assert u.input == 49
+    assert u.output == 15
+    assert u.tool_calls == 1
+
+
+def test_streaming_final_chunk_carries_usage() -> None:
+    """The streaming wrapper grabs usage from the last chunk that has it."""
+    model_id, resp = _load("03_streaming.json")
+    chunks = resp["chunks"]
+    final = next((c for c in reversed(chunks) if c.get("usage_metadata")), None)
+    assert final is not None
+    u = extract_gemini_native(final, model_id=model_id)
+    assert u.input == 14
+    assert u.output == 9
+    assert u.reasoning == 29
+
+
+def test_thinking_mode_populates_reasoning() -> None:
+    """Gemini 2.5 with explicit thinking_config emits a large thoughts_token_count."""
+    model_id, resp = _load("04_thinking.json")
+    u = extract_gemini_native(resp, model_id=model_id)
+    assert u.input == 27
+    assert u.output == 1003
+    assert u.reasoning == 1546
+    # Math check: candidates + thoughts + prompt = total (additive, not subset)
+    assert u.input + u.output + u.reasoning == 2576  # matches usage_metadata.total_token_count
+
+
+def test_multi_turn() -> None:
+    model_id, resp = _load("05_multi_turn.json")
+    u = extract_gemini_native(resp, model_id=model_id)
+    assert u.input == 22
+    assert u.output == 25
+
+
+# --------------------------------------------------------------------------
+# Synthetic — edge cases the fixtures didn't cover (no real audio/image test traffic)
+# --------------------------------------------------------------------------
+def test_audio_input_from_modality_details() -> None:
+    """Multimodal AUDIO input lives in usage_metadata.prompt_tokens_details[modality=AUDIO]."""
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 1000,
+            "candidates_token_count": 50,
+            "prompt_tokens_details": [
+                {"modality": "TEXT", "token_count": 200},
+                {"modality": "AUDIO", "token_count": 800},
+            ],
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert u.input == 1000
+    assert u.audio_input == 800
+    assert u.image_input == 0
+
+
+def test_image_input_from_modality_details() -> None:
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 500,
+            "candidates_token_count": 50,
+            "prompt_tokens_details": [
+                {"modality": "TEXT", "token_count": 300},
+                {"modality": "IMAGE", "token_count": 200},
+            ],
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert u.image_input == 200
+
+
+def test_audio_output_from_modality_details() -> None:
+    """Audio output (e.g. TTS-capable model) lives in candidates_tokens_details[modality=AUDIO]."""
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 50,
+            "candidates_token_count": 1500,
+            "candidates_tokens_details": [
+                {"modality": "AUDIO", "token_count": 1500},
+            ],
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash-audio")
+    assert u.audio_output == 1500
+
+
+def test_cached_content_token_count() -> None:
+    """When CachedContent API has been primed, cached_content_token_count fires."""
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 5000,
+            "candidates_token_count": 30,
+            "cached_content_token_count": 4800,
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert u.cache_read == 4800
+
+
+def test_multiple_function_calls_counted() -> None:
+    resp = {
+        "usage_metadata": {"prompt_token_count": 10, "candidates_token_count": 20},
+        "candidates": [
+            {
+                "content": {
+                    "parts": [
+                        {"text": "..."},
+                        {"function_call": {"name": "fn1"}},
+                        {"function_call": {"name": "fn2"}},
+                        {"function_call": {"name": "fn3"}},
+                    ]
+                }
+            }
+        ],
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert u.tool_calls == 3
+
+
+def test_handles_pydantic_via_model_dump() -> None:
+    class FakePydantic:
+        def model_dump(self) -> dict:
+            return {
+                "model_version": "gemini-2.5-flash",
+                "candidates": [
+                    {"content": {"parts": [{"function_call": {"name": "x"}}]}}
+                ],
+                "usage_metadata": {
+                    "prompt_token_count": 10,
+                    "candidates_token_count": 20,
+                    "thoughts_token_count": 5,
+                },
+            }
+
+    u = extract_gemini_native(FakePydantic(), model_id="gemini-2.5-flash")
+    assert u.input == 10
+    assert u.output == 20
+    assert u.reasoning == 5
+    assert u.tool_calls == 1
+    assert u.api == "native"
+
+
+def test_no_usage_metadata_returns_zeros() -> None:
+    u = extract_gemini_native({}, model_id="gemini-2.5-flash")
+    assert u.input == 0
+    assert u.output == 0
+    assert not u.nonzero_numeric()
+
+
+def test_survives_non_dict_usage_metadata() -> None:
+    assert extract_gemini_native({"usage_metadata": True}, model_id="x").input == 0
+    assert extract_gemini_native({"usage_metadata": "bogus"}, model_id="x").output == 0
+    assert extract_gemini_native(None, model_id="x").input == 0
+
+
+def test_unknown_usage_field_lands_in_extras() -> None:
+    """If Google adds a new top-level usage field, drift detection picks it up."""
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 10,
+            "candidates_token_count": 20,
+            "future_field_xyz": "novel",
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert u.extras.get("future_field_xyz") == "novel"
+
+
+def test_traffic_type_lands_in_known_fields_not_extras() -> None:
+    """traffic_type is a known metadata field; it shouldn't leak into extras."""
+    resp = {
+        "usage_metadata": {
+            "prompt_token_count": 10,
+            "candidates_token_count": 20,
+            "traffic_type": "PAID",
+        }
+    }
+    u = extract_gemini_native(resp, model_id="gemini-2.5-flash")
+    assert "traffic_type" not in u.extras
diff --git a/tests/unit/test_wrapper_gemini.py b/tests/unit/test_wrapper_gemini.py
new file mode 100644
index 0000000..a234bfd
--- /dev/null
+++ b/tests/unit/test_wrapper_gemini.py
@@ -0,0 +1,214 @@
+"""Gemini wrapper tests — fake client, no live API."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from lago_agent_sdk import LagoSDK
+
+
+# ----------------------------------------------------------------------
+# Fake google-genai client mimicking genai.Client.models surface area
+# ----------------------------------------------------------------------
+class FakePydanticResponse:
+    def __init__(self, payload: dict):
+        self._payload = payload
+
+    def model_dump(self) -> dict:
+        return self._payload
+
+
+class FakeStreamChunk:
+    def __init__(self, payload: dict):
+        self._payload = payload
+
+    def model_dump(self) -> dict:
+        return self._payload
+
+
+class FakeModels:
+    def __init__(self) -> None:
+        self.generate_calls = 0
+        self.stream_calls = 0
+
+    def generate_content(self, **kwargs: Any) -> Any:
+        self.generate_calls += 1
+        assert "extra_lago" not in kwargs
+        return FakePydanticResponse(
+            {
+                "model_version": kwargs.get("model", "gemini-2.5-flash"),
+                "candidates": [
+                    {"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"}
+                ],
+                "usage_metadata": {
+                    "prompt_token_count": 7,
+                    "candidates_token_count": 23,
+                    "thoughts_token_count": 0,
+                    "total_token_count": 30,
+                },
+            }
+        )
+
+    def generate_content_stream(self, **kwargs: Any) -> Any:
+        self.stream_calls += 1
+        assert "extra_lago" not in kwargs
+        chunks = [
+            FakeStreamChunk(
+                {
+                    "candidates": [{"content": {"parts": [{"text": "hi"}]}}],
+                    "usage_metadata": None,  # intermediate chunks don't carry usage
+                }
+            ),
+            FakeStreamChunk(
+                {
+                    "candidates": [{"content": {"parts": [{"text": "."}]}, "finish_reason": "STOP"}],
+                    "usage_metadata": {
+                        "prompt_token_count": 9,
+                        "candidates_token_count": 4,
+                        "thoughts_token_count": 0,
+                        "total_token_count": 13,
+                    },
+                }
+            ),
+        ]
+        return iter(chunks)
+
+
+class FakeGeminiClient:
+    """Mimics `from google import genai; genai.Client(api_key=...)`."""
+
+    __module__ = "google.genai.client"
+
+    def __init__(self) -> None:
+        self.models = FakeModels()
+        # No .aio in this fake — tests cover the sync path only
+
+
+# ----------------------------------------------------------------------
+# Helpers (same pattern as Bedrock/Mistral wrapper tests)
+# ----------------------------------------------------------------------
+def _make_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list]:
+    received: list = []
+    sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub)
+    sdk._queue._sender = lambda b: received.append(list(b))  # type: ignore[attr-defined]
+    return sdk, received
+
+
+# ----------------------------------------------------------------------
+# Tests
+# ----------------------------------------------------------------------
+def test_wrap_generate_content_emits_input_and_output() -> None:
+    sdk, received = _make_sdk()
+    fake = FakeGeminiClient()
+    client = sdk.wrap(fake)
+    resp = client.models.generate_content(model="gemini-2.5-flash", contents="hi")
+    assert resp.model_dump()["usage_metadata"]["prompt_token_count"] == 7
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    flat = [e for batch in received for e in batch]
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat}
+    assert by_code["llm_input_tokens"] == 7
+    assert by_code["llm_output_tokens"] == 23
+
+
+def test_wrap_strips_extra_lago_kwarg_and_uses_per_call_sub() -> None:
+    sdk, received = _make_sdk("sub_default")
+    fake = FakeGeminiClient()
+    client = sdk.wrap(fake)
+    client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents="hi",
+        extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}},
+    )
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    flat = [e for batch in received for e in batch]
+    assert all(e["external_subscription_id"] == "sub_per_call" for e in flat)
+    assert flat[0]["properties"]["feature"] == "X"
+
+
+def test_wrap_double_wrap_is_idempotent() -> None:
+    sdk, received = _make_sdk()
+    fake = FakeGeminiClient()
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    sdk.wrap(fake)
+    fake.models.generate_content(model="gemini-2.5-flash", contents="hi")
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    flat = [e for batch in received for e in batch]
+    # 2 events from 1 call (no triple-wrap = no 6 events)
+    assert len(flat) == 2
+    assert fake.models.generate_calls == 1
+
+
+def test_wrap_generate_content_stream_captures_usage_from_final_chunk() -> None:
+    sdk, received = _make_sdk()
+    fake = FakeGeminiClient()
+    client = sdk.wrap(fake)
+    chunks = list(client.models.generate_content_stream(model="gemini-2.5-flash", contents="hi"))
+    assert len(chunks) == 2
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    flat = [e for batch in received for e in batch]
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat}
+    assert by_code["llm_input_tokens"] == 9
+    assert by_code["llm_output_tokens"] == 4
+
+
+def test_wrap_thinking_emits_reasoning_separately() -> None:
+    """Gemini 2.5 emits thoughts_token_count → llm_reasoning_tokens event."""
+    sdk, received = _make_sdk()
+
+    class ThinkingModels:
+        def generate_content(self, **kwargs):
+            return FakePydanticResponse(
+                {
+                    "usage_metadata": {
+                        "prompt_token_count": 10,
+                        "candidates_token_count": 50,
+                        "thoughts_token_count": 200,
+                    }
+                }
+            )
+
+    class ThinkingClient:
+        __module__ = "google.genai.client"
+
+        def __init__(self):
+            self.models = ThinkingModels()
+
+    client = sdk.wrap(ThinkingClient())
+    client.models.generate_content(model="gemini-2.5-flash", contents="hi")
+    assert sdk.flush(timeout=2.0)
+    sdk.shutdown(timeout=1.0)
+    flat = [e for batch in received for e in batch]
+    by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat}
+    assert by_code["llm_input_tokens"] == 10
+    assert by_code["llm_output_tokens"] == 50
+    assert by_code["llm_reasoning_tokens"] == 200
+
+
+def test_wrap_instrumentation_failure_does_not_break_call() -> None:
+    """Adapter failure must not propagate to the customer's call."""
+    sdk, _ = _make_sdk()
+
+    class BadResp:
+        def model_dump(self):
+            raise RuntimeError("boom")
+
+    class BadModels:
+        def generate_content(self, **_kw):
+            return BadResp()
+
+    class BadClient:
+        __module__ = "google.genai.client"
+
+        def __init__(self):
+            self.models = BadModels()
+
+    client = sdk.wrap(BadClient())
+    # Must not raise even though our adapter will crash on this response
+    resp = client.models.generate_content(model="x", contents="hi")
+    assert resp is not None
+    sdk.shutdown(timeout=1.0)

From a5b4511608893c69c5609aa92eb1b20ae4f717a5 Mon Sep 17 00:00:00 2001
From: Anass <anass@getlago.com>
Date: Fri, 29 May 2026 14:42:16 +0200
Subject: [PATCH 5/5] Apply ruff format to OpenAI + Gemini files

CI runs `ruff format --check` which was failing because earlier dev only
ran `ruff check` (linter) locally, not the formatter. Auto-formatting
restores whitespace consistency in:

- src/lago_agent_sdk/adapters/gemini_native.py
- src/lago_agent_sdk/wrappers/openai.py
- tests/unit/adapters/fixtures/capture_openai.py
- tests/unit/adapters/test_gemini_native.py
- tests/unit/test_wrapper_gemini.py

No functional changes.
---
 src/lago_agent_sdk/adapters/gemini_native.py   | 8 ++++----
 src/lago_agent_sdk/wrappers/openai.py          | 4 +++-
 tests/unit/adapters/fixtures/capture_openai.py | 3 +--
 tests/unit/adapters/test_gemini_native.py      | 4 +---
 tests/unit/test_wrapper_gemini.py              | 4 +---
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/lago_agent_sdk/adapters/gemini_native.py b/src/lago_agent_sdk/adapters/gemini_native.py
index 8cb0f06..f3bdc96 100644
--- a/src/lago_agent_sdk/adapters/gemini_native.py
+++ b/src/lago_agent_sdk/adapters/gemini_native.py
@@ -74,9 +74,7 @@ def _to_dict(obj: Any) -> dict[str, Any]:
     return {}
 
 
-def _modality_token_count(
-    details: list[dict[str, Any]] | Any, modality: str
-) -> int:
+def _modality_token_count(details: list[dict[str, Any]] | Any, modality: str) -> int:
     """Sum token_count from a list of {modality, token_count} entries matching the given modality."""
     if not isinstance(details, list):
         return 0
@@ -128,7 +126,9 @@ def extract_gemini_native(response: Any, model_id: str = "") -> CanonicalUsage:
         audio_output=_modality_token_count(candidates_details, "AUDIO"),
         image_input=_modality_token_count(prompt_details, "IMAGE"),
         tool_calls=_count_tool_calls(resp),
-        model=model_id or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "") or "",
+        model=model_id
+        or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "")
+        or "",
         provider="gemini",
         api="native",
         extras=extras,
diff --git a/src/lago_agent_sdk/wrappers/openai.py b/src/lago_agent_sdk/wrappers/openai.py
index 1864986..f015154 100644
--- a/src/lago_agent_sdk/wrappers/openai.py
+++ b/src/lago_agent_sdk/wrappers/openai.py
@@ -165,7 +165,9 @@ async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]:
         original_chat_create = getattr(completions, "create", None)
         if original_chat_create is not None:
             completions.create = (
-                _make_async_create(original_chat_create) if is_async else _make_sync_create(original_chat_create)
+                _make_async_create(original_chat_create)
+                if is_async
+                else _make_sync_create(original_chat_create)
             )
 
     # ------------------------------------------------------------------
diff --git a/tests/unit/adapters/fixtures/capture_openai.py b/tests/unit/adapters/fixtures/capture_openai.py
index 5bcdd25..ed7f8bf 100644
--- a/tests/unit/adapters/fixtures/capture_openai.py
+++ b/tests/unit/adapters/fixtures/capture_openai.py
@@ -88,8 +88,7 @@ def main() -> int:
     # ----- 3. Cache hit attempt — long prompt sent twice (OpenAI auto-caches >1024 tokens) -----
     print("\n[3] cache attempt — long prompt, call 1 then call 2")
     long_prompt = (
-        "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. "
-        * 200
+        "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. " * 200
     )
     msgs = [
         {"role": "system", "content": long_prompt},
diff --git a/tests/unit/adapters/test_gemini_native.py b/tests/unit/adapters/test_gemini_native.py
index d98c1c6..ffaab30 100644
--- a/tests/unit/adapters/test_gemini_native.py
+++ b/tests/unit/adapters/test_gemini_native.py
@@ -160,9 +160,7 @@ class FakePydantic:
         def model_dump(self) -> dict:
             return {
                 "model_version": "gemini-2.5-flash",
-                "candidates": [
-                    {"content": {"parts": [{"function_call": {"name": "x"}}]}}
-                ],
+                "candidates": [{"content": {"parts": [{"function_call": {"name": "x"}}]}}],
                 "usage_metadata": {
                     "prompt_token_count": 10,
                     "candidates_token_count": 20,
diff --git a/tests/unit/test_wrapper_gemini.py b/tests/unit/test_wrapper_gemini.py
index a234bfd..b1f84f5 100644
--- a/tests/unit/test_wrapper_gemini.py
+++ b/tests/unit/test_wrapper_gemini.py
@@ -37,9 +37,7 @@ def generate_content(self, **kwargs: Any) -> Any:
         return FakePydanticResponse(
             {
                 "model_version": kwargs.get("model", "gemini-2.5-flash"),
-                "candidates": [
-                    {"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"}
-                ],
+                "candidates": [{"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"}],
                 "usage_metadata": {
                     "prompt_token_count": 7,
                     "candidates_token_count": 23,