From 8da7b82e21e34285d069f74aeb8cae387d54bf23 Mon Sep 17 00:00:00 2001 From: Anass Date: Wed, 20 May 2026 16:11:34 +0200 Subject: [PATCH 1/5] Add native Anthropic SDK support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - src/lago_agent_sdk/adapters/anthropic_native.py — extract_anthropic_native - src/lago_agent_sdk/wrappers/anthropic.py — wraps messages.create (sync + async, streaming and non-streaming) and messages.stream context manager - Wired into sdk.wrap() dispatch and adapters/__init__.py exports - anthropic = ["anthropic>=0.30"] optional-dep group - 19 new unit tests + 3 live integration tests; 256 unit tests pass - Coverage 80.71% — gate maintained - 9 captured response fixtures from real Anthropic API - README + CHANGELOG updated --- CHANGELOG.md | 8 + README.md | 42 +++- pyproject.toml | 8 + src/lago_agent_sdk/adapters/__init__.py | 2 + .../adapters/anthropic_native.py | 91 +++++++ src/lago_agent_sdk/sdk.py | 8 +- src/lago_agent_sdk/wrappers/anthropic.py | 231 ++++++++++++++++++ tests/integration/test_live_anthropic.py | 114 +++++++++ .../anthropic_native/01_plain_haiku.json | 33 +++ .../anthropic_native/02_plain_sonnet.json | 33 +++ .../anthropic_native/03_tool_use.json | 39 +++ .../anthropic_native/04_cache_create_5m.json | 33 +++ .../anthropic_native/05_cache_read.json | 33 +++ .../anthropic_native/06_cache_create_1h.json | 33 +++ .../07_extended_thinking.json | 38 +++ .../fixtures/anthropic_native/08_stream.json | 142 +++++++++++ .../anthropic_native/09_multi_turn.json | 33 +++ .../adapters/fixtures/capture_anthropic.py | 179 ++++++++++++++ tests/unit/adapters/test_anthropic_native.py | 152 ++++++++++++ tests/unit/test_wrapper_anthropic.py | 223 +++++++++++++++++ uv.lock | 163 +++++++++++- 21 files changed, 1624 insertions(+), 14 deletions(-) create mode 100644 src/lago_agent_sdk/adapters/anthropic_native.py create mode 100644 src/lago_agent_sdk/wrappers/anthropic.py create mode 100644 tests/integration/test_live_anthropic.py create mode 100644 tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/08_stream.json create mode 100644 tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json create mode 100644 tests/unit/adapters/fixtures/capture_anthropic.py create mode 100644 tests/unit/adapters/test_anthropic_native.py create mode 100644 tests/unit/test_wrapper_anthropic.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cd6396..fa696ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented here. Format follows [Kee ## [Unreleased] +### Added +- Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants). +- `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`. +- `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`. +- 19 new unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`). Total: 256 unit tests, ≥80% coverage maintained. +- 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn). + + ## [0.1.0] — initial release ### Added diff --git a/README.md b/README.md index 89ad3f9..b8855c7 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ pip install lago-agent-sdk For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`). For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`). +For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`). ## Quickstart — Bedrock @@ -52,6 +53,25 @@ sdk.flush() The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background. +## Quickstart — Anthropic + +```python +from anthropic import Anthropic +from lago_agent_sdk import LagoSDK + +sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme") +client = sdk.wrap(Anthropic(api_key="...")) + +resp = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=200, + messages=[{"role": "user", "content": "Hello"}], +) +sdk.flush() +``` + +Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case. + ## Quickstart — Mistral ```python @@ -92,9 +112,9 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks. |---|---|---| | AWS Bedrock | `Converse` (sync + stream) | ✓ | | AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ | +| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ | | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ | | OpenAI | native SDK | Phase 2 | -| Anthropic | native SDK | Phase 2 | | Google Gemini | native SDK | Phase 2 | | LiteLLM | callback bridge | Phase 4 | @@ -102,16 +122,16 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks. `CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider: -| Field | Lago metric code | Bedrock | Mistral native | -|---|---|---|---| -| input | `llm_input_tokens` | ✓ | ✓ | -| output | `llm_output_tokens` | ✓ | ✓ | -| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ (when cache hits) | -| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✗ | -| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✗ | -| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output) | -| tool_calls | `llm_tool_calls` | ✓ | ✓ | -| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ | +| Field | Lago metric code | Bedrock | Anthropic native | Mistral native | +|---|---|---|---|---| +| input | `llm_input_tokens` | ✓ | ✓ | ✓ | +| output | `llm_output_tokens` | ✓ | ✓ | ✓ | +| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | +| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | +| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | +| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | +| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | +| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ | ✗ | Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships. diff --git a/pyproject.toml b/pyproject.toml index 8c23f42..4044de0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,9 @@ dev = [ "mypy>=1.10", "types-requests>=2.31", ] +anthropic = [ + "anthropic>=0.30", +] [project.urls] Homepage = "https://www.getlago.com" @@ -81,3 +84,8 @@ files = ["src/lago_agent_sdk"] [[tool.mypy.overrides]] module = ["boto3.*", "botocore.*", "mistralai.*"] ignore_missing_imports = true + +[dependency-groups] +dev = [ + "anthropic>=0.30", +] diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py index 31f94e8..217ed3d 100644 --- a/src/lago_agent_sdk/adapters/__init__.py +++ b/src/lago_agent_sdk/adapters/__init__.py @@ -1,8 +1,10 @@ +from .anthropic_native import extract_anthropic_native from .bedrock_converse import extract_bedrock_converse from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter from .mistral_native import extract_mistral_native __all__ = [ + "extract_anthropic_native", "extract_bedrock_converse", "extract_bedrock_invoke", "pick_invoke_adapter", diff --git a/src/lago_agent_sdk/adapters/anthropic_native.py b/src/lago_agent_sdk/adapters/anthropic_native.py new file mode 100644 index 0000000..5943676 --- /dev/null +++ b/src/lago_agent_sdk/adapters/anthropic_native.py @@ -0,0 +1,91 @@ +"""Anthropic native adapter — verified against real fixtures. + +Field mapping: + usage.input_tokens → input + usage.output_tokens → output + usage.cache_read_input_tokens → cache_read + usage.cache_creation_input_tokens → cache_write + usage.cache_creation.ephemeral_5m_input_tokens → cache_write_5m + usage.cache_creation.ephemeral_1h_input_tokens → cache_write_1h + count of content[].type == "tool_use" → tool_calls + +Not exposed by Anthropic (folded into output_tokens): + reasoning_tokens — even with extended thinking enabled + +Unknown usage fields (service_tier, inference_geo, server_tool_use, …) land in extras. +""" + +from __future__ import annotations + +from typing import Any, cast + +from ..canonical import CanonicalUsage + +_KNOWN_USAGE_FIELDS = { + "input_tokens", + "output_tokens", + "cache_read_input_tokens", + "cache_creation_input_tokens", + "cache_creation", +} + + +def _safe_dict(v: Any) -> dict[str, Any]: + return v if isinstance(v, dict) else {} + + +def _safe_int(v: Any) -> int: + try: + return max(0, int(v or 0)) + except (TypeError, ValueError): + return 0 + + +def _to_dict(obj: Any) -> dict[str, Any]: + """Best-effort pydantic-or-dict to dict (Anthropic SDK returns pydantic Message objects).""" + if isinstance(obj, dict): + return obj + if hasattr(obj, "model_dump"): + try: + return cast(dict[str, Any], obj.model_dump()) + except Exception: # noqa: BLE001 + pass + return {} + + +def extract_anthropic_native(response: Any, model_id: str = "") -> CanonicalUsage: + """Translate an Anthropic native response (Message or dict) → CanonicalUsage. + + Accepts the SDK's pydantic Message object, a dict (e.g. captured fixture), + or a synthetic `{"usage": {...}}` blob produced by the streaming wrapper. + """ + resp = _to_dict(response) if not isinstance(response, dict) else response + + usage = _safe_dict(resp.get("usage")) + cache_creation = _safe_dict(usage.get("cache_creation")) + + content = resp.get("content") + tool_calls = ( + sum(1 for b in content if isinstance(b, dict) and b.get("type") == "tool_use") + if isinstance(content, list) + else 0 + ) + + extras: dict[str, Any] = {} + for k, v in usage.items(): + if k not in _KNOWN_USAGE_FIELDS: + extras[k] = v + + return CanonicalUsage( + input=_safe_int(usage.get("input_tokens")), + output=_safe_int(usage.get("output_tokens")), + cache_read=_safe_int(usage.get("cache_read_input_tokens")), + cache_write=_safe_int(usage.get("cache_creation_input_tokens")), + cache_write_5m=_safe_int(cache_creation.get("ephemeral_5m_input_tokens")), + cache_write_1h=_safe_int(cache_creation.get("ephemeral_1h_input_tokens")), + tool_calls=tool_calls, + model=model_id or (resp.get("model") if isinstance(resp.get("model"), str) else "") or "", + provider="anthropic", + api="native", + extras=extras, + ) diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py index c401df5..c303e03 100644 --- a/src/lago_agent_sdk/sdk.py +++ b/src/lago_agent_sdk/sdk.py @@ -83,13 +83,17 @@ def wrap( from .wrappers.mistral import wrap_mistral_client return wrap_mistral_client(self, client, dimensions=dimensions, subscription=subscription) + if kind == "anthropic": + from .wrappers.anthropic import wrap_anthropic_client + + return wrap_anthropic_client(self, client, dimensions=dimensions, subscription=subscription) if kind == "unknown": raise UnknownClientError( f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. " - "Supported: boto3 bedrock-runtime, mistralai.client.Mistral." + "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, anthropic.Anthropic / AsyncAnthropic." ) raise UnknownClientError( - f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral'." + f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral', 'anthropic'." ) # ------------------------------------------------------------------ diff --git a/src/lago_agent_sdk/wrappers/anthropic.py b/src/lago_agent_sdk/wrappers/anthropic.py new file mode 100644 index 0000000..50da482 --- /dev/null +++ b/src/lago_agent_sdk/wrappers/anthropic.py @@ -0,0 +1,231 @@ +"""anthropic SDK wrapper. + +Wraps the public methods of `Anthropic.messages` (and `AsyncAnthropic.messages`) +in place — instrumentation never breaks the customer's call. + +Methods wrapped: + - .create(...) — non-streaming and stream=True both supported + - .stream(...) — sync context-manager helper + - AsyncMessages.create(...) — async non-streaming and stream=True + - AsyncMessages.stream(...) — async context-manager helper + +Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from kwargs +before forwarding so Anthropic's strict validation doesn't reject it. +""" + +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator, Iterator +from typing import Any + +from ..adapters import extract_anthropic_native + +logger = logging.getLogger("lago_agent_sdk.wrappers.anthropic") + +_INSTRUMENTED_ATTR = "_lago_instrumented" +_LAGO_KWARG = "extra_lago" + + +def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]: + return kwargs.pop(_LAGO_KWARG, {}) or {} + + +def _is_message_like(obj: Any) -> bool: + """Anthropic Message objects expose `.usage` and `.content`; streams don't. + + Safe against properties that raise — falls through to False so the customer's + call is never broken by attribute-access surprises in their custom objects. + """ + try: + if isinstance(obj, dict): + return "usage" in obj + # hasattr propagates non-AttributeError exceptions on Py3; guard explicitly. + return hasattr(obj, "usage") + except Exception: # noqa: BLE001 + return False + + +def wrap_anthropic_client( + sdk: Any, + client: Any, + dimensions: dict[str, Any] | None = None, + subscription: str | None = None, +) -> Any: + """In-place wrap of an `anthropic.Anthropic` or `anthropic.AsyncAnthropic` client. Idempotent.""" + if getattr(client, _INSTRUMENTED_ATTR, False): + logger.info("lago: anthropic client already wrapped — skipping") + return client + + base_dims = dict(dimensions or {}) + base_sub = subscription + + messages = getattr(client, "messages", None) + if messages is None: + logger.warning("lago: anthropic client has no .messages — skipping wrap") + return client + + original_create = getattr(messages, "create", None) + original_stream = getattr(messages, "stream", None) + is_async = type(client).__name__.startswith("Async") + + def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]: + sub = lago_opts.get("subscription") or base_sub + dims = {**base_dims, **(lago_opts.get("dimensions") or {})} + return sub, dims + + def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None: + try: + usage = extract_anthropic_native(payload, model_id=model_id) + sdk.emit(usage, subscription=sub, dimensions=dims) + except Exception as exc: # noqa: BLE001 + logger.warning("lago: anthropic emit failed: %s", exc) + + # ------------------------------------------------------------------ + # Sync messages.create — auto-detects streaming via response shape + # ------------------------------------------------------------------ + def _create(*args: Any, **kwargs: Any) -> Any: + assert original_create is not None + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model", "") + sub, dims = _resolve_opts(lago_opts) + response = original_create(*args, **kwargs) + + if _is_message_like(response): + _emit_from(response, model_id, sub, dims) + return response + + # Streaming — wrap the iterator to capture the final usage on close. + def _wrap_stream(src: Iterator[Any]) -> Iterator[Any]: + last_usage: dict[str, Any] | None = None + try: + for event in src: + payload = event.model_dump() if hasattr(event, "model_dump") else event + if isinstance(payload, dict): + usage = payload.get("usage") + if isinstance(usage, dict): + last_usage = {"usage": usage} + yield event + finally: + if last_usage is not None: + _emit_from(last_usage, model_id, sub, dims) + + return _wrap_stream(response) + + # ------------------------------------------------------------------ + # Async messages.create — same as sync, awaited + # ------------------------------------------------------------------ + async def _create_async(*args: Any, **kwargs: Any) -> Any: + assert original_create is not None + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model", "") + sub, dims = _resolve_opts(lago_opts) + response = await original_create(*args, **kwargs) + + if _is_message_like(response): + _emit_from(response, model_id, sub, dims) + return response + + async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]: + last_usage: dict[str, Any] | None = None + try: + async for event in src: + payload = event.model_dump() if hasattr(event, "model_dump") else event + if isinstance(payload, dict): + usage = payload.get("usage") + if isinstance(usage, dict): + last_usage = {"usage": usage} + yield event + finally: + if last_usage is not None: + _emit_from(last_usage, model_id, sub, dims) + + return _wrap_async_stream(response) + + # ------------------------------------------------------------------ + # messages.stream context manager (sync + async) + # + # Anthropic returns a MessageStreamManager (sync) / AsyncMessageStreamManager + # (async). Both have .__enter__/.__exit__ and the inner stream object + # exposes .get_final_message() after the with-block closes. + # ------------------------------------------------------------------ + def _wrap_stream_manager(*args: Any, **kwargs: Any) -> Any: + assert original_stream is not None + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model", "") + sub, dims = _resolve_opts(lago_opts) + inner = original_stream(*args, **kwargs) + return _LagoStreamManager(inner, sdk, model_id, sub, dims, is_async=is_async) + + if original_create is not None: + messages.create = _create_async if is_async else _create + if original_stream is not None: + messages.stream = _wrap_stream_manager + + setattr(client, _INSTRUMENTED_ATTR, True) + return client + + +class _LagoStreamManager: + """Proxies Anthropic's MessageStreamManager and emits on close. + + Works for both sync (`with`) and async (`async with`) variants by detecting + which __exit__ kind is being called. + """ + + def __init__( + self, + inner: Any, + sdk: Any, + model_id: str, + sub: str | None, + dims: dict[str, Any], + *, + is_async: bool, + ) -> None: + self._inner = inner + self._sdk = sdk + self._model_id = model_id + self._sub = sub + self._dims = dims + self._stream: Any = None + self._is_async = is_async + + # ----- sync ----- + def __enter__(self) -> Any: + self._stream = self._inner.__enter__() + return self._stream + + def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> Any: + try: + result = self._inner.__exit__(exc_type, exc, tb) + finally: + self._emit_final() + return result + + # ----- async ----- + async def __aenter__(self) -> Any: + self._stream = await self._inner.__aenter__() + return self._stream + + async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> Any: + try: + result = await self._inner.__aexit__(exc_type, exc, tb) + finally: + self._emit_final() + return result + + def _emit_final(self) -> None: + try: + final = ( + self._stream.get_final_message() + if self._stream and hasattr(self._stream, "get_final_message") + else None + ) + if final is not None: + from ..adapters import extract_anthropic_native + + usage = extract_anthropic_native(final, model_id=self._model_id) + self._sdk.emit(usage, subscription=self._sub, dimensions=self._dims) + except Exception as exc: # noqa: BLE001 + logger.warning("lago: anthropic stream-manager emit failed: %s", exc) diff --git a/tests/integration/test_live_anthropic.py b/tests/integration/test_live_anthropic.py new file mode 100644 index 0000000..1b60e67 --- /dev/null +++ b/tests/integration/test_live_anthropic.py @@ -0,0 +1,114 @@ +"""End-to-end Anthropic integration test — live API + mocked Lago. + +Skipped unless ANTHROPIC_API_KEY is set. +""" + +from __future__ import annotations + +import json +import os +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +from lago_agent_sdk import LagoSDK + +pytestmark = pytest.mark.skipif( + not os.environ.get("ANTHROPIC_API_KEY"), + reason="ANTHROPIC_API_KEY not set", +) + + +class _MockLago(BaseHTTPRequestHandler): + def do_POST(self): # noqa: N802 + n = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(n) + self.server.received.append(json.loads(body)) # type: ignore[attr-defined] + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"ok": true}') + + def log_message(self, *_args, **_kwargs): + return + + +def _spawn_lago(): + s = HTTPServer(("127.0.0.1", 0), _MockLago) + s.received = [] # type: ignore[attr-defined] + threading.Thread(target=s.serve_forever, daemon=True).start() + return s, f"http://127.0.0.1:{s.server_port}" + + +def test_live_anthropic_messages_create_emits_to_lago() -> None: + from anthropic import Anthropic + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])) + client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=20, + messages=[{"role": "user", "content": "Say hi"}], + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = [e for p in server.received for e in p["events"]] # type: ignore[attr-defined] + codes = {e["code"] for e in events} + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + for e in events: + assert e["properties"]["api"] == "native" + assert e["properties"]["provider"] == "anthropic" + finally: + server.shutdown() + + +def test_live_anthropic_streaming_emits_from_final_delta() -> None: + from anthropic import Anthropic + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])) + for _ in client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=20, + messages=[{"role": "user", "content": "Say hi"}], + stream=True, + ): + pass + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = [e for p in server.received for e in p["events"]] # type: ignore[attr-defined] + codes = {e["code"] for e in events} + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + finally: + server.shutdown() + + +def test_live_anthropic_messages_stream_context_manager() -> None: + from anthropic import Anthropic + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])) + with client.messages.stream( + model="claude-haiku-4-5-20251001", + max_tokens=20, + messages=[{"role": "user", "content": "Say hi"}], + ) as stream: + for _ in stream.text_stream: + pass + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = [e for p in server.received for e in p["events"]] # type: ignore[attr-defined] + codes = {e["code"] for e in events} + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + finally: + server.shutdown() diff --git a/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json b/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json new file mode 100644 index 0000000..ebe80c3 --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/01_plain_haiku.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-haiku-4-5-20251001", + "_response": { + "id": "msg_014oRrBt8p4HqV5k5eS1RyKN", + "container": null, + "content": [ + { + "citations": null, + "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.", + "type": "text" + } + ], + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 13, + "output_tokens": 35, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json b/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json new file mode 100644 index 0000000..36071ff --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/02_plain_sonnet.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_01Y3fakcdpcj6tk6FJrxV5GJ", + "container": null, + "content": [ + { + "citations": null, + "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social bonds, and remarkable ability to communicate using a variety of clicks, whistles, and other sounds.", + "type": "text" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "global", + "input_tokens": 13, + "output_tokens": 39, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json b/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json new file mode 100644 index 0000000..5dc4e83 --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/03_tool_use.json @@ -0,0 +1,39 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_01FBZLSB8UduV9akqUkEtyEW", + "container": null, + "content": [ + { + "id": "toolu_01AMkoyrfvRgYBCA21zpGXNi", + "caller": { + "type": "direct" + }, + "input": { + "city": "Tokyo" + }, + "name": "get_weather", + "type": "tool_use" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "tool_use", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "global", + "input_tokens": 658, + "output_tokens": 38, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json b/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json new file mode 100644 index 0000000..2fdaf1f --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/04_cache_create_5m.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_01VtgdQSaox4WMdGvHipchpn", + "container": null, + "content": [ + { + "citations": null, + "text": "**4**\n\nSteps:\n1. Start with 2\n2. Add 2\n3. Result = **4**", + "type": "text" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 2803 + }, + "cache_creation_input_tokens": 2803, + "cache_read_input_tokens": 0, + "inference_geo": "global", + "input_tokens": 13, + "output_tokens": 30, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json b/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json new file mode 100644 index 0000000..c7fc4bb --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/05_cache_read.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_011bpj3E8EKkiNzynaPfBuJu", + "container": null, + "content": [ + { + "citations": null, + "text": "**3 + 3 = 6**\n\n**Step 1:** Start with 3.\n**Step 2:** Add ", + "type": "text" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "max_tokens", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 2803, + "inference_geo": "global", + "input_tokens": 13, + "output_tokens": 30, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json b/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json new file mode 100644 index 0000000..8624bc4 --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/06_cache_create_1h.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_01CnMDjYpfBiy7JWKyMeNWsL", + "container": null, + "content": [ + { + "citations": null, + "text": "Hi! How can I help you today?", + "type": "text" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 2808, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 2808, + "cache_read_input_tokens": 0, + "inference_geo": "global", + "input_tokens": 7, + "output_tokens": 12, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json b/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json new file mode 100644 index 0000000..4d0002d --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/07_extended_thinking.json @@ -0,0 +1,38 @@ +{ + "_model_id": "claude-sonnet-4-6", + "_response": { + "id": "msg_01EAk8qJYaKDY7YNx6hiKBta", + "container": null, + "content": [ + { + "signature": "ErQCCmUIDRgCKkCFY8OE3VsRVnV6nXU/77sHcbai8ab7vVMPJKrFLAG73brAKyvMuOUHY3F3hARXd9ZCb++Z1sJ6emEjEdif2OwRMhFjbGF1ZGUtc29ubmV0LTQtNjgAQgh0aGlua2luZxIM6cZezoae3YMBKeUHGgw/iy0PoRrY0tY3z48iML7CqI0ZluxPJPXTAI3QxkdVG3dzXfxNpDhRWvOHvrURM5KpKm8gasgo5LLIJfQa0ip9EvggIrnDJKCXnOk5BFOsoNUe+2QdTzF7tGepNuhmB9R10uJzdEft/L/pw847GzejnrRYrahkVoNAv8hx/tiQABnoEbPqLixGimf2FhSMEUzzaHHoYiNlIdaR9cCZOXQ3hgyKrfkjg004wt6SaCLbZxm5SLZ8Yi5gsgVC0kAYAQ==", + "thinking": "We need to prove that 1\u00b3 + 2\u00b3 + 3\u00b3 + ... + n\u00b3 = (1 + 2 + 3 + ... + n)\u00b2\n\nI'll use mathematical induction.", + "type": "thinking" + }, + { + "citations": null, + "text": "# Proof: Sum of First n Cubes\n\n## Statement to Prove\n\n$$\\sum_{k=1}^{n} k^3 = \\left(\\sum_{k=1}^{n} k\\right)^2$$\n\nUsing the known closed form for the linear sum, this is equivalent to:\n\n$$1^3 + 2^3 + 3^3 + \\cdots + n^3 = \\left(\\frac{n(n+1)}{2}\\right)^2$$\n\n## Proof by Mathematical Induction\n\n### Base Case (n = 1)\n\n**Left side:** $1^3 = 1$\n\n**Right side:** $\\left(\\dfrac{1 \\cdot 2}{2}\\right)^2 = (1)^2 = 1$ \u2713\n\n### Inductive Step\n\n**Inductive Hypothesis:** Assume the statement holds for some $n = m \\geq 1$:\n\n$$1^3 + 2^3 + \\cdots + m^3 = \\left(\\frac{m(m+1)}{2}\\right)^2$$\n\n**Goal:** Prove the statement holds for $n = m + 1$:\n\n$$1^3 + 2^3 + \\cdots + m^3 + (m+1)^3 = \\left(\\frac{(m+1)(m+2)}{2}\\right)^2$$\n\n**Derivation:**\n\nStarting with the left side of the goal:\n\n$$\\underbrace{1^3 + 2^3 + \\cdots + m^3}_{\\text{apply hypothesis}} + (m+1)^3$$\n\n$$= \\left(\\frac{m(m+1)}{2}\\right)^2 + (m+1)^3$$\n\n$$= \\frac{m^2(m+1)^2}{4} + (m+1)^3$$\n\nFactor out $(m+1)^2$:\n\n$$= (m+1)^2\\left(\\frac{m^2}{4} + (m+1)\\right)$$\n\nCombine terms inside the parentheses over a common denominator of 4:\n\n$$= (m+1)^2\\left(\\frac{m^2 + 4(m+1)}{4}\\right)$$\n\n$$= (m+1)^2\\left(\\frac{m^2 + 4m + 4}{4}\\right)$$\n\nFactor the numerator as a perfect square:\n\n$$= (m+1)^2\\left(\\frac{(m+2)^2}{4}\\right)$$\n\nRearrange:\n\n$$= \\frac{(m+1)^2(m+2)^2}{4}$$\n\n$$= \\left(\\frac{(m+1)(m+2)}{2}\\right)^2$$\n\nThis is exactly the right side of the goal. $\\blacksquare$\n\n## Conclusion\n\nBy the principle of mathematical induction, for all positive integers $n$:\n\n$$\\boxed{1^3 + 2^3 + 3^3 + \\cdots + n^3 = \\left(\\frac{n(n+1)}{2}\\right)^2 = \\left(1 + 2 + 3 + \\cdots + n\\right)^2}$$", + "type": "text" + } + ], + "model": "claude-sonnet-4-6", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "global", + "input_tokens": 66, + "output_tokens": 862, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/08_stream.json b/tests/unit/adapters/fixtures/anthropic_native/08_stream.json new file mode 100644 index 0000000..6b55d43 --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/08_stream.json @@ -0,0 +1,142 @@ +{ + "_model_id": "claude-haiku-4-5-20251001", + "_response": { + "events": [ + { + "message": { + "id": "msg_01A9ASsVixqCzpb4qwbcCkd2", + "container": null, + "content": [], + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_details": null, + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 13, + "output_tokens": 8, + "server_tool_use": null, + "service_tier": "standard" + } + }, + "type": "message_start" + }, + { + "content_block": { + "citations": null, + "text": "", + "type": "text" + }, + "index": 0, + "type": "content_block_start" + }, + { + "delta": { + "text": "Dolphins are highly intelligent marine mammals known", + "type": "text_delta" + }, + "index": 0, + "type": "content_block_delta" + }, + { + "type": "text", + "text": "Dolphins are highly intelligent marine mammals known", + "snapshot": "Dolphins are highly intelligent marine mammals known" + }, + { + "delta": { + "text": " for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks", + "type": "text_delta" + }, + "index": 0, + "type": "content_block_delta" + }, + { + "type": "text", + "text": " for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks", + "snapshot": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks" + }, + { + "delta": { + "text": " and whistles.", + "type": "text_delta" + }, + "index": 0, + "type": "content_block_delta" + }, + { + "type": "text", + "text": " and whistles.", + "snapshot": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles." + }, + { + "index": 0, + "type": "content_block_stop", + "content_block": { + "citations": null, + "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.", + "type": "text", + "parsed_output": null + } + }, + { + "delta": { + "container": null, + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null + }, + "type": "message_delta", + "usage": { + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "input_tokens": 13, + "output_tokens": 35, + "server_tool_use": null + } + }, + { + "type": "message_stop", + "message": { + "id": "msg_01A9ASsVixqCzpb4qwbcCkd2", + "container": null, + "content": [ + { + "citations": null, + "text": "Dolphins are highly intelligent marine mammals known for their playful behavior, complex social structures, and remarkable ability to communicate with each other through clicks and whistles.", + "type": "text", + "parsed_output": null + } + ], + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 13, + "output_tokens": 35, + "server_tool_use": null, + "service_tier": "standard" + } + } + } + ] + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json b/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json new file mode 100644 index 0000000..eb3ff9c --- /dev/null +++ b/tests/unit/adapters/fixtures/anthropic_native/09_multi_turn.json @@ -0,0 +1,33 @@ +{ + "_model_id": "claude-haiku-4-5-20251001", + "_response": { + "id": "msg_01Tg1sNzgCXenQnUBLwQ8Ycr", + "container": null, + "content": [ + { + "citations": null, + "text": "4 times 3 equals 12.", + "type": "text" + } + ], + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_details": null, + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 34, + "output_tokens": 14, + "server_tool_use": null, + "service_tier": "standard" + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/capture_anthropic.py b/tests/unit/adapters/fixtures/capture_anthropic.py new file mode 100644 index 0000000..73893a2 --- /dev/null +++ b/tests/unit/adapters/fixtures/capture_anthropic.py @@ -0,0 +1,179 @@ +"""Capture real Anthropic native API responses for adapter design. + +Saves raw responses to tests/unit/adapters/fixtures/anthropic_native/.json +so we can verify mappings against reality before writing the adapter. + +Reads ANTHROPIC_API_KEY from env. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import sys + +from anthropic import Anthropic + +OUT = pathlib.Path(__file__).parent / "anthropic_native" +OUT.mkdir(parents=True, exist_ok=True) + + +def to_dict(response) -> dict: + """Anthropic SDK returns pydantic models — convert to plain dict for JSON.""" + if hasattr(response, "model_dump"): + return response.model_dump() + if hasattr(response, "dict"): + return response.dict() + return json.loads(response.json()) if hasattr(response, "json") else dict(response) + + +def save(name: str, model: str, payload: dict) -> None: + path = OUT / f"{name}.json" + path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str)) + print(f" ✓ saved {path.name}") + + +def main() -> int: + key = os.environ.get("ANTHROPIC_API_KEY") + if not key: + print("error: set ANTHROPIC_API_KEY", file=sys.stderr) + return 2 + + client = Anthropic(api_key=key) + PROMPT = "Write one sentence about dolphins." + + # Rename badge: the script header reads "Sonnet 4.5" but the API only exposes 4-6+ now. + # ----- 1. Plain call (small model) ----- + print("\n[1] plain — claude-haiku-4-5-20251001") + r = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=80, + messages=[{"role": "user", "content": PROMPT}], + ) + save("01_plain_haiku", "claude-haiku-4-5-20251001", to_dict(r)) + + # ----- 2. Plain call (Sonnet, larger) ----- + print("\n[2] plain — claude-sonnet-4-6") + r = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=80, + messages=[{"role": "user", "content": PROMPT}], + ) + save("02_plain_sonnet", "claude-sonnet-4-6", to_dict(r)) + + # ----- 3. Tool use ----- + print("\n[3] tool use — claude-sonnet-4-6 with weather tool") + tools = [ + { + "name": "get_weather", + "description": "Get the current weather for a city.", + "input_schema": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + } + ] + r = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=200, + tools=tools, + tool_choice={"type": "any"}, + messages=[{"role": "user", "content": "What's the weather in Tokyo?"}], + ) + save("03_tool_use", "claude-sonnet-4-6", to_dict(r)) + + # ----- 4. Cache create (5m default TTL) — long system prompt ----- + print("\n[4] cache create — long system + cache_control 5m default") + LONG_TEXT = ("You are a helpful assistant. Answer concisely. " * 200) + ( + "Always cite step by step. " * 100 + ) + cached_body = { + "model": "claude-sonnet-4-6", + "max_tokens": 30, + "system": [{"type": "text", "text": LONG_TEXT, "cache_control": {"type": "ephemeral"}}], + "messages": [{"role": "user", "content": "What's 2+2?"}], + } + r = client.messages.create(**cached_body) + save("04_cache_create_5m", "claude-sonnet-4-6", to_dict(r)) + + # ----- 5. Cache read (same long system, different user question) ----- + print("\n[5] cache read — same cached_control content, second call") + cached_body["messages"] = [{"role": "user", "content": "What's 3+3?"}] + r = client.messages.create(**cached_body) + save("05_cache_read", "claude-sonnet-4-6", to_dict(r)) + + # ----- 6. Cache 1h TTL ----- + print("\n[6] cache 1h — explicit ttl") + cached_1h = { + "model": "claude-sonnet-4-6", + "max_tokens": 30, + "system": [ + { + "type": "text", + "text": LONG_TEXT + " (1h variant)", + "cache_control": {"type": "ephemeral", "ttl": "1h"}, + } + ], + "messages": [{"role": "user", "content": "Hi"}], + } + try: + r = client.messages.create(**cached_1h) + save("06_cache_create_1h", "claude-sonnet-4-6", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" 1h TTL not available on this account/region: {str(exc)[:160]}") + + # ----- 7. Extended thinking (reasoning) ----- + print("\n[7] extended thinking — claude-sonnet-4-6") + try: + r = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=2048, + thinking={"type": "enabled", "budget_tokens": 1024}, + messages=[ + { + "role": "user", + "content": ( + "Prove that the sum of the first n cubes equals the square of the sum of " + "the first n positive integers. Show each algebraic step." + ), + } + ], + ) + save("07_extended_thinking", "claude-sonnet-4-6", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" extended thinking error: {str(exc)[:160]}") + + # ----- 8. Streaming ----- + print("\n[8] streaming — claude-haiku-4-5-20251001") + events: list[dict] = [] + with client.messages.stream( + model="claude-haiku-4-5-20251001", + max_tokens=60, + messages=[{"role": "user", "content": PROMPT}], + ) as stream: + for event in stream: + events.append(to_dict(event)) + save("08_stream", "claude-haiku-4-5-20251001", {"events": events}) + + # ----- 9. Multi-turn ----- + print("\n[9] multi-turn — claude-haiku-4-5-20251001") + convo = [ + {"role": "user", "content": "What is 2+2?"}, + {"role": "assistant", "content": "2+2 equals 4."}, + {"role": "user", "content": "And times 3?"}, + ] + r = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=40, + messages=convo, + ) + save("09_multi_turn", "claude-haiku-4-5-20251001", to_dict(r)) + + print("\nDone. Inspect tests/unit/adapters/fixtures/anthropic_native/*.json") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/unit/adapters/test_anthropic_native.py b/tests/unit/adapters/test_anthropic_native.py new file mode 100644 index 0000000..13bff69 --- /dev/null +++ b/tests/unit/adapters/test_anthropic_native.py @@ -0,0 +1,152 @@ +"""Anthropic native adapter — verified against real fixtures.""" + +from __future__ import annotations + +import json +import pathlib + +from lago_agent_sdk.adapters import extract_anthropic_native + +FIX = pathlib.Path(__file__).parent / "fixtures" / "anthropic_native" + + +def _load(name: str) -> tuple[str, dict]: + data = json.loads((FIX / name).read_text()) + return data["_model_id"], data["_response"] + + +# -------------------------------------------------------------------------- +# Real fixtures +# -------------------------------------------------------------------------- +def test_plain_haiku() -> None: + model_id, resp = _load("01_plain_haiku.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.input == 13 + assert u.output == 35 + assert u.cache_read == 0 + assert u.cache_write == 0 + assert u.tool_calls == 0 + assert u.api == "native" + assert u.provider == "anthropic" + assert u.model == "claude-haiku-4-5-20251001" + + +def test_plain_sonnet() -> None: + model_id, resp = _load("02_plain_sonnet.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.input == 13 + assert u.output == 39 + + +def test_tool_use_counts_tool_calls() -> None: + model_id, resp = _load("03_tool_use.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.input == 658 + assert u.output == 38 + assert u.tool_calls == 1 + + +def test_cache_create_5m() -> None: + model_id, resp = _load("04_cache_create_5m.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.cache_write == 2803 + assert u.cache_write_5m == 2803 + assert u.cache_write_1h == 0 + assert u.cache_read == 0 + + +def test_cache_read_after_create() -> None: + model_id, resp = _load("05_cache_read.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.cache_read == 2803 + assert u.cache_write == 0 + assert u.cache_write_5m == 0 + + +def test_cache_create_1h() -> None: + model_id, resp = _load("06_cache_create_1h.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.cache_write == 2808 + assert u.cache_write_1h == 2808 + assert u.cache_write_5m == 0 + + +def test_extended_thinking_bundles_into_output_tokens() -> None: + """Anthropic's extended thinking does NOT expose reasoning_tokens — they're folded into output_tokens.""" + model_id, resp = _load("07_extended_thinking.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.input == 66 + assert u.output == 862 # all 862 includes thinking + final answer + assert u.reasoning == 0 # confirmed: Anthropic doesn't separate it + # content has both 'thinking' and 'text' blocks — neither counts as a tool call + assert u.tool_calls == 0 + + +def test_multi_turn() -> None: + model_id, resp = _load("09_multi_turn.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert u.input == 34 + assert u.output == 14 + + +def test_unknown_top_usage_field_lands_in_extras() -> None: + """service_tier, inference_geo, server_tool_use are new fields → drift detection.""" + model_id, resp = _load("01_plain_haiku.json") + u = extract_anthropic_native(resp, model_id=model_id) + assert "service_tier" in u.extras + assert "inference_geo" in u.extras + assert "server_tool_use" in u.extras + + +# -------------------------------------------------------------------------- +# Synthetic +# -------------------------------------------------------------------------- +def test_handles_pydantic_via_model_dump() -> None: + class FakePydantic: + def model_dump(self) -> dict: + return { + "model": "claude-sonnet-4-6", + "content": [{"type": "text", "text": "hi"}], + "usage": { + "input_tokens": 5, + "output_tokens": 7, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "cache_creation": { + "ephemeral_5m_input_tokens": 0, + "ephemeral_1h_input_tokens": 0, + }, + }, + } + + u = extract_anthropic_native(FakePydantic(), model_id="claude-sonnet-4-6") + assert u.input == 5 + assert u.output == 7 + assert u.api == "native" + + +def test_multiple_tool_use_blocks_counted() -> None: + resp = { + "usage": {"input_tokens": 10, "output_tokens": 20}, + "content": [ + {"type": "text", "text": "..."}, + {"type": "tool_use", "id": "t1"}, + {"type": "tool_use", "id": "t2"}, + {"type": "tool_use", "id": "t3"}, + ], + } + u = extract_anthropic_native(resp, model_id="claude-sonnet-4-6") + assert u.tool_calls == 3 + + +def test_no_usage_returns_zeros() -> None: + u = extract_anthropic_native({}, model_id="claude-sonnet-4-6") + assert u.input == 0 + assert u.output == 0 + assert not u.nonzero_numeric() + + +def test_survives_non_dict_usage() -> None: + assert extract_anthropic_native({"usage": True}, model_id="x").input == 0 + assert extract_anthropic_native({"usage": "bogus"}, model_id="x").output == 0 + assert extract_anthropic_native(None, model_id="x").input == 0 diff --git a/tests/unit/test_wrapper_anthropic.py b/tests/unit/test_wrapper_anthropic.py new file mode 100644 index 0000000..08c7a70 --- /dev/null +++ b/tests/unit/test_wrapper_anthropic.py @@ -0,0 +1,223 @@ +"""Anthropic wrapper tests — fake client, no live API.""" + +from __future__ import annotations + +from typing import Any + +from lago_agent_sdk import LagoSDK + + +class FakeMessage: + """Mimics Anthropic's Message pydantic object.""" + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + # expose .usage and .content as attribute access for _is_message_like check + self.usage = payload.get("usage") + self.content = payload.get("content", []) + + def model_dump(self) -> dict[str, Any]: + return self._payload + + +class FakeStreamEvent: + """Mimics one of Anthropic's MessageStreamEvent objects (MessageDelta/Start/etc.).""" + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + def model_dump(self) -> dict[str, Any]: + return self._payload + + +class FakeMessages: + def __init__(self) -> None: + self.create_calls = 0 + self.stream_calls = 0 + + def create(self, **kwargs: Any) -> Any: + self.create_calls += 1 + assert "extra_lago" not in kwargs + if kwargs.get("stream") is True: + events = [ + FakeStreamEvent({"type": "message_start", "message": {"usage": {"input_tokens": 12}}}), + FakeStreamEvent( + { + "type": "message_delta", + "delta": {"stop_reason": "end_turn"}, + "usage": {"input_tokens": 12, "output_tokens": 22}, + } + ), + FakeStreamEvent({"type": "message_stop"}), + ] + return iter(events) + return FakeMessage( + { + "model": kwargs.get("model", "claude-sonnet-4-6"), + "content": [{"type": "text", "text": "hi"}], + "usage": { + "input_tokens": 8, + "output_tokens": 16, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "cache_creation": { + "ephemeral_5m_input_tokens": 0, + "ephemeral_1h_input_tokens": 0, + }, + }, + } + ) + + def stream(self, **kwargs: Any) -> Any: + self.stream_calls += 1 + assert "extra_lago" not in kwargs + outer = self + + class _FakeStreamManager: + def __enter__(self_inner) -> Any: + outer._final = FakeMessage( + { + "model": kwargs.get("model", "claude-sonnet-4-6"), + "content": [{"type": "text", "text": "hi"}], + "usage": { + "input_tokens": 5, + "output_tokens": 11, + }, + } + ) + return _FakeStreamHandle(outer._final) + + def __exit__(self_inner, exc_type, exc, tb) -> Any: # noqa: D401 + return False + + return _FakeStreamManager() + + +class _FakeStreamHandle: + def __init__(self, final: FakeMessage) -> None: + self._final = final + self.text_stream = iter(["hi"]) + + def get_final_message(self) -> FakeMessage: + return self._final + + +class FakeAnthropic: + """Mimics `from anthropic import Anthropic; Anthropic(api_key=...)`.""" + + def __init__(self) -> None: + self.messages = FakeMessages() + + +# Module path needs to contain 'anthropic' so detector.py routes to anthropic wrapper. +FakeAnthropic.__module__ = "anthropic.fake" + + +def _new_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list[dict]]: + received: list[dict] = [] + + def sender(batch: list[dict]) -> None: + received.extend(batch) + + sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub) + sdk._queue._sender = sender # type: ignore[attr-defined] + return sdk, received + + +def test_wrap_messages_create_emits_input_and_output() -> None: + sdk, received = _new_sdk() + fake = FakeAnthropic() + client = sdk.wrap(fake) + resp = client.messages.create(model="claude-sonnet-4-6", messages=[]) + assert resp.usage["input_tokens"] == 8 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 8 + assert by_code["llm_output_tokens"] == 16 + + +def test_wrap_strips_extra_lago_and_uses_per_call_sub() -> None: + sdk, received = _new_sdk("sub_default") + fake = FakeAnthropic() + client = sdk.wrap(fake) + client.messages.create( + model="claude-sonnet-4-6", + messages=[], + extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}}, + ) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + assert all(e["external_subscription_id"] == "sub_per_call" for e in received) + assert received[0]["properties"]["feature"] == "X" + + +def test_wrap_double_wrap_is_idempotent() -> None: + sdk, received = _new_sdk() + fake = FakeAnthropic() + sdk.wrap(fake) + sdk.wrap(fake) + sdk.wrap(fake) + fake.messages.create(model="claude-sonnet-4-6", messages=[]) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + assert len(received) == 2 # input + output, not 6 + assert fake.messages.create_calls == 1 + + +def test_wrap_create_with_stream_captures_usage_from_message_delta() -> None: + sdk, received = _new_sdk() + fake = FakeAnthropic() + client = sdk.wrap(fake) + events = list(client.messages.create(model="claude-sonnet-4-6", messages=[], stream=True)) + assert len(events) == 3 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 12 + assert by_code["llm_output_tokens"] == 22 + + +def test_wrap_messages_stream_context_manager_emits_on_close() -> None: + sdk, received = _new_sdk() + fake = FakeAnthropic() + client = sdk.wrap(fake) + with client.messages.stream(model="claude-sonnet-4-6", messages=[]) as stream: + list(stream.text_stream) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 5 + assert by_code["llm_output_tokens"] == 11 + + +def test_instrumentation_failure_does_not_break_call() -> None: + sdk, _ = _new_sdk() + + class BadMessage: + @property + def usage(self): + raise RuntimeError("boom") + + @property + def content(self): + return [] + + def model_dump(self): + raise RuntimeError("boom") + + class BadMessages: + def create(self, **_kw): + return BadMessage() + + class BadAnthropic: + def __init__(self): + self.messages = BadMessages() + + BadAnthropic.__module__ = "anthropic.fake" + + client = sdk.wrap(BadAnthropic()) + # Adapter will crash inside, but wrap must still return resp. + resp = client.messages.create(model="x", messages=[]) + assert resp is not None + sdk.shutdown(timeout=1.0) diff --git a/uv.lock b/uv.lock index f1fc5c8..a40fdc7 100644 --- a/uv.lock +++ b/uv.lock @@ -15,6 +15,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "anthropic" +version = "0.103.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/57/0b758b08cf4606c94d63a997d67a0063f7438efbaf81cfedd0d7c0c69d67/anthropic-0.103.1.tar.gz", hash = "sha256:21c12f4fc0fdd87a2e80d58479cd0af640062b3cfb82bbfa01c7977acd4defeb", size = 848877, upload-time = "2026-05-19T15:43:27.698Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/ec/cf357cf571377a39552c1530390a9b79bbdb6ea463f48fbe4e3624141e3b/anthropic-0.103.1-py3-none-any.whl", hash = "sha256:b9a523fac34e64caf6ee55fdbda213950e6a744b906fce100d34909aad2cd8f4", size = 832551, upload-time = "2026-05-19T15:43:29.663Z" }, +] + [[package]] name = "anyio" version = "4.13.0" @@ -345,6 +364,24 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + +[[package]] +name = "docstring-parser" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/4d/f332313098c1de1b2d2ff91cf2674415cc7cddab2ca1b01ae29774bd5fdf/docstring_parser-0.18.0.tar.gz", hash = "sha256:292510982205c12b1248696f44959db3cdd1740237a968ea1e2e7a900eeb2015", size = 29341, upload-time = "2026-04-14T04:09:19.867Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" }, +] + [[package]] name = "eval-type-backport" version = "0.3.1" @@ -446,6 +483,109 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "jiter" +version = "0.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/b5/55f06bb281d92fb3cc86d14e1def2bd908bb77693183e7cb1f5a3c388b0c/jiter-0.15.0.tar.gz", hash = "sha256:4251acc80e2b7c9b7b8823456ea0fceeb0734dac2df7636d3c711b38476b5a76", size = 166640, upload-time = "2026-05-19T10:09:48.361Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/da/76a2c7e510ba15fe323d9509c223ab272da79ea59f54488f4a78da6426db/jiter-0.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:edebcf7d1f601199084bb6e844d7dc67e03e04f6ac786b0332d616635c4ff7a4", size = 310849, upload-time = "2026-05-19T10:06:51.944Z" }, + { url = "https://files.pythonhosted.org/packages/5d/8e/827be942883a4dc0862c48626ff41af3320b1902d136a0bf4b9041f2c567/jiter-0.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f924585cdacf631cd382b657966847bb537bf9ed0a6f9b991da5f05a631480f", size = 314991, upload-time = "2026-05-19T10:06:53.522Z" }, + { url = "https://files.pythonhosted.org/packages/6d/38/be2832be361ba1b9517c76f46d30b64e985be1dd43c974f4c3a4b1844436/jiter-0.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abbf258599526ad0326fe51e252e24f2bd6f24f1852681b4b78feda3808f1d18", size = 340843, upload-time = "2026-05-19T10:06:55.071Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d8/90f01fb83c0c7ba509303ec93e32a308fbfa167d264860b01c0fd0dbbd06/jiter-0.15.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c468136b8bd6bb18c8786e4236a1fa27362f24cb23450ba0cb204ab379b8e6f", size = 365116, upload-time = "2026-05-19T10:06:56.893Z" }, + { url = "https://files.pythonhosted.org/packages/91/38/94593d34f8c67a0b6f6cbc027f016ffa9780b3a858a7a86f6fd7a15bcc1e/jiter-0.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05906b93d72f03339e6bb7cf8dc10ebda64a0266126eed6beba79e20abcf5fd4", size = 457970, upload-time = "2026-05-19T10:06:58.707Z" }, + { url = "https://files.pythonhosted.org/packages/df/04/d79962dd49d00c97e2a9b4cacea1947904d02135936960351f9a96d4c1a6/jiter-0.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30ce785d2adb8e32c3f7741442370a74834ec4c01f3c48f0750227a0b4ef27d6", size = 375744, upload-time = "2026-05-19T10:07:00.471Z" }, + { url = "https://files.pythonhosted.org/packages/c3/2e/5d37abe2be0e819c21e2338bebd410e481763ce526a9138c8c3652fa0123/jiter-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd73e3da91a0a722d67165e849ce2cdc10de0e0d48738c142be8c6c5f310f4c", size = 349609, upload-time = "2026-05-19T10:07:01.829Z" }, + { url = "https://files.pythonhosted.org/packages/7a/90/98768ad2ed90c1fda15d64157de2dfbf73c1c074d4b1bfaca915480bc7cf/jiter-0.15.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:ceb8fc27d38793f9c97149be8302720c5b22e5c195a37bf2c45dc36c4600a512", size = 354366, upload-time = "2026-05-19T10:07:03.587Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c4/fbfb806209f1fe4b7dccdfb07bc62bb044300734a945b06fd64db446ef6a/jiter-0.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d726e3ceeb337191324b49de298142f27c3ad10886341555d1d5315b5f252c6a", size = 393519, upload-time = "2026-05-19T10:07:05.08Z" }, + { url = "https://files.pythonhosted.org/packages/37/1c/b9c257cd70cb453b6d10f3ebf0402cdb11669ab455389096f09839670290/jiter-0.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2c8aea7781d2a372227871de4e1a1332aa96f5a89fd76c5e835dafdbad102887", size = 519952, upload-time = "2026-05-19T10:07:06.589Z" }, + { url = "https://files.pythonhosted.org/packages/a9/1a/aa85027db7ab15829c12feebbc33b404f53fc399bd559d85fd0d6365ff0d/jiter-0.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cf4bd113a69c0a740e27cb962ce10630c36d2b8f59d759a651b955ee9d18a823", size = 550770, upload-time = "2026-05-19T10:07:08.228Z" }, + { url = "https://files.pythonhosted.org/packages/d4/54/8c3f65c8a5687925e84708f19d63f7f37d28e2b86a48d951702ad94424d8/jiter-0.15.0-cp310-cp310-win32.whl", hash = "sha256:d92a5cd21fdb083931d546c207aa29633787c5dc5b02daab2d32b843f88a2c53", size = 209303, upload-time = "2026-05-19T10:07:10.006Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/0528a1eb9f42dd2d8228a0711458628f35924d131f623eaebc35fd23d3d4/jiter-0.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:e58585a58209d72691ce2d62a9147445f5a87beb0bde97fde284c96ae392a3d1", size = 200404, upload-time = "2026-05-19T10:07:11.426Z" }, + { url = "https://files.pythonhosted.org/packages/e4/13/daa722f5765c393576f466378f9dfd29d77c9bed939e0688f96afa3601ea/jiter-0.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0f862193b8696249d22ec433e85fd2ab0ad9596bc3e45e6c0bc55e8aeba97be2", size = 310899, upload-time = "2026-05-19T10:07:12.89Z" }, + { url = "https://files.pythonhosted.org/packages/7f/82/2d2551829b082f4b6d82b9f939b031fb808a10aab1ec0664f82e150bb9a2/jiter-0.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1303d4d68a9b051ea90502402063ecf3807da00ad2affa19ca1ae3b90b3c5f67", size = 314963, upload-time = "2026-05-19T10:07:14.539Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0a/8b1a51466f7fe9f31dbe4bc7e0ca848674f9825e0f737b929b97e8c60aa7/jiter-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392b8ab019e5502d08aff85c6272209c24bc2cbe706ea82a56368f524236614a", size = 341730, upload-time = "2026-05-19T10:07:15.869Z" }, + { url = "https://files.pythonhosted.org/packages/f6/2a/e71dea19822e2e404e83992a08c1d6b9b617bb944f28c9c2fbd85d02c91e/jiter-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:773b6eb282ce11ee19f05f6b2d4404fa308e5bbd353b0b80a0262caad6db2cd7", size = 366214, upload-time = "2026-05-19T10:07:17.259Z" }, + { url = "https://files.pythonhosted.org/packages/c4/59/97e1fa539d124a509a00ab7f669289d1c1d236ecabf12948a18f16c91082/jiter-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2c0c44d569ce0f2850f5c926f8caeb5f245fbc84475aeb36efccc2103e6dbd", size = 459527, upload-time = "2026-05-19T10:07:18.741Z" }, + { url = "https://files.pythonhosted.org/packages/d1/7a/4a68d331aef8cf2e2393c14a3aacb635c62aa86071b0229899fb5baaa907/jiter-0.15.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:032396229564bca02440396bd327710719f724f5e7b7e9f7a8eb3faa4a2c2281", size = 375451, upload-time = "2026-05-19T10:07:20.208Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7e/1c445c2b6f0e30a274dc8082e0c3c7825411cce80d726bccd697c98cc8d3/jiter-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d37768fce7f88dd2a8c6091f2325dea27d30d30d5c6e7a1c0f0af77723b708", size = 349428, upload-time = "2026-05-19T10:07:22.372Z" }, + { url = "https://files.pythonhosted.org/packages/00/94/e20d38984fc17a636371bffd2ae0f698124fdc8e75ef969cd2da6ba7cea7/jiter-0.15.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2c9cb907439d20bd0c7d7565ca01ee52234203208433749bae5b516907526928", size = 355405, upload-time = "2026-05-19T10:07:23.916Z" }, + { url = "https://files.pythonhosted.org/packages/94/fa/4d09f814779d0ea80a28ed8e4c6662ec9a4a8ecef0ac52190ebac6262d14/jiter-0.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9100ddbec09741cc66feb0fc6773f8bdbd0e3c345689368f260082ff85dcc0cd", size = 393688, upload-time = "2026-05-19T10:07:25.854Z" }, + { url = "https://files.pythonhosted.org/packages/54/9d/8eb5d4fb8bf7e93a75964a5da71a75c67c864baf7fa3f98598187b3c7e57/jiter-0.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ae1b0d82ac2d987f9ea512b1c9adfcc71a28de3dea3a6039b54d76cffda9901e", size = 520853, upload-time = "2026-05-19T10:07:27.303Z" }, + { url = "https://files.pythonhosted.org/packages/e7/2c/5e07874e59e623a943a0acf1552a80d05b70f31b402287a8fc6d7ec634c7/jiter-0.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8020c99ec13a7db2b6f96cbe82ef4721c88b426a4892f27478044af0284615ef", size = 551016, upload-time = "2026-05-19T10:07:28.846Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/d2d34422143474cadc15b60d482b1c35683dbc5c63c24346ddd0df09bcaf/jiter-0.15.0-cp311-cp311-win32.whl", hash = "sha256:42bfb257930800cf43e7c62c832402c704ab60797c992faf88d20e903eac8f32", size = 209518, upload-time = "2026-05-19T10:07:30.431Z" }, + { url = "https://files.pythonhosted.org/packages/1d/7d/52778b930e5cc3e52a37d950b1c10494244308b4329b25a0ff0d88303a81/jiter-0.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:860a74063284a2ae9bfedd694f299cc2c68e2696c5f3d440cc9d18bb81b9dd04", size = 200565, upload-time = "2026-05-19T10:07:32.125Z" }, + { url = "https://files.pythonhosted.org/packages/3b/4f/d9b4067feb69b3fa6eb0488e1b59e2ad5b463fe39f59e527eab2aca00bb0/jiter-0.15.0-cp311-cp311-win_arm64.whl", hash = "sha256:37a10c377ce3a4a85f4a67f28b7afe093154cde77eaf248a72e856aa08b4d865", size = 195488, upload-time = "2026-05-19T10:07:33.846Z" }, + { url = "https://files.pythonhosted.org/packages/44/53/4f6bddbcde3c71e56d0aa1337ec95950f3d27dd4153e25aadf0feac71751/jiter-0.15.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0e90a1c315a0226ec822d973817967f9223b7701546c8c2a7913e7ab0926294d", size = 308793, upload-time = "2026-05-19T10:07:35.25Z" }, + { url = "https://files.pythonhosted.org/packages/01/84/c01099b59a285a1ebba64ae93f62bfa036675340fd1b0045ae65890a0442/jiter-0.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c9004af7c8d67cce7f1aae1026fb55607f4aa600710d08ede3a3ce4aeefe7e0", size = 309570, upload-time = "2026-05-19T10:07:36.919Z" }, + { url = "https://files.pythonhosted.org/packages/58/64/8fb7f9d45bb98190355454cd04dad8d8f27223d6bd52f83af07f637168a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c210f8b35dc6f30aafd4b4365ca89b9d1189f21ab49b8e68fa6322a847aef138", size = 336783, upload-time = "2026-05-19T10:07:38.694Z" }, + { url = "https://files.pythonhosted.org/packages/c3/b6/f5739011d009b3a30f6a53c5240979030ba29ae46a8c67e3a15759f7c37d/jiter-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f30bae8bc1c2d613e28e5af3e8cceb09b742f1c8a8a5f839fb67afaffc03b61", size = 363555, upload-time = "2026-05-19T10:07:40.832Z" }, + { url = "https://files.pythonhosted.org/packages/e5/12/98a9d9f766665e8a3b6252454e17cb0c464606a28cf2fa09399b003345fa/jiter-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c60e71b6d10cfc284c9bf36bd885e8d44c46f688ce50aa91b5edd90181dea687", size = 452255, upload-time = "2026-05-19T10:07:42.62Z" }, + { url = "https://files.pythonhosted.org/packages/e8/d5/60f972840f79c5e7544fce567c56f1e4e50468f996baba3e78d823dd62a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ab068bce62a45aa3e7367eceaffb5dde60b7eb853be8dece45132e3d0ff4879", size = 373559, upload-time = "2026-05-19T10:07:44.201Z" }, + { url = "https://files.pythonhosted.org/packages/ee/cf/d46ef1234ba335aabc2f013210db8e0821a22f5e644a2e9449df199ecc23/jiter-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa248c9eb220197d363f688818dac2fd4b2f0cd7d843ca7105d652034823427d", size = 346055, upload-time = "2026-05-19T10:07:46.005Z" }, + { url = "https://files.pythonhosted.org/packages/f0/63/4d2749d8d54d230bad9b3a6b0d00cc28c6ff6b2fdffc26a8ccf76cc5a974/jiter-0.15.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2a77aadd57cac1682e4401a72724d2796d89a4ba129b1a5812aa94ee480826eb", size = 351406, upload-time = "2026-05-19T10:07:47.855Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b9/9965b990035d8773328e0a8c8b457a87bf2b19f6c4126d9d99296be5d16a/jiter-0.15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ae901f3a55bfafdde31d289590fa25e3245735a2b1e8c7cc15871710a002871", size = 389357, upload-time = "2026-05-19T10:07:49.665Z" }, + { url = "https://files.pythonhosted.org/packages/2d/55/9ddf903deda1413e87fed792f416b7123daee5b8efbad6a202a7421c36a5/jiter-0.15.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f0b271b462769543716f92d3a4f90527df6ef5ed05ee95ec4137f513e21e1b77", size = 517263, upload-time = "2026-05-19T10:07:51.537Z" }, + { url = "https://files.pythonhosted.org/packages/e8/76/a0c40ad064d3a20a4fde231e35d56e9a01ce82164278180e82d5daf85469/jiter-0.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2fb6a5d26af81fc0f00f9360a891e05cf755e149bba391c4d563adc54812973d", size = 548646, upload-time = "2026-05-19T10:07:53.196Z" }, + { url = "https://files.pythonhosted.org/packages/23/4f/eca9b954942916ba2f453891b8593ab444cd872396fe66a3936616f236f3/jiter-0.15.0-cp312-cp312-win32.whl", hash = "sha256:c2f6bb8b5216ab9e7873bc08b5d7bef2b8abbb578a3069bf1cd14a45d71d771d", size = 206427, upload-time = "2026-05-19T10:07:55.307Z" }, + { url = "https://files.pythonhosted.org/packages/95/bf/8ead82a87495149542748e828d153fd232a512a22c83b02c4815c1a9c7d8/jiter-0.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:40b2c7e92c44a84d748d21706c68dc6ff8161d80b59c99d774721a0d2317d7c7", size = 197300, upload-time = "2026-05-19T10:07:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/f4/e4/9b8a78fb2d894471bc344e37f1949bdd784bd914d031dba0ba3a40c71dd7/jiter-0.15.0-cp312-cp312-win_arm64.whl", hash = "sha256:cc0bc345cf2df9d1c00ac443f50d543c1ccfa8b0422cb85b1ab70d681c0b255b", size = 192702, upload-time = "2026-05-19T10:07:58.307Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f4/f708c900ecee41b2025ef8413d5351e5649eb2125c506f6720cc69b06f5c/jiter-0.15.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c11465f97e2abf45a014b83b730222f8f1c5335e802c7055a67d50de6f1f4e3", size = 307829, upload-time = "2026-05-19T10:07:59.704Z" }, + { url = "https://files.pythonhosted.org/packages/86/59/db537c0949e83668c38481d426b9f2fd5ab758c4ee53a811dd0a510626a0/jiter-0.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e7b1776f0797956c509e123d0952d10d293a9492dea9f288ab9570ec01d1a5", size = 308445, upload-time = "2026-05-19T10:08:01.184Z" }, + { url = "https://files.pythonhosted.org/packages/37/38/ea0e13b18c30ef951da0d47d39e7fa9edb82a93a62990ffbd7cea9b622d4/jiter-0.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:351a341c2105aa430b7047e30f1bf7975f6313b00165d3fc07be2edaf741f279", size = 336181, upload-time = "2026-05-19T10:08:02.688Z" }, + { url = "https://files.pythonhosted.org/packages/58/fc/2303901b16c4ba05865588990a420c0b4156270b44379c20931544a1d962/jiter-0.15.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ab395feec8d249ec4044e228e98a7033f043426a265df439dc3698823f0a4e4", size = 362985, upload-time = "2026-05-19T10:08:04.394Z" }, + { url = "https://files.pythonhosted.org/packages/5b/6f/11bace093c52e7d4d26c8e606ccd7ae8c972189622469ec0d9e28161e28b/jiter-0.15.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2a438005b6f22d0273413484d6094d7c2c5d10ec1b3a3bf128e0d1d3ba53258", size = 453292, upload-time = "2026-05-19T10:08:05.967Z" }, + { url = "https://files.pythonhosted.org/packages/22/db/987f2f086ca4d7a6582eb4ccd513f9b26b42d9e4243a087609a3137a8fc7/jiter-0.15.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f18f85e4218d1b40f000f42a92239a7a61a902cd42c65e6c360dbd17dcb20894", size = 373501, upload-time = "2026-05-19T10:08:07.857Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7c/89fbcabb2739b7a5b8dc959a1b6c5761f6484f5fed3486854b3c789bb1de/jiter-0.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1aa62e277fc1cbd80e6deacae6f4d983b41b3d7728e0645c5d741a6149bba45", size = 344683, upload-time = "2026-05-19T10:08:09.431Z" }, + { url = "https://files.pythonhosted.org/packages/30/6f/6cca7692e7dddfec6d8d76c54dc97f2af2a41df4ac0674b999df1f09a5f3/jiter-0.15.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:6550fa135c7deb8ead6af49ed7ff648532ea8334a1447fe34a36315ef79c5c29", size = 350892, upload-time = "2026-05-19T10:08:11.352Z" }, + { url = "https://files.pythonhosted.org/packages/39/14/0338d6190cb8e6d22e677ab1d4eabd4117f67cca70c54cd04b82ff64e068/jiter-0.15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:066f8f33f18b2419cd8213b2436fa7fbc9c499f315971cfa3ce1f9820c001b1b", size = 388723, upload-time = "2026-05-19T10:08:12.912Z" }, + { url = "https://files.pythonhosted.org/packages/90/31/cc19f4a1bdb6afb09ce6a2f2615aa8d44d994eba0d8e6105ed1af920e736/jiter-0.15.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:75e8a04e91432dde9f1838373cf93d23726c79d3e908d319acf0e796f85592e7", size = 516648, upload-time = "2026-05-19T10:08:14.808Z" }, + { url = "https://files.pythonhosted.org/packages/49/9f/833c541512cd091b63c10c0381973dfe11bc7a503a818c16384417e0c81e/jiter-0.15.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a97261f1fccb8e50ecd2890a96e46efdc3f57c80a197324c6777827231eca712", size = 547382, upload-time = "2026-05-19T10:08:16.927Z" }, + { url = "https://files.pythonhosted.org/packages/d2/11/e7b70e91f90bc4477e8eee9e8a5f7cf3cb41b4525d6394dc98a714eb8f7f/jiter-0.15.0-cp313-cp313-win32.whl", hash = "sha256:c77496cb10bd7549690fbbab3e5ec05857b83e49276f4a9423a766ddd2afcd4c", size = 205845, upload-time = "2026-05-19T10:08:18.401Z" }, + { url = "https://files.pythonhosted.org/packages/4b/23/5c20d9ad6f02c493e4023e5d2d09e1c1f15fe2753c9102c544aff068a88e/jiter-0.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b15741f501469009ae0ae90b7147958a664a7dede40aa7ff174a8a4645f546d0", size = 196842, upload-time = "2026-05-19T10:08:20.131Z" }, + { url = "https://files.pythonhosted.org/packages/6b/11/1eb400ef248e8c925fd883fbe325daf5e42cd1b0d308539dd332bd4f7ffc/jiter-0.15.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d6a60072b44c3c2b797a7ddcbcbbf2b34ea3cfd4721580fbfd2a09d9d9b84ba", size = 192212, upload-time = "2026-05-19T10:08:21.807Z" }, + { url = "https://files.pythonhosted.org/packages/8a/60/2fd8d7c79da8acf9b7b277c7616847773779356b92acfc9bb158452174da/jiter-0.15.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ef1fd24d9413f6209e00d3d5a453e67acfe004a25cc6c8e8484faed4311ab9e8", size = 315065, upload-time = "2026-05-19T10:08:23.218Z" }, + { url = "https://files.pythonhosted.org/packages/46/f4/008fb7d65e8ac2abf00811651a661e025c4ba80bbc6f378450384ddd3aed/jiter-0.15.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:144f8e72cb53dab146347b91cceac01f5481237f2b93b4a339a1ee8f8878b67c", size = 339444, upload-time = "2026-05-19T10:08:24.701Z" }, + { url = "https://files.pythonhosted.org/packages/00/55/90b0c7b9c6896c0f2a591dd36d36b71d22e09674bfef178fa03ba3f81499/jiter-0.15.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553fcac2ef2cb990877f9fc0833b8b629a3e6a5670b6b5fd58219b41a653ddc4", size = 347779, upload-time = "2026-05-19T10:08:26.408Z" }, + { url = "https://files.pythonhosted.org/packages/51/6b/69666cec5000fd57734c118437394516c749ae8dbeea9fb66d6fef9c4775/jiter-0.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:774f93f65031856bf14ad9f59bdcab8b8cad501e5ceabd51ba3525f76937a25b", size = 200395, upload-time = "2026-05-19T10:08:28.055Z" }, + { url = "https://files.pythonhosted.org/packages/39/04/a6aa62cd27e8149b0d28df5561f10f6cceaf7935a9ccf3f1c5a05f9a0cd8/jiter-0.15.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f1e1754960f38ec40613a07e5e372df67acb3b890fb383b6fb3de3e49ddbf3c7", size = 190516, upload-time = "2026-05-19T10:08:29.35Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d2/079f350ebf7859d081de30aa890f9e3be68516f754f3ba32366ffff4dcee/jiter-0.15.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:ac0d9ddea4350974be7a221fc25895f251a8fee748c889bdced2141c0fec1a49", size = 308884, upload-time = "2026-05-19T10:08:31.667Z" }, + { url = "https://files.pythonhosted.org/packages/04/4e/a2c30a7f69b48c03b20935d647479106fe932f6e63f75faf53937197e05d/jiter-0.15.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01a8222cf05ab1128e239421156c207949808acaaea2bdfd33130ae666786e86", size = 310028, upload-time = "2026-05-19T10:08:33.304Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/2e7cdfd3cf8ca967be38c48f5cf474d79f089efaf559a40f15984a77ae69/jiter-0.15.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:182226cbc930c9fab81bc2e41a4da672f89539906dadb05e75670ac07b94f71f", size = 337485, upload-time = "2026-05-19T10:08:35.259Z" }, + { url = "https://files.pythonhosted.org/packages/9b/11/15a1aa28b120b8ee5b4f1fb894c125046225f09847738bd64233d3b84883/jiter-0.15.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:71683c38c825452999b5717fcae07ea708e8c93003e808be4319c1b02e3d176e", size = 364223, upload-time = "2026-05-19T10:08:36.694Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/f442e8af5f3d0dcf47b39e83a0efd9ee45ea946aa6d04625dc3181eae3b6/jiter-0.15.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f2218e6a9e5c18bc10fe6d41ac189c442c88eacf11bad9f28ef95a9bef00e6", size = 456387, upload-time = "2026-05-19T10:08:38.143Z" }, + { url = "https://files.pythonhosted.org/packages/da/f4/37f2d2c9f64f49af7da652ed7532bb5a2372e588e6927c3fdd76f911db65/jiter-0.15.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5157de9f76eb4bc5ea74a1219366a25f945ad305641d74e04f59c54087091aa9", size = 374461, upload-time = "2026-05-19T10:08:39.869Z" }, + { url = "https://files.pythonhosted.org/packages/60/28/edcfbbbf0cb15436f36664a8908a0df47ab9006298d4cd937dc08ea932d6/jiter-0.15.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c5db5527c221249a876160663ab891ace358c17f7b9c93ec1478b7f0550e5c", size = 345924, upload-time = "2026-05-19T10:08:41.668Z" }, + { url = "https://files.pythonhosted.org/packages/47/13/89fba6398dab7f202b7278c4b4aac122399d2c0183971c4a57a3b7088df5/jiter-0.15.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:3e4540b8e74e4268811ac05db226a6a128ff572e7e0ce3f1163b693cadb184cd", size = 352283, upload-time = "2026-05-19T10:08:43.091Z" }, + { url = "https://files.pythonhosted.org/packages/1b/da/0f6af8cef2c565a1ab44d970f268c43ccaa72707386ea6388e6fe2b6cd26/jiter-0.15.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:62ebd14e47e9aed9df4472afcb2663668ce4d74891cd54f86bf6e44029d6dc89", size = 389985, upload-time = "2026-05-19T10:08:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/a1/ec/b9cb7d6d29e24ee14910266157d2a279d7a8f60ee0df7fa840882976ba64/jiter-0.15.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0be6f5ad41a809f303f416d17cec92a7a725902fb9b4f3de3d19362ac0ef8554", size = 517695, upload-time = "2026-05-19T10:08:46.486Z" }, + { url = "https://files.pythonhosted.org/packages/64/5e/6d1bda880723aae0ad86b4b763f044362448efe31e3e819635d41cb03451/jiter-0.15.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:813dfbb17d65328bf86e5f0905dd277ba2265d3ca20556e86c0c7035b7182e5a", size = 548868, upload-time = "2026-05-19T10:08:48.026Z" }, + { url = "https://files.pythonhosted.org/packages/0c/72/7de501cf38dcacaf35098796f3a50e0f2e338baba18a58946c618544b809/jiter-0.15.0-cp314-cp314-win32.whl", hash = "sha256:50e51156192722a9c58db112837d3f8ef96fb3c5ecc14e95f409134b08b158ec", size = 206380, upload-time = "2026-05-19T10:08:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/1e/a9/e19addf4b0c1bdce52c6da12351e6bc42c340c45e7c09e2158e46d293ccc/jiter-0.15.0-cp314-cp314-win_amd64.whl", hash = "sha256:30ce1a5d16b5641dc935d50ef775af6a0871e3d14ab05d6fc54dff371b78e558", size = 197687, upload-time = "2026-05-19T10:08:51.088Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c9/776b1db01db25fc6c1d58d1979a37b0a9fe787e5f5b1d062d2eaacb77923/jiter-0.15.0-cp314-cp314-win_arm64.whl", hash = "sha256:510c8b3c17a0ed9ac69850c0438dada3c9b82d9c4d589fcb62002a5a9cf3a866", size = 192571, upload-time = "2026-05-19T10:08:52.451Z" }, + { url = "https://files.pythonhosted.org/packages/a0/f6/45bb4670bacf300fd2c7abadbfb3af376e5f1b6ae75fd9bc069891d15870/jiter-0.15.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7553333dd0930c104a5a0db8df72bf7219fe663d731383b576bb6ed6351c984d", size = 317151, upload-time = "2026-05-19T10:08:53.867Z" }, + { url = "https://files.pythonhosted.org/packages/d7/68/ed635ad5acd7b73e454283083bbb7c8205ad10e88b0d9d7d793b09fe8226/jiter-0.15.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2143ab06181d2b029eedcb6af3cebe95f11bbac62441781860f98ee9330a6a6", size = 341243, upload-time = "2026-05-19T10:08:55.383Z" }, + { url = "https://files.pythonhosted.org/packages/5d/db/3ff4176b817b8ea33879e71e13d8bc2b0d481a7ed3fe9e080f333d415c16/jiter-0.15.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6eac374c5c975709b69c10f09afd199df74150172156ad10c8d4fd785b7da995", size = 363629, upload-time = "2026-05-19T10:08:56.928Z" }, + { url = "https://files.pythonhosted.org/packages/ab/24/5f8270e0ba9c883582f96f722f8a0b58015c7ce1f8c6d4571cf394e99b6b/jiter-0.15.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3b3b775e33d3bfaec9899edc526ae97b0da0bf9d071a46124ba419149a414f8", size = 456198, upload-time = "2026-05-19T10:08:58.618Z" }, + { url = "https://files.pythonhosted.org/packages/45/5b/76fc02b0b5c54c3d18c60653156e2f76fde1816f9b4722db68d6ee2c897e/jiter-0.15.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3071db3346334beae1360b46da4606da57bf3528c167b3c38533afaf9f2c5", size = 373710, upload-time = "2026-05-19T10:09:00.151Z" }, + { url = "https://files.pythonhosted.org/packages/c4/52/4310821b0ea9277994d3e1f49fc6a4b34e4800caebacb2c0af81da59a454/jiter-0.15.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6694a173ecabc12eb60efbc0b474464ead1951ff65cd8b1e72100715c64512b", size = 349901, upload-time = "2026-05-19T10:09:01.621Z" }, + { url = "https://files.pythonhosted.org/packages/93/fe/67648c35b3594fba8854ac64cc8a826d8bcd18324bbdb53d77697c60b6ef/jiter-0.15.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:a254e10b593624d230c365b6d616b22ca0ad65e63a16e6631c2b3466022e6ba8", size = 352438, upload-time = "2026-05-19T10:09:03.216Z" }, + { url = "https://files.pythonhosted.org/packages/cb/28/0a1879d07ad6b3e025a2750027363452ced93c2d16d1c9d4b153ffd51c91/jiter-0.15.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8d2955167274e15d79a7a020afdd9b39c990eb80b2d89fca695d92dcfdd38ec", size = 388152, upload-time = "2026-05-19T10:09:04.741Z" }, + { url = "https://files.pythonhosted.org/packages/c1/78/46c6f6b56ba85c90021f4afd72ed42f691f8f84daacb5fe27277070e3858/jiter-0.15.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:acf4ee4d1fc55917239fe72972fb292dd773055d05eb040d36f4326e02cc2c0e", size = 517707, upload-time = "2026-05-19T10:09:06.231Z" }, + { url = "https://files.pythonhosted.org/packages/ca/cb/720662d4c88fcad606e826fef5424365527ba43ce4868a479aed8f8c507e/jiter-0.15.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:e7196e56f1cd69af1dbb07dff02dcfb260a50b45a82d409d92a06fedb32473b5", size = 548241, upload-time = "2026-05-19T10:09:08.093Z" }, + { url = "https://files.pythonhosted.org/packages/60/e3/935b8034fd143f21125c87d51404a9e0e1449186a494405721ff5d1d695e/jiter-0.15.0-cp314-cp314t-win32.whl", hash = "sha256:7f6163c0f10b055245f814dcc59f4818da60dfe72f3e72ab89fc24b6bd5e9c52", size = 207950, upload-time = "2026-05-19T10:09:09.616Z" }, + { url = "https://files.pythonhosted.org/packages/93/59/984fd9ece895953dad3e0880a650e766f5a2da2c5514f0eafdaaabbeb5f9/jiter-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:980c256edb05b78a111b99c4de3b1d32e31634b867fd1fc2cf726e7b7bba9854", size = 200055, upload-time = "2026-05-19T10:09:11.367Z" }, + { url = "https://files.pythonhosted.org/packages/0e/a4/cf8d779feb133a27a2e3bc833bccb9e13aa332cdf820497ebf72c10ce8c3/jiter-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:66b1880df2d01e206e8339769d1c7c1753bcb653efd6289e203f6f24ebada0c0", size = 191244, upload-time = "2026-05-19T10:09:12.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/43/1fc62172aa98b50a7de9a25554060db510f85c89cfbed0dfe13e1907a139/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:411fa4dfa5a7ae3d11491027ffb9beadec3996010a986862db70d91abba1c750", size = 305585, upload-time = "2026-05-19T10:09:35.995Z" }, + { url = "https://files.pythonhosted.org/packages/e8/c4/dd58fcd9e2df83666e5c1c1347bef58ce919cd8efc3ffa38aeea62ce493b/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:2b0074e2f56eb2dacca1689760fd2852a068f85a0547a157b82cb4cafeb6768b", size = 306936, upload-time = "2026-05-19T10:09:37.435Z" }, + { url = "https://files.pythonhosted.org/packages/39/86/b695e16f1180c07f43ea98e73ecd21cf63fa2e1b0c1103739013784d11ae/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913d02d29c9606643418d9ccfc3b72492ab25a6bf7889934e09a3490f8d3438b", size = 342453, upload-time = "2026-05-19T10:09:39.294Z" }, + { url = "https://files.pythonhosted.org/packages/34/56/55d76614af37fe3f22a3347d1e410d2a15da581997cb2da499a625000bb5/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b15d3ec9b0449c40e85319bdb4caa8b77ab526e74f5532ed94bec15e2f66822c", size = 345606, upload-time = "2026-05-19T10:09:40.727Z" }, + { url = "https://files.pythonhosted.org/packages/73/38/505941b2b092fd5bbbd60a52a880db1173f1690ae6751bed3af1c9ddcb4e/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:631f13a3d04e97d4e083993b10f4b99530e3a10d953e2eb5e196b7dc7f812ce0", size = 303769, upload-time = "2026-05-19T10:09:42.203Z" }, + { url = "https://files.pythonhosted.org/packages/e7/95/a06692b29e77473f286e1ec1f426d3ca44d7b5843be8ad21d7a5f3fcdcc0/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:b6c0ffae686c39bf3737be60793783267628783ea42545632c10b291105aee45", size = 305128, upload-time = "2026-05-19T10:09:43.657Z" }, + { url = "https://files.pythonhosted.org/packages/23/85/7270d7ad41d6061a25b950c6bf91d638bd9aacb113200a8c8d57a055fd67/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d54fb5b31dea401a41af3f8a7d2512e9b6a6a005491e6166c7e4ffab9639a9c", size = 340459, upload-time = "2026-05-19T10:09:45.452Z" }, + { url = "https://files.pythonhosted.org/packages/c8/8d/302cb2057b7513327b4d575cff6b1d066ee6431a5357fc3f8867cd684406/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d5d6090cdc1b7c9e780dfb04949a990adb1e301a2fc0bbcee7de4638d33f9a", size = 344469, upload-time = "2026-05-19T10:09:46.864Z" }, +] + [[package]] name = "jmespath" version = "1.1.0" @@ -473,6 +613,9 @@ dependencies = [ ] [package.optional-dependencies] +anthropic = [ + { name = "anthropic" }, +] bedrock = [ { name = "boto3" }, ] @@ -491,8 +634,14 @@ mistral = [ { name = "mistralai" }, ] +[package.dev-dependencies] +dev = [ + { name = "anthropic" }, +] + [package.metadata] requires-dist = [ + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.30" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.34" }, { name = "boto3", marker = "extra == 'dev'", specifier = ">=1.34" }, { name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6" }, @@ -506,7 +655,10 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.6" }, { name = "types-requests", marker = "extra == 'dev'", specifier = ">=2.31" }, ] -provides-extras = ["bedrock", "mistral", "dev"] +provides-extras = ["bedrock", "mistral", "dev", "anthropic"] + +[package.metadata.requires-dev] +dev = [{ name = "anthropic", specifier = ">=0.30" }] [[package]] name = "librt" @@ -992,6 +1144,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + [[package]] name = "sortedcontainers" version = "2.4.0" From c23e6920df40a9d6ef4018d22c4041778f289cfe Mon Sep 17 00:00:00 2001 From: Anass Date: Fri, 22 May 2026 09:33:07 +0200 Subject: [PATCH 2/5] Fix flaky test_repeated_overflow_keeps_window_sliding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test set max_batch_size == max_buffer_size == 100, which caused the push that brings the buffer to 100 to trigger a wake on the background worker. The worker would take a batch (emptying the buffer) and then race with the remaining 150 pushes to call slow_sender. On CI's slower runners the worker sometimes squeezed in additional batches before slow_sender finally blocked, leaving the buffer with fewer items than the expected sliding window. Setting max_batch_size > max_buffer_size guarantees push() never sets the wake event (buffer can never reach max_batch_size). Combined with a long flush_interval the worker only runs once shutdown() releases the pause in the finally block — fully deterministic. Verified with 5 consecutive runs. --- tests/unit/test_buffer_overflow.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_buffer_overflow.py b/tests/unit/test_buffer_overflow.py index 19d2e0f..d1c9907 100644 --- a/tests/unit/test_buffer_overflow.py +++ b/tests/unit/test_buffer_overflow.py @@ -45,7 +45,11 @@ def test_repeated_overflow_keeps_window_sliding(): def slow_sender(batch): paused.wait(timeout=30.0) - q = EventQueue(sender=slow_sender, flush_interval=10.0, max_batch_size=100, max_buffer_size=100) + # max_batch_size > max_buffer_size keeps the background worker from ever + # being woken by push (buffer can't exceed max_batch_size). Combined with + # a long flush_interval, the test is deterministic — the worker only runs + # once shutdown() releases `paused` in the finally block. + q = EventQueue(sender=slow_sender, flush_interval=60.0, max_batch_size=10_000, max_buffer_size=100) try: for i in range(250): # 150 events overflow q.push({"i": i}) From 0f79c5a8342bdc590f94bedb3b8e0d9b6439b7ac Mon Sep 17 00:00:00 2001 From: Anass Date: Fri, 29 May 2026 11:30:13 +0200 Subject: [PATCH 3/5] Add native OpenAI SDK support (Chat Completions + Responses API) Adapter handles both API shapes with auto-detection: Chat Completions (client.chat.completions.create): usage.prompt_tokens -> input usage.completion_tokens -> output usage.prompt_tokens_details.cached_tokens -> cache_read usage.prompt_tokens_details.audio_tokens -> audio_input usage.completion_tokens_details.reasoning_tokens -> reasoning (o-series) usage.completion_tokens_details.audio_tokens -> audio_output count of choices[0].message.tool_calls -> tool_calls Responses API (client.responses.create): usage.input_tokens -> input usage.output_tokens -> output usage.input_tokens_details.cached_tokens -> cache_read usage.output_tokens_details.reasoning_tokens -> reasoning count of output[].type == "function_call" -> tool_calls Wrapper covers both methods, sync + streaming, on both OpenAI and AsyncOpenAI. For Chat Completions streaming, auto-injects stream_options.include_usage=true when missing so the final chunk carries usage data (without that flag, OpenAI emits no usage on streamed responses). CanonicalUsage extended with audio_output (mapped to llm_audio_output_tokens) to capture GPT-4o-audio output usage. OpenAI is the first provider to actually populate llm_reasoning_tokens (o-series surfaces reasoning tokens separately; Anthropic/Bedrock fold them into output_tokens). Predicted Outputs tokens (accepted/rejected_prediction_tokens) are intentionally not surfaced -- documented in the adapter docstring as a v1 gap. 27 new unit tests (18 adapter + 9 wrapper). 5 live integration tests gated on OPENAI_API_KEY. 10 captured response fixtures from the real OpenAI API. Total: 283 unit tests passing, ruff + mypy strict clean. --- CHANGELOG.md | 14 +- README.md | 56 +- pyproject.toml | 6 +- src/lago_agent_sdk/adapters/__init__.py | 2 + src/lago_agent_sdk/adapters/openai_native.py | 157 ++++ src/lago_agent_sdk/canonical.py | 2 + src/lago_agent_sdk/config.py | 1 + src/lago_agent_sdk/sdk.py | 10 +- src/lago_agent_sdk/wrappers/openai.py | 185 +++++ tests/integration/test_live_openai.py | 190 +++++ .../unit/adapters/fixtures/capture_openai.py | 226 +++++ .../fixtures/openai_native/01_plain_chat.json | 42 + .../openai_native/02_tool_use_chat.json | 51 ++ .../openai_native/03_cache_call1_chat.json | 42 + .../openai_native/04_cache_call2_chat.json | 42 + .../openai_native/05_streaming_chat.json | 776 ++++++++++++++++++ .../openai_native/06_reasoning_chat.json | 42 + .../openai_native/07_multi_turn_chat.json | 42 + .../openai_native/08_plain_responses.json | 80 ++ .../openai_native/09_tool_use_responses.json | 94 +++ .../openai_native/10_reasoning_responses.json | 88 ++ tests/unit/adapters/test_openai_native.py | 228 +++++ tests/unit/test_wrapper_openai.py | 296 +++++++ 23 files changed, 2652 insertions(+), 20 deletions(-) create mode 100644 src/lago_agent_sdk/adapters/openai_native.py create mode 100644 src/lago_agent_sdk/wrappers/openai.py create mode 100644 tests/integration/test_live_openai.py create mode 100644 tests/unit/adapters/fixtures/capture_openai.py create mode 100644 tests/unit/adapters/fixtures/openai_native/01_plain_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json create mode 100644 tests/unit/adapters/fixtures/openai_native/08_plain_responses.json create mode 100644 tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json create mode 100644 tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json create mode 100644 tests/unit/adapters/test_openai_native.py create mode 100644 tests/unit/test_wrapper_openai.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fa696ed..4d857d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,22 @@ All notable changes to this project will be documented here. Format follows [Kee ## [Unreleased] ### Added +- Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`. +- `extract_openai_native` adapter handles both API shapes with auto-detection: + - Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`. + - Responses API: `input_tokens`, `output_tokens`, `input_tokens_details.cached_tokens`, `output_tokens_details.reasoning_tokens`, count of `output[].type == "function_call"`. +- **First provider to populate `llm_reasoning_tokens`** — OpenAI o-series models (`o4-mini`, `o1`, etc.) surface reasoning token counts separately. +- Auto-injection of `stream_options={"include_usage": True}` when the customer sets `stream=True` without it, so streamed Chat Completions emit usage on the final chunk. +- `audio_output` field added to `CanonicalUsage` (maps to `llm_audio_output_tokens`), populated by GPT-4o-audio responses. +- `openai` optional dependency group: `pip install 'lago-agent-sdk[openai]'`. +- 27 new unit tests (18 adapter + 9 wrapper) and 5 live integration tests (gated on `OPENAI_API_KEY`). Total: 283 unit tests. +- 10 captured response fixtures from the real OpenAI API (plain chat, tool use, auto-caching, streaming with usage, o-series reasoning, multi-turn, Responses API plain + tool use + reasoning). + +### Previously in unreleased (Anthropic) - Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants). - `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`. - `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`. -- 19 new unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`). Total: 256 unit tests, ≥80% coverage maintained. +- 19 unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`). - 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn). diff --git a/README.md b/README.md index b8855c7..ba96fc3 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ pip install lago-agent-sdk For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`). For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`). For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`). +For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`). ## Quickstart — Bedrock @@ -88,6 +89,27 @@ resp = client.chat.complete( sdk.flush() ``` +## Quickstart — OpenAI + +```python +from openai import OpenAI +from lago_agent_sdk import LagoSDK + +sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme") +client = sdk.wrap(OpenAI(api_key="...")) + +resp = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Hello"}], + max_completion_tokens=200, +) +sdk.flush() +``` + +Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses. + +**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately. + ## Multi-tenant — pick a subscription per call Three ways to set the `external_subscription_id`, in priority order: @@ -114,26 +136,28 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks. | AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ | | Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ | | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ | -| OpenAI | native SDK | Phase 2 | -| Google Gemini | native SDK | Phase 2 | +| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ | +| Google Gemini | native SDK | Phase 3 | | LiteLLM | callback bridge | Phase 4 | ## Token dimensions captured -`CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider: - -| Field | Lago metric code | Bedrock | Anthropic native | Mistral native | -|---|---|---|---|---| -| input | `llm_input_tokens` | ✓ | ✓ | ✓ | -| output | `llm_output_tokens` | ✓ | ✓ | ✓ | -| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | -| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | -| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | -| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | -| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | -| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ | ✗ | - -Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships. +`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider: + +| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | +|---|---|---|---|---|---| +| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | +| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | +| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | +| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ (auto-cache; OpenAI doesn't surface creation counts) | +| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | +| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series models)** | +| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | +| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio input) | +| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio output) | +| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3 — multimodal adapter) | + +OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap. ## Error policy diff --git a/pyproject.toml b/pyproject.toml index 4044de0..77b3897 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ dev = [ anthropic = [ "anthropic>=0.30", ] +openai = [ + "openai>=1.50", +] [project.urls] Homepage = "https://www.getlago.com" @@ -82,10 +85,11 @@ strict = true files = ["src/lago_agent_sdk"] [[tool.mypy.overrides]] -module = ["boto3.*", "botocore.*", "mistralai.*"] +module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*"] ignore_missing_imports = true [dependency-groups] dev = [ "anthropic>=0.30", + "openai>=1.50", ] diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py index 217ed3d..1d24920 100644 --- a/src/lago_agent_sdk/adapters/__init__.py +++ b/src/lago_agent_sdk/adapters/__init__.py @@ -2,6 +2,7 @@ from .bedrock_converse import extract_bedrock_converse from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter from .mistral_native import extract_mistral_native +from .openai_native import extract_openai_native __all__ = [ "extract_anthropic_native", @@ -9,4 +10,5 @@ "extract_bedrock_invoke", "pick_invoke_adapter", "extract_mistral_native", + "extract_openai_native", ] diff --git a/src/lago_agent_sdk/adapters/openai_native.py b/src/lago_agent_sdk/adapters/openai_native.py new file mode 100644 index 0000000..55bd09d --- /dev/null +++ b/src/lago_agent_sdk/adapters/openai_native.py @@ -0,0 +1,157 @@ +"""OpenAI native adapter — verified against real fixtures. + +Handles both Chat Completions API (`client.chat.completions.create`) and the +Responses API (`client.responses.create`). They share a similar concept but +use different field names — we detect which by looking at the usage shape. + +CHAT COMPLETIONS field mapping (`usage.*`): + prompt_tokens → input + completion_tokens → output + prompt_tokens_details.cached_tokens → cache_read + prompt_tokens_details.audio_tokens → audio_input + completion_tokens_details.reasoning_tokens → reasoning (o-series models) + completion_tokens_details.audio_tokens → audio_output (GPT-4o-audio output) + count of choices[0].message.tool_calls → tool_calls + +RESPONSES API field mapping (`usage.*`): + input_tokens → input + output_tokens → output + input_tokens_details.cached_tokens → cache_read + output_tokens_details.reasoning_tokens → reasoning + count of output[].type == "function_call" → tool_calls + +Not exposed by either API: + cache_write, cache_write_5m, cache_write_1h — OpenAI auto-caches without + surfacing creation counts. + +Known gaps (intentional, documented): + - completion_tokens_details.accepted_prediction_tokens — Predicted Outputs + feature: subset of completion_tokens (the ones that matched the prediction). + Skipped to avoid double-counting against completion_tokens. + - completion_tokens_details.rejected_prediction_tokens — Predicted Outputs: + extra cost beyond completion_tokens (prediction tokens the model rejected). + Skipped for v1 — customers using Predicted Outputs can read this from + `extras["completion_tokens_details"]` (if drift-detection captures it) or + via the openai response object directly. +""" + +from __future__ import annotations + +from typing import Any, cast + +from ..canonical import CanonicalUsage + +# Top-level usage fields we recognize across BOTH chat completions and responses APIs. +_KNOWN_USAGE_FIELDS = { + # chat completions + "prompt_tokens", + "completion_tokens", + "total_tokens", + "prompt_tokens_details", + "completion_tokens_details", + # responses API + "input_tokens", + "output_tokens", + "input_tokens_details", + "output_tokens_details", +} + + +def _safe_dict(v: Any) -> dict[str, Any]: + return v if isinstance(v, dict) else {} + + +def _safe_int(v: Any) -> int: + try: + return max(0, int(v or 0)) + except (TypeError, ValueError): + return 0 + + +def _to_dict(obj: Any) -> dict[str, Any]: + """Best-effort pydantic-or-dict to dict (OpenAI SDK returns pydantic objects).""" + if isinstance(obj, dict): + return obj + if hasattr(obj, "model_dump"): + try: + return cast(dict[str, Any], obj.model_dump()) + except Exception: # noqa: BLE001 + pass + return {} + + +def _count_chat_tool_calls(resp: dict[str, Any]) -> int: + """choices[0].message.tool_calls is a list of called functions in Chat Completions.""" + choices = resp.get("choices") + if not isinstance(choices, list) or not choices: + return 0 + first = choices[0] + if not isinstance(first, dict): + return 0 + message = _safe_dict(first.get("message")) + tcs = message.get("tool_calls") + return len(tcs) if isinstance(tcs, list) else 0 + + +def _count_responses_tool_calls(resp: dict[str, Any]) -> int: + """In the Responses API, tool invocations are items in `output` with type == "function_call".""" + output = resp.get("output") + if not isinstance(output, list): + return 0 + return sum(1 for item in output if isinstance(item, dict) and item.get("type") == "function_call") + + +def extract_openai_native(response: Any, model_id: str = "") -> CanonicalUsage: + """Translate an OpenAI response (chat completion or responses API) → CanonicalUsage. + + Accepts the SDK's pydantic objects, dicts (e.g. captured fixtures), or the + synthetic `{"usage": {...}}` blob produced by the streaming wrapper. + """ + resp = _to_dict(response) if not isinstance(response, dict) else response + usage = _safe_dict(resp.get("usage")) + + # Detect which API shape we have. Chat Completions uses prompt_tokens; + # Responses API uses input_tokens. They never both appear. + is_responses_api = "input_tokens" in usage and "prompt_tokens" not in usage + + if is_responses_api: + input_tokens = _safe_int(usage.get("input_tokens")) + output_tokens = _safe_int(usage.get("output_tokens")) + input_details = _safe_dict(usage.get("input_tokens_details")) + output_details = _safe_dict(usage.get("output_tokens_details")) + cache_read = _safe_int(input_details.get("cached_tokens")) + reasoning = _safe_int(output_details.get("reasoning_tokens")) + audio_input = _safe_int(input_details.get("audio_tokens")) + audio_output = 0 # not exposed by Responses API today + tool_calls = _count_responses_tool_calls(resp) + api = "responses" + else: + input_tokens = _safe_int(usage.get("prompt_tokens")) + output_tokens = _safe_int(usage.get("completion_tokens")) + prompt_details = _safe_dict(usage.get("prompt_tokens_details")) + completion_details = _safe_dict(usage.get("completion_tokens_details")) + cache_read = _safe_int(prompt_details.get("cached_tokens")) + reasoning = _safe_int(completion_details.get("reasoning_tokens")) + audio_input = _safe_int(prompt_details.get("audio_tokens")) + audio_output = _safe_int(completion_details.get("audio_tokens")) + tool_calls = _count_chat_tool_calls(resp) + api = "chat_completions" + + extras: dict[str, Any] = {} + for k, v in usage.items(): + if k not in _KNOWN_USAGE_FIELDS: + extras[k] = v + + return CanonicalUsage( + input=input_tokens, + output=output_tokens, + cache_read=cache_read, + reasoning=reasoning, + audio_input=audio_input, + audio_output=audio_output, + tool_calls=tool_calls, + model=model_id or (resp.get("model") if isinstance(resp.get("model"), str) else "") or "", + provider="openai", + api=api, + extras=extras, + ) diff --git a/src/lago_agent_sdk/canonical.py b/src/lago_agent_sdk/canonical.py index 6ec8dff..715a595 100644 --- a/src/lago_agent_sdk/canonical.py +++ b/src/lago_agent_sdk/canonical.py @@ -18,6 +18,7 @@ class CanonicalUsage: tool_calls: int = 0 image_input: int = 0 audio_input: int = 0 + audio_output: int = 0 model: str = "" provider: str = "" api: str = "" @@ -34,6 +35,7 @@ class CanonicalUsage: "tool_calls", "image_input", "audio_input", + "audio_output", ) def nonzero_numeric(self) -> dict[str, int]: diff --git a/src/lago_agent_sdk/config.py b/src/lago_agent_sdk/config.py index 0383117..28bb1c5 100644 --- a/src/lago_agent_sdk/config.py +++ b/src/lago_agent_sdk/config.py @@ -16,6 +16,7 @@ "tool_calls": "llm_tool_calls", "image_input": "llm_image_input_tokens", "audio_input": "llm_audio_input_tokens", + "audio_output": "llm_audio_output_tokens", } diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py index c303e03..ce55ccb 100644 --- a/src/lago_agent_sdk/sdk.py +++ b/src/lago_agent_sdk/sdk.py @@ -87,13 +87,19 @@ def wrap( from .wrappers.anthropic import wrap_anthropic_client return wrap_anthropic_client(self, client, dimensions=dimensions, subscription=subscription) + if kind == "openai": + from .wrappers.openai import wrap_openai_client + + return wrap_openai_client(self, client, dimensions=dimensions, subscription=subscription) if kind == "unknown": raise UnknownClientError( f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. " - "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, anthropic.Anthropic / AsyncAnthropic." + "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, " + "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI." ) raise UnknownClientError( - f"Client kind '{kind}' is not yet supported. Implemented: 'bedrock', 'mistral', 'anthropic'." + f"Client kind '{kind}' is not yet supported. " + "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai'." ) # ------------------------------------------------------------------ diff --git a/src/lago_agent_sdk/wrappers/openai.py b/src/lago_agent_sdk/wrappers/openai.py new file mode 100644 index 0000000..1864986 --- /dev/null +++ b/src/lago_agent_sdk/wrappers/openai.py @@ -0,0 +1,185 @@ +"""openai SDK wrapper. + +Wraps the public methods of `OpenAI` (and `AsyncOpenAI`) clients in place — +instrumentation never breaks the customer's call. + +Methods wrapped: + - .chat.completions.create(...) — non-streaming and stream=True both supported + - .responses.create(...) — Responses API, sync + streaming + - AsyncOpenAI variants of both — async non-streaming and stream=True + +Streaming behavior: + When `stream=True` is passed without `stream_options={"include_usage": True}` + (Chat Completions) we automatically inject it so the final chunk carries the + usage payload we need to bill. Without that flag, OpenAI's stream emits no + usage data and the customer gets silent under-billing. + +Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from +kwargs before forwarding so OpenAI's strict validation doesn't reject it. +""" + +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator, Iterator +from typing import Any + +from ..adapters import extract_openai_native + +logger = logging.getLogger("lago_agent_sdk.wrappers.openai") + +_INSTRUMENTED_ATTR = "_lago_instrumented" +_LAGO_KWARG = "extra_lago" + + +def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]: + return kwargs.pop(_LAGO_KWARG, {}) or {} + + +def _ensure_stream_options_include_usage(kwargs: dict[str, Any]) -> None: + """If stream=True without include_usage, inject it. No-op otherwise. + + Only meaningful for Chat Completions; the Responses API exposes usage on its + own final event already. + """ + if not kwargs.get("stream"): + return + so = kwargs.get("stream_options") + if isinstance(so, dict): + # Respect customer's explicit choice if they set it + if "include_usage" in so: + return + kwargs["stream_options"] = {**so, "include_usage": True} + else: + kwargs["stream_options"] = {"include_usage": True} + + +def _is_response_like(obj: Any) -> bool: + """Real responses expose `.usage`; Stream iterators don't. + + Safe against properties that raise — falls through to False so the customer's + call is never broken. + """ + try: + if isinstance(obj, dict): + return "usage" in obj + return hasattr(obj, "usage") + except Exception: # noqa: BLE001 + return False + + +def wrap_openai_client( + sdk: Any, + client: Any, + dimensions: dict[str, Any] | None = None, + subscription: str | None = None, +) -> Any: + """In-place wrap of an `openai.OpenAI` or `openai.AsyncOpenAI` client. Idempotent.""" + if getattr(client, _INSTRUMENTED_ATTR, False): + logger.info("lago: openai client already wrapped — skipping") + return client + + base_dims = dict(dimensions or {}) + base_sub = subscription + is_async = type(client).__name__.startswith("Async") + + def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]: + sub = lago_opts.get("subscription") or base_sub + dims = {**base_dims, **(lago_opts.get("dimensions") or {})} + return sub, dims + + def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None: + try: + usage = extract_openai_native(payload, model_id=model_id) + sdk.emit(usage, subscription=sub, dimensions=dims) + except Exception as exc: # noqa: BLE001 + logger.warning("lago: openai emit failed: %s", exc) + + def _make_sync_create(original: Any) -> Any: + def _create(*args: Any, **kwargs: Any) -> Any: + lago_opts = _pop_lago_kwarg(kwargs) + _ensure_stream_options_include_usage(kwargs) + model_id = kwargs.get("model", "") + sub, dims = _resolve_opts(lago_opts) + response = original(*args, **kwargs) + + if _is_response_like(response): + _emit_from(response, model_id, sub, dims) + return response + + # Streaming — wrap the iterator to capture the final usage on close. + def _wrap_stream(src: Iterator[Any]) -> Iterator[Any]: + last_usage: dict[str, Any] | None = None + try: + for event in src: + payload = event.model_dump() if hasattr(event, "model_dump") else event + if isinstance(payload, dict): + usage = payload.get("usage") + if isinstance(usage, dict) and usage: + last_usage = {"usage": usage} + yield event + finally: + if last_usage is not None: + _emit_from(last_usage, model_id, sub, dims) + + return _wrap_stream(response) + + return _create + + def _make_async_create(original: Any) -> Any: + async def _create_async(*args: Any, **kwargs: Any) -> Any: + lago_opts = _pop_lago_kwarg(kwargs) + _ensure_stream_options_include_usage(kwargs) + model_id = kwargs.get("model", "") + sub, dims = _resolve_opts(lago_opts) + response = await original(*args, **kwargs) + + if _is_response_like(response): + _emit_from(response, model_id, sub, dims) + return response + + async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]: + last_usage: dict[str, Any] | None = None + try: + async for event in src: + payload = event.model_dump() if hasattr(event, "model_dump") else event + if isinstance(payload, dict): + usage = payload.get("usage") + if isinstance(usage, dict) and usage: + last_usage = {"usage": usage} + yield event + finally: + if last_usage is not None: + _emit_from(last_usage, model_id, sub, dims) + + return _wrap_async_stream(response) + + return _create_async + + # ------------------------------------------------------------------ + # chat.completions.create + # ------------------------------------------------------------------ + chat = getattr(client, "chat", None) + completions = getattr(chat, "completions", None) if chat is not None else None + if completions is not None: + original_chat_create = getattr(completions, "create", None) + if original_chat_create is not None: + completions.create = ( + _make_async_create(original_chat_create) if is_async else _make_sync_create(original_chat_create) + ) + + # ------------------------------------------------------------------ + # responses.create + # ------------------------------------------------------------------ + responses_namespace = getattr(client, "responses", None) + if responses_namespace is not None: + original_responses_create = getattr(responses_namespace, "create", None) + if original_responses_create is not None: + responses_namespace.create = ( + _make_async_create(original_responses_create) + if is_async + else _make_sync_create(original_responses_create) + ) + + setattr(client, _INSTRUMENTED_ATTR, True) + return client diff --git a/tests/integration/test_live_openai.py b/tests/integration/test_live_openai.py new file mode 100644 index 0000000..4a90189 --- /dev/null +++ b/tests/integration/test_live_openai.py @@ -0,0 +1,190 @@ +"""End-to-end OpenAI integration test — live API + mocked Lago. + +Skipped unless OPENAI_API_KEY is set. +""" + +from __future__ import annotations + +import json +import os +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +from lago_agent_sdk import LagoSDK + +pytestmark = pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY"), + reason="OPENAI_API_KEY not set", +) + + +class _MockLago(BaseHTTPRequestHandler): + def do_POST(self): # noqa: N802 + n = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(n) + self.server.received.append(json.loads(body)) # type: ignore[attr-defined] + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"ok": true}') + + def log_message(self, *_args, **_kwargs): + return + + +def _spawn_lago(): + s = HTTPServer(("127.0.0.1", 0), _MockLago) + s.received = [] # type: ignore[attr-defined] + threading.Thread(target=s.serve_forever, daemon=True).start() + return s, f"http://127.0.0.1:{s.server_port}" + + +def _collect_events(server) -> list[dict]: + return [e for p in server.received for e in p["events"]] + + +def _codes(events) -> set[str]: + return {e["code"] for e in events} + + +# -------------------------------------------------------------------------- +# Chat Completions +# -------------------------------------------------------------------------- +def test_live_openai_chat_completions_create_emits_to_lago() -> None: + from openai import OpenAI + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"])) + client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi"}], + max_completion_tokens=20, + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + for e in events: + assert e["properties"]["api"] == "chat_completions" + assert e["properties"]["provider"] == "openai" + finally: + server.shutdown() + + +def test_live_openai_chat_completions_streaming_emits_from_final_chunk() -> None: + from openai import OpenAI + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"])) + # Note: stream_options.include_usage is auto-injected by the wrapper + for _ in client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi"}], + max_completion_tokens=20, + stream=True, + ): + pass + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + finally: + server.shutdown() + + +def test_live_openai_chat_completions_tool_use_emits_tool_calls() -> None: + from openai import OpenAI + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"])) + client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "What's the weather in Tokyo?"}], + tools=[ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a city.", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ], + tool_choice={"type": "function", "function": {"name": "get_weather"}}, + max_completion_tokens=200, + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + assert "llm_tool_calls" in _codes(events) + finally: + server.shutdown() + + +def test_live_openai_reasoning_model_emits_reasoning_tokens() -> None: + """o-series models populate completion_tokens_details.reasoning_tokens. + First provider to actually expose this metric.""" + from openai import OpenAI + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"])) + client.chat.completions.create( + model="o4-mini", + messages=[{"role": "user", "content": "What is 17 * 23? Just the number."}], + max_completion_tokens=2000, + ) + assert sdk.flush(timeout=30.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + assert "llm_reasoning_tokens" in codes # ← the key win for OpenAI + finally: + server.shutdown() + + +# -------------------------------------------------------------------------- +# Responses API +# -------------------------------------------------------------------------- +def test_live_openai_responses_create_emits_to_lago() -> None: + from openai import OpenAI + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(OpenAI(api_key=os.environ["OPENAI_API_KEY"])) + client.responses.create( + model="gpt-4o-mini", + input="Say hi", + max_output_tokens=20, + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + for e in events: + assert e["properties"]["api"] == "responses" + assert e["properties"]["provider"] == "openai" + finally: + server.shutdown() diff --git a/tests/unit/adapters/fixtures/capture_openai.py b/tests/unit/adapters/fixtures/capture_openai.py new file mode 100644 index 0000000..5bcdd25 --- /dev/null +++ b/tests/unit/adapters/fixtures/capture_openai.py @@ -0,0 +1,226 @@ +"""Capture real OpenAI API responses for adapter design. + +Saves raw responses to tests/unit/adapters/fixtures/openai_native/.json +so we can verify the field mappings against reality before writing the adapter. + +Covers both Chat Completions (`client.chat.completions.create`) and +the Responses API (`client.responses.create`) — they have different +usage shapes. + +Reads OPENAI_API_KEY from env. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import sys + +from openai import OpenAI + +OUT = pathlib.Path(__file__).parent / "openai_native" +OUT.mkdir(parents=True, exist_ok=True) + + +def to_dict(response) -> dict: + """OpenAI SDK returns pydantic models — convert to plain dict for JSON.""" + if hasattr(response, "model_dump"): + return response.model_dump() + if hasattr(response, "dict"): + return response.dict() + return json.loads(response.json()) if hasattr(response, "json") else dict(response) + + +def save(name: str, model: str, payload: dict) -> None: + path = OUT / f"{name}.json" + path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str)) + print(f" ✓ saved {path.name}") + + +def main() -> int: + key = os.environ.get("OPENAI_API_KEY") + if not key: + print("error: set OPENAI_API_KEY", file=sys.stderr) + return 2 + + client = OpenAI(api_key=key) + PROMPT = "Write one sentence about dolphins." + + # ================================================================= + # Chat Completions API — client.chat.completions.create(...) + # ================================================================= + + # ----- 1. Plain chat completion ----- + print("\n[1] plain chat — gpt-4o-mini") + r = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": PROMPT}], + max_completion_tokens=80, + ) + save("01_plain_chat", "gpt-4o-mini", to_dict(r)) + + # ----- 2. Tool use (function calling) ----- + print("\n[2] tool use chat — gpt-4o-mini with weather tool") + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a city.", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + ] + r = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "What's the weather in Tokyo?"}], + tools=tools, + tool_choice={"type": "function", "function": {"name": "get_weather"}}, + max_completion_tokens=200, + ) + save("02_tool_use_chat", "gpt-4o-mini", to_dict(r)) + + # ----- 3. Cache hit attempt — long prompt sent twice (OpenAI auto-caches >1024 tokens) ----- + print("\n[3] cache attempt — long prompt, call 1 then call 2") + long_prompt = ( + "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. " + * 200 + ) + msgs = [ + {"role": "system", "content": long_prompt}, + {"role": "user", "content": "What is 2+2?"}, + ] + r1 = client.chat.completions.create( + model="gpt-4o-mini", + messages=msgs, + max_completion_tokens=20, + ) + save("03_cache_call1_chat", "gpt-4o-mini", to_dict(r1)) + + msgs2 = [ + {"role": "system", "content": long_prompt}, + {"role": "user", "content": "What is 3+3?"}, + ] + r2 = client.chat.completions.create( + model="gpt-4o-mini", + messages=msgs2, + max_completion_tokens=20, + ) + save("04_cache_call2_chat", "gpt-4o-mini", to_dict(r2)) + + # ----- 5. Streaming with usage included ----- + print("\n[5] streaming chat — gpt-4o-mini with stream_options.include_usage") + chunks: list[dict] = [] + stream = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": PROMPT}], + max_completion_tokens=60, + stream=True, + stream_options={"include_usage": True}, + ) + for chunk in stream: + chunks.append(to_dict(chunk)) + save("05_streaming_chat", "gpt-4o-mini", {"chunks": chunks}) + + # ----- 6. Reasoning model (o-series) — exposes reasoning_tokens ----- + print("\n[6] reasoning chat — o4-mini") + try: + r = client.chat.completions.create( + model="o4-mini", + messages=[ + { + "role": "user", + "content": ( + "Prove that the sum of the first n cubes equals the square of the sum " + "of the first n positive integers. Show each step." + ), + } + ], + max_completion_tokens=2000, + ) + save("06_reasoning_chat", "o4-mini", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" o4-mini error: {str(exc)[:160]}") + + # ----- 7. Multi-turn ----- + print("\n[7] multi-turn chat — gpt-4o-mini") + convo = [ + {"role": "user", "content": "What is 2+2?"}, + {"role": "assistant", "content": "2+2 equals 4."}, + {"role": "user", "content": "And times 3?"}, + ] + r = client.chat.completions.create( + model="gpt-4o-mini", + messages=convo, + max_completion_tokens=40, + ) + save("07_multi_turn_chat", "gpt-4o-mini", to_dict(r)) + + # ================================================================= + # Responses API — client.responses.create(...) + # ================================================================= + + # ----- 8. Plain Responses API call ----- + print("\n[8] plain responses — gpt-4o-mini") + try: + r = client.responses.create( + model="gpt-4o-mini", + input=PROMPT, + max_output_tokens=80, + ) + save("08_plain_responses", "gpt-4o-mini", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" responses.create error: {str(exc)[:160]}") + + # ----- 9. Responses API with tool use ----- + print("\n[9] tool use responses — gpt-4o-mini") + try: + r = client.responses.create( + model="gpt-4o-mini", + input="What's the weather in Tokyo?", + tools=[ + { + "type": "function", + "name": "get_weather", + "description": "Get current weather for a city.", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + } + ], + tool_choice="required", + max_output_tokens=200, + ) + save("09_tool_use_responses", "gpt-4o-mini", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" responses tool use error: {str(exc)[:160]}") + + # ----- 10. Reasoning via Responses API ----- + print("\n[10] reasoning responses — o4-mini") + try: + r = client.responses.create( + model="o4-mini", + input=( + "Prove that the sum of the first n cubes equals the square of the sum " + "of the first n positive integers. Show each step." + ), + reasoning={"effort": "low"}, + max_output_tokens=2000, + ) + save("10_reasoning_responses", "o4-mini", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" responses reasoning error: {str(exc)[:160]}") + + print("\nDone. Inspect tests/unit/adapters/fixtures/openai_native/*.json") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json b/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json new file mode 100644 index 0000000..e573d42 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/01_plain_chat.json @@ -0,0 +1,42 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "chatcmpl-Dkn8rdIFbTd9EfFHjOXXA4pRjMF8R", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "Dolphins are highly intelligent marine mammals known for their playful behavior, strong social bonds, and complex communication skills.", + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1780044361, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": { + "completion_tokens": 23, + "prompt_tokens": 13, + "total_tokens": 36, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json b/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json new file mode 100644 index 0000000..1cd0872 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/02_tool_use_chat.json @@ -0,0 +1,51 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "chatcmpl-Dkn8sYZgalPsBNluUfJ7rWW80Fwh0", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": null, + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "call_eihD8nbuIvE6wVEC26EyyKty", + "function": { + "arguments": "{\"city\":\"Tokyo\"}", + "name": "get_weather" + }, + "type": "function" + } + ] + } + } + ], + "created": 1780044362, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": "fp_e2d886d409", + "usage": { + "completion_tokens": 5, + "prompt_tokens": 60, + "total_tokens": 65, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json b/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json new file mode 100644 index 0000000..239fdbe --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/03_cache_call1_chat.json @@ -0,0 +1,42 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "chatcmpl-Dkn8troaHOR66ipcDGaytoKv6Vdmf", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "message": { + "content": "2 + 2 equals 4. \n\n**Reasoning step by step:**\n1. Identify", + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1780044363, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": "fp_196f526a25", + "usage": { + "completion_tokens": 20, + "prompt_tokens": 3819, + "total_tokens": 3839, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json b/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json new file mode 100644 index 0000000..ef3664f --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/04_cache_call2_chat.json @@ -0,0 +1,42 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "chatcmpl-Dkn8uVrLuXJiU7Ef7z5Yix39CCOus", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "message": { + "content": "3 + 3 = 6. \n\n**Reasoning:**\n1. Start with the number", + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1780044364, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": "fp_196f526a25", + "usage": { + "completion_tokens": 20, + "prompt_tokens": 3819, + "total_tokens": 3839, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 3712 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json b/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json new file mode 100644 index 0000000..c748280 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/05_streaming_chat.json @@ -0,0 +1,776 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "chunks": [ + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "A0RHlUO5d" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": "D", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "x3EVvwJs9P" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": "olph", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "FmQqISP" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": "ins", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "AwlMSuh8" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " are", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "M29Dn9T" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " highly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "Uv8y" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " intelligent", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "Wo7jeAXq3tBZ84p" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " marine", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "5mQE" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " mammals", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "zaw" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " known", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "L4rI7" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "KRkgzRl" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " their", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "8XDZI" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " playful", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "NIG" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " behavior", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "Fm" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "jyjIDVS3vK" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " complex", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "8I4" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " social", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "yHI5" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " structures", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "SNNBP953Gk" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "omShNu9" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " ability", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "alj" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "3t0LIxjK" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " communicate", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "0ITzn2FwU9chcTG" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " using", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "6Mi6K" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "lPGTprxfJ" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " variety", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "3oV" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "mqLE8UfV" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": " vocal", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "bCG7c" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": "izations", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "sUl" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "EAucOccXmU" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": null, + "obfuscation": "hPVGr" + }, + { + "id": "chatcmpl-Dkn8vwv6VoUsxIF5EssIBJgb3ruqv", + "choices": [], + "created": 1780044365, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_4f2a4e4dd8", + "usage": { + "completion_tokens": 29, + "prompt_tokens": 13, + "total_tokens": 42, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + }, + "obfuscation": "jYiy3qiNKb" + } + ] + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json b/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json new file mode 100644 index 0000000..8cf766c --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/06_reasoning_chat.json @@ -0,0 +1,42 @@ +{ + "_model_id": "o4-mini", + "_response": { + "id": "chatcmpl-Dkn8wt8xNufUS91CnuPY3a2IUhErr", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "Here is a classic proof by induction (one can also use a telescoping argument; see footnote). We want to show for all integers n\u22651: \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 \u2002=\u2002 [1 + 2 + \u22ef + n]\u00b2 \u2002=\u2002 [n(n + 1)/2]\u00b2.\n\n**1. Base case (n=1).** \nLHS = 1\u00b3 = 1. \nRHS = [1\u00b7(1+1)/2]\u00b2 = [1\u00b72/2]\u00b2 = 1\u00b2 = 1. \nSo the formula holds at n=1.\n\n**2. Inductive step.** \nAssume the formula holds for some n\u22651, i.e. assume \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 = [n(n+1)/2]\u00b2. \nWe must prove it then holds for n+1, namely \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3 = [(n+1)(n+2)/2]\u00b2.\n\nStarting from the LHS at n+1 we write \n\u20031\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3 \n= (1\u00b3 + \u22ef + n\u00b3) + (n+1)\u00b3 \n\u27f6 by the induction hypothesis \n= [n(n+1)/2]\u00b2 + (n+1)\u00b3. \n\nFactor out (n+1)\u00b2 from these two terms: \n\u2003[n(n+1)/2]\u00b2 + (n+1)\u00b3 \n= (n+1)\u00b2 \u00b7 [ n\u00b2/4 + (n+1) ] \n(because [n(n+1)/2]\u00b2 = (n+1)\u00b2\u00b7n\u00b2/4). \n\nCombine the bracket: \n\u2003n\u00b2/4 + (n+1) \n= n\u00b2/4 + (4n+4)/4 \n= (n\u00b2 + 4n + 4)/4 \n= (n+2)\u00b2/4. \n\nHence \n\u2003(1\u00b3 + \u22ef + n\u00b3 + (n+1)\u00b3) \n= (n+1)\u00b2 \u00b7 [(n+2)\u00b2/4] \n= [(n+1)(n+2)/2]\u00b2, \n\nwhich is exactly the desired formula at n+1. This completes the induction. \nTherefore by induction the identity holds for all n\u22651.\n\n\u220e\n\nFootnote (telescoping proof). Observe \n\u2003a_k := [k(k+1)/2]\u00b2. \nThen \n\u2003a_k \u2013 a_{k\u20131} \n= [k(k+1)/2]\u00b2 \u2013 [(k\u20131)k/2]\u00b2 \n= (1/4){ k\u00b2[(k+1)\u00b2 \u2013 (k\u20131)\u00b2] } \n= (1/4){ k\u00b2[ (k\u00b2+2k+1) \u2013 (k\u00b2\u20132k+1) ] } \n= (1/4){ k\u00b2\u00b74k } \n= k\u00b3. \nTherefore \n\u2003\u2211_{k=1}^n k\u00b3 = \u2211_{k=1}^n (a_k\u2013a_{k\u20131}) = a_n \u2013 a_0 = [n(n+1)/2]\u00b2.", + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1780044366, + "model": "o4-mini-2025-04-16", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": null, + "usage": { + "completion_tokens": 1579, + "prompt_tokens": 33, + "total_tokens": 1612, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 832, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json b/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json new file mode 100644 index 0000000..e543675 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/07_multi_turn_chat.json @@ -0,0 +1,42 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "chatcmpl-Dkn96bNjx7tRhEKJbRCcXPVYlugPs", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "4 times 3 equals 12.", + "refusal": null, + "role": "assistant", + "annotations": [], + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1780044376, + "model": "gpt-4o-mini-2024-07-18", + "object": "chat.completion", + "service_tier": "default", + "system_fingerprint": "fp_da89e836d0", + "usage": { + "completion_tokens": 8, + "prompt_tokens": 34, + "total_tokens": 42, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + } + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json b/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json new file mode 100644 index 0000000..2c2aad7 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/08_plain_responses.json @@ -0,0 +1,80 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "resp_0a1d246c78dc3537006a195258f9f481a1816a2d26b761d722", + "created_at": 1780044376.0, + "error": null, + "incomplete_details": null, + "instructions": null, + "metadata": {}, + "model": "gpt-4o-mini-2024-07-18", + "object": "response", + "output": [ + { + "id": "msg_0a1d246c78dc3537006a19525a405081a193ec1a80b639a122", + "content": [ + { + "annotations": [], + "text": "Dolphins are highly intelligent marine mammals known for their playful behavior and complex social structures.", + "type": "output_text", + "logprobs": [] + } + ], + "role": "assistant", + "status": "completed", + "type": "message", + "phase": null + } + ], + "parallel_tool_calls": true, + "temperature": 1.0, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "background": false, + "completed_at": 1780044378.0, + "conversation": null, + "max_output_tokens": 80, + "max_tool_calls": null, + "previous_response_id": null, + "prompt": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "effort": null, + "generate_summary": null, + "summary": null, + "context": null + }, + "safety_identifier": null, + "service_tier": "default", + "status": "completed", + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "top_logprobs": 0, + "truncation": "disabled", + "usage": { + "input_tokens": 13, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 19, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 32 + }, + "user": null, + "billing": { + "payer": "developer" + }, + "frequency_penalty": 0.0, + "moderation": null, + "presence_penalty": 0.0, + "store": true + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json b/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json new file mode 100644 index 0000000..a6e3484 --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/09_tool_use_responses.json @@ -0,0 +1,94 @@ +{ + "_model_id": "gpt-4o-mini", + "_response": { + "id": "resp_0c3beedd7242846a006a19525ae42881949bd176fd7d3aa90e", + "created_at": 1780044378.0, + "error": null, + "incomplete_details": null, + "instructions": null, + "metadata": {}, + "model": "gpt-4o-mini-2024-07-18", + "object": "response", + "output": [ + { + "arguments": "{\"city\":\"Tokyo\"}", + "call_id": "call_O98Bwd1iE01xgrPVmPli3M0i", + "name": "get_weather", + "type": "function_call", + "id": "fc_0c3beedd7242846a006a19525c212081948e414c52f9ec8029", + "namespace": null, + "status": "completed" + } + ], + "parallel_tool_calls": true, + "temperature": 1.0, + "tool_choice": "required", + "tools": [ + { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string" + } + }, + "required": [ + "city" + ], + "additionalProperties": false + }, + "strict": true, + "type": "function", + "defer_loading": null, + "description": "Get current weather for a city." + } + ], + "top_p": 1.0, + "background": false, + "completed_at": 1780044380.0, + "conversation": null, + "max_output_tokens": 200, + "max_tool_calls": null, + "previous_response_id": null, + "prompt": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "effort": null, + "generate_summary": null, + "summary": null, + "context": null + }, + "safety_identifier": null, + "service_tier": "default", + "status": "completed", + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "top_logprobs": 0, + "truncation": "disabled", + "usage": { + "input_tokens": 53, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 6, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 59 + }, + "user": null, + "billing": { + "payer": "developer" + }, + "frequency_penalty": 0.0, + "moderation": null, + "presence_penalty": 0.0, + "store": true + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json b/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json new file mode 100644 index 0000000..56b184d --- /dev/null +++ b/tests/unit/adapters/fixtures/openai_native/10_reasoning_responses.json @@ -0,0 +1,88 @@ +{ + "_model_id": "o4-mini", + "_response": { + "id": "resp_01686e0eda1186ad006a19525c9244819281061c5e851add1a", + "created_at": 1780044380.0, + "error": null, + "incomplete_details": null, + "instructions": null, + "metadata": {}, + "model": "o4-mini-2025-04-16", + "object": "response", + "output": [ + { + "id": "rs_01686e0eda1186ad006a19525e3b6081928c78127466f13ae9", + "summary": [], + "type": "reasoning", + "content": null, + "encrypted_content": null, + "status": null + }, + { + "id": "msg_01686e0eda1186ad006a1952607b7c8192aabd7ae10c7ed3c1", + "content": [ + { + "annotations": [], + "text": "Here is a proof by mathematical induction that for every positive integer n, \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 = (1 + 2 + \u22ef + n)\u00b2. \n\nNotation. Let \n\u2003S(n) := 1\u00b3 + 2\u00b3 + \u22ef + n\u00b3, \nand recall the well\u2010known formula \n\u2003T(n) := 1 + 2 + \u22ef + n = n(n + 1)/2. \n\nWe will show \n\u2003S(n) = [T(n)]\u00b2 = [n(n + 1)/2]\u00b2. \n\n1. Base case (n = 1). \n S(1) = 1\u00b3 = 1, \n T(1) = 1, so [T(1)]\u00b2 = 1\u00b2 = 1. \n Hence S(1) = [T(1)]\u00b2. \n\n2. Inductive step. \n Assume that for some k \u2265 1, \n S(k) = [T(k)]\u00b2 = [k(k + 1)/2]\u00b2. \n We must show \n S(k + 1) = [T(k + 1)]\u00b2. \n\n Now \n S(k + 1) \n = S(k) + (k + 1)\u00b3 \n = [k(k + 1)/2]\u00b2 + (k + 1)\u00b3 (by the inductive hypothesis) \n = (k\u00b2 (k + 1)\u00b2)/4 + (k + 1)\u00b3. \n\n Factor out (k + 1)\u00b2/4 from the sum: \n = (k + 1)\u00b2 \n \u00b7 [k\u00b2/4 + 4\u00b7(k + 1)/4 ] \n = (k + 1)\u00b2 \n \u00b7 [ (k\u00b2 + 4(k + 1)) / 4 ] \n = (k + 1)\u00b2 \n \u00b7 [ (k\u00b2 + 4k + 4) / 4 ] \n = (k + 1)\u00b2 \n \u00b7 [ (k + 2)\u00b2 / 4 ] \n = [ (k + 1)(k + 2) / 2 ]\u00b2. \n\n But (k + 1)(k + 2)/2 = T(k + 1), so we conclude \n S(k + 1) = [T(k + 1)]\u00b2. \n\nBy the principle of mathematical induction, the formula \n\u20031\u00b3 + 2\u00b3 + \u22ef + n\u00b3 = [n(n + 1)/2]\u00b2 \nholds for all positive integers n. \nEquivalently, the sum of the first n cubes equals the square of the sum of the first n positive integers.", + "type": "output_text", + "logprobs": [] + } + ], + "role": "assistant", + "status": "completed", + "type": "message", + "phase": null + } + ], + "parallel_tool_calls": true, + "temperature": 1.0, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "background": false, + "completed_at": 1780044387.0, + "conversation": null, + "max_output_tokens": 2000, + "max_tool_calls": null, + "previous_response_id": null, + "prompt": null, + "prompt_cache_key": null, + "prompt_cache_retention": "in_memory", + "reasoning": { + "effort": "low", + "generate_summary": null, + "summary": null, + "context": "current_turn" + }, + "safety_identifier": null, + "service_tier": "default", + "status": "completed", + "text": { + "format": { + "type": "text" + }, + "verbosity": "medium" + }, + "top_logprobs": 0, + "truncation": "disabled", + "usage": { + "input_tokens": 33, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 981, + "output_tokens_details": { + "reasoning_tokens": 320 + }, + "total_tokens": 1014 + }, + "user": null, + "billing": { + "payer": "developer" + }, + "frequency_penalty": 0.0, + "moderation": null, + "presence_penalty": 0.0, + "store": true + } +} \ No newline at end of file diff --git a/tests/unit/adapters/test_openai_native.py b/tests/unit/adapters/test_openai_native.py new file mode 100644 index 0000000..71ffb16 --- /dev/null +++ b/tests/unit/adapters/test_openai_native.py @@ -0,0 +1,228 @@ +"""OpenAI native adapter — verified against real fixtures.""" + +from __future__ import annotations + +import json +import pathlib + +from lago_agent_sdk.adapters import extract_openai_native + +FIX = pathlib.Path(__file__).parent / "fixtures" / "openai_native" + + +def _load(name: str) -> tuple[str, dict]: + data = json.loads((FIX / name).read_text()) + return data["_model_id"], data["_response"] + + +# -------------------------------------------------------------------------- +# Chat Completions fixtures +# -------------------------------------------------------------------------- +def test_plain_chat() -> None: + model_id, resp = _load("01_plain_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 13 + assert u.output == 23 + assert u.cache_read == 0 + assert u.reasoning == 0 + assert u.tool_calls == 0 + assert u.audio_input == 0 + assert u.audio_output == 0 + assert u.api == "chat_completions" + assert u.provider == "openai" + + +def test_tool_use_chat_counts_tool_calls() -> None: + model_id, resp = _load("02_tool_use_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 60 + assert u.output == 5 + assert u.tool_calls == 1 + assert u.api == "chat_completions" + + +def test_cache_call1_no_cache_yet() -> None: + """First call with a long prompt — OpenAI hasn't cached it yet.""" + model_id, resp = _load("03_cache_call1_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 3819 + assert u.output == 20 + assert u.cache_read == 0 + + +def test_cache_call2_auto_cached() -> None: + """Second call with the same long prompt — OpenAI auto-caches, exposes cached_tokens.""" + model_id, resp = _load("04_cache_call2_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 3819 + assert u.output == 20 + assert u.cache_read == 3712 # most of the system prompt cached + # OpenAI doesn't expose cache_write / cache_write_5m / cache_write_1h + assert u.cache_write == 0 + assert u.cache_write_5m == 0 + + +def test_streaming_chat_final_chunk_carries_usage() -> None: + """When stream_options.include_usage=True, the final chunk carries the usage payload.""" + model_id, resp = _load("05_streaming_chat.json") + chunks = resp["chunks"] + # Find the chunk with usage (it's the last one) + final_with_usage = next((c for c in reversed(chunks) if c.get("usage")), None) + assert final_with_usage is not None + u = extract_openai_native(final_with_usage, model_id=model_id) + assert u.input == 13 + assert u.output == 29 + assert u.api == "chat_completions" + + +def test_reasoning_chat_exposes_reasoning_tokens() -> None: + """o-series models populate completion_tokens_details.reasoning_tokens — first provider to do so.""" + model_id, resp = _load("06_reasoning_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 33 + assert u.output == 1579 + assert u.reasoning == 832 # actual measured value — not folded away + assert u.tool_calls == 0 + + +def test_multi_turn_chat() -> None: + model_id, resp = _load("07_multi_turn_chat.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 34 + assert u.output == 8 + + +# -------------------------------------------------------------------------- +# Responses API fixtures +# -------------------------------------------------------------------------- +def test_plain_responses() -> None: + model_id, resp = _load("08_plain_responses.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 13 + assert u.output == 19 + assert u.api == "responses" + assert u.provider == "openai" + + +def test_tool_use_responses_counts_function_calls() -> None: + """Responses API encodes tool calls as items in `output[]` with type 'function_call'.""" + model_id, resp = _load("09_tool_use_responses.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 53 + assert u.output == 6 + assert u.tool_calls == 1 + assert u.api == "responses" + + +def test_reasoning_responses() -> None: + model_id, resp = _load("10_reasoning_responses.json") + u = extract_openai_native(resp, model_id=model_id) + assert u.input == 33 + assert u.output == 981 + assert u.reasoning == 320 + assert u.api == "responses" + + +# -------------------------------------------------------------------------- +# API detection +# -------------------------------------------------------------------------- +def test_chat_completions_shape_detected() -> None: + """`prompt_tokens` in usage → Chat Completions.""" + u = extract_openai_native( + {"usage": {"prompt_tokens": 1, "completion_tokens": 1}}, + model_id="gpt-4o", + ) + assert u.api == "chat_completions" + + +def test_responses_api_shape_detected() -> None: + """`input_tokens` (without prompt_tokens) → Responses API.""" + u = extract_openai_native( + {"usage": {"input_tokens": 1, "output_tokens": 1}}, + model_id="gpt-4o", + ) + assert u.api == "responses" + + +# -------------------------------------------------------------------------- +# Robustness +# -------------------------------------------------------------------------- +def test_handles_pydantic_via_model_dump() -> None: + class FakePydantic: + def model_dump(self) -> dict: + return { + "model": "gpt-4o-mini", + "choices": [{"message": {"tool_calls": [{"id": "t1"}, {"id": "t2"}]}}], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, + "completion_tokens_details": { + "reasoning_tokens": 3, + "audio_tokens": 0, + }, + }, + } + + u = extract_openai_native(FakePydantic(), model_id="gpt-4o-mini") + assert u.input == 5 + assert u.output == 7 + assert u.reasoning == 3 + assert u.tool_calls == 2 + assert u.api == "chat_completions" + + +def test_no_usage_returns_zeros() -> None: + u = extract_openai_native({}, model_id="gpt-4o-mini") + assert u.input == 0 + assert u.output == 0 + assert not u.nonzero_numeric() + + +def test_survives_non_dict_usage() -> None: + assert extract_openai_native({"usage": True}, model_id="x").input == 0 + assert extract_openai_native({"usage": "bogus"}, model_id="x").output == 0 + assert extract_openai_native(None, model_id="x").input == 0 + + +def test_unknown_top_usage_field_lands_in_extras() -> None: + """If OpenAI adds a new top-level field, drift detection picks it up.""" + resp = { + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "future_field_xyz": "novel", + } + } + u = extract_openai_native(resp, model_id="gpt-4o") + assert u.extras.get("future_field_xyz") == "novel" + + +def test_audio_input_mapped_from_prompt_details() -> None: + """Chat Completions audio input lives at usage.prompt_tokens_details.audio_tokens.""" + resp = { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "prompt_tokens_details": {"audio_tokens": 42, "cached_tokens": 0}, + "completion_tokens_details": {"audio_tokens": 0, "reasoning_tokens": 0}, + } + } + u = extract_openai_native(resp, model_id="gpt-4o-audio") + assert u.audio_input == 42 + assert u.audio_output == 0 + + +def test_audio_output_mapped_from_completion_details() -> None: + """GPT-4o-audio output audio lives at usage.completion_tokens_details.audio_tokens.""" + resp = { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, + "completion_tokens_details": {"audio_tokens": 33, "reasoning_tokens": 0}, + } + } + u = extract_openai_native(resp, model_id="gpt-4o-audio") + assert u.audio_input == 0 + assert u.audio_output == 33 diff --git a/tests/unit/test_wrapper_openai.py b/tests/unit/test_wrapper_openai.py new file mode 100644 index 0000000..43acd95 --- /dev/null +++ b/tests/unit/test_wrapper_openai.py @@ -0,0 +1,296 @@ +"""OpenAI wrapper tests — fake client, no live API.""" + +from __future__ import annotations + +from typing import Any + +from lago_agent_sdk import LagoSDK + + +class FakeChatCompletion: + """Mimics openai's ChatCompletion pydantic object.""" + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + # expose .usage so the wrapper's _is_response_like check passes + self.usage = payload.get("usage") + + def model_dump(self) -> dict[str, Any]: + return self._payload + + +class FakeResponsesResponse: + """Mimics openai's Response object (Responses API).""" + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + self.usage = payload.get("usage") + + def model_dump(self) -> dict[str, Any]: + return self._payload + + +class FakeStreamChunk: + """Mimics a ChatCompletionChunk.""" + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + def model_dump(self) -> dict[str, Any]: + return self._payload + + +class FakeCompletions: + def __init__(self) -> None: + self.create_calls = 0 + self.last_kwargs: dict[str, Any] | None = None + + def create(self, **kwargs: Any) -> Any: + self.create_calls += 1 + # extra_lago must be stripped by the wrapper before reaching here + assert "extra_lago" not in kwargs + self.last_kwargs = dict(kwargs) + + if kwargs.get("stream") is True: + # Stream yields several chunks; the LAST one carries usage + # (because the wrapper auto-injects stream_options.include_usage). + chunks = [ + FakeStreamChunk( + {"choices": [{"delta": {"content": "hi"}}], "usage": None}, + ), + FakeStreamChunk( + { + "choices": [], + "usage": { + "prompt_tokens": 12, + "completion_tokens": 22, + "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + }, + }, + } + ), + ] + return iter(chunks) + + # Non-streaming: return a ChatCompletion-like object with .usage + return FakeChatCompletion( + { + "model": kwargs.get("model", "gpt-4o-mini"), + "choices": [{"message": {"role": "assistant", "content": "hi", "tool_calls": None}}], + "usage": { + "prompt_tokens": 8, + "completion_tokens": 16, + "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, + "completion_tokens_details": {"reasoning_tokens": 0, "audio_tokens": 0}, + }, + } + ) + + +class FakeChat: + def __init__(self) -> None: + self.completions = FakeCompletions() + + +class FakeResponsesNamespace: + def __init__(self) -> None: + self.create_calls = 0 + + def create(self, **kwargs: Any) -> Any: + self.create_calls += 1 + assert "extra_lago" not in kwargs + return FakeResponsesResponse( + { + "model": kwargs.get("model", "gpt-4o-mini"), + "output": [{"type": "function_call", "name": "get_weather"}], + "usage": { + "input_tokens": 53, + "output_tokens": 6, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + ) + + +class FakeOpenAI: + """Mimics `from openai import OpenAI; OpenAI(api_key=...)`.""" + + def __init__(self) -> None: + self.chat = FakeChat() + self.responses = FakeResponsesNamespace() + + +# Module path needs to contain 'openai' so detector routes to openai wrapper. +FakeOpenAI.__module__ = "openai.fake" + + +def _new_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list[dict]]: + received: list[dict] = [] + + def sender(batch: list[dict]) -> None: + received.extend(batch) + + sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub) + sdk._queue._sender = sender # type: ignore[attr-defined] + return sdk, received + + +# -------------------------------------------------------------------------- +# Chat Completions +# -------------------------------------------------------------------------- +def test_wrap_chat_completions_create_emits_input_and_output() -> None: + sdk, received = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + resp = client.chat.completions.create(model="gpt-4o-mini", messages=[]) + assert resp.usage["prompt_tokens"] == 8 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 8 + assert by_code["llm_output_tokens"] == 16 + + +def test_wrap_strips_extra_lago_and_uses_per_call_sub() -> None: + sdk, received = _new_sdk("sub_default") + fake = FakeOpenAI() + client = sdk.wrap(fake) + client.chat.completions.create( + model="gpt-4o-mini", + messages=[], + extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}}, + ) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + assert all(e["external_subscription_id"] == "sub_per_call" for e in received) + assert received[0]["properties"]["feature"] == "X" + + +def test_wrap_double_wrap_is_idempotent() -> None: + sdk, received = _new_sdk() + fake = FakeOpenAI() + sdk.wrap(fake) + sdk.wrap(fake) + sdk.wrap(fake) + fake.chat.completions.create(model="gpt-4o-mini", messages=[]) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + assert len(received) == 2 # input + output, not 6 + assert fake.chat.completions.create_calls == 1 + + +def test_wrap_create_with_stream_captures_usage_from_final_chunk() -> None: + sdk, received = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + chunks = list(client.chat.completions.create(model="gpt-4o-mini", messages=[], stream=True)) + assert len(chunks) == 2 # first chunk + usage chunk + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 12 + assert by_code["llm_output_tokens"] == 22 + + +def test_wrap_auto_injects_stream_options_include_usage() -> None: + """Customer passes stream=True without stream_options — wrapper injects include_usage:True.""" + sdk, _ = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + list(client.chat.completions.create(model="gpt-4o-mini", messages=[], stream=True)) + sdk.shutdown(timeout=1.0) + seen = fake.chat.completions.last_kwargs or {} + assert seen.get("stream_options") == {"include_usage": True} + + +def test_wrap_respects_customer_explicit_include_usage_false() -> None: + """If customer set include_usage=False explicitly, we don't override.""" + sdk, _ = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + list( + client.chat.completions.create( + model="gpt-4o-mini", + messages=[], + stream=True, + stream_options={"include_usage": False}, + ) + ) + sdk.shutdown(timeout=1.0) + seen = fake.chat.completions.last_kwargs or {} + assert seen.get("stream_options") == {"include_usage": False} + + +def test_wrap_preserves_existing_stream_options_keys() -> None: + """Existing stream_options keys are kept; include_usage is added alongside.""" + sdk, _ = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + list( + client.chat.completions.create( + model="gpt-4o-mini", + messages=[], + stream=True, + stream_options={"some_other_option": "value"}, + ) + ) + sdk.shutdown(timeout=1.0) + seen = fake.chat.completions.last_kwargs or {} + assert seen.get("stream_options") == {"some_other_option": "value", "include_usage": True} + + +# -------------------------------------------------------------------------- +# Responses API +# -------------------------------------------------------------------------- +def test_wrap_responses_create_emits_input_output_and_tool_calls() -> None: + sdk, received = _new_sdk() + fake = FakeOpenAI() + client = sdk.wrap(fake) + resp = client.responses.create(model="gpt-4o-mini", input="hi") + assert resp.usage["input_tokens"] == 53 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in received} + assert by_code["llm_input_tokens"] == 53 + assert by_code["llm_output_tokens"] == 6 + assert by_code["llm_tool_calls"] == 1 + + +# -------------------------------------------------------------------------- +# Failure isolation +# -------------------------------------------------------------------------- +def test_instrumentation_failure_does_not_break_call() -> None: + sdk, _ = _new_sdk() + + class BadResp: + @property + def usage(self): + raise RuntimeError("boom") + + def model_dump(self): + raise RuntimeError("boom") + + class BadCompletions: + def create(self, **_kw): + return BadResp() + + class BadChat: + def __init__(self): + self.completions = BadCompletions() + + class BadOpenAI: + def __init__(self): + self.chat = BadChat() + self.responses = None # responses namespace deliberately omitted + + BadOpenAI.__module__ = "openai.fake" + + client = sdk.wrap(BadOpenAI()) + # Adapter will crash inside, but wrap must still return resp. + resp = client.chat.completions.create(model="x", messages=[]) + assert resp is not None + sdk.shutdown(timeout=1.0) From 6c487ab7449761e43b2dd6ec58500c1297323e7b Mon Sep 17 00:00:00 2001 From: Anass Date: Fri, 29 May 2026 13:50:11 +0200 Subject: [PATCH 4/5] Add native Gemini (google-genai) SDK support Adapter maps usage_metadata fields to CanonicalUsage: prompt_token_count -> input candidates_token_count -> output cached_content_token_count -> cache_read thoughts_token_count -> reasoning prompt_tokens_details[modality=AUDIO].token_count -> audio_input prompt_tokens_details[modality=IMAGE].token_count -> image_input candidates_tokens_details[modality=AUDIO].token_count -> audio_output count of candidates[0].content.parts[].function_call -> tool_calls Wrapper covers client.models.generate_content + generate_content_stream (sync) and the async variants under client.aio.models. Idempotent via _lago_instrumented sentinel. Detector now returns 'gemini' (was 'google') for google-genai clients -- matches the naming convention used by other providers (bedrock, anthropic, openai, mistral). Semantic note vs OpenAI: Gemini's `thoughts_token_count` is ADDITIVE to `candidates_token_count` (verified by math across all 5 fixtures: input + output + reasoning = total). OpenAI's `reasoning_tokens` is a SUBSET of `completion_tokens`. Documented in adapter docstring + README for customers configuring per-metric billing. Gemini 2.5 emits reasoning tokens by default (no explicit thinking_config needed) -- second provider populating llm_reasoning_tokens. 21 new unit tests (15 adapter + 6 wrapper). 4 live integration tests gated on GEMINI_API_KEY. 5 captured response fixtures (plain, tool use, streaming, thinking, multi-turn). Total: 304 unit tests passing, ruff + mypy strict clean. --- CHANGELOG.md | 9 + README.md | 53 +++-- pyproject.toml | 6 +- src/lago_agent_sdk/adapters/__init__.py | 2 + src/lago_agent_sdk/adapters/gemini_native.py | 135 +++++++++++ src/lago_agent_sdk/detector.py | 2 +- src/lago_agent_sdk/sdk.py | 9 +- src/lago_agent_sdk/wrappers/gemini.py | 157 +++++++++++++ tests/integration/test_live_gemini.py | 154 +++++++++++++ .../unit/adapters/fixtures/capture_gemini.py | 138 +++++++++++ .../gemini_native/01_plain_flash.json | 82 +++++++ .../fixtures/gemini_native/02_tool_use.json | 90 ++++++++ .../fixtures/gemini_native/03_streaming.json | 85 +++++++ .../fixtures/gemini_native/04_thinking.json | 82 +++++++ .../fixtures/gemini_native/05_multi_turn.json | 82 +++++++ tests/unit/adapters/test_gemini_native.py | 217 ++++++++++++++++++ tests/unit/test_wrapper_gemini.py | 214 +++++++++++++++++ 17 files changed, 1500 insertions(+), 17 deletions(-) create mode 100644 src/lago_agent_sdk/adapters/gemini_native.py create mode 100644 src/lago_agent_sdk/wrappers/gemini.py create mode 100644 tests/integration/test_live_gemini.py create mode 100644 tests/unit/adapters/fixtures/capture_gemini.py create mode 100644 tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json create mode 100644 tests/unit/adapters/fixtures/gemini_native/02_tool_use.json create mode 100644 tests/unit/adapters/fixtures/gemini_native/03_streaming.json create mode 100644 tests/unit/adapters/fixtures/gemini_native/04_thinking.json create mode 100644 tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json create mode 100644 tests/unit/adapters/test_gemini_native.py create mode 100644 tests/unit/test_wrapper_gemini.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d857d3..cda593c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented here. Format follows [Kee ## [Unreleased] ### Added +- Native `google-genai` SDK support covering `client.models.generate_content` + `generate_content_stream`, sync + async (`client.aio.models`). +- `extract_gemini_native` adapter maps `usage_metadata`: `prompt_token_count → input`, `candidates_token_count → output`, `cached_content_token_count → cache_read`, `thoughts_token_count → reasoning`, `prompt_tokens_details[modality=AUDIO/IMAGE] → audio_input/image_input`, `candidates_tokens_details[modality=AUDIO] → audio_output`, count of `candidates[0].content.parts[].function_call → tool_calls`. +- **Gemini 2.5 surfaces reasoning tokens by default** (`thoughts_token_count`) — fires `llm_reasoning_tokens` automatically. Note the semantic difference vs OpenAI: Gemini's reasoning is ADDITIVE to output (`candidates + thoughts = total billable output`); OpenAI's reasoning is a SUBSET of `completion_tokens`. Documented in adapter docstring + README. +- `gemini` optional dependency group: `pip install 'lago-agent-sdk[gemini]'`. +- 21 new unit tests (15 adapter + 6 wrapper) and 4 live integration tests (gated on `GEMINI_API_KEY`). Total: 304 unit tests. +- 5 captured response fixtures from the real Gemini API (plain, tool use, streaming, thinking, multi-turn). +- Detector now returns `gemini` (was `google`) for `google-genai` clients. + +### Added (OpenAI — earlier in this branch) - Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`. - `extract_openai_native` adapter handles both API shapes with auto-detection: - Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`. diff --git a/README.md b/README.md index ba96fc3..2d1fb0b 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`). For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`). For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`). For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`). +For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`). ## Quickstart — Bedrock @@ -110,6 +111,28 @@ Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client **Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately. +## Quickstart — Gemini + +```python +from google import genai +from lago_agent_sdk import LagoSDK + +sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme") +client = sdk.wrap(genai.Client(api_key="...")) + +resp = client.models.generate_content( + model="gemini-2.5-flash", + contents="Hello", +) +sdk.flush() +``` + +Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`). + +**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI: +- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output) +- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning) + ## Multi-tenant — pick a subscription per call Three ways to set the `external_subscription_id`, in priority order: @@ -137,25 +160,29 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks. | Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ | | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ | | OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ | -| Google Gemini | native SDK | Phase 3 | +| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) | ✓ | | LiteLLM | callback bridge | Phase 4 | ## Token dimensions captured `CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider: -| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | -|---|---|---|---|---|---| -| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | -| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | -| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | -| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ (auto-cache; OpenAI doesn't surface creation counts) | -| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | -| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series models)** | -| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | -| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio input) | -| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio output) | -| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3 — multimodal adapter) | +| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini | +|---|---|---|---|---|---|---| +| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ | +| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ | +| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) | +| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ | ✗ | +| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | ✗ | +| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** | +| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | ✓ | +| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) | +| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) | +| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3) | ✓ (multimodal IMAGE) | + +**Semantic note on `reasoning`:** +- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`. +- **Gemini's `thoughts_token_count` is ADDITIVE to `output`** — `candidates + thoughts = total billable output`. OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap. diff --git a/pyproject.toml b/pyproject.toml index 77b3897..4c6bfd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,9 @@ anthropic = [ openai = [ "openai>=1.50", ] +gemini = [ + "google-genai>=1.0", +] [project.urls] Homepage = "https://www.getlago.com" @@ -85,11 +88,12 @@ strict = true files = ["src/lago_agent_sdk"] [[tool.mypy.overrides]] -module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*"] +module = ["boto3.*", "botocore.*", "mistralai.*", "openai.*", "google.*"] ignore_missing_imports = true [dependency-groups] dev = [ "anthropic>=0.30", "openai>=1.50", + "google-genai>=1.0", ] diff --git a/src/lago_agent_sdk/adapters/__init__.py b/src/lago_agent_sdk/adapters/__init__.py index 1d24920..88d3a71 100644 --- a/src/lago_agent_sdk/adapters/__init__.py +++ b/src/lago_agent_sdk/adapters/__init__.py @@ -1,6 +1,7 @@ from .anthropic_native import extract_anthropic_native from .bedrock_converse import extract_bedrock_converse from .bedrock_invoke import extract_bedrock_invoke, pick_invoke_adapter +from .gemini_native import extract_gemini_native from .mistral_native import extract_mistral_native from .openai_native import extract_openai_native @@ -9,6 +10,7 @@ "extract_bedrock_converse", "extract_bedrock_invoke", "pick_invoke_adapter", + "extract_gemini_native", "extract_mistral_native", "extract_openai_native", ] diff --git a/src/lago_agent_sdk/adapters/gemini_native.py b/src/lago_agent_sdk/adapters/gemini_native.py new file mode 100644 index 0000000..8cb0f06 --- /dev/null +++ b/src/lago_agent_sdk/adapters/gemini_native.py @@ -0,0 +1,135 @@ +"""Gemini native adapter — verified against real fixtures. + +Wraps the modern `google-genai` SDK (`from google import genai`). Both +`client.models.generate_content` (sync + async) and +`client.models.generate_content_stream` (sync + async) put usage in +`response.usage_metadata` (the final chunk for streaming). + +Field mapping (`usage_metadata.*`): + prompt_token_count → input + candidates_token_count → output + cached_content_token_count → cache_read + thoughts_token_count → reasoning + (Gemini 2.5; ADDITIVE + to candidates, not a subset) + prompt_tokens_details[modality=AUDIO].token_count → audio_input + prompt_tokens_details[modality=IMAGE].token_count → image_input + candidates_tokens_details[modality=AUDIO].token_count → audio_output + +Tool calls: count of candidates[0].content.parts[] entries that have a +non-null `function_call` field. + +Semantic note vs OpenAI: + Gemini's `thoughts_token_count` is ADDITIVE to `candidates_token_count` + (total billable output for Google = candidates + thoughts). + OpenAI's `reasoning_tokens` is a SUBSET of `completion_tokens`. + When a customer bills on both `llm_output_tokens` and `llm_reasoning_tokens` + as separate Lago metrics, the Gemini-side sum reflects the full Google bill; + the OpenAI-side `llm_output_tokens` already includes reasoning. + +Unknown top-level usage fields land in `extras` (drift detection). +""" + +from __future__ import annotations + +from typing import Any, cast + +from ..canonical import CanonicalUsage + +_KNOWN_USAGE_FIELDS = { + "prompt_token_count", + "candidates_token_count", + "cached_content_token_count", + "thoughts_token_count", + "tool_use_prompt_token_count", + "total_token_count", + "prompt_tokens_details", + "candidates_tokens_details", + "cache_tokens_details", + "tool_use_prompt_tokens_details", + "traffic_type", +} + + +def _safe_dict(v: Any) -> dict[str, Any]: + return v if isinstance(v, dict) else {} + + +def _safe_int(v: Any) -> int: + try: + return max(0, int(v or 0)) + except (TypeError, ValueError): + return 0 + + +def _to_dict(obj: Any) -> dict[str, Any]: + """Best-effort pydantic-or-dict → dict (google-genai returns pydantic objects).""" + if isinstance(obj, dict): + return obj + if hasattr(obj, "model_dump"): + try: + return cast(dict[str, Any], obj.model_dump()) + except Exception: # noqa: BLE001 + pass + return {} + + +def _modality_token_count( + details: list[dict[str, Any]] | Any, modality: str +) -> int: + """Sum token_count from a list of {modality, token_count} entries matching the given modality.""" + if not isinstance(details, list): + return 0 + total = 0 + for entry in details: + if isinstance(entry, dict) and entry.get("modality") == modality: + total += _safe_int(entry.get("token_count")) + return total + + +def _count_tool_calls(resp: dict[str, Any]) -> int: + """Count parts in candidates[0].content.parts[] that have a function_call.""" + candidates = resp.get("candidates") + if not isinstance(candidates, list) or not candidates: + return 0 + first = candidates[0] + if not isinstance(first, dict): + return 0 + content = _safe_dict(first.get("content")) + parts = content.get("parts") + if not isinstance(parts, list): + return 0 + return sum(1 for p in parts if isinstance(p, dict) and p.get("function_call") is not None) + + +def extract_gemini_native(response: Any, model_id: str = "") -> CanonicalUsage: + """Translate a google-genai response (GenerateContentResponse or dict) → CanonicalUsage. + + Accepts the SDK's pydantic objects, dicts (e.g. captured fixtures), or a + synthetic `{"usage_metadata": {...}}` blob produced by the streaming wrapper. + """ + resp = _to_dict(response) if not isinstance(response, dict) else response + usage = _safe_dict(resp.get("usage_metadata")) + + prompt_details = usage.get("prompt_tokens_details") + candidates_details = usage.get("candidates_tokens_details") + + extras: dict[str, Any] = {} + for k, v in usage.items(): + if k not in _KNOWN_USAGE_FIELDS: + extras[k] = v + + return CanonicalUsage( + input=_safe_int(usage.get("prompt_token_count")), + output=_safe_int(usage.get("candidates_token_count")), + cache_read=_safe_int(usage.get("cached_content_token_count")), + reasoning=_safe_int(usage.get("thoughts_token_count")), + audio_input=_modality_token_count(prompt_details, "AUDIO"), + audio_output=_modality_token_count(candidates_details, "AUDIO"), + image_input=_modality_token_count(prompt_details, "IMAGE"), + tool_calls=_count_tool_calls(resp), + model=model_id or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "") or "", + provider="gemini", + api="native", + extras=extras, + ) diff --git a/src/lago_agent_sdk/detector.py b/src/lago_agent_sdk/detector.py index f20def8..91315e2 100644 --- a/src/lago_agent_sdk/detector.py +++ b/src/lago_agent_sdk/detector.py @@ -38,6 +38,6 @@ def detect_client_kind(client: Any) -> str: if cls_name == "mistral" and "mistral" in module: return "mistral" if "google" in module and ("genai" in module or "generativeai" in module): - return "google" + return "gemini" return "unknown" diff --git a/src/lago_agent_sdk/sdk.py b/src/lago_agent_sdk/sdk.py index ce55ccb..4400f79 100644 --- a/src/lago_agent_sdk/sdk.py +++ b/src/lago_agent_sdk/sdk.py @@ -91,15 +91,20 @@ def wrap( from .wrappers.openai import wrap_openai_client return wrap_openai_client(self, client, dimensions=dimensions, subscription=subscription) + if kind == "gemini": + from .wrappers.gemini import wrap_gemini_client + + return wrap_gemini_client(self, client, dimensions=dimensions, subscription=subscription) if kind == "unknown": raise UnknownClientError( f"Unknown client passed to wrap(): {type(client).__module__}.{type(client).__name__}. " "Supported: boto3 bedrock-runtime, mistralai.client.Mistral, " - "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI." + "anthropic.Anthropic / AsyncAnthropic, openai.OpenAI / AsyncOpenAI, " + "google.genai.Client." ) raise UnknownClientError( f"Client kind '{kind}' is not yet supported. " - "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai'." + "Implemented: 'bedrock', 'mistral', 'anthropic', 'openai', 'gemini'." ) # ------------------------------------------------------------------ diff --git a/src/lago_agent_sdk/wrappers/gemini.py b/src/lago_agent_sdk/wrappers/gemini.py new file mode 100644 index 0000000..f53ec51 --- /dev/null +++ b/src/lago_agent_sdk/wrappers/gemini.py @@ -0,0 +1,157 @@ +"""google-genai SDK wrapper. + +Wraps the public methods of `genai.Client.models` (sync) and `genai.Client.aio.models` +(async) in place — instrumentation never breaks the customer's call. + +Methods wrapped: + - models.generate_content(...) — sync, returns GenerateContentResponse + - models.generate_content_stream(...) — sync, returns iterator of chunks (last has usage) + - aio.models.generate_content(...) — async, awaited + - aio.models.generate_content_stream(...) — async, yields chunks + +Per-call override: pop `extra_lago={"subscription": ..., "dimensions": ...}` from +kwargs before forwarding so the SDK's strict validation doesn't reject it. +""" + +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator, Iterator +from typing import Any + +from ..adapters import extract_gemini_native + +logger = logging.getLogger("lago_agent_sdk.wrappers.gemini") + +_INSTRUMENTED_ATTR = "_lago_instrumented" +_LAGO_KWARG = "extra_lago" + + +def _pop_lago_kwarg(kwargs: dict[str, Any]) -> dict[str, Any]: + return kwargs.pop(_LAGO_KWARG, {}) or {} + + +def wrap_gemini_client( + sdk: Any, + client: Any, + dimensions: dict[str, Any] | None = None, + subscription: str | None = None, +) -> Any: + """In-place wrap of a `google.genai.Client`. Idempotent.""" + if getattr(client, _INSTRUMENTED_ATTR, False): + logger.info("lago: gemini client already wrapped — skipping") + return client + + base_dims = dict(dimensions or {}) + base_sub = subscription + + def _resolve_opts(lago_opts: dict[str, Any]) -> tuple[str | None, dict[str, Any]]: + sub = lago_opts.get("subscription") or base_sub + dims = {**base_dims, **(lago_opts.get("dimensions") or {})} + return sub, dims + + def _emit_from(payload: Any, model_id: str, sub: str | None, dims: dict[str, Any]) -> None: + try: + usage = extract_gemini_native(payload, model_id=model_id) + sdk.emit(usage, subscription=sub, dimensions=dims) + except Exception as exc: # noqa: BLE001 + logger.warning("lago: gemini emit failed: %s", exc) + + def _make_sync_generate(original: Any) -> Any: + def _generate(*args: Any, **kwargs: Any) -> Any: + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model") or (args[0] if args else "") + sub, dims = _resolve_opts(lago_opts) + response = original(*args, **kwargs) + _emit_from(response, str(model_id), sub, dims) + return response + + return _generate + + def _make_async_generate(original: Any) -> Any: + async def _generate_async(*args: Any, **kwargs: Any) -> Any: + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model") or (args[0] if args else "") + sub, dims = _resolve_opts(lago_opts) + response = await original(*args, **kwargs) + _emit_from(response, str(model_id), sub, dims) + return response + + return _generate_async + + def _make_sync_stream(original: Any) -> Any: + def _stream(*args: Any, **kwargs: Any) -> Iterator[Any]: + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model") or (args[0] if args else "") + sub, dims = _resolve_opts(lago_opts) + src = original(*args, **kwargs) + + def _iter() -> Iterator[Any]: + last_with_usage: Any = None + try: + for chunk in src: + payload = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk + if isinstance(payload, dict) and payload.get("usage_metadata"): + last_with_usage = {"usage_metadata": payload["usage_metadata"]} + yield chunk + finally: + if last_with_usage is not None: + _emit_from(last_with_usage, str(model_id), sub, dims) + + return _iter() + + return _stream + + def _make_async_stream(original: Any) -> Any: + async def _stream_async(*args: Any, **kwargs: Any) -> AsyncIterator[Any]: + lago_opts = _pop_lago_kwarg(kwargs) + model_id = kwargs.get("model") or (args[0] if args else "") + sub, dims = _resolve_opts(lago_opts) + src = await original(*args, **kwargs) + + async def _aiter() -> AsyncIterator[Any]: + last_with_usage: Any = None + try: + async for chunk in src: + payload = chunk.model_dump() if hasattr(chunk, "model_dump") else chunk + if isinstance(payload, dict) and payload.get("usage_metadata"): + last_with_usage = {"usage_metadata": payload["usage_metadata"]} + yield chunk + finally: + if last_with_usage is not None: + _emit_from(last_with_usage, str(model_id), sub, dims) + + return _aiter() + + return _stream_async + + # ------------------------------------------------------------------ + # client.models.* (sync) + # ------------------------------------------------------------------ + models = getattr(client, "models", None) + if models is not None: + original_generate = getattr(models, "generate_content", None) + if original_generate is not None: + models.generate_content = _make_sync_generate(original_generate) + + original_stream = getattr(models, "generate_content_stream", None) + if original_stream is not None: + models.generate_content_stream = _make_sync_stream(original_stream) + + # ------------------------------------------------------------------ + # client.aio.models.* (async) + # ------------------------------------------------------------------ + aio = getattr(client, "aio", None) + if aio is not None: + aio_models = getattr(aio, "models", None) + if aio_models is not None: + original_aio_generate = getattr(aio_models, "generate_content", None) + if original_aio_generate is not None: + aio_models.generate_content = _make_async_generate(original_aio_generate) + + original_aio_stream = getattr(aio_models, "generate_content_stream", None) + if original_aio_stream is not None: + aio_models.generate_content_stream = _make_async_stream(original_aio_stream) + + setattr(client, _INSTRUMENTED_ATTR, True) + return client diff --git a/tests/integration/test_live_gemini.py b/tests/integration/test_live_gemini.py new file mode 100644 index 0000000..4ac5de6 --- /dev/null +++ b/tests/integration/test_live_gemini.py @@ -0,0 +1,154 @@ +"""End-to-end Gemini integration test — live API + mocked Lago. + +Skipped unless GEMINI_API_KEY is set. +""" + +from __future__ import annotations + +import json +import os +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +from lago_agent_sdk import LagoSDK + +pytestmark = pytest.mark.skipif( + not os.environ.get("GEMINI_API_KEY"), + reason="GEMINI_API_KEY not set", +) + + +class _MockLago(BaseHTTPRequestHandler): + def do_POST(self): # noqa: N802 + n = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(n) + self.server.received.append(json.loads(body)) # type: ignore[attr-defined] + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"ok": true}') + + def log_message(self, *_args, **_kwargs): + return + + +def _spawn_lago(): + s = HTTPServer(("127.0.0.1", 0), _MockLago) + s.received = [] # type: ignore[attr-defined] + threading.Thread(target=s.serve_forever, daemon=True).start() + return s, f"http://127.0.0.1:{s.server_port}" + + +def _collect_events(server) -> list[dict]: + return [e for p in server.received for e in p["events"]] + + +def _codes(events) -> set[str]: + return {e["code"] for e in events} + + +def test_live_gemini_generate_content_emits_to_lago() -> None: + from google import genai + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"])) + client.models.generate_content( + model="gemini-2.5-flash", + contents="Say hi", + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + for e in events: + assert e["properties"]["api"] == "native" + assert e["properties"]["provider"] == "gemini" + finally: + server.shutdown() + + +def test_live_gemini_streaming_captures_usage_from_final_chunk() -> None: + from google import genai + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"])) + for _ in client.models.generate_content_stream( + model="gemini-2.5-flash", + contents="Count from 1 to 3.", + ): + pass + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + finally: + server.shutdown() + + +def test_live_gemini_thinking_emits_reasoning() -> None: + """Gemini 2.5 emits thoughts_token_count → llm_reasoning_tokens event.""" + from google import genai + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"])) + client.models.generate_content( + model="gemini-2.5-flash", + contents="What is 17 * 23? Show your reasoning step by step.", + ) + assert sdk.flush(timeout=15.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + codes = _codes(events) + assert "llm_input_tokens" in codes + assert "llm_output_tokens" in codes + # Gemini 2.5 reasons even without explicit thinking_config + assert "llm_reasoning_tokens" in codes + finally: + server.shutdown() + + +def test_live_gemini_tool_use_emits_tool_calls() -> None: + from google import genai + from google.genai import types as genai_types + + server, url = _spawn_lago() + try: + sdk = LagoSDK(api_key="x", api_url=url, default_subscription_id="sub_int") + client = sdk.wrap(genai.Client(api_key=os.environ["GEMINI_API_KEY"])) + weather_fn = genai_types.FunctionDeclaration( + name="get_weather", + description="Get the current weather for a city.", + parameters=genai_types.Schema( + type="OBJECT", + properties={"city": genai_types.Schema(type="STRING")}, + required=["city"], + ), + ) + client.models.generate_content( + model="gemini-2.5-flash", + contents="What's the weather in Tokyo?", + config=genai_types.GenerateContentConfig( + tools=[genai_types.Tool(function_declarations=[weather_fn])], + tool_config=genai_types.ToolConfig( + function_calling_config=genai_types.FunctionCallingConfig(mode="ANY"), + ), + ), + ) + assert sdk.flush(timeout=10.0) + sdk.shutdown(timeout=2.0) + events = _collect_events(server) + assert "llm_tool_calls" in _codes(events) + finally: + server.shutdown() diff --git a/tests/unit/adapters/fixtures/capture_gemini.py b/tests/unit/adapters/fixtures/capture_gemini.py new file mode 100644 index 0000000..3ecf59e --- /dev/null +++ b/tests/unit/adapters/fixtures/capture_gemini.py @@ -0,0 +1,138 @@ +"""Capture real Gemini API responses for adapter design. + +Saves raw responses to tests/unit/adapters/fixtures/gemini_native/.json +so we can verify the field mappings against reality before writing the adapter. + +Uses the modern `google-genai` SDK: `from google import genai`. + +Reads GEMINI_API_KEY from env. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import sys + +from google import genai +from google.genai import types + +OUT = pathlib.Path(__file__).parent / "gemini_native" +OUT.mkdir(parents=True, exist_ok=True) + + +def to_dict(response) -> dict: + """google-genai SDK returns pydantic models — convert to plain dict for JSON.""" + if hasattr(response, "model_dump"): + return response.model_dump() + if hasattr(response, "dict"): + return response.dict() + return json.loads(response.json()) if hasattr(response, "json") else dict(response) + + +def save(name: str, model: str, payload: dict) -> None: + path = OUT / f"{name}.json" + path.write_text(json.dumps({"_model_id": model, "_response": payload}, indent=2, default=str)) + print(f" ✓ saved {path.name}") + + +def main() -> int: + key = os.environ.get("GEMINI_API_KEY") + if not key: + print("error: set GEMINI_API_KEY", file=sys.stderr) + return 2 + + client = genai.Client(api_key=key) + PROMPT = "Write one sentence about dolphins." + + # ----- 1. Plain call (cheap flash model) ----- + print("\n[1] plain — gemini-2.5-flash") + r = client.models.generate_content( + model="gemini-2.5-flash", + contents=PROMPT, + ) + save("01_plain_flash", "gemini-2.5-flash", to_dict(r)) + + # ----- 2. Tool use (function calling) ----- + print("\n[2] tool use — gemini-2.5-flash with weather function") + weather_fn = types.FunctionDeclaration( + name="get_weather", + description="Get the current weather for a city.", + parameters=types.Schema( + type="OBJECT", + properties={"city": types.Schema(type="STRING")}, + required=["city"], + ), + ) + r = client.models.generate_content( + model="gemini-2.5-flash", + contents="What's the weather in Tokyo?", + config=types.GenerateContentConfig( + tools=[types.Tool(function_declarations=[weather_fn])], + tool_config=types.ToolConfig( + function_calling_config=types.FunctionCallingConfig(mode="ANY"), + ), + ), + ) + save("02_tool_use", "gemini-2.5-flash", to_dict(r)) + + # ----- 3. Streaming with usage metadata ----- + print("\n[3] streaming — gemini-2.5-flash") + chunks: list[dict] = [] + for chunk in client.models.generate_content_stream( + model="gemini-2.5-flash", + contents="Count from 1 to 5, one number per line.", + ): + chunks.append(to_dict(chunk)) + save("03_streaming", "gemini-2.5-flash", {"chunks": chunks}) + + # ----- 4. Thinking mode (Gemini 2.5 — emits thoughts_token_count) ----- + print("\n[4] thinking — gemini-2.5-flash with thinking_config") + try: + r = client.models.generate_content( + model="gemini-2.5-flash", + contents=( + "Prove that the sum of the first n cubes equals the square of " + "the sum of the first n positive integers. Show each step." + ), + config=types.GenerateContentConfig( + thinking_config=types.ThinkingConfig(include_thoughts=False, thinking_budget=2048), + ), + ) + save("04_thinking", "gemini-2.5-flash", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" thinking config error: {str(exc)[:160]}") + + # ----- 5. Multi-turn ----- + print("\n[5] multi-turn — gemini-2.5-flash (3 turns)") + convo = [ + types.Content(role="user", parts=[types.Part(text="What is 2+2?")]), + types.Content(role="model", parts=[types.Part(text="2+2 equals 4.")]), + types.Content(role="user", parts=[types.Part(text="And times 3?")]), + ] + r = client.models.generate_content(model="gemini-2.5-flash", contents=convo) + save("05_multi_turn", "gemini-2.5-flash", to_dict(r)) + + # ----- 6. Explicit cache (Gemini's CachedContent API) ----- + # Note: requires a sufficiently large prompt (>32k tokens for flash) so we skip + # for the demo; documented but not part of the captured fixture set. + print("\n[6] (explicit-cache fixture skipped — needs >32k-token prompt)") + + # ----- 7. Larger model for cross-shape comparison ----- + print("\n[7] plain — gemini-2.5-pro") + try: + r = client.models.generate_content( + model="gemini-2.5-pro", + contents=PROMPT, + ) + save("07_plain_pro", "gemini-2.5-pro", to_dict(r)) + except Exception as exc: # noqa: BLE001 + print(f" gemini-2.5-pro error: {str(exc)[:160]}") + + print("\nDone. Inspect tests/unit/adapters/fixtures/gemini_native/*.json") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json b/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json new file mode 100644 index 0000000..1f226e9 --- /dev/null +++ b/tests/unit/adapters/fixtures/gemini_native/01_plain_flash.json @@ -0,0 +1,82 @@ +{ + "_model_id": "gemini-2.5-flash", + "_response": { + "sdk_http_response": { + "headers": { + "x-gemini-service-tier": "standard", + "content-type": "application/json; charset=UTF-8", + "vary": "Origin, X-Origin, Referer", + "content-encoding": "gzip", + "date": "Fri, 29 May 2026 11:03:34 GMT", + "server": "scaffolding on HTTPServer2", + "x-xss-protection": "0", + "x-frame-options": "SAMEORIGIN", + "x-content-type-options": "nosniff", + "server-timing": "gfet4t7; dur=4026", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "transfer-encoding": "chunked" + }, + "body": null + }, + "candidates": [ + { + "content": { + "parts": [ + { + "media_resolution": null, + "code_execution_result": null, + "executable_code": null, + "file_data": null, + "function_call": null, + "function_response": null, + "inline_data": null, + "text": "Dolphins are highly intelligent and social marine mammals known for their playful acrobatics and sophisticated use of echolocation.", + "thought": null, + "thought_signature": null, + "video_metadata": null, + "tool_call": null, + "tool_response": null, + "part_metadata": null + } + ], + "role": "model" + }, + "citation_metadata": null, + "finish_message": null, + "token_count": null, + "finish_reason": "STOP", + "grounding_metadata": null, + "avg_logprobs": null, + "index": 0, + "logprobs_result": null, + "safety_ratings": null, + "url_context_metadata": null + } + ], + "create_time": null, + "model_version": "gemini-2.5-flash", + "prompt_feedback": null, + "response_id": "gnIZaribIvXzxs0Pt-rpkAI", + "usage_metadata": { + "cache_tokens_details": null, + "cached_content_token_count": null, + "candidates_token_count": 23, + "candidates_tokens_details": null, + "prompt_token_count": 7, + "prompt_tokens_details": [ + { + "modality": "TEXT", + "token_count": 7 + } + ], + "thoughts_token_count": 442, + "tool_use_prompt_token_count": null, + "tool_use_prompt_tokens_details": null, + "total_token_count": 472, + "traffic_type": null + }, + "model_status": null, + "automatic_function_calling_history": [], + "parsed": null + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json b/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json new file mode 100644 index 0000000..3be2e59 --- /dev/null +++ b/tests/unit/adapters/fixtures/gemini_native/02_tool_use.json @@ -0,0 +1,90 @@ +{ + "_model_id": "gemini-2.5-flash", + "_response": { + "sdk_http_response": { + "headers": { + "x-gemini-service-tier": "standard", + "content-type": "application/json; charset=UTF-8", + "vary": "Origin, X-Origin, Referer", + "content-encoding": "gzip", + "date": "Fri, 29 May 2026 11:03:35 GMT", + "server": "scaffolding on HTTPServer2", + "x-xss-protection": "0", + "x-frame-options": "SAMEORIGIN", + "x-content-type-options": "nosniff", + "server-timing": "gfet4t7; dur=1142", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "transfer-encoding": "chunked" + }, + "body": null + }, + "candidates": [ + { + "content": { + "parts": [ + { + "media_resolution": null, + "code_execution_result": null, + "executable_code": null, + "file_data": null, + "function_call": { + "id": null, + "args": { + "city": "Tokyo" + }, + "name": "get_weather", + "partial_args": null, + "will_continue": null + }, + "function_response": null, + "inline_data": null, + "text": null, + "thought": null, + "thought_signature": "b'\\n\\xba\\x02\\x01\\x0c9\\xd6\\xc7r\\xeb\\xe3\\x83E\\xad\\xbc\\xd82E\\x1c\\xb0*\\xcf\\xa6\\x07N\\xcc\\xd2\\xb4\\x14\\xa2\\x1f\\xda\\xc1\\xd8\\x01\\x16\\xba\\x15\\xf3f5\\x8c\\xdb\\n\\xc9\\xb47`\\xd8\\x05\\xc0\"x-\\x80\\xce\\x83\\xe1\\xb5h\\x9c\\xf5\\x95\\xce\\xc6\\xa2\\x0bX\\xe4r\\x82\\xcdGu\\xcc\\xf7\\x06\\x0f\\xb5?\\xe6k5n\\xfb\\\\\\x99-\\xa88\\xaat\\x00l\\x0e\\xdb\\xea\\x1b\\x0b\\x95gi\\xf3\\xc3\\xc9F\\x81\\x94\\x08\\xceE\\x13~:\\x827\\xebc\\xda\\xd3\\xd99IE\\xff\\xd8\\x8f\\x1e_+\\xf8\\xf77g\\xc3\\xa5\\xc3cHPf3\\xf0\\x8a\\xef\\x19M\\x0f\\xc5K\\x15\\x1c\\x86\\x95\\x00\\x1drB`\\xbc\\xcb\\xac\\xf5\\xde%\\xa6\\xfc<\\x8f\\x82.\\x029\\xab\\xa0\\xcdU\\x02\\xc5\\xb1[:\\x1b\\xc8\\xd8\\xbc\\xdc\\xcc\\x19m\\xe9\\xb5\\xb8\\xcf\\x9b\\xd0;\\xeb\\x8d \\x16,7\\x84\\xbeS\\xd3k\\x0f\\xa5\\xb62\\xddl\\xe6\\xdf\\xe1\\xda\\x7fnAa\\xb6\\x86 \\x97\\rE\\xac\\xac0\\xacaU\\xbc\\xbb|J\\xf0\\xe5o\\xd9YV\\xed\\xaa\\x01\\xd3\\x94x\\xe6\\xc3!\\xa4>\\x8dJ\\\\Q\\x86\\xcc\\xf6\\xb2\\'(\\\\>k\\xd6\\x10\\xb5\\xab\\x1b*\\xde\\t\\xea\\xc5\\xca\\x97\\xa0\\x18G\\xde\\xcdu\\xc1u\\xfc0yv\\xa4?3y\\x8b\\xd6\\xca>\\x90\\xe95\\x14\\x03U\\x12\\xdckW\\x00\\x9eL\\r\\x06\\x841'", + "video_metadata": null, + "tool_call": null, + "tool_response": null, + "part_metadata": null + } + ], + "role": "model" + }, + "citation_metadata": null, + "finish_message": null, + "token_count": null, + "finish_reason": "STOP", + "grounding_metadata": null, + "avg_logprobs": null, + "index": 0, + "logprobs_result": null, + "safety_ratings": null, + "url_context_metadata": null + } + ], + "create_time": null, + "model_version": "gemini-2.5-flash", + "prompt_feedback": null, + "response_id": "hnIZauLSI77zxs0Pm_iDmQY", + "usage_metadata": { + "cache_tokens_details": null, + "cached_content_token_count": null, + "candidates_token_count": 15, + "candidates_tokens_details": null, + "prompt_token_count": 49, + "prompt_tokens_details": [ + { + "modality": "TEXT", + "token_count": 49 + } + ], + "thoughts_token_count": 69, + "tool_use_prompt_token_count": null, + "tool_use_prompt_tokens_details": null, + "total_token_count": 133, + "traffic_type": null + }, + "model_status": null, + "automatic_function_calling_history": null, + "parsed": null + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/gemini_native/03_streaming.json b/tests/unit/adapters/fixtures/gemini_native/03_streaming.json new file mode 100644 index 0000000..c736593 --- /dev/null +++ b/tests/unit/adapters/fixtures/gemini_native/03_streaming.json @@ -0,0 +1,85 @@ +{ + "_model_id": "gemini-2.5-flash", + "_response": { + "chunks": [ + { + "sdk_http_response": { + "headers": { + "content-type": "text/event-stream", + "content-disposition": "attachment", + "vary": "Origin, X-Origin, Referer", + "transfer-encoding": "chunked", + "date": "Fri, 29 May 2026 11:03:36 GMT", + "server": "scaffolding on HTTPServer2", + "x-xss-protection": "0", + "x-frame-options": "SAMEORIGIN", + "x-content-type-options": "nosniff", + "server-timing": "gfet4t7; dur=885", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000" + }, + "body": null + }, + "candidates": [ + { + "content": { + "parts": [ + { + "media_resolution": null, + "code_execution_result": null, + "executable_code": null, + "file_data": null, + "function_call": null, + "function_response": null, + "inline_data": null, + "text": "1\n2\n3\n4\n5", + "thought": null, + "thought_signature": null, + "video_metadata": null, + "tool_call": null, + "tool_response": null, + "part_metadata": null + } + ], + "role": "model" + }, + "citation_metadata": null, + "finish_message": null, + "token_count": null, + "finish_reason": "STOP", + "grounding_metadata": null, + "avg_logprobs": null, + "index": 0, + "logprobs_result": null, + "safety_ratings": null, + "url_context_metadata": null + } + ], + "create_time": null, + "model_version": "gemini-2.5-flash", + "prompt_feedback": null, + "response_id": "h3IZapK5NI_hxs0P2p2dqQQ", + "usage_metadata": { + "cache_tokens_details": null, + "cached_content_token_count": null, + "candidates_token_count": 9, + "candidates_tokens_details": null, + "prompt_token_count": 14, + "prompt_tokens_details": [ + { + "modality": "TEXT", + "token_count": 14 + } + ], + "thoughts_token_count": 29, + "tool_use_prompt_token_count": null, + "tool_use_prompt_tokens_details": null, + "total_token_count": 52, + "traffic_type": null + }, + "model_status": null, + "automatic_function_calling_history": null, + "parsed": null + } + ] + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/gemini_native/04_thinking.json b/tests/unit/adapters/fixtures/gemini_native/04_thinking.json new file mode 100644 index 0000000..64b4439 --- /dev/null +++ b/tests/unit/adapters/fixtures/gemini_native/04_thinking.json @@ -0,0 +1,82 @@ +{ + "_model_id": "gemini-2.5-flash", + "_response": { + "sdk_http_response": { + "headers": { + "x-gemini-service-tier": "standard", + "content-type": "application/json; charset=UTF-8", + "vary": "Origin, X-Origin, Referer", + "content-encoding": "gzip", + "date": "Fri, 29 May 2026 11:03:48 GMT", + "server": "scaffolding on HTTPServer2", + "x-xss-protection": "0", + "x-frame-options": "SAMEORIGIN", + "x-content-type-options": "nosniff", + "server-timing": "gfet4t7; dur=11714", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "transfer-encoding": "chunked" + }, + "body": null + }, + "candidates": [ + { + "content": { + "parts": [ + { + "media_resolution": null, + "code_execution_result": null, + "executable_code": null, + "file_data": null, + "function_call": null, + "function_response": null, + "inline_data": null, + "text": "To prove that the sum of the first n cubes equals the square of the sum of the first n positive integers, we need to show that:\n\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\sum_{k=1}^{n} k \\right)^2 $$\n\nWe know the formula for the sum of the first n positive integers:\n$$ \\sum_{k=1}^{n} k = \\frac{n(n+1)}{2} $$\n\nSo, the statement we need to prove can be written as:\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\frac{n(n+1)}{2} \\right)^2 $$\n$$ \\sum_{k=1}^{n} k^3 = \\frac{n^2(n+1)^2}{4} $$\n\nWe will prove this by mathematical induction.\n\n---\n\n**Proof by Mathematical Induction**\n\nLet P(n) be the statement: $\\sum_{k=1}^{n} k^3 = \\frac{n^2(n+1)^2}{4}$.\n\n**Step 1: Base Case (n=1)**\n\nWe need to show that P(1) is true.\nLeft Hand Side (LHS):\n$ \\sum_{k=1}^{1} k^3 = 1^3 = 1 $\n\nRight Hand Side (RHS):\n$ \\frac{1^2(1+1)^2}{4} = \\frac{1^2(2)^2}{4} = \\frac{1 \\cdot 4}{4} = 1 $\n\nSince LHS = RHS (1 = 1), the statement P(1) is true.\n\n**Step 2: Inductive Hypothesis**\n\nAssume that P(m) is true for some positive integer m.\nThat is, assume:\n$$ \\sum_{k=1}^{m} k^3 = \\frac{m^2(m+1)^2}{4} $$\n\n**Step 3: Inductive Step (Prove P(m+1))**\n\nWe need to show that if P(m) is true, then P(m+1) is also true.\nWe need to prove that:\n$$ \\sum_{k=1}^{m+1} k^3 = \\frac{(m+1)^2((m+1)+1)^2}{4} $$\n$$ \\sum_{k=1}^{m+1} k^3 = \\frac{(m+1)^2(m+2)^2}{4} $$\n\nLet's start with the LHS of the statement P(m+1):\n$$ \\sum_{k=1}^{m+1} k^3 = \\left( \\sum_{k=1}^{m} k^3 \\right) + (m+1)^3 $$\n\nNow, using the Inductive Hypothesis (substituting the assumed value for the sum up to m):\n$$ = \\frac{m^2(m+1)^2}{4} + (m+1)^3 $$\n\nTo combine these terms, we can factor out $(m+1)^2$:\n$$ = (m+1)^2 \\left[ \\frac{m^2}{4} + (m+1) \\right] $$\n\nNow, find a common denominator inside the brackets:\n$$ = (m+1)^2 \\left[ \\frac{m^2}{4} + \\frac{4(m+1)}{4} \\right] $$\n$$ = (m+1)^2 \\left[ \\frac{m^2 + 4m + 4}{4} \\right] $$\n\nRecognize that the numerator $m^2 + 4m + 4$ is a perfect square trinomial: $(m+2)^2$.\n$$ = (m+1)^2 \\left[ \\frac{(m+2)^2}{4} \\right] $$\n$$ = \\frac{(m+1)^2(m+2)^2}{4} $$\n\nThis is exactly the RHS of the statement P(m+1).\n\n**Step 4: Conclusion**\n\nSince the base case P(1) is true, and the inductive step shows that if P(m) is true then P(m+1) is true, by the principle of mathematical induction, the statement P(n) is true for all positive integers n.\n\nTherefore, the sum of the first n cubes equals the square of the sum of the first n positive integers:\n$$ \\sum_{k=1}^{n} k^3 = \\left( \\sum_{k=1}^{n} k \\right)^2 $$", + "thought": null, + "thought_signature": null, + "video_metadata": null, + "tool_call": null, + "tool_response": null, + "part_metadata": null + } + ], + "role": "model" + }, + "citation_metadata": null, + "finish_message": null, + "token_count": null, + "finish_reason": "STOP", + "grounding_metadata": null, + "avg_logprobs": null, + "index": 0, + "logprobs_result": null, + "safety_ratings": null, + "url_context_metadata": null + } + ], + "create_time": null, + "model_version": "gemini-2.5-flash", + "prompt_feedback": null, + "response_id": "iHIZaqGILdbMvdIPt6ussAQ", + "usage_metadata": { + "cache_tokens_details": null, + "cached_content_token_count": null, + "candidates_token_count": 1003, + "candidates_tokens_details": null, + "prompt_token_count": 27, + "prompt_tokens_details": [ + { + "modality": "TEXT", + "token_count": 27 + } + ], + "thoughts_token_count": 1546, + "tool_use_prompt_token_count": null, + "tool_use_prompt_tokens_details": null, + "total_token_count": 2576, + "traffic_type": null + }, + "model_status": null, + "automatic_function_calling_history": [], + "parsed": null + } +} \ No newline at end of file diff --git a/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json b/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json new file mode 100644 index 0000000..c173eea --- /dev/null +++ b/tests/unit/adapters/fixtures/gemini_native/05_multi_turn.json @@ -0,0 +1,82 @@ +{ + "_model_id": "gemini-2.5-flash", + "_response": { + "sdk_http_response": { + "headers": { + "x-gemini-service-tier": "standard", + "content-type": "application/json; charset=UTF-8", + "vary": "Origin, X-Origin, Referer", + "content-encoding": "gzip", + "date": "Fri, 29 May 2026 11:03:49 GMT", + "server": "scaffolding on HTTPServer2", + "x-xss-protection": "0", + "x-frame-options": "SAMEORIGIN", + "x-content-type-options": "nosniff", + "server-timing": "gfet4t7; dur=1538", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "transfer-encoding": "chunked" + }, + "body": null + }, + "candidates": [ + { + "content": { + "parts": [ + { + "media_resolution": null, + "code_execution_result": null, + "executable_code": null, + "file_data": null, + "function_call": null, + "function_response": null, + "inline_data": null, + "text": "Okay, taking the previous result (4) and multiplying it by 3:\n\n4 * 3 = 12", + "thought": null, + "thought_signature": null, + "video_metadata": null, + "tool_call": null, + "tool_response": null, + "part_metadata": null + } + ], + "role": "model" + }, + "citation_metadata": null, + "finish_message": null, + "token_count": null, + "finish_reason": "STOP", + "grounding_metadata": null, + "avg_logprobs": null, + "index": 0, + "logprobs_result": null, + "safety_ratings": null, + "url_context_metadata": null + } + ], + "create_time": null, + "model_version": "gemini-2.5-flash", + "prompt_feedback": null, + "response_id": "lHIZasOaHvu3vdIP-9KoyQQ", + "usage_metadata": { + "cache_tokens_details": null, + "cached_content_token_count": null, + "candidates_token_count": 25, + "candidates_tokens_details": null, + "prompt_token_count": 22, + "prompt_tokens_details": [ + { + "modality": "TEXT", + "token_count": 22 + } + ], + "thoughts_token_count": 147, + "tool_use_prompt_token_count": null, + "tool_use_prompt_tokens_details": null, + "total_token_count": 194, + "traffic_type": null + }, + "model_status": null, + "automatic_function_calling_history": [], + "parsed": null + } +} \ No newline at end of file diff --git a/tests/unit/adapters/test_gemini_native.py b/tests/unit/adapters/test_gemini_native.py new file mode 100644 index 0000000..d98c1c6 --- /dev/null +++ b/tests/unit/adapters/test_gemini_native.py @@ -0,0 +1,217 @@ +"""Gemini native adapter — verified against real fixtures captured via google-genai.""" + +from __future__ import annotations + +import json +import pathlib + +from lago_agent_sdk.adapters import extract_gemini_native + +FIX = pathlib.Path(__file__).parent / "fixtures" / "gemini_native" + + +def _load(name: str) -> tuple[str, dict]: + data = json.loads((FIX / name).read_text()) + return data["_model_id"], data["_response"] + + +# -------------------------------------------------------------------------- +# Real fixtures +# -------------------------------------------------------------------------- +def test_plain_flash() -> None: + """Plain call to gemini-2.5-flash: input/output/reasoning all populated.""" + model_id, resp = _load("01_plain_flash.json") + u = extract_gemini_native(resp, model_id=model_id) + assert u.input == 7 + assert u.output == 23 + # Gemini 2.5 emits thoughts even without explicit thinking config + assert u.reasoning == 442 + assert u.tool_calls == 0 + assert u.cache_read == 0 + assert u.api == "native" + assert u.provider == "gemini" + + +def test_tool_use_counts_function_calls() -> None: + """A function_call in candidates[0].content.parts[] increments tool_calls.""" + model_id, resp = _load("02_tool_use.json") + u = extract_gemini_native(resp, model_id=model_id) + assert u.input == 49 + assert u.output == 15 + assert u.tool_calls == 1 + + +def test_streaming_final_chunk_carries_usage() -> None: + """The streaming wrapper grabs usage from the last chunk that has it.""" + model_id, resp = _load("03_streaming.json") + chunks = resp["chunks"] + final = next((c for c in reversed(chunks) if c.get("usage_metadata")), None) + assert final is not None + u = extract_gemini_native(final, model_id=model_id) + assert u.input == 14 + assert u.output == 9 + assert u.reasoning == 29 + + +def test_thinking_mode_populates_reasoning() -> None: + """Gemini 2.5 with explicit thinking_config emits a large thoughts_token_count.""" + model_id, resp = _load("04_thinking.json") + u = extract_gemini_native(resp, model_id=model_id) + assert u.input == 27 + assert u.output == 1003 + assert u.reasoning == 1546 + # Math check: candidates + thoughts + prompt = total (additive, not subset) + assert u.input + u.output + u.reasoning == 2576 # matches usage_metadata.total_token_count + + +def test_multi_turn() -> None: + model_id, resp = _load("05_multi_turn.json") + u = extract_gemini_native(resp, model_id=model_id) + assert u.input == 22 + assert u.output == 25 + + +# -------------------------------------------------------------------------- +# Synthetic — edge cases the fixtures didn't cover (no real audio/image test traffic) +# -------------------------------------------------------------------------- +def test_audio_input_from_modality_details() -> None: + """Multimodal AUDIO input lives in usage_metadata.prompt_tokens_details[modality=AUDIO].""" + resp = { + "usage_metadata": { + "prompt_token_count": 1000, + "candidates_token_count": 50, + "prompt_tokens_details": [ + {"modality": "TEXT", "token_count": 200}, + {"modality": "AUDIO", "token_count": 800}, + ], + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert u.input == 1000 + assert u.audio_input == 800 + assert u.image_input == 0 + + +def test_image_input_from_modality_details() -> None: + resp = { + "usage_metadata": { + "prompt_token_count": 500, + "candidates_token_count": 50, + "prompt_tokens_details": [ + {"modality": "TEXT", "token_count": 300}, + {"modality": "IMAGE", "token_count": 200}, + ], + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert u.image_input == 200 + + +def test_audio_output_from_modality_details() -> None: + """Audio output (e.g. TTS-capable model) lives in candidates_tokens_details[modality=AUDIO].""" + resp = { + "usage_metadata": { + "prompt_token_count": 50, + "candidates_token_count": 1500, + "candidates_tokens_details": [ + {"modality": "AUDIO", "token_count": 1500}, + ], + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash-audio") + assert u.audio_output == 1500 + + +def test_cached_content_token_count() -> None: + """When CachedContent API has been primed, cached_content_token_count fires.""" + resp = { + "usage_metadata": { + "prompt_token_count": 5000, + "candidates_token_count": 30, + "cached_content_token_count": 4800, + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert u.cache_read == 4800 + + +def test_multiple_function_calls_counted() -> None: + resp = { + "usage_metadata": {"prompt_token_count": 10, "candidates_token_count": 20}, + "candidates": [ + { + "content": { + "parts": [ + {"text": "..."}, + {"function_call": {"name": "fn1"}}, + {"function_call": {"name": "fn2"}}, + {"function_call": {"name": "fn3"}}, + ] + } + } + ], + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert u.tool_calls == 3 + + +def test_handles_pydantic_via_model_dump() -> None: + class FakePydantic: + def model_dump(self) -> dict: + return { + "model_version": "gemini-2.5-flash", + "candidates": [ + {"content": {"parts": [{"function_call": {"name": "x"}}]}} + ], + "usage_metadata": { + "prompt_token_count": 10, + "candidates_token_count": 20, + "thoughts_token_count": 5, + }, + } + + u = extract_gemini_native(FakePydantic(), model_id="gemini-2.5-flash") + assert u.input == 10 + assert u.output == 20 + assert u.reasoning == 5 + assert u.tool_calls == 1 + assert u.api == "native" + + +def test_no_usage_metadata_returns_zeros() -> None: + u = extract_gemini_native({}, model_id="gemini-2.5-flash") + assert u.input == 0 + assert u.output == 0 + assert not u.nonzero_numeric() + + +def test_survives_non_dict_usage_metadata() -> None: + assert extract_gemini_native({"usage_metadata": True}, model_id="x").input == 0 + assert extract_gemini_native({"usage_metadata": "bogus"}, model_id="x").output == 0 + assert extract_gemini_native(None, model_id="x").input == 0 + + +def test_unknown_usage_field_lands_in_extras() -> None: + """If Google adds a new top-level usage field, drift detection picks it up.""" + resp = { + "usage_metadata": { + "prompt_token_count": 10, + "candidates_token_count": 20, + "future_field_xyz": "novel", + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert u.extras.get("future_field_xyz") == "novel" + + +def test_traffic_type_lands_in_known_fields_not_extras() -> None: + """traffic_type is a known metadata field; it shouldn't leak into extras.""" + resp = { + "usage_metadata": { + "prompt_token_count": 10, + "candidates_token_count": 20, + "traffic_type": "PAID", + } + } + u = extract_gemini_native(resp, model_id="gemini-2.5-flash") + assert "traffic_type" not in u.extras diff --git a/tests/unit/test_wrapper_gemini.py b/tests/unit/test_wrapper_gemini.py new file mode 100644 index 0000000..a234bfd --- /dev/null +++ b/tests/unit/test_wrapper_gemini.py @@ -0,0 +1,214 @@ +"""Gemini wrapper tests — fake client, no live API.""" + +from __future__ import annotations + +from typing import Any + +from lago_agent_sdk import LagoSDK + + +# ---------------------------------------------------------------------- +# Fake google-genai client mimicking genai.Client.models surface area +# ---------------------------------------------------------------------- +class FakePydanticResponse: + def __init__(self, payload: dict): + self._payload = payload + + def model_dump(self) -> dict: + return self._payload + + +class FakeStreamChunk: + def __init__(self, payload: dict): + self._payload = payload + + def model_dump(self) -> dict: + return self._payload + + +class FakeModels: + def __init__(self) -> None: + self.generate_calls = 0 + self.stream_calls = 0 + + def generate_content(self, **kwargs: Any) -> Any: + self.generate_calls += 1 + assert "extra_lago" not in kwargs + return FakePydanticResponse( + { + "model_version": kwargs.get("model", "gemini-2.5-flash"), + "candidates": [ + {"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"} + ], + "usage_metadata": { + "prompt_token_count": 7, + "candidates_token_count": 23, + "thoughts_token_count": 0, + "total_token_count": 30, + }, + } + ) + + def generate_content_stream(self, **kwargs: Any) -> Any: + self.stream_calls += 1 + assert "extra_lago" not in kwargs + chunks = [ + FakeStreamChunk( + { + "candidates": [{"content": {"parts": [{"text": "hi"}]}}], + "usage_metadata": None, # intermediate chunks don't carry usage + } + ), + FakeStreamChunk( + { + "candidates": [{"content": {"parts": [{"text": "."}]}, "finish_reason": "STOP"}], + "usage_metadata": { + "prompt_token_count": 9, + "candidates_token_count": 4, + "thoughts_token_count": 0, + "total_token_count": 13, + }, + } + ), + ] + return iter(chunks) + + +class FakeGeminiClient: + """Mimics `from google import genai; genai.Client(api_key=...)`.""" + + __module__ = "google.genai.client" + + def __init__(self) -> None: + self.models = FakeModels() + # No .aio in this fake — tests cover the sync path only + + +# ---------------------------------------------------------------------- +# Helpers (same pattern as Bedrock/Mistral wrapper tests) +# ---------------------------------------------------------------------- +def _make_sdk(default_sub: str = "sub_test") -> tuple[LagoSDK, list]: + received: list = [] + sdk = LagoSDK(api_key="dummy", default_subscription_id=default_sub) + sdk._queue._sender = lambda b: received.append(list(b)) # type: ignore[attr-defined] + return sdk, received + + +# ---------------------------------------------------------------------- +# Tests +# ---------------------------------------------------------------------- +def test_wrap_generate_content_emits_input_and_output() -> None: + sdk, received = _make_sdk() + fake = FakeGeminiClient() + client = sdk.wrap(fake) + resp = client.models.generate_content(model="gemini-2.5-flash", contents="hi") + assert resp.model_dump()["usage_metadata"]["prompt_token_count"] == 7 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + flat = [e for batch in received for e in batch] + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat} + assert by_code["llm_input_tokens"] == 7 + assert by_code["llm_output_tokens"] == 23 + + +def test_wrap_strips_extra_lago_kwarg_and_uses_per_call_sub() -> None: + sdk, received = _make_sdk("sub_default") + fake = FakeGeminiClient() + client = sdk.wrap(fake) + client.models.generate_content( + model="gemini-2.5-flash", + contents="hi", + extra_lago={"subscription": "sub_per_call", "dimensions": {"feature": "X"}}, + ) + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + flat = [e for batch in received for e in batch] + assert all(e["external_subscription_id"] == "sub_per_call" for e in flat) + assert flat[0]["properties"]["feature"] == "X" + + +def test_wrap_double_wrap_is_idempotent() -> None: + sdk, received = _make_sdk() + fake = FakeGeminiClient() + sdk.wrap(fake) + sdk.wrap(fake) + sdk.wrap(fake) + fake.models.generate_content(model="gemini-2.5-flash", contents="hi") + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + flat = [e for batch in received for e in batch] + # 2 events from 1 call (no triple-wrap = no 6 events) + assert len(flat) == 2 + assert fake.models.generate_calls == 1 + + +def test_wrap_generate_content_stream_captures_usage_from_final_chunk() -> None: + sdk, received = _make_sdk() + fake = FakeGeminiClient() + client = sdk.wrap(fake) + chunks = list(client.models.generate_content_stream(model="gemini-2.5-flash", contents="hi")) + assert len(chunks) == 2 + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + flat = [e for batch in received for e in batch] + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat} + assert by_code["llm_input_tokens"] == 9 + assert by_code["llm_output_tokens"] == 4 + + +def test_wrap_thinking_emits_reasoning_separately() -> None: + """Gemini 2.5 emits thoughts_token_count → llm_reasoning_tokens event.""" + sdk, received = _make_sdk() + + class ThinkingModels: + def generate_content(self, **kwargs): + return FakePydanticResponse( + { + "usage_metadata": { + "prompt_token_count": 10, + "candidates_token_count": 50, + "thoughts_token_count": 200, + } + } + ) + + class ThinkingClient: + __module__ = "google.genai.client" + + def __init__(self): + self.models = ThinkingModels() + + client = sdk.wrap(ThinkingClient()) + client.models.generate_content(model="gemini-2.5-flash", contents="hi") + assert sdk.flush(timeout=2.0) + sdk.shutdown(timeout=1.0) + flat = [e for batch in received for e in batch] + by_code = {e["code"]: int(float(e["properties"]["value"])) for e in flat} + assert by_code["llm_input_tokens"] == 10 + assert by_code["llm_output_tokens"] == 50 + assert by_code["llm_reasoning_tokens"] == 200 + + +def test_wrap_instrumentation_failure_does_not_break_call() -> None: + """Adapter failure must not propagate to the customer's call.""" + sdk, _ = _make_sdk() + + class BadResp: + def model_dump(self): + raise RuntimeError("boom") + + class BadModels: + def generate_content(self, **_kw): + return BadResp() + + class BadClient: + __module__ = "google.genai.client" + + def __init__(self): + self.models = BadModels() + + client = sdk.wrap(BadClient()) + # Must not raise even though our adapter will crash on this response + resp = client.models.generate_content(model="x", contents="hi") + assert resp is not None + sdk.shutdown(timeout=1.0) From a5b4511608893c69c5609aa92eb1b20ae4f717a5 Mon Sep 17 00:00:00 2001 From: Anass Date: Fri, 29 May 2026 14:42:16 +0200 Subject: [PATCH 5/5] Apply ruff format to OpenAI + Gemini files CI runs `ruff format --check` which was failing because earlier dev only ran `ruff check` (linter) locally, not the formatter. Auto-formatting restores whitespace consistency in: - src/lago_agent_sdk/adapters/gemini_native.py - src/lago_agent_sdk/wrappers/openai.py - tests/unit/adapters/fixtures/capture_openai.py - tests/unit/adapters/test_gemini_native.py - tests/unit/test_wrapper_gemini.py No functional changes. --- src/lago_agent_sdk/adapters/gemini_native.py | 8 ++++---- src/lago_agent_sdk/wrappers/openai.py | 4 +++- tests/unit/adapters/fixtures/capture_openai.py | 3 +-- tests/unit/adapters/test_gemini_native.py | 4 +--- tests/unit/test_wrapper_gemini.py | 4 +--- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/lago_agent_sdk/adapters/gemini_native.py b/src/lago_agent_sdk/adapters/gemini_native.py index 8cb0f06..f3bdc96 100644 --- a/src/lago_agent_sdk/adapters/gemini_native.py +++ b/src/lago_agent_sdk/adapters/gemini_native.py @@ -74,9 +74,7 @@ def _to_dict(obj: Any) -> dict[str, Any]: return {} -def _modality_token_count( - details: list[dict[str, Any]] | Any, modality: str -) -> int: +def _modality_token_count(details: list[dict[str, Any]] | Any, modality: str) -> int: """Sum token_count from a list of {modality, token_count} entries matching the given modality.""" if not isinstance(details, list): return 0 @@ -128,7 +126,9 @@ def extract_gemini_native(response: Any, model_id: str = "") -> CanonicalUsage: audio_output=_modality_token_count(candidates_details, "AUDIO"), image_input=_modality_token_count(prompt_details, "IMAGE"), tool_calls=_count_tool_calls(resp), - model=model_id or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "") or "", + model=model_id + or (resp.get("model_version") if isinstance(resp.get("model_version"), str) else "") + or "", provider="gemini", api="native", extras=extras, diff --git a/src/lago_agent_sdk/wrappers/openai.py b/src/lago_agent_sdk/wrappers/openai.py index 1864986..f015154 100644 --- a/src/lago_agent_sdk/wrappers/openai.py +++ b/src/lago_agent_sdk/wrappers/openai.py @@ -165,7 +165,9 @@ async def _wrap_async_stream(src: AsyncIterator[Any]) -> AsyncIterator[Any]: original_chat_create = getattr(completions, "create", None) if original_chat_create is not None: completions.create = ( - _make_async_create(original_chat_create) if is_async else _make_sync_create(original_chat_create) + _make_async_create(original_chat_create) + if is_async + else _make_sync_create(original_chat_create) ) # ------------------------------------------------------------------ diff --git a/tests/unit/adapters/fixtures/capture_openai.py b/tests/unit/adapters/fixtures/capture_openai.py index 5bcdd25..ed7f8bf 100644 --- a/tests/unit/adapters/fixtures/capture_openai.py +++ b/tests/unit/adapters/fixtures/capture_openai.py @@ -88,8 +88,7 @@ def main() -> int: # ----- 3. Cache hit attempt — long prompt sent twice (OpenAI auto-caches >1024 tokens) ----- print("\n[3] cache attempt — long prompt, call 1 then call 2") long_prompt = ( - "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. " - * 200 + "You are an extremely thorough expert tutor. Answer concisely and cite reasoning step by step. " * 200 ) msgs = [ {"role": "system", "content": long_prompt}, diff --git a/tests/unit/adapters/test_gemini_native.py b/tests/unit/adapters/test_gemini_native.py index d98c1c6..ffaab30 100644 --- a/tests/unit/adapters/test_gemini_native.py +++ b/tests/unit/adapters/test_gemini_native.py @@ -160,9 +160,7 @@ class FakePydantic: def model_dump(self) -> dict: return { "model_version": "gemini-2.5-flash", - "candidates": [ - {"content": {"parts": [{"function_call": {"name": "x"}}]}} - ], + "candidates": [{"content": {"parts": [{"function_call": {"name": "x"}}]}}], "usage_metadata": { "prompt_token_count": 10, "candidates_token_count": 20, diff --git a/tests/unit/test_wrapper_gemini.py b/tests/unit/test_wrapper_gemini.py index a234bfd..b1f84f5 100644 --- a/tests/unit/test_wrapper_gemini.py +++ b/tests/unit/test_wrapper_gemini.py @@ -37,9 +37,7 @@ def generate_content(self, **kwargs: Any) -> Any: return FakePydanticResponse( { "model_version": kwargs.get("model", "gemini-2.5-flash"), - "candidates": [ - {"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"} - ], + "candidates": [{"content": {"parts": [{"text": "hi"}]}, "finish_reason": "STOP"}], "usage_metadata": { "prompt_token_count": 7, "candidates_token_count": 23,