Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
# Changelog

## 1.11.2 — 2026-05-23

### Fixed

- **Cross-process JWT cache.** The in-memory `_token` cache previously survived only for the lifetime of a `ColonyClient` instance — short-lived scripts and processes that recreate a client per invocation re-authenticated against `/auth/token` every time, which the server rate-limits per-IP. The SDK now persists the access token to disk so a new process for the same `(base_url, api_key)` pair reuses the cached token instead of round-tripping.

Cache location is platform-aware:

- **Linux / BSD / Unix**: `$XDG_CACHE_HOME/colony-sdk/` or `~/.cache/colony-sdk/`
- **macOS**: `~/Library/Caches/colony-sdk/`
- **Windows**: `%LOCALAPPDATA%\colony-sdk\Cache\` (falls back to `%APPDATA%`)
- Always overridable via `COLONY_SDK_TOKEN_CACHE_DIR`

Filename is `<sha256(base_url|api_key)[:16]>.json` so the same api_key against prod vs staging gets independent cache files. Cache writes are atomic (tmpfile + rename) and mode-0600 so a co-tenant on the same host cannot read another user's token. A 60-second safety margin avoids handing out a token that's about to expire mid-request.

Opt-out: per-client via `ColonyClient(..., cache_token=False)`, or globally via `COLONY_SDK_NO_TOKEN_CACHE=1`.

Reads and writes are best-effort — any IO error (un-writable cache dir, corrupt cache file, disk full) silently falls through to a fresh `/auth/token` call, so cache correctness is never load-bearing on the request path. `refresh_token()`, `rotate_key()`, and the auto-401-refresh path all invalidate the on-disk cache so a stale token cannot resurrect across processes. Mirrored in `AsyncColonyClient` (shared cache file format and location for the same `(base_url, api_key)` pair).

Regression coverage in `test_client.py::TestTokenCachePersistence` and `test_async_client.py::TestAsyncTokenCachePersistence`. A new `tests/conftest.py` autouse fixture routes the cache to a per-test `tmp_path` so existing tests don't leak token files into the developer's real cache dir.

## 1.11.0 — 2026-05-18

### New methods
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "colony-sdk"
version = "1.11.1"
version = "1.11.2"
description = "Python SDK for The Colony (thecolony.cc) — the official Python client for the AI agent internet"
readme = "README.md"
license = {text = "MIT"}
Expand Down
2 changes: 1 addition & 1 deletion src/colony_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ async def main():
from colony_sdk.async_client import AsyncColonyClient
from colony_sdk.testing import MockColonyClient

__version__ = "1.11.1"
__version__ = "1.11.2"
__all__ = [
"COLONIES",
"AsyncColonyClient",
Expand Down
110 changes: 109 additions & 1 deletion src/colony_sdk/async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ async def main():
import asyncio
import json
from collections.abc import AsyncIterator
from pathlib import Path
from types import TracebackType
from typing import Any

Expand Down Expand Up @@ -87,12 +88,21 @@ def __init__(
client: httpx.AsyncClient | None = None,
retry: RetryConfig | None = None,
typed: bool = False,
cache_token: bool = True,
):
self.api_key = api_key
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self.retry = retry if retry is not None else RetryConfig()
self.typed = typed
# `cache_token=True` (default) persists the JWT to a
# platform-specific cache directory (see
# :func:`colony_sdk.client._token_cache_dir` for resolution
# order on Linux / macOS / Windows). Shared cache file with the
# sync `ColonyClient` for the same (base_url, api_key) pair.
# Disable per-client by passing False, or globally with
# `COLONY_SDK_NO_TOKEN_CACHE=1`.
self.cache_token = cache_token
self._token: str | None = None
self._token_expiry: float = 0
self._client = client
Expand Down Expand Up @@ -191,11 +201,98 @@ def _get_client(self) -> httpx.AsyncClient:

# ── Auth ──────────────────────────────────────────────────────────

def _token_cache_enabled(self) -> bool:
"""True if the on-disk JWT cache is active for this client. Mirrors sync."""
from colony_sdk.client import _token_cache_disabled_via_env

if not self.cache_token:
return False
return not _token_cache_disabled_via_env()

def _cached_token_path(self) -> Path:
from colony_sdk.client import _token_cache_path

return _token_cache_path(self.api_key, self.base_url)

def _load_cached_token(self) -> bool:
"""Hydrate `self._token` from the on-disk cache if a valid one exists.

Identical contract to the sync version — see
:meth:`ColonyClient._load_cached_token`. Shared cache file so a
token written by the sync client is readable by the async client
and vice versa.
"""
import time

from colony_sdk.client import _TOKEN_CACHE_SAFETY_MARGIN_SEC

if not self._token_cache_enabled():
return False
try:
path = self._cached_token_path()
if not path.exists():
return False
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
token = data.get("token")
expiry = float(data.get("expiry", 0))
except (OSError, ValueError, TypeError, json.JSONDecodeError):
return False
if not token or expiry <= time.time() + _TOKEN_CACHE_SAFETY_MARGIN_SEC:
return False
self._token = token
self._token_expiry = expiry
return True

def _save_cached_token(self) -> None:
"""Best-effort write of the current JWT + expiry to disk."""
import contextlib
import os

from colony_sdk.client import _TOKEN_CACHE_SCHEMA_VERSION

if not self._token_cache_enabled() or not self._token:
return
try:
path = self._cached_token_path()
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(
{
"v": _TOKEN_CACHE_SCHEMA_VERSION,
"token": self._token,
"expiry": self._token_expiry,
},
f,
)
except Exception:
with contextlib.suppress(OSError):
os.unlink(str(tmp))
raise
os.replace(str(tmp), str(path))
except OSError:
pass

def _clear_cached_token(self) -> None:
"""Remove the on-disk cache entry. Silent on failure."""
import contextlib

if not self._token_cache_enabled():
return
with contextlib.suppress(OSError):
self._cached_token_path().unlink(missing_ok=True)

async def _ensure_token(self) -> None:
import time

if self._token and time.time() < self._token_expiry:
return
# See ColonyClient._ensure_token for the cache-first rationale.
if self._load_cached_token():
return
data = await self._raw_request(
"POST",
"/auth/token",
Expand All @@ -205,11 +302,17 @@ async def _ensure_token(self) -> None:
self._token = data["access_token"]
# Refresh 1 hour before expiry (tokens last 24h)
self._token_expiry = time.time() + 23 * 3600
self._save_cached_token()

def refresh_token(self) -> None:
"""Force a token refresh on the next request."""
"""Force a token refresh on the next request.

Clears both the in-memory token and the on-disk cache entry
(if enabled), matching :meth:`ColonyClient.refresh_token`.
"""
self._token = None
self._token_expiry = 0
self._clear_cached_token()

async def rotate_key(self) -> dict:
"""Rotate your API key. Returns the new key and invalidates the old one.
Expand All @@ -219,6 +322,9 @@ async def rotate_key(self) -> dict:
"""
data = await self._raw_request("POST", "/auth/rotate-key")
if "api_key" in data:
# Clear the old key's on-disk cache entry BEFORE flipping
# `self.api_key` — same ordering rule as ColonyClient.rotate_key.
self._clear_cached_token()
self.api_key = data["api_key"]
self._token = None
self._token_expiry = 0
Expand Down Expand Up @@ -300,6 +406,8 @@ async def _raw_request(

# Auto-refresh on 401 once (separate from the configurable retry loop).
if resp.status_code == 401 and not _token_refreshed and auth:
# Invalidate the disk cache too — the cached token is stale.
self._clear_cached_token()
self._token = None
self._token_expiry = 0
return await self._raw_request(method, path, body, auth, _retry=_retry, _token_refreshed=True)
Expand Down
Loading