Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 37 additions & 17 deletions baseten/client/_inference.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any

import httpx

import baseten.client.inferenceapi
from baseten.client._user_agent import with_user_agent


@dataclass(frozen=True)
Expand All @@ -19,6 +21,9 @@ class InferenceClientOptions:
api_key: str
"""API key for authentication."""

headers: Mapping[str, str] | None = None
"""Additional headers to send on every request."""

model_id: str | None = None
"""Model ID. Mutually exclusive with *chain_id*."""

Expand Down Expand Up @@ -84,48 +89,56 @@ def __init__(
self,
*,
api_key: str,
headers: Mapping[str, str] | None = None,
model_id: str | None = None,
chain_id: str | None = None,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why you can specify IDs of two differently typed inference targets for one client.

My reasoning is that a user must know and in advance decide whether they want to invoke a chain or a model, and if that's so, isn't it cleaner to also select the respective inference client type to init?
This hinges now on the assumption that both chains and models always have exact semantics and types for invoking.

Was the goal here more about impl/code sharing or about not bothering the user with two distinct interfaces/types? But would the latter be even good, rather than obfuscating/misleading?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also think the "exactly one must be provided"-semantics is both awkward for the code we have to write for checking this at runtime.
But also client side there is no linter/type check that helps enforcing this properly before runtime.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reasoning is that a user must know and in advance decide whether they want to invoke a chain or a model, and if that's so, isn't it cleaner to also select the respective inference client type to init?

Cleaner to whom, us and our type checker, or the user? Which looks cleaner to you: InferenceClient(api_key=my_api_key, chain_id=my_chain_id) or InferenceClient(api_key=my_api_key, ref=ChainReference(chain_id=my_chain_id)). It is very clearly the former. However, if it is of great concern to have kwarg mutual exclusivity in the type system, we can leverage @overload to prevent both together (though this can get messy as it grows).

Was the goal here more about impl/code sharing or about not bothering the user with two distinct interfaces/types? But would the latter be even good, rather than obfuscating/misleading?

It was more to match the API/spec surface. So we have https://docs.baseten.co/reference/inference-api/, not separate APIs. The only difference is in the construction. It is more misleading to have some places at Baseten share an API surface under a common concept (inference API in this case) and some split.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I understand your reasoning. InferenceClient(api_key=my_api_key, chain_id=my_chain_id) does look clean, but I was thinking about InferenceClient(api_key=my_api_key) or InferenceClient(api_key=my_api_key, chain_id=my_chain_id, model_id=...).
@overload is actually the ideal solution here, I think you should add that.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that no prob (will do on successive PR)

environment: str | None = None,
base_url_override: str | None = None,
http_client: httpx.Client | None = None,
http_client_override: httpx.Client | None = None,
close_http_client_on_close: bool | None = None,
) -> None:
"""Create a new synchronous inference client.

Args:
api_key: API key for authentication.
headers: Additional headers to send on every request.
model_id: Model ID. Mutually exclusive with *chain_id*.
chain_id: Chain ID. Mutually exclusive with *model_id*.
environment: Environment name for regional routing (e.g. ``"production"``).
base_url_override: Override the computed base URL. When set,
*model_id*, *chain_id*, and *environment* are ignored.
http_client: Pre-configured httpx client. When provided, the
caller is responsible for setting base URL and auth headers.
http_client_override: Pre-configured httpx client. When provided,
the caller is responsible for setting base URL and all
headers.
close_http_client_on_close: Whether :meth:`close` should close
the underlying HTTP client. Defaults to ``True`` when the
client is created internally, ``False`` when *http_client*
is provided.
client is created internally, ``False`` when
*http_client_override* is provided.
"""
self._options = InferenceClientOptions(
api_key=api_key,
headers=headers,
model_id=model_id,
chain_id=chain_id,
environment=environment,
base_url_override=base_url_override,
)
if http_client is None:
if http_client_override is None:
request_headers: dict[str, str] = {**(headers or {})}
# Empty api_key is an advanced opt-out from sending Authorization.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the use case for this? Testing/dev only?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To give more full freedom. Technically forcing auth headers on people with 0 workaround can be bad practice for those that have reverse proxies (that only set header on the way upstream). I just didn't want to make API key optional for this rare use case, but it does happen in the wild for API clients like these.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, so this is against the "DUMMY_KEY" value that you'd otherwise need to set.

Though this does stand in tension with fail loud, early and actionable. Setting DUMMY_KEY must be done with clear intent and understanding. Forgetting an API key could be a common mistake. If that happens, the question is how clear would it surface to users that this is the problem and what they need to do to fix it.

Copy link
Copy Markdown
Collaborator Author

@cretz cretz May 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is against the "DUMMY_KEY" value that you'd otherwise need to set.

We were actually force-overriding until this point, there was nothing you could do short of providing your own HTTP client instance

Forgetting an API key could be a common mistake

This is why I don't accept None as a type. But yeah, absent key or bad key have the same server-side auth fail upon invocation. I could accept api_key: str | IDoNotWantAuthHeaderSentinel type of thing, but not even trying to broadcast this via type system (note empty string behavior not in docstring).

if api_key != "":
request_headers["Authorization"] = f"Api-Key {api_key}"
self._http_client = httpx.Client(
base_url=self._options.base_url,
headers={"Authorization": f"Api-Key {api_key}"},
headers=with_user_agent(request_headers),
)
self.close_http_client_on_close = (
True
if close_http_client_on_close is None
else close_http_client_on_close
)
else:
self._http_client = http_client
self._http_client = http_client_override
self.close_http_client_on_close = (
False
if close_http_client_on_close is None
Expand Down Expand Up @@ -191,49 +204,56 @@ def __init__(
self,
*,
api_key: str,
headers: Mapping[str, str] | None = None,
model_id: str | None = None,
chain_id: str | None = None,
environment: str | None = None,
base_url_override: str | None = None,
http_client: httpx.AsyncClient | None = None,
http_client_override: httpx.AsyncClient | None = None,
close_http_client_on_close: bool | None = None,
) -> None:
"""Create a new asynchronous inference client.

Args:
api_key: API key for authentication.
headers: Additional headers to send on every request.
model_id: Model ID. Mutually exclusive with *chain_id*.
chain_id: Chain ID. Mutually exclusive with *model_id*.
environment: Environment name for regional routing (e.g. ``"production"``).
base_url_override: Override the computed base URL. When set,
*model_id*, *chain_id*, and *environment* are ignored.
http_client: Pre-configured httpx async client. When provided,
the caller is responsible for setting base URL and auth
headers.
http_client_override: Pre-configured httpx async client. When
provided, the caller is responsible for setting base URL
and all headers.
close_http_client_on_close: Whether :meth:`close` should close
the underlying HTTP client. Defaults to ``True`` when the
client is created internally, ``False`` when *http_client*
is provided.
client is created internally, ``False`` when
*http_client_override* is provided.
"""
self._options = InferenceClientOptions(
api_key=api_key,
headers=headers,
model_id=model_id,
chain_id=chain_id,
environment=environment,
base_url_override=base_url_override,
)
if http_client is None:
if http_client_override is None:
request_headers: dict[str, str] = {**(headers or {})}
# Empty api_key is an advanced opt-out from sending Authorization.
if api_key != "":
request_headers["Authorization"] = f"Api-Key {api_key}"
self._http_client = httpx.AsyncClient(
base_url=self._options.base_url,
headers={"Authorization": f"Api-Key {api_key}"},
headers=with_user_agent(request_headers),
)
self.close_http_client_on_close = (
True
if close_http_client_on_close is None
else close_http_client_on_close
)
else:
self._http_client = http_client
self._http_client = http_client_override
self.close_http_client_on_close = (
False
if close_http_client_on_close is None
Expand Down
56 changes: 37 additions & 19 deletions baseten/client/_management.py
Comment thread
marius-baseten marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any

import httpx

import baseten.client.managementapi
from baseten.client._user_agent import with_user_agent


@dataclass(frozen=True)
Expand All @@ -19,6 +21,9 @@ class ManagementClientOptions:
api_key: str
"""API key for authentication."""

headers: Mapping[str, str] | None = None
"""Additional headers to send on every request."""

base_url_override: str | None = None
"""Explicit base URL override, or ``None`` to use the default."""

Expand Down Expand Up @@ -46,38 +51,45 @@ def __init__(
self,
*,
api_key: str,
headers: Mapping[str, str] | None = None,
base_url_override: str | None = None,
http_client: httpx.Client | None = None,
http_client_override: httpx.Client | None = None,
close_http_client_on_close: bool | None = None,
) -> None:
"""Create a new synchronous management client.

Args:
api_key: API key for authentication.
headers: Additional headers to send on every request.
base_url_override: Override the default base URL. When ``None``,
:meth:`default_base_url` is used.
http_client: Pre-configured httpx client. When provided, the
caller is responsible for setting base URL and auth headers.
http_client_override: Pre-configured httpx client. When provided,
the caller is responsible for setting base URL and all
headers.
close_http_client_on_close: Whether :meth:`close` should close
the underlying HTTP client. Defaults to ``True`` when the
client is created internally, ``False`` when *http_client*
is provided.
client is created internally, ``False`` when
*http_client_override* is provided.
"""
self._options = ManagementClientOptions(
api_key=api_key, base_url_override=base_url_override
api_key=api_key, headers=headers, base_url_override=base_url_override
)
if http_client is None:
if http_client_override is None:
request_headers: dict[str, str] = {**(headers or {})}
# Empty api_key is an advanced opt-out from sending Authorization.
if api_key != "":
request_headers["Authorization"] = f"Api-Key {api_key}"
self._http_client = httpx.Client(
base_url=self._options.base_url,
headers={"Authorization": f"Api-Key {api_key}"},
headers=with_user_agent(request_headers),
)
self.close_http_client_on_close = (
True
if close_http_client_on_close is None
else close_http_client_on_close
)
else:
self._http_client = http_client
self._http_client = http_client_override
self.close_http_client_on_close = (
False
if close_http_client_on_close is None
Expand Down Expand Up @@ -132,39 +144,45 @@ def __init__(
self,
*,
api_key: str,
headers: Mapping[str, str] | None = None,
base_url_override: str | None = None,
http_client: httpx.AsyncClient | None = None,
http_client_override: httpx.AsyncClient | None = None,
close_http_client_on_close: bool | None = None,
) -> None:
"""Create a new asynchronous management client.

Args:
api_key: API key for authentication.
headers: Additional headers to send on every request.
base_url_override: Override the default base URL. When ``None``,
:meth:`default_base_url` is used.
http_client: Pre-configured httpx async client. When provided,
the caller is responsible for setting base URL and auth
headers.
http_client_override: Pre-configured httpx async client. When
provided, the caller is responsible for setting base URL
and all headers.
close_http_client_on_close: Whether :meth:`close` should close
the underlying HTTP client. Defaults to ``True`` when the
client is created internally, ``False`` when *http_client*
is provided.
client is created internally, ``False`` when
*http_client_override* is provided.
"""
self._options = ManagementClientOptions(
api_key=api_key, base_url_override=base_url_override
api_key=api_key, headers=headers, base_url_override=base_url_override
)
if http_client is None:
if http_client_override is None:
request_headers: dict[str, str] = {**(headers or {})}
# Empty api_key is an advanced opt-out from sending Authorization.
if api_key != "":
request_headers["Authorization"] = f"Api-Key {api_key}"
self._http_client = httpx.AsyncClient(
base_url=self._options.base_url,
headers={"Authorization": f"Api-Key {api_key}"},
headers=with_user_agent(request_headers),
)
self.close_http_client_on_close = (
True
if close_http_client_on_close is None
else close_http_client_on_close
)
else:
self._http_client = http_client
self._http_client = http_client_override
self.close_http_client_on_close = (
False
if close_http_client_on_close is None
Expand Down
25 changes: 25 additions & 0 deletions baseten/client/_user_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import platform
from collections.abc import Mapping
from importlib.metadata import PackageNotFoundError, version


def _package_version() -> str:
try:
return version("baseten")
except PackageNotFoundError:
return "dev"


def user_agent_header() -> str:
"""Build a User-Agent value like ``baseten-python/0.9.0 (Python/3.13.2; Linux)``."""
return (
f"baseten-python/{_package_version()} "
Comment thread
marius-baseten marked this conversation as resolved.
f"(Python/{platform.python_version()}; {platform.system()})"
)


def with_user_agent(headers: Mapping[str, str]) -> dict[str, str]:
"""Return a copy of ``headers`` with our User-Agent set, unless one is already present."""
if any(key.lower() == "user-agent" for key in headers):
return dict(headers)
return {**headers, "User-Agent": user_agent_header()}
Loading
Loading