diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1575e1..e3cc233 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,8 @@ jobs: curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1 - name: Install dependencies run: poetry install + - name: Release metadata check + run: poetry run python scripts/check_release_workflow.py - name: Compile run: poetry run mypy . test: diff --git a/compat/agora-agent-server-sdk/pyproject.toml b/compat/agora-agent-server-sdk/pyproject.toml index 468294b..eea45d7 100644 --- a/compat/agora-agent-server-sdk/pyproject.toml +++ b/compat/agora-agent-server-sdk/pyproject.toml @@ -3,7 +3,7 @@ name = "agora-agent-server-sdk" [tool.poetry] name = "agora-agent-server-sdk" -version = "v2.1.0" +version = "v2.1.1" description = "Compatibility shim for the renamed agora-agents package." readme = "README.md" authors = [] @@ -35,7 +35,7 @@ Repository = 'https://github.com/AgoraIO-Conversational-AI/agent-server-sdk-pyth [tool.poetry.dependencies] python = "^3.8" -agora-agents = ">=2.1.0,<3.0.0" +agora-agents = ">=2.1.1,<3.0.0" [build-system] requires = ["poetry-core"] diff --git a/pyproject.toml b/pyproject.toml index dd74e01..f1e9e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "agora-agents" [tool.poetry] name = "agora-agents" -version = "v2.1.0" +version = "v2.1.1" description = "" readme = "README.md" authors = [] diff --git a/scripts/check_release_workflow.py b/scripts/check_release_workflow.py new file mode 100644 index 0000000..1a6e065 --- /dev/null +++ b/scripts/check_release_workflow.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import re +import sys +from pathlib import Path +from typing import NoReturn + + +def fail(message: str) -> NoReturn: + print(message, file=sys.stderr) + raise SystemExit(1) + + +def read_version(path: str) -> str: + text = Path(path).read_text() + match = re.search(r'^version\s*=\s*"v?([^"]+)"', text, re.M) + if not match: + fail(f"version not found in {path}") + return match.group(1) + + +def read_compat_dependency(path: str) -> str: + text = Path(path).read_text() + match = re.search(r'^agora-agents\s*=\s*"([^"]+)"', text, re.M) + if not match: + fail(f"agora-agents dependency not found in {path}") + return match.group(1) + + +root_version = read_version("pyproject.toml") +compat_pyproject = "compat/agora-agent-server-sdk/pyproject.toml" +compat_version = read_version(compat_pyproject) +compat_dependency = read_compat_dependency(compat_pyproject) + +if compat_version != root_version: + fail(f"Compat package version ({compat_version}) must match root package version ({root_version}).") + +expected_dependency = f">={root_version},<3.0.0" +if compat_dependency != expected_dependency: + fail(f"Compat package dependency on agora-agents ({compat_dependency}) must be {expected_dependency}.") + +release_workflow = Path(".github/workflows/release.yml").read_text() +required_workflow_markers = [ + ("contents: write", "release workflow must have contents: write so it can create GitHub releases"), + ("gh release create", "release workflow must create a GitHub release when one does not exist"), + ("gh release edit", "release workflow must update an existing GitHub release"), + ("release_notes.md", "release workflow must generate and use a release notes file"), +] + +for marker, message in required_workflow_markers: + if marker not in release_workflow: + fail(message) + +print("Release metadata and workflow checks passed.") diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py index 0a652db..6275f04 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -3,6 +3,7 @@ import time import typing import typing_extensions +import warnings if typing.TYPE_CHECKING: from .agent_session import AgentSession, AsyncAgentSession @@ -815,6 +816,8 @@ def to_properties( app_certificate: typing.Optional[str] = None, expires_in: typing.Optional[int] = None, skip_vendor_validation: bool = False, + skip_vendor_validation_categories: typing.Optional[typing.AbstractSet[str]] = None, + allow_missing_vendor_categories: typing.Optional[typing.AbstractSet[str]] = None, ) -> StartAgentsRequestProperties: # Validate the MLLM + enabled-avatar combination BEFORE generating the # RTC token so callers get a clear, actionable error first (matches the @@ -895,19 +898,49 @@ def to_properties( base_kwargs["mllm"] = mllm_config return StartAgentsRequestProperties(**base_kwargs) - base_kwargs["asr"] = self._resolve_asr_config() + if skip_vendor_validation: + warnings.warn( + "skip_vendor_validation is deprecated and will be removed in a future release. " + "Use skip_vendor_validation_categories and allow_missing_vendor_categories instead.", + DeprecationWarning, + stacklevel=2, + ) + + skip_categories = set(skip_vendor_validation_categories or ()) + allow_missing_categories = set(allow_missing_vendor_categories or ()) + if skip_vendor_validation: + skip_categories.update({"asr", "llm", "tts"}) + allow_missing_categories.update({"asr", "llm", "tts"}) + + skip_asr_validation = skip_vendor_validation or "asr" in skip_categories + skip_llm_validation = skip_vendor_validation or "llm" in skip_categories + skip_tts_validation = skip_vendor_validation or "tts" in skip_categories + allow_missing_asr = "asr" in allow_missing_categories + allow_missing_llm = "llm" in allow_missing_categories + allow_missing_tts = "tts" in allow_missing_categories + + if not skip_asr_validation and (self._stt is not None or not allow_missing_asr): + base_kwargs["asr"] = self._resolve_asr_config() base_kwargs["turn_detection"] = self._resolve_turn_detection_config() if skip_vendor_validation: return StartAgentsRequestProperties(**base_kwargs) - if self._tts is None: + if self._tts is None and not (skip_tts_validation or allow_missing_tts): raise ValueError("TTS configuration is required. Use with_tts() to set it.") - if self._llm is None: + if self._llm is None and not (skip_llm_validation or allow_missing_llm): raise ValueError("LLM configuration is required. Use with_llm() to set it.") - llm_config = dict(self._llm) + if self._llm is not None and not skip_llm_validation: + base_kwargs["llm"] = self._resolve_llm_config() + if self._tts is not None and not skip_tts_validation: + base_kwargs["tts"] = self._tts + + return StartAgentsRequestProperties(**base_kwargs) + + def _resolve_llm_config(self) -> typing.Dict[str, typing.Any]: + llm_config = dict(self._llm or {}) # Agent-level fields take priority over the vendor's defaults. # This matches the TS SDK where agent-level values override vendor config. if self._instructions is not None: @@ -920,11 +953,7 @@ def to_properties( llm_config["failure_message"] = self._failure_message if self._max_history is not None: llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - - return StartAgentsRequestProperties(**base_kwargs) + return llm_config def _resolve_asr_config(self) -> typing.Dict[str, typing.Any]: asr_config = dict(self._stt or {}) diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py index 5c866ac..dbff562 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py @@ -27,7 +27,14 @@ validate_avatar_config, validate_tts_sample_rate, ) -from .presets import resolve_session_presets +from .presets import ( + get_preset_category, + infer_asr_preset, + infer_llm_preset, + infer_tts_preset, + normalize_preset_input, + resolve_session_presets, +) from .token import generate_convo_ai_token, _parse_numeric_uid @@ -294,7 +301,8 @@ def _is_mllm_mode(self) -> bool: def _build_start_properties( self, token_opts: typing.Dict[str, typing.Any], - skip_vendor_validation: bool, + skip_vendor_validation_categories: typing.AbstractSet[str], + allow_missing_vendor_categories: typing.AbstractSet[str], ) -> typing.Dict[str, typing.Any]: base_properties = self._agent.to_properties( channel=self._channel, @@ -302,7 +310,8 @@ def _build_start_properties( remote_uids=self._remote_uids, idle_timeout=self._idle_timeout, enable_string_uid=self._enable_string_uid, - skip_vendor_validation=skip_vendor_validation, + skip_vendor_validation_categories=skip_vendor_validation_categories, + allow_missing_vendor_categories=allow_missing_vendor_categories, **token_opts, ) properties = self._dump_model(base_properties) @@ -340,6 +349,29 @@ def _build_start_properties( return properties + def _vendor_validation_categories( + self, + pipeline_id: typing.Optional[str], + ) -> typing.Tuple[typing.Set[str], typing.Set[str]]: + skip_categories: typing.Set[str] = set() + allow_missing_categories: typing.Set[str] = {"asr", "llm", "tts"} if pipeline_id else set() + + preset = normalize_preset_input(self._preset) + if preset: + for item in preset.split(","): + category = get_preset_category(item) + if category is not None: + skip_categories.add(category) + allow_missing_categories.add(category) + + if infer_asr_preset(self._agent.stt): + skip_categories.add("asr") + if infer_llm_preset(self._agent.llm): + skip_categories.add("llm") + if infer_tts_preset(self._agent.tts): + skip_categories.add("tts") + return skip_categories, allow_missing_categories + @staticmethod def _page_value(pagination: typing.Any, field: str) -> typing.Any: if pagination is None: @@ -460,7 +492,12 @@ def start(self) -> str: "expires_in": self._expires_in, } - properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) + skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) + properties = self._build_start_properties( + token_opts, + skip_vendor_validation_categories=skip_categories, + allow_missing_vendor_categories=allow_missing_categories, + ) resolved_preset, resolved_properties = resolve_session_presets( self._preset, properties, @@ -782,7 +819,12 @@ async def start(self) -> str: "expires_in": self._expires_in, } - properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) + skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) + properties = self._build_start_properties( + token_opts, + skip_vendor_validation_categories=skip_categories, + allow_missing_vendor_categories=allow_missing_categories, + ) resolved_preset, resolved_properties = resolve_session_presets( self._preset, properties, diff --git a/src/agora_agent/agentkit/presets.py b/src/agora_agent/agentkit/presets.py index dcd9680..68d27df 100644 --- a/src/agora_agent/agentkit/presets.py +++ b/src/agora_agent/agentkit/presets.py @@ -37,7 +37,12 @@ class _AgentPresets: DeepgramPresetModels = ("nova-2", "nova-3") OpenAIPresetModels = ("gpt-4o-mini", "gpt-4.1-mini", "gpt-5-nano", "gpt-5-mini") OpenAITtsPresetModels = ("tts-1",) -MiniMaxPresetModels = ("speech-2.6-turbo", "speech_2_6_turbo", "speech-2.8-turbo", "speech_2_8_turbo") +MiniMaxPresetModels = ( + "speech-2.6-turbo", + "speech_2_6_turbo", + "speech-2.8-turbo", + "speech_2_8_turbo", +) PresetInput = typing.Union[str, typing.Sequence[str]] @@ -61,7 +66,10 @@ class _AgentPresets: def _normalize_model_name(value: typing.Any) -> typing.Optional[str]: - return value.strip().lower() if isinstance(value, str) else None + if not isinstance(value, str): + return None + normalized = value.strip().lower() + return normalized if normalized else None def _parse_preset_input(preset: typing.Optional[PresetInput]) -> typing.List[str]: @@ -87,6 +95,10 @@ def _get_preset_category(preset: str) -> typing.Optional[str]: return None +def get_preset_category(preset: str) -> typing.Optional[str]: + return _get_preset_category(preset) + + def _omit_none(value: typing.Dict[str, typing.Any]) -> typing.Optional[typing.Dict[str, typing.Any]]: next_value = {k: v for k, v in value.items() if v is not None} return next_value or None diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py index acd9073..a8efe07 100644 --- a/src/agora_agent/core/client_wrapper.py +++ b/src/agora_agent/core/client_wrapper.py @@ -26,10 +26,10 @@ def __init__( def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { - "User-Agent": "agora-agents/v2.1.0", + "User-Agent": "agora-agents/v2.1.1", "X-Fern-Language": "Python", "X-Fern-SDK-Name": "agora-agents", - "X-Fern-SDK-Version": "v2.1.0", + "X-Fern-SDK-Version": "v2.1.1", **(self.get_custom_headers() or {}), } headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header diff --git a/tests/custom/test_pipeline_id.py b/tests/custom/test_pipeline_id.py index c6c8c8f..2e5453b 100644 --- a/tests/custom/test_pipeline_id.py +++ b/tests/custom/test_pipeline_id.py @@ -1,6 +1,6 @@ import pytest -from agora_agent import Agent +from agora_agent import Agent, OpenAI, OpenAITTS def dump(value): @@ -85,6 +85,75 @@ def test_agent_pipeline_id_skips_missing_vendor_validation() -> None: call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) assert call["pipeline_id"] == "studio-pipeline-id" + properties = dump(call["properties"]) + assert "asr" not in properties + assert "llm" not in properties + assert "tts" not in properties + + +def test_pipeline_id_allows_single_llm_override_without_tts_or_asr() -> None: + agent = Agent(name="support", pipeline_id="studio-pipeline-id").with_llm( + OpenAI( + api_key="openai-key", + base_url="https://api.openai.com/v1/chat/completions", + model="gpt-4o", + ) + ) + + call = start_agent(agent) + + assert call["pipeline_id"] == "studio-pipeline-id" + properties = dump(call["properties"]) + assert "asr" not in properties + assert "tts" not in properties + assert properties["llm"]["api_key"] == "openai-key" + assert properties["llm"]["params"]["model"] == "gpt-4o" + + +def test_pipeline_id_allows_multiple_overrides_without_asr() -> None: + agent = ( + Agent(name="support", pipeline_id="studio-pipeline-id") + .with_llm( + OpenAI( + api_key="openai-key", + base_url="https://api.openai.com/v1/chat/completions", + model="gpt-4o", + ) + ) + .with_tts( + OpenAITTS( + api_key="tts-key", + base_url="https://api.openai.com/v1/audio/speech", + model="tts-1-hd", + voice="alloy", + ) + ) + ) + + call = start_agent(agent) + + assert call["pipeline_id"] == "studio-pipeline-id" + properties = dump(call["properties"]) + assert "asr" not in properties + assert properties["llm"]["api_key"] == "openai-key" + assert properties["tts"]["vendor"] == "openai" + assert properties["tts"]["params"]["api_key"] == "tts-key" + + +def test_skip_vendor_validation_boolean_is_deprecated() -> None: + with pytest.warns(DeprecationWarning, match="skip_vendor_validation is deprecated"): + properties = Agent(name="support").to_properties( + channel="channel", + token="token", + agent_uid="1", + remote_uids=["100"], + skip_vendor_validation=True, + ) + + payload = dump(properties) + assert "asr" not in payload + assert "llm" not in payload + assert "tts" not in payload def test_pipeline_id_is_not_sent_inside_properties() -> None: diff --git a/tests/custom/test_preset_inference.py b/tests/custom/test_preset_inference.py new file mode 100644 index 0000000..1dd5f41 --- /dev/null +++ b/tests/custom/test_preset_inference.py @@ -0,0 +1,251 @@ +import pytest + +from agora_agent import Agent, DeepgramSTT, MiniMaxTTS, OpenAI, OpenAITTS +from agora_agent.agentkit.presets import resolve_session_presets + + +class StartResponse: + agent_id = "agent-id" + + +class FakeAgentsClient: + def __init__(self): + self.calls = [] + + def start(self, appid, **kwargs): + self.calls.append({"appid": appid, **kwargs}) + return StartResponse() + + +class FakeAsyncAgentsClient: + def __init__(self): + self.calls = [] + + async def start(self, appid, **kwargs): + self.calls.append({"appid": appid, **kwargs}) + return StartResponse() + + +class FakeClient: + app_id = "appid" + app_certificate = None + + def __init__(self, agents): + self.agents = agents + + +def test_infers_minimax_tts_preset_for_managed_model() -> None: + tts = MiniMaxTTS(model="speech_2_8_turbo", voice_id="English_captivating_female1") + preset, properties = resolve_session_presets(None, {"tts": tts.to_config()}) + + assert preset == "minimax_speech_2_8_turbo" + assert properties["tts"]["vendor"] == "minimax" + assert properties["tts"]["params"] == {"voice_setting": {"voice_id": "English_captivating_female1"}} + + +def test_infers_hyphenated_minimax_tts_preset_model() -> None: + tts = MiniMaxTTS(model="speech-2.6-turbo", voice_id="English_captivating_female1") + preset, properties = resolve_session_presets(None, {"tts": tts.to_config()}) + + assert preset == "minimax_speech_2_6_turbo" + assert properties["tts"]["vendor"] == "minimax" + assert properties["tts"]["params"] == {"voice_setting": {"voice_id": "English_captivating_female1"}} + + +def test_infers_openai_tts_preset_when_model_omitted() -> None: + tts = OpenAITTS(voice="alloy") + preset, properties = resolve_session_presets(None, {"tts": tts.to_config()}) + + assert preset == "openai_tts_1" + assert properties["tts"]["vendor"] == "openai" + assert properties["tts"]["params"] == {"voice": "alloy"} + + +def dump(value): + if hasattr(value, "model_dump"): + return value.model_dump(exclude_none=True) + if hasattr(value, "dict"): + return value.dict(exclude_none=True) + return value + + +def test_session_start_infers_presets_for_managed_tts_and_llm() -> None: + agent = ( + Agent(name="support") + .with_llm(OpenAI(model="gpt-4o-mini")) + .with_tts(MiniMaxTTS(model="speech_2_8_turbo", voice_id="English_captivating_female1")) + ) + + agents = FakeAgentsClient() + client = FakeClient(agents) + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + ).start() + + assert len(agents.calls) == 1 + call = agents.calls[0] + assert call["preset"] == "openai_gpt_4o_mini,minimax_speech_2_8_turbo" + + properties = dump(call["properties"]) + assert properties["tts"]["vendor"] == "minimax" + assert properties["tts"]["params"] == {"voice_setting": {"voice_id": "English_captivating_female1"}} + + +@pytest.mark.asyncio +async def test_async_session_start_infers_presets_for_managed_tts_and_llm() -> None: + agent = ( + Agent(name="support") + .with_llm(OpenAI(model="gpt-4o-mini")) + .with_tts(OpenAITTS(voice="alloy")) + ) + + agents = FakeAsyncAgentsClient() + client = FakeClient(agents) + await agent.create_async_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + ).start() + + assert len(agents.calls) == 1 + call = agents.calls[0] + assert call["preset"] == "openai_gpt_4o_mini,openai_tts_1" + + properties = dump(call["properties"]) + assert properties["tts"]["vendor"] == "openai" + assert properties["tts"]["params"] == {"voice": "alloy"} + + +def test_session_start_infers_managed_asr_without_skipping_llm_or_tts_validation() -> None: + agent = ( + Agent(name="support") + .with_stt(DeepgramSTT(model="nova-3", language="en-US")) + .with_llm(OpenAI(model="gpt-4o-mini")) + .with_tts(OpenAITTS(voice="alloy")) + ) + + agents = FakeAgentsClient() + client = FakeClient(agents) + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + ).start() + + assert len(agents.calls) == 1 + call = agents.calls[0] + assert call["preset"] == "deepgram_nova_3,openai_gpt_4o_mini,openai_tts_1" + + properties = dump(call["properties"]) + assert properties["asr"]["vendor"] == "deepgram" + assert properties["asr"]["params"] == {"language": "en-US"} + assert properties["llm"]["style"] == "openai" + assert properties["tts"]["vendor"] == "openai" + + +def test_explicit_asr_preset_still_requires_tts_and_llm() -> None: + agent = Agent(name="support") + + agents = FakeAgentsClient() + client = FakeClient(agents) + + with pytest.raises(ValueError, match="TTS configuration is required"): + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + preset="deepgram_nova_3", + ).start() + + assert agents.calls == [] + + +def test_managed_llm_inference_still_requires_tts() -> None: + agent = Agent(name="support").with_llm(OpenAI(model="gpt-4o-mini")) + + agents = FakeAgentsClient() + client = FakeClient(agents) + + with pytest.raises(ValueError, match="TTS configuration is required"): + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + ).start() + + assert agents.calls == [] + + +def test_explicit_llm_preset_still_requires_tts() -> None: + agent = Agent(name="support") + + agents = FakeAgentsClient() + client = FakeClient(agents) + + with pytest.raises(ValueError, match="TTS configuration is required"): + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + preset="openai_gpt_4o_mini", + ).start() + + assert agents.calls == [] + + +def test_managed_tts_inference_still_requires_llm() -> None: + agent = Agent(name="support").with_tts( + MiniMaxTTS(model="speech_2_8_turbo", voice_id="English_captivating_female1") + ) + + agents = FakeAgentsClient() + client = FakeClient(agents) + + with pytest.raises(ValueError, match="LLM configuration is required"): + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + ).start() + + assert agents.calls == [] + + +def test_explicit_tts_preset_still_requires_llm() -> None: + agent = Agent(name="support") + + agents = FakeAgentsClient() + client = FakeClient(agents) + + with pytest.raises(ValueError, match="LLM configuration is required"): + agent.create_session( + client=client, + channel="channel", + token="token", + agent_uid="1", + remote_uids=["2"], + preset="openai_tts_1", + ).start() + + assert agents.calls == [] + + +def test_minimax_speech_02_turbo_requires_byok() -> None: + with pytest.raises(ValueError, match="MiniMaxTTS requires key unless using a supported Agora-managed model"): + MiniMaxTTS(model="speech-02-turbo", voice_id="English_captivating_female1") diff --git a/tests/custom/test_tts_vendors.py b/tests/custom/test_tts_vendors.py index 9499eca..bdd9482 100644 --- a/tests/custom/test_tts_vendors.py +++ b/tests/custom/test_tts_vendors.py @@ -114,5 +114,5 @@ def test_tts_managed_mode_validation_matches_core_shapes() -> None: with pytest.raises(Exception, match="OpenAITTS requires api_key"): OpenAITTS(voice="coral", model="tts-1-hd") - with pytest.raises(Exception, match="MiniMaxTTS requires key"): - MiniMaxTTS(model="speech-02-turbo") + with pytest.raises(Exception, match="MiniMaxTTS requires key unless using a supported Agora-managed model"): + MiniMaxTTS(model="unsupported-model")