From 97423158d5aeed58663f22c5aa8fe3a3a99ea852 Mon Sep 17 00:00:00 2001
From: Amrit Krishnan <amrit110@gmail.com>
Date: Wed, 25 Feb 2026 09:39:10 -0500
Subject: [PATCH] Update and fix config

---
 .../aieng/agent_evals/configs.py              |   4 +
 .../aieng/agent_evals/tools/search.py         |   4 +-
 .../tests/aieng/agent_evals/test_configs.py   | 231 ++++++++++++++++++
 implementations/basics/01_why_evals.ipynb     |  12 +-
 4 files changed, 238 insertions(+), 13 deletions(-)
 create mode 100644 aieng-eval-agents/tests/aieng/agent_evals/test_configs.py

diff --git a/aieng-eval-agents/aieng/agent_evals/configs.py b/aieng-eval-agents/aieng/agent_evals/configs.py
index 61f7542..ab25b4e 100644
--- a/aieng-eval-agents/aieng/agent_evals/configs.py
+++ b/aieng-eval-agents/aieng/agent_evals/configs.py
@@ -96,6 +96,10 @@ class Configs(BaseSettings):
         validation_alias=AliasChoices("OPENAI_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY"),
         description="API key for OpenAI-compatible API (accepts OPENAI_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY).",
     )
+    google_api_key: SecretStr = Field(
+        validation_alias=AliasChoices("GEMINI_API_KEY", "GOOGLE_API_KEY"),
+        description="API key for Google/Gemini API (accepts GEMINI_API_KEY or GOOGLE_API_KEY).",
+    )
     default_planner_model: str = Field(
         default="gemini-2.5-pro",
         description="Model name for planning/complex reasoning tasks.",
diff --git a/aieng-eval-agents/aieng/agent_evals/tools/search.py b/aieng-eval-agents/aieng/agent_evals/tools/search.py
index 686568e..f97add0 100644
--- a/aieng-eval-agents/aieng/agent_evals/tools/search.py
+++ b/aieng-eval-agents/aieng/agent_evals/tools/search.py
@@ -303,7 +303,7 @@ async def google_search(query: str, model: str | None = None) -> dict[str, Any]:
         model = config.default_worker_model
 
     return await _google_search_async(
-        query, model=model, temperature=config.default_temperature, api_key=config.openai_api_key.get_secret_value()
+        query, model=model, temperature=config.default_temperature, api_key=config.google_api_key.get_secret_value()
     )
 
 
@@ -369,7 +369,7 @@ async def google_search(query: str) -> dict[str, Any]:
             - **error** (str): Error message (error case only)
         """
         return await _google_search_async(
-            query, model=model, temperature=temperature, api_key=config.openai_api_key.get_secret_value()
+            query, model=model, temperature=temperature, api_key=config.google_api_key.get_secret_value()
         )
 
     return FunctionTool(func=google_search)
diff --git a/aieng-eval-agents/tests/aieng/agent_evals/test_configs.py b/aieng-eval-agents/tests/aieng/agent_evals/test_configs.py
new file mode 100644
index 0000000..cbef1d7
--- /dev/null
+++ b/aieng-eval-agents/tests/aieng/agent_evals/test_configs.py
@@ -0,0 +1,231 @@
+"""Tests for Configs and DatabaseConfig configuration models."""
+
+import os
+
+import pytest
+from aieng.agent_evals.configs import Configs, DatabaseConfig
+from pydantic import SecretStr, ValidationError
+
+
+def make_configs() -> Configs:
+    """Create Configs without loading any .env file.
+
+    Wraps ``Configs(_env_file=None)`` to avoid a Pyright false-positive:
+    pydantic-settings accepts ``_env_file`` as a special init override but it
+    is absent from the generated type stubs.
+    """
+    return Configs(_env_file=None)  # type: ignore[call-arg]
+
+
+class TestDatabaseConfig:
+    """Tests for DatabaseConfig and its build_uri() method."""
+
+    def test_build_uri_sqlite(self):
+        """SQLite URI with only driver and database is valid."""
+        config = DatabaseConfig(driver="sqlite", database="/tmp/test.db")
+        assert config.build_uri() == "sqlite:////tmp/test.db"
+
+    def test_build_uri_postgresql_with_credentials(self):
+        """PostgreSQL URI includes host, port, username, and password."""
+        config = DatabaseConfig(
+            driver="postgresql",
+            username="user",
+            password=SecretStr("secret"),
+            host="localhost",
+            port=5432,
+            database="mydb",
+        )
+        assert config.build_uri() == "postgresql://user:secret@localhost:5432/mydb"
+
+    def test_build_uri_includes_query_params(self):
+        """Query parameters appear in the rendered URI."""
+        config = DatabaseConfig(driver="sqlite", database="/tmp/test.db", query={"mode": "ro"})
+        assert "mode=ro" in config.build_uri()
+
+    def test_build_uri_escapes_special_password_chars(self):
+        """Special characters in the password are URL-encoded, not exposed verbatim."""
+        config = DatabaseConfig(
+            driver="postgresql",
+            username="user",
+            password=SecretStr("p@ss/word"),
+            host="localhost",
+            port=5432,
+            database="db",
+        )
+        uri = config.build_uri()
+        assert "p@ss/word" not in uri  # must be percent-encoded
+        assert "user" in uri
+
+    def test_optional_fields_default_to_none(self):
+        """username, host, password, port, and database all default to None."""
+        config = DatabaseConfig(driver="sqlite")
+        assert config.username is None
+        assert config.host is None
+        assert config.password is None
+        assert config.port is None
+        assert config.database is None
+
+    def test_query_defaults_to_empty_dict(self):
+        """Query field defaults to an empty dict."""
+        assert DatabaseConfig(driver="sqlite").query == {}
+
+
+class TestConfigsDefaults:
+    """Tests for default field values in Configs."""
+
+    @pytest.fixture(autouse=True)
+    def _required_env(self, monkeypatch):
+        """Run with a fully isolated environment containing only required fields."""
+        monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key", "GEMINI_API_KEY": "test-google-key"})
+
+    def test_default_worker_model(self):
+        """default_worker_model is gemini-2.5-flash."""
+        assert make_configs().default_worker_model == "gemini-2.5-flash"
+
+    def test_default_planner_model(self):
+        """default_planner_model is gemini-2.5-pro."""
+        assert make_configs().default_planner_model == "gemini-2.5-pro"
+
+    def test_default_evaluator_model(self):
+        """default_evaluator_model is gemini-2.5-pro."""
+        assert make_configs().default_evaluator_model == "gemini-2.5-pro"
+
+    def test_default_temperature(self):
+        """default_temperature is 1.0."""
+        assert make_configs().default_temperature == 1.0
+
+    def test_default_evaluator_temperature(self):
+        """default_evaluator_temperature is 0.0."""
+        assert make_configs().default_evaluator_temperature == 0.0
+
+    def test_default_openai_base_url(self):
+        """openai_base_url defaults to the Gemini googleapis endpoint."""
+        assert "googleapis.com" in make_configs().openai_base_url
+
+    def test_optional_fields_default_none(self):
+        """All optional service fields default to None."""
+        config = make_configs()
+        assert config.aml_db is None
+        assert config.report_generation_db is None
+        assert config.langfuse_public_key is None
+        assert config.langfuse_secret_key is None
+        assert config.e2b_api_key is None
+
+
+class TestGoogleApiKey:
+    """Tests for the google_api_key field and its env var aliases."""
+
+    @pytest.fixture(autouse=True)
+    def _required_env(self, monkeypatch):
+        """Run with a clean environment: only OPENAI_API_KEY set, no Google keys."""
+        monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key"})
+
+    def test_loaded_from_gemini_api_key(self, monkeypatch):
+        """google_api_key is populated from GEMINI_API_KEY."""
+        monkeypatch.setenv("GEMINI_API_KEY", "my-gemini-key")
+        assert make_configs().google_api_key.get_secret_value() == "my-gemini-key"
+
+    def test_loaded_from_google_api_key(self, monkeypatch):
+        """google_api_key is populated from GOOGLE_API_KEY."""
+        monkeypatch.setenv("GOOGLE_API_KEY", "my-google-key")
+        assert make_configs().google_api_key.get_secret_value() == "my-google-key"
+
+    def test_gemini_api_key_takes_priority_over_google_api_key(self, monkeypatch):
+        """GEMINI_API_KEY takes priority over GOOGLE_API_KEY when both are set."""
+        monkeypatch.setenv("GEMINI_API_KEY", "gemini-key")
+        monkeypatch.setenv("GOOGLE_API_KEY", "google-key")
+        config = make_configs()
+        assert config.google_api_key.get_secret_value() == "gemini-key"
+
+    def test_secret_value_not_exposed_in_repr(self, monkeypatch):
+        """SecretStr does not leak the raw key in repr or str."""
+        monkeypatch.setenv("GEMINI_API_KEY", "super-secret-key")
+        key = make_configs().google_api_key
+        assert "super-secret-key" not in repr(key)
+        assert "super-secret-key" not in str(key)
+
+
+class TestOpenAiApiKeyAliases:
+    """Tests for openai_api_key env var aliases."""
+
+    @pytest.fixture(autouse=True)
+    def _clear_google_env(self, monkeypatch):
+        monkeypatch.setattr(os, "environ", {})
+
+    def test_loaded_from_openai_api_key(self, monkeypatch):
+        """openai_api_key is loaded from OPENAI_API_KEY when it is set."""
+        monkeypatch.setenv("OPENAI_API_KEY", "my-openai-key")
+        monkeypatch.setenv("GEMINI_API_KEY", "test-google-key")
+        config = make_configs()
+        assert config.openai_api_key.get_secret_value() == "my-openai-key"
+
+    def test_loaded_from_gemini_api_key(self, monkeypatch):
+        """openai_api_key falls back to GEMINI_API_KEY when OPENAI_API_KEY is absent."""
+        monkeypatch.setenv("GEMINI_API_KEY", "my-gemini-key")
+        config = make_configs()
+        assert config.openai_api_key.get_secret_value() == "my-gemini-key"
+
+    def test_loaded_from_google_api_key(self, monkeypatch):
+        """openai_api_key falls back to GOOGLE_API_KEY as the last alias."""
+        monkeypatch.setenv("GOOGLE_API_KEY", "my-google-key")
+        config = make_configs()
+        assert config.openai_api_key.get_secret_value() == "my-google-key"
+
+    def test_missing_raises_validation_error(self):
+        """Configs raises ValidationError when no API key env var is set."""
+        with pytest.raises(ValidationError):
+            make_configs()
+
+
+class TestConfigsValidators:
+    """Tests for Configs field validators."""
+
+    @pytest.fixture(autouse=True)
+    def _required_env(self, monkeypatch):
+        monkeypatch.setattr(os, "environ", {"OPENAI_API_KEY": "test-openai-key", "GEMINI_API_KEY": "test-google-key"})
+
+    def test_langfuse_secret_key_valid(self, monkeypatch):
+        """langfuse_secret_key accepts values starting with 'sk-lf-'."""
+        monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-valid-secret")
+        config = make_configs()
+        assert config.langfuse_secret_key is not None
+        assert config.langfuse_secret_key.get_secret_value() == "sk-lf-valid-secret"
+
+    def test_langfuse_secret_key_invalid_prefix_raises(self, monkeypatch):
+        """langfuse_secret_key rejects values not starting with 'sk-lf-'."""
+        monkeypatch.setenv("LANGFUSE_SECRET_KEY", "invalid-secret")
+        with pytest.raises(ValidationError, match="sk-lf-"):
+            make_configs()
+
+    def test_langfuse_secret_key_none_is_allowed(self):
+        """langfuse_secret_key accepts None (key not configured)."""
+        assert make_configs().langfuse_secret_key is None
+
+    def test_e2b_api_key_valid(self, monkeypatch):
+        """e2b_api_key accepts values starting with 'e2b_'."""
+        monkeypatch.setenv("E2B_API_KEY", "e2b_valid_key")
+        config = make_configs()
+        assert config.e2b_api_key is not None
+        assert config.e2b_api_key.get_secret_value() == "e2b_valid_key"
+
+    def test_e2b_api_key_invalid_prefix_raises(self, monkeypatch):
+        """e2b_api_key rejects values not starting with 'e2b_'."""
+        monkeypatch.setenv("E2B_API_KEY", "invalid_key")
+        with pytest.raises(ValidationError, match="e2b_"):
+            make_configs()
+
+    def test_e2b_api_key_none_is_allowed(self):
+        """e2b_api_key accepts None (key not configured)."""
+        assert make_configs().e2b_api_key is None
+
+    def test_langfuse_public_key_valid_pattern(self, monkeypatch):
+        """langfuse_public_key accepts values matching 'pk-lf-*'."""
+        monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk-lf-abc123")
+        config = make_configs()
+        assert config.langfuse_public_key == "pk-lf-abc123"
+
+    def test_langfuse_public_key_invalid_pattern_raises(self, monkeypatch):
+        """langfuse_public_key rejects values not matching 'pk-lf-*'."""
+        monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "invalid-key")
+        with pytest.raises(ValidationError):
+            make_configs()
diff --git a/implementations/basics/01_why_evals.ipynb b/implementations/basics/01_why_evals.ipynb
index 4544954..ccce621 100644
--- a/implementations/basics/01_why_evals.ipynb
+++ b/implementations/basics/01_why_evals.ipynb
@@ -122,17 +122,7 @@
    "cell_type": "markdown",
    "id": "summary",
    "metadata": {},
-   "source": [
-    "## Summary\n",
-    "\n",
-    "1. **Compounding errors** — 10-step agents need task-level evaluation, not just per-answer accuracy\n",
-    "2. **Four dimensions** — outcome, tool quality, reasoning, cost-performance\n",
-    "3. **Three grader types** — code-based (fast, deterministic), model-based (flexible, nuanced), human (gold-standard, for calibration)\n",
-    "4. **Capability benchmarks** — hard problems as hills to climb; progress is measured, not estimated\n",
-    "5. **Closed loop** — failure patterns map to prompt, tool, behavior, and reasoning changes\n",
-    "\n",
-    "**Next:** In Notebook 02, we'll use the shared evaluation harness to run these ideas in code."
-   ]
+   "source": "## Summary\n\n1. **Compounding errors** — 10-step agents need task-level evaluation, not just per-answer accuracy\n2. **Four dimensions** — outcome, tool quality, reasoning, cost-performance\n3. **Three grader types** — code-based (fast, deterministic), model-based (flexible, nuanced), human (gold-standard, for calibration)\n4. **Capability benchmarks** — hard problems as hills to climb; progress is measured, not estimated\n5. **Closed loop** — failure patterns map to prompt, tool, behavior, and reasoning changes\n\n**Next:** In Notebook 02, we'll use the shared evaluation harness to run these ideas in code.\n\n---\n\n**Further Reading:** [Evaluating Agents — Google ADK Documentation](https://google.github.io/adk-docs/evaluate/) — official guide covering built-in evaluation tools for ADK-based agents, including test-file formats, metrics, and running evaluations with the CLI."
   }
  ],
  "metadata": {