Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/datacommons-mcp/.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ DC_TYPE=base
# Path to directory containing markdown file overrides for server instructions and/or tool descriptions.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't realize this was in agent-toolkit repo. Shouldn't it be in the website env.sample, in the MCP server options section?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. We should add it here: https://github.com/datacommonsorg/website/blob/master/custom_dc/env.list.sample#L49

As part of your verification and doc, do you mind adding it?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do right now! Thanks!

# Supports partial overrides: only create files for the specific instructions or tools you want to replace.
# The system will fall back to package defaults for any file not found here.
# Supports both local filesystem paths and Google Cloud Storage paths (e.g., gs://bucket/path).
#
# Expected structure inside this directory:
# - server.md
Expand Down
14 changes: 7 additions & 7 deletions packages/datacommons-mcp/datacommons_mcp/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

MCP_SERVER_NAME = "DC MCP Server"
DEFAULT_INSTRUCTIONS_PACKAGE = "datacommons_mcp.instructions"
SERVER_INSTRUCTION_FILE = "server.md"
SERVER_INSTRUCTIONS_FILE = "server.md"


class DCApp:
Expand Down Expand Up @@ -62,15 +62,15 @@ def __init__(self) -> None:
raise

# Load Server Instructions
server_instructions = self._load_instruction(SERVER_INSTRUCTION_FILE)
server_instructions = self._load_instructions(SERVER_INSTRUCTIONS_FILE)

self.mcp = FastMCP(
MCP_SERVER_NAME,
version=__version__,
instructions=server_instructions,
)

def _load_instruction(self, filename: str) -> str:
def _load_instructions(self, filename: str) -> str:
"""
Loads markdown content.
Priority:
Expand All @@ -82,13 +82,13 @@ def _load_instruction(self, filename: str) -> str:
content = read_external_content(self.settings.instructions_dir, filename)
if content is not None:
logger.info(
"Loaded custom instruction for %s from %s",
"Loaded custom instructions for %s from %s",
filename,
self.settings.instructions_dir,
)
return content
logger.debug(
"Custom instruction file %s not found in %s, falling back to default.",
"Custom instructions file %s not found in %s, falling back to default.",
filename,
self.settings.instructions_dir,
)
Expand All @@ -101,9 +101,9 @@ def register_tool(self, func: Callable[..., Any], instruction_file: str) -> None

Args:
func: The tool function to register.
instruction_file: Path to instruction file relative to instructions dir.
instruction_file: Path to instructions file relative to instructions dir.
"""
description = self._load_instruction(instruction_file)
description = self._load_instructions(instruction_file)
if not description:
logger.warning(
"No description found for tool %s from file %s",
Expand Down
70 changes: 55 additions & 15 deletions packages/datacommons-mcp/datacommons_mcp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@

import importlib.resources
import logging
from functools import cache
from pathlib import Path
from typing import TYPE_CHECKING

import requests

if TYPE_CHECKING:
from google.cloud import storage
from datacommons_client.models.observation import Observation

from datacommons_mcp.data_models.observations import DateRange, ObservationDate
Expand Down Expand Up @@ -93,33 +98,68 @@ def filter_by_date(
return filtered_list


def read_external_content(base_path: str, filename: str) -> str | None:
"""Reads content from an external location (currently only local paths).
@cache
def _get_gcs_client() -> "storage.Client":
"""Returns a cached GCS client instance."""
# Local import to avoid loading the module unless GCS is required
from google.cloud import storage

Args:
base_path: The base directory to look in.
filename: The name of the file to read (relative to base_path). Can include
subdirectories (e.g. "tools/search_indicators.md").
return storage.Client()

Returns:
The content of the file as a string, or None if the file does not exist
or cannot be read.

Example:
>>> content = read_external_content("/path/to/instructions", "server.md")
"""
# TODO(keyurs): Add support for GCS if needed. This is useful for Custom DCs deployed in the cloud.
def _read_local_content(path: Path) -> str | None:
"""Reads content from a local file path."""
try:
path = Path(base_path) / filename
if path.exists() and path.is_file():
return path.read_text(encoding="utf-8")
except Exception as e:
logger.warning("Failed to read local file %s: %s", path, e)
return None


def _read_gcs_content(uri: str) -> str | None:
"""Reads content from a GCS blob URI."""
from google.cloud import storage
from google.cloud.exceptions import NotFound

try:
client = _get_gcs_client()
# Create the blob object directly from the URI
blob = storage.Blob.from_string(uri, client=client)
return blob.download_as_text(encoding="utf-8")
except NotFound:
logger.warning(
"Failed to read external instruction %s from %s: %s", filename, base_path, e
"GCS blob %s not found. Falling back to default.",
uri,
)
return None
except Exception as e:
logger.warning(
"Failed to read GCS blob %s: %s",
uri,
e,
)
return None


def read_external_content(base_path: str, filename: str) -> str | None:
"""Reads content from an external location (local or GCS).

Args:
base_path: The base directory or GCS path (gs://bucket/prefix) to look in.
filename: The name of the file to read (relative to base_path).

Returns:
The content of the file as a string, or None if the file does not exist.
"""
if base_path.startswith("gs://"):
uri = f"{base_path.rstrip('/')}/{filename}"
return _read_gcs_content(uri)

path = Path(base_path) / filename
return _read_local_content(path)


def read_package_content(package: str, filename: str) -> str:
"""Reads content from the package resources.

Expand Down
1 change: 1 addition & 0 deletions packages/datacommons-mcp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"pydantic-settings",
"python-dateutil>=2.9.0.post0",
"python-dotenv>=1.1.1",
"google-cloud-storage",
]
urls = {Homepage = "https://github.com/datacommonsorg/agent-toolkit"}
license = {file = "LICENSE"}
Expand Down
4 changes: 2 additions & 2 deletions packages/datacommons-mcp/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_load_instruction_tool_override(mock_settings, tmp_path, create_test_fil
from datacommons_mcp.app import DCApp

app = DCApp()
content = app._load_instruction("tools/test_tool.md")
content = app._load_instructions("tools/test_tool.md")
assert content == "Custom Tool Instructions"


Expand All @@ -114,7 +114,7 @@ def test_load_instruction_fallback(mock_settings, tmp_path):
app = DCApp()

# Should fall back to default package resource (server.md exists in package)
content = app._load_instruction("server.md")
content = app._load_instructions("server.md")
assert "Data Commons" in content


Expand Down
65 changes: 65 additions & 0 deletions packages/datacommons-mcp/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@
# limitations under the License.


from unittest.mock import MagicMock, patch

import pytest
import requests
from datacommons_client.models.observation import Observation
from datacommons_mcp.data_models.observations import DateRange
from datacommons_mcp.exceptions import APIKeyValidationError, InvalidAPIKeyError
from datacommons_mcp.utils import (
VALIDATION_API_PATH,
_get_gcs_client,
filter_by_date,
read_external_content,
read_package_content,
Expand Down Expand Up @@ -85,6 +88,22 @@ def test_validate_api_key_network_error(self, requests_mock):


class TestReadContent:
@pytest.fixture
def mock_gcs(self):
with (
patch("google.cloud.storage.Client") as mock_client_class,
patch("google.cloud.storage.Blob.from_string") as mock_from_string,
):
_get_gcs_client.cache_clear()
mock_client = MagicMock()
mock_client_class.return_value = mock_client

mock_blob = MagicMock()
mock_from_string.return_value = mock_blob
mock_blob.download_as_text.return_value = "gcs content"

yield mock_client, mock_from_string, mock_blob

def test_read_external_content_success(self, tmp_path, create_test_file):
create_test_file("test.md", "content")
assert read_external_content(str(tmp_path), "test.md") == "content"
Expand All @@ -97,6 +116,52 @@ def test_read_external_content_subdir(self, tmp_path, create_test_file):
def test_read_external_content_missing(self, tmp_path):
assert read_external_content(str(tmp_path), "missing.md") is None

def test_read_external_content_gcs_success(self, mock_gcs):
mock_client, mock_from_string, mock_blob = mock_gcs
mock_blob.download_as_text.return_value = "custom content 1"

content = read_external_content("gs://my-bucket/path", "test.md")

assert content == "custom content 1"
mock_from_string.assert_called_once_with(
"gs://my-bucket/path/test.md", client=mock_client
)

def test_read_external_content_gcs_success_no_prefix(self, mock_gcs):
mock_client, mock_from_string, mock_blob = mock_gcs
mock_blob.download_as_text.return_value = "custom content 2"

content = read_external_content("gs://my-bucket", "test.md")

assert content == "custom content 2"
mock_from_string.assert_called_once_with(
"gs://my-bucket/test.md", client=mock_client
)

def test_read_external_content_gcs_not_found(self, mock_gcs):
from google.cloud.exceptions import NotFound

mock_client, mock_from_string, mock_blob = mock_gcs
mock_blob.download_as_text.side_effect = NotFound("Blob not found")

content = read_external_content("gs://my-bucket", "test.md")

assert content is None
mock_from_string.assert_called_once_with(
"gs://my-bucket/test.md", client=mock_client
)

def test_read_external_content_gcs_failure(self, mock_gcs):
mock_client, mock_from_string, mock_blob = mock_gcs
mock_blob.download_as_text.side_effect = Exception("GCS error")

content = read_external_content("gs://my-bucket", "test.md")

assert content is None
mock_from_string.assert_called_once_with(
"gs://my-bucket/test.md", client=mock_client
)

def test_read_package_content_success(self):
# Read actual content from the package
content = read_package_content("datacommons_mcp.instructions", "server.md")
Expand Down
4 changes: 3 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading