Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 60 additions & 5 deletions src/specify_cli/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,44 @@ def _open_url(self, url: str, timeout: int = 10):
from specify_cli.authentication.http import open_url
return open_url(url, timeout)

def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:
"""Validate a parsed catalog payload's shape.

Applied to both network-fetched and cache-loaded payloads so a
once-poisoned cache (older spec-kit version, manual edit, upstream
served a bad payload before the network-side guards were added)
cannot re-crash ``_get_merged_extensions`` on subsequent calls.

Checking only key presence would let a payload like
``{"extensions": []}`` or ``{"extensions": null}`` slip through
here and then crash with ``AttributeError: 'list' object has no
attribute 'items'`` deep inside ``_get_merged_extensions``. The
sibling integration catalog reader already guards both the root
object and the nested mapping (see ``integrations/catalog.py``);
the extension catalog must stay consistent so a malformed payload
surfaces as the user-facing ``Invalid catalog format`` error
instead of a raw Python traceback.

Args:
catalog_data: Parsed JSON payload from the catalog source.
url: Source URL — used in the error message so the user can
tell which catalog in a multi-catalog stack is malformed.

Raises:
ExtensionError: If the payload's shape is invalid.
"""
if not isinstance(catalog_data, dict):
raise ExtensionError(
f"Invalid catalog format from {url}: expected a JSON object"
)
if "schema_version" not in catalog_data or "extensions" not in catalog_data:
raise ExtensionError(f"Invalid catalog format from {url}")
if not isinstance(catalog_data.get("extensions"), dict):
raise ExtensionError(
f"Invalid catalog format from {url}: "
"'extensions' must be a JSON object"
)

def get_active_catalogs(self) -> List[CatalogEntry]:
"""Get the ordered list of active catalogs.

Expand Down Expand Up @@ -1827,20 +1865,27 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False
# If metadata is invalid or missing expected fields, treat cache as invalid
pass

# Use cache if valid
# Use cache if valid. A previously-cached payload must clear the
# same shape checks as a freshly-fetched one — otherwise a once-
# poisoned cache (older spec-kit version, manual edit, upstream
# served a bad payload before the network-side guards were added)
# would re-crash on every invocation despite the cache being
# "valid" by age. If validation fails on the cached read, fall
# through to the network fetch path so the cache gets refreshed.
if is_valid:
try:
return json.loads(cache_file.read_text())
except json.JSONDecodeError:
cached_data = json.loads(cache_file.read_text())
self._validate_catalog_payload(cached_data, entry.url)
return cached_data
except (json.JSONDecodeError, ExtensionError):
pass

# Fetch from network
try:
with self._open_url(entry.url, timeout=10) as response:
catalog_data = json.loads(response.read())

if "schema_version" not in catalog_data or "extensions" not in catalog_data:
raise ExtensionError(f"Invalid catalog format from {entry.url}")
self._validate_catalog_payload(catalog_data, entry.url)

# Save to cache
self.cache_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -1895,6 +1940,16 @@ def _get_merged_extensions(self, force_refresh: bool = False) -> List[Dict[str,
continue

for ext_id, ext_data in catalog_data.get("extensions", {}).items():
# Per-entry guard: ``_fetch_single_catalog`` already validates
# that ``catalog_data["extensions"]`` is a mapping, but it
# does not (and should not) validate every entry shape there
# — one malformed entry shouldn't poison an otherwise valid
# catalog. Skip non-mapping entries here so a payload like
# ``{"extensions": {"foo": [], "bar": {...}}}`` still merges
# the valid entries without crashing on ``**ext_data``.
# Mirrors ``integrations/catalog.py:245``.
if not isinstance(ext_data, dict):
continue
if ext_id not in merged: # Higher-priority catalog wins
merged[ext_id] = {
**ext_data,
Expand Down
71 changes: 64 additions & 7 deletions src/specify_cli/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,48 @@ def _open_url(self, url: str, timeout: int = 10):
from specify_cli.authentication.http import open_url
return open_url(url, timeout)

def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None:
"""Validate a parsed preset-catalog payload's shape.

Applied to both network-fetched and cache-loaded payloads so a
once-poisoned cache (older spec-kit version, manual edit, upstream
served a bad payload before the network-side guards were added)
cannot re-crash ``_get_merged_packs`` on subsequent calls.

Checking only key presence would let a payload like
``{"presets": []}`` or ``{"presets": null}`` slip through here and
then crash with ``AttributeError: 'list' object has no attribute
'items'`` deep inside ``_get_merged_packs``. The sibling
integration catalog reader already guards both the root object and
the nested mapping (see ``integrations/catalog.py``); the preset
catalog must stay consistent so a malformed payload surfaces as
the user-facing ``Invalid preset catalog format`` error instead of
a raw Python traceback.

Args:
catalog_data: Parsed JSON payload from the catalog source.
url: Source URL — used in the error message so the user can
tell which catalog in a multi-catalog stack is malformed.

Raises:
PresetError: If the payload's shape is invalid.
"""
if not isinstance(catalog_data, dict):
raise PresetError(
f"Invalid preset catalog format from {url}: "
"expected a JSON object"
)
if (
"schema_version" not in catalog_data
or "presets" not in catalog_data
):
raise PresetError("Invalid preset catalog format")
if not isinstance(catalog_data.get("presets"), dict):
raise PresetError(
f"Invalid preset catalog format from {url}: "
"'presets' must be a JSON object"
)

def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]:
"""Load catalog stack configuration from a YAML file.

Expand Down Expand Up @@ -2035,21 +2077,25 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool =
"""
cache_file, metadata_file = self._get_cache_paths(entry.url)

# Use cache if valid. A previously-cached payload must clear the
# same shape checks as a freshly-fetched one — otherwise a once-
# poisoned cache would re-crash on every invocation despite the
# cache being "valid" by age. If validation fails on the cached
# read, fall through to the network fetch path so the cache gets
# refreshed.
if not force_refresh and self._is_url_cache_valid(entry.url):
try:
return json.loads(cache_file.read_text())
except json.JSONDecodeError:
cached_data = json.loads(cache_file.read_text())
self._validate_catalog_payload(cached_data, entry.url)
return cached_data
except (json.JSONDecodeError, PresetError):
pass

try:
with self._open_url(entry.url, timeout=10) as response:
catalog_data = json.loads(response.read())

if (
"schema_version" not in catalog_data
or "presets" not in catalog_data
):
raise PresetError("Invalid preset catalog format")
self._validate_catalog_payload(catalog_data, entry.url)

self.cache_dir.mkdir(parents=True, exist_ok=True)
cache_file.write_text(json.dumps(catalog_data, indent=2))
Expand Down Expand Up @@ -2083,6 +2129,17 @@ def _get_merged_packs(self, force_refresh: bool = False) -> Dict[str, Dict[str,
try:
data = self._fetch_single_catalog(entry, force_refresh)
for pack_id, pack_data in data.get("presets", {}).items():
# Per-entry guard: ``_fetch_single_catalog`` already
# validates that ``data["presets"]`` is a mapping, but it
# does not (and should not) validate every entry shape
# there — one malformed entry shouldn't poison an
# otherwise valid catalog. Skip non-mapping entries here
# so a payload like ``{"presets": {"foo": [], "bar":
# {...}}}`` still merges the valid entries without
# crashing on ``**pack_data``. Mirrors
# ``integrations/catalog.py:245``.
if not isinstance(pack_data, dict):
continue
pack_data_with_catalog = {**pack_data, "_catalog_name": entry.name, "_install_allowed": entry.install_allowed}
merged[pack_id] = pack_data_with_catalog
except PresetError:
Expand Down
157 changes: 157 additions & 0 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2577,6 +2577,163 @@ def fake_open(req, timeout=None):

assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken"

@pytest.mark.parametrize(
"payload",
[
# Root is not a JSON object.
[],
"oops",
42,
None,
# Root is fine but ``extensions`` is the wrong type.
{"schema_version": "1.0", "extensions": []},
{"schema_version": "1.0", "extensions": "oops"},
{"schema_version": "1.0", "extensions": None},
{"schema_version": "1.0", "extensions": 42},
],
)
def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload):
"""Malformed catalog payloads raise ExtensionError, not AttributeError.

Without this guard, a payload like ``{"extensions": []}`` would pass the
key-presence check and then crash with ``AttributeError: 'list' object
has no attribute 'items'`` deep inside ``_get_merged_extensions``. The
sibling integration catalog reader already validates both the root
object and the nested mapping (see ``integrations/catalog.py``); the
extension catalog must stay consistent.
"""
from unittest.mock import patch, MagicMock

catalog = self._make_catalog(temp_dir)

mock_response = MagicMock()
mock_response.read.return_value = json.dumps(payload).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)

entry = CatalogEntry(
url="https://example.com/catalog.json",
name="default",
priority=1,
install_allowed=True,
)

with patch.object(catalog, "_open_url", return_value=mock_response):
with pytest.raises(ExtensionError, match="Invalid catalog format"):
catalog._fetch_single_catalog(entry, force_refresh=True)

@pytest.mark.parametrize(
"cached_payload",
[
[],
"oops",
42,
None,
{"schema_version": "1.0", "extensions": []},
{"schema_version": "1.0", "extensions": "oops"},
{"schema_version": "1.0", "extensions": None},
],
)
def test_fetch_single_catalog_rejects_malformed_cached_payload(
self, temp_dir, cached_payload
):
"""A poisoned cache silently falls back to the network instead of
crashing — cached payloads pass through the same shape validation
as freshly-fetched ones.

Without this, a cache poisoned by an older spec-kit version (or a
manual edit, or an upstream that briefly served a bad payload
before the network guards landed) would re-crash every invocation
of ``_get_merged_extensions`` despite the cache being "valid" by
age. The recovery contract is: if the cached payload fails
validation, drop it and refetch — never propagate
``AttributeError`` to the caller.
"""
from unittest.mock import patch, MagicMock

catalog = self._make_catalog(temp_dir)

# Poison the default-URL cache. ``DEFAULT_CATALOG_URL`` is the
# branch that goes through ``is_cache_valid()`` (the non-default
# branch uses per-URL hashed cache files but the same code path
# below).
catalog.cache_dir.mkdir(parents=True, exist_ok=True)
catalog.cache_file.write_text(json.dumps(cached_payload))
catalog.cache_metadata_file.write_text(
json.dumps(
{
"cached_at": datetime.now(timezone.utc).isoformat(),
"catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL,
}
)
)

# Network refetch returns a valid payload so the recovery path
# can complete.
valid = {
"schema_version": "1.0",
"extensions": {"foo": {"name": "Foo", "version": "1.0.0"}},
}
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(valid).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)

entry = CatalogEntry(
url=ExtensionCatalog.DEFAULT_CATALOG_URL,
name="default",
priority=1,
install_allowed=True,
)

with patch.object(catalog, "_open_url", return_value=mock_response):
result = catalog._fetch_single_catalog(entry, force_refresh=False)

# The poisoned cache was discarded and the network payload returned.
assert result == valid

def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir):
"""Per-entry guard: one malformed entry shouldn't poison the merge.

``_fetch_single_catalog`` validates that ``extensions`` is a mapping,
but it doesn't (and shouldn't) validate every entry inside it — a
single bad entry in an otherwise-valid catalog should be skipped, not
crash the whole resolve path. Mirrors the per-entry skip in
``integrations/catalog.py``: a malformed entry returns no error,
valid entries continue to merge normally.
"""
from unittest.mock import patch, MagicMock

catalog = self._make_catalog(temp_dir)
# Mix of valid entry, list-shaped entry, and string-shaped entry.
payload = {
"schema_version": "1.0",
"extensions": {
"good": {"name": "Good", "version": "1.0.0"},
"bad-list": [],
"bad-str": "oops",
},
}
mock_response = MagicMock()
mock_response.read.return_value = json.dumps(payload).encode()
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)

entry = CatalogEntry(
url="https://example.com/catalog.json",
name="default",
priority=1,
install_allowed=True,
)

with patch.object(catalog, "_open_url", return_value=mock_response), \
patch.object(catalog, "get_active_catalogs", return_value=[entry]):
merged = catalog._get_merged_extensions(force_refresh=True)

# Only the well-formed entry survives; the two malformed entries are
# silently dropped rather than raising or crashing.
assert [ext["id"] for ext in merged] == ["good"]

def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch):
"""download_extension passes Authorization header when a provider is configured."""
from unittest.mock import patch, MagicMock
Expand Down
Loading