From d023600420865f58e4ead3eba089e9aa2c175dae Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Mon, 18 May 2026 20:58:22 +0500 Subject: [PATCH 1/3] fix(catalogs): validate extension and preset catalog payload shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ExtensionCatalog._fetch_single_catalog` and `PresetCatalog._fetch_single_catalog` only check that the `extensions` / `presets` key is *present* in the parsed catalog JSON. They don't check that the value is a JSON object, and they don't check that the root is a JSON object at all. A malformed (or compromised) upstream catalog returning: {"schema_version": "1.0", "extensions": []} passes both `"extensions" not in catalog_data` and the subsequent `response.read()` JSON parse, gets cached on disk, and then crashes deep inside `_get_merged_extensions` (resp. `_get_merged_packs`) with: AttributeError: 'list' object has no attribute 'items' instead of the existing user-facing `ExtensionError("Invalid catalog format from ")` / `PresetError("Invalid preset catalog format")` that the surrounding code is clearly trying to produce. The sibling integration-catalog reader already validates this — see `src/specify_cli/integrations/catalog.py` where the fetch path explicitly checks both `isinstance(catalog_data, dict)` and `isinstance(catalog_data.get("integrations"), dict)` before returning. This change mirrors that pattern in the extension and preset readers so the three catalog fetchers stay consistent and a malformed upstream surfaces as the user-facing error instead of a raw Python traceback. Adds parametrized regression tests covering: - root payload is not a JSON object (list, str, int, null) - root is a dict but `extensions` / `presets` value is the wrong type (list, str, null, int) All eight bad-payload shapes now raise the expected catalog error. --- src/specify_cli/extensions.py | 19 +++++++++++++++ src/specify_cli/presets.py | 20 ++++++++++++++++ tests/test_extensions.py | 45 +++++++++++++++++++++++++++++++++++ tests/test_presets.py | 45 +++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 871503f0ae..5ad6303838 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1839,8 +1839,27 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) + # Validate payload shape before iteration. Checking only key + # presence would let a payload like ``{"extensions": []}`` or + # ``{"extensions": null}`` slip through here and then crash with + # ``AttributeError: 'list' object has no attribute 'items'`` deep + # inside ``_get_merged_extensions``. The sibling integration + # catalog reader already guards both the root object and the + # nested mapping (see ``integrations/catalog.py``); the extension + # catalog must stay consistent so a malformed upstream surfaces as + # the user-facing ``Invalid catalog format`` error instead of a + # raw Python traceback. + if not isinstance(catalog_data, dict): + raise ExtensionError( + f"Invalid catalog format from {entry.url}: expected a JSON object" + ) if "schema_version" not in catalog_data or "extensions" not in catalog_data: raise ExtensionError(f"Invalid catalog format from {entry.url}") + if not isinstance(catalog_data.get("extensions"), dict): + raise ExtensionError( + f"Invalid catalog format from {entry.url}: " + "'extensions' must be a JSON object" + ) # Save to cache self.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index fd9d4745f1..523b50726a 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -2045,11 +2045,31 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) + # Validate payload shape before iteration. Checking only key + # presence would let a payload like ``{"presets": []}`` or + # ``{"presets": null}`` slip through here and then crash with + # ``AttributeError: 'list' object has no attribute 'items'`` deep + # inside ``_get_merged_packs``. The sibling integration catalog + # reader already guards both the root object and the nested + # mapping (see ``integrations/catalog.py``); the preset catalog + # must stay consistent so a malformed upstream surfaces as the + # user-facing ``Invalid preset catalog format`` error instead of + # a raw Python traceback. + if not isinstance(catalog_data, dict): + raise PresetError( + f"Invalid preset catalog format from {entry.url}: " + "expected a JSON object" + ) if ( "schema_version" not in catalog_data or "presets" not in catalog_data ): raise PresetError("Invalid preset catalog format") + if not isinstance(catalog_data.get("presets"), dict): + raise PresetError( + f"Invalid preset catalog format from {entry.url}: " + "'presets' must be a JSON object" + ) self.cache_dir.mkdir(parents=True, exist_ok=True) cache_file.write_text(json.dumps(catalog_data, indent=2)) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 153388a541..f1ce74c33a 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2577,6 +2577,51 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``extensions`` is the wrong type. + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + {"schema_version": "1.0", "extensions": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload): + """Malformed catalog payloads raise ExtensionError, not AttributeError. + + Without this guard, a payload like ``{"extensions": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_extensions``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + extension catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="Invalid catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index 8fa700fa77..1928840d96 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1514,6 +1514,51 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``presets`` is the wrong type. + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + {"schema_version": "1.0", "presets": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, payload): + """Malformed catalog payloads raise PresetError, not AttributeError. + + Without this guard, a payload like ``{"presets": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_packs``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + preset catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="Invalid preset catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): """download_pack passes Authorization header when configured.""" from unittest.mock import patch, MagicMock From df3823f711eec103aca816adfbb98b0dce5499c5 Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Thu, 21 May 2026 07:35:40 +0500 Subject: [PATCH 2/3] fix(catalogs): skip non-mapping entries during extension and preset merge Addresses Copilot review feedback on this PR. `_fetch_single_catalog` now validates that the ``extensions`` / ``presets`` value is a mapping, but it doesn't (and shouldn't) validate every entry inside that mapping. A payload like: {"schema_version": "1.0", "extensions": {"good": {...}, "bad": []}} passes the fetch-level guard, then later crashes inside ``_get_merged_extensions`` (resp. ``_get_merged_packs``) at ``{**ext_data, ...}`` with ``TypeError: 'list' object is not a mapping``. The sibling integration-catalog reader at ``src/specify_cli/integrations/catalog.py:245`` handles this with a per-entry ``isinstance(integ_data, dict)`` skip during merge, so one malformed entry doesn't poison an otherwise valid catalog. This change mirrors that pattern in the extension and preset mergers and adds regression tests asserting that valid entries continue to merge while malformed siblings are silently dropped. --- src/specify_cli/extensions.py | 10 +++++++++ src/specify_cli/presets.py | 11 +++++++++ tests/test_extensions.py | 42 +++++++++++++++++++++++++++++++++++ tests/test_presets.py | 41 ++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 5ad6303838..19782aacb5 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1914,6 +1914,16 @@ def _get_merged_extensions(self, force_refresh: bool = False) -> List[Dict[str, continue for ext_id, ext_data in catalog_data.get("extensions", {}).items(): + # Per-entry guard: ``_fetch_single_catalog`` already validates + # that ``catalog_data["extensions"]`` is a mapping, but it + # does not (and should not) validate every entry shape there + # — one malformed entry shouldn't poison an otherwise valid + # catalog. Skip non-mapping entries here so a payload like + # ``{"extensions": {"foo": [], "bar": {...}}}`` still merges + # the valid entries without crashing on ``**ext_data``. + # Mirrors ``integrations/catalog.py:245``. + if not isinstance(ext_data, dict): + continue if ext_id not in merged: # Higher-priority catalog wins merged[ext_id] = { **ext_data, diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 523b50726a..39611cd8b7 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -2103,6 +2103,17 @@ def _get_merged_packs(self, force_refresh: bool = False) -> Dict[str, Dict[str, try: data = self._fetch_single_catalog(entry, force_refresh) for pack_id, pack_data in data.get("presets", {}).items(): + # Per-entry guard: ``_fetch_single_catalog`` already + # validates that ``data["presets"]`` is a mapping, but it + # does not (and should not) validate every entry shape + # there — one malformed entry shouldn't poison an + # otherwise valid catalog. Skip non-mapping entries here + # so a payload like ``{"presets": {"foo": [], "bar": + # {...}}}`` still merges the valid entries without + # crashing on ``**pack_data``. Mirrors + # ``integrations/catalog.py:245``. + if not isinstance(pack_data, dict): + continue pack_data_with_catalog = {**pack_data, "_catalog_name": entry.name, "_install_allowed": entry.install_allowed} merged[pack_id] = pack_data_with_catalog except PresetError: diff --git a/tests/test_extensions.py b/tests/test_extensions.py index f1ce74c33a..db8c080d7e 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2622,6 +2622,48 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload) with pytest.raises(ExtensionError, match="Invalid catalog format"): catalog._fetch_single_catalog(entry, force_refresh=True) + def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): + """Per-entry guard: one malformed entry shouldn't poison the merge. + + ``_fetch_single_catalog`` validates that ``extensions`` is a mapping, + but it doesn't (and shouldn't) validate every entry inside it — a + single bad entry in an otherwise-valid catalog should be skipped, not + crash the whole resolve path. Mirrors the per-entry skip in + ``integrations/catalog.py``: a malformed entry returns no error, + valid entries continue to merge normally. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + # Mix of valid entry, list-shaped entry, and string-shaped entry. + payload = { + "schema_version": "1.0", + "extensions": { + "good": {"name": "Good", "version": "1.0.0"}, + "bad-list": [], + "bad-str": "oops", + }, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch.object(catalog, "get_active_catalogs", return_value=[entry]): + merged = catalog._get_merged_extensions(force_refresh=True) + + # Only the well-formed entry survives; the two malformed entries are + # silently dropped rather than raising or crashing. + assert [ext["id"] for ext in merged] == ["good"] + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index 1928840d96..514365d1a5 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1559,6 +1559,47 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, paylo with pytest.raises(PresetError, match="Invalid preset catalog format"): catalog._fetch_single_catalog(entry, force_refresh=True) + def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): + """Per-entry guard: one malformed entry shouldn't poison the merge. + + ``_fetch_single_catalog`` validates that ``presets`` is a mapping, + but it doesn't (and shouldn't) validate every entry inside it — a + single bad entry in an otherwise-valid catalog should be skipped, + not crash the whole resolve path. Mirrors the per-entry skip in + ``integrations/catalog.py``: a malformed entry returns no error, + valid entries continue to merge normally. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + payload = { + "schema_version": "1.0", + "presets": { + "good": {"name": "Good", "version": "1.0.0"}, + "bad-list": [], + "bad-str": "oops", + }, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch.object(catalog, "get_active_catalogs", return_value=[entry]): + merged = catalog._get_merged_packs(force_refresh=True) + + # Only the well-formed entry survives; the two malformed entries are + # silently dropped rather than raising or crashing. + assert list(merged.keys()) == ["good"] + def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): """download_pack passes Authorization header when configured.""" from unittest.mock import patch, MagicMock From 964324a8efa96bc0b54c7c4e1eaeb5f4c41402a1 Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Sat, 23 May 2026 16:07:23 +0500 Subject: [PATCH 3/3] fix(catalogs): validate cached extension and preset payload shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot review feedback on this PR (round 2). The earlier commits in this branch added payload-shape validation on the network fetch path. The cache-hit path still returned ``json.loads(cache_file.read_text())`` directly without re-checking the shape, so a cache poisoned by an older spec-kit version (or a manual edit, or an upstream that briefly served a bad payload before the network guards landed) would re-crash every invocation of ``_get_merged_extensions`` / ``_get_merged_packs`` with ``AttributeError: 'list' object has no attribute 'items'`` despite the cache being "valid" by age. Extracts the shape validation into ``_validate_catalog_payload`` on both ``ExtensionCatalog`` and ``PresetCatalog``, and calls it from both the cache-load and network-fetch branches of ``_fetch_single_catalog``. If the cached payload fails validation, the cache read is treated like a ``json.JSONDecodeError`` — the cached value is discarded and the function falls through to the network fetch, which refreshes the cache with a clean payload on success. Never propagates ``AttributeError`` to the caller. Regression tests parametrize the four root-bad-type variants plus three ``extensions``/``presets``-bad-type variants per file, asserting that a poisoned cache silently recovers via network refetch and returns the freshly-fetched payload. --- src/specify_cli/extensions.py | 74 +++++++++++++++++++++----------- src/specify_cli/presets.py | 80 +++++++++++++++++++++++------------ tests/test_extensions.py | 70 ++++++++++++++++++++++++++++++ tests/test_presets.py | 71 +++++++++++++++++++++++++++++++ 4 files changed, 244 insertions(+), 51 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 19782aacb5..de67dfcdd8 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1702,6 +1702,44 @@ def _open_url(self, url: str, timeout: int = 10): from specify_cli.authentication.http import open_url return open_url(url, timeout) + def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None: + """Validate a parsed catalog payload's shape. + + Applied to both network-fetched and cache-loaded payloads so a + once-poisoned cache (older spec-kit version, manual edit, upstream + served a bad payload before the network-side guards were added) + cannot re-crash ``_get_merged_extensions`` on subsequent calls. + + Checking only key presence would let a payload like + ``{"extensions": []}`` or ``{"extensions": null}`` slip through + here and then crash with ``AttributeError: 'list' object has no + attribute 'items'`` deep inside ``_get_merged_extensions``. The + sibling integration catalog reader already guards both the root + object and the nested mapping (see ``integrations/catalog.py``); + the extension catalog must stay consistent so a malformed payload + surfaces as the user-facing ``Invalid catalog format`` error + instead of a raw Python traceback. + + Args: + catalog_data: Parsed JSON payload from the catalog source. + url: Source URL — used in the error message so the user can + tell which catalog in a multi-catalog stack is malformed. + + Raises: + ExtensionError: If the payload's shape is invalid. + """ + if not isinstance(catalog_data, dict): + raise ExtensionError( + f"Invalid catalog format from {url}: expected a JSON object" + ) + if "schema_version" not in catalog_data or "extensions" not in catalog_data: + raise ExtensionError(f"Invalid catalog format from {url}") + if not isinstance(catalog_data.get("extensions"), dict): + raise ExtensionError( + f"Invalid catalog format from {url}: " + "'extensions' must be a JSON object" + ) + def get_active_catalogs(self) -> List[CatalogEntry]: """Get the ordered list of active catalogs. @@ -1827,11 +1865,19 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # If metadata is invalid or missing expected fields, treat cache as invalid pass - # Use cache if valid + # Use cache if valid. A previously-cached payload must clear the + # same shape checks as a freshly-fetched one — otherwise a once- + # poisoned cache (older spec-kit version, manual edit, upstream + # served a bad payload before the network-side guards were added) + # would re-crash on every invocation despite the cache being + # "valid" by age. If validation fails on the cached read, fall + # through to the network fetch path so the cache gets refreshed. if is_valid: try: - return json.loads(cache_file.read_text()) - except json.JSONDecodeError: + cached_data = json.loads(cache_file.read_text()) + self._validate_catalog_payload(cached_data, entry.url) + return cached_data + except (json.JSONDecodeError, ExtensionError): pass # Fetch from network @@ -1839,27 +1885,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) - # Validate payload shape before iteration. Checking only key - # presence would let a payload like ``{"extensions": []}`` or - # ``{"extensions": null}`` slip through here and then crash with - # ``AttributeError: 'list' object has no attribute 'items'`` deep - # inside ``_get_merged_extensions``. The sibling integration - # catalog reader already guards both the root object and the - # nested mapping (see ``integrations/catalog.py``); the extension - # catalog must stay consistent so a malformed upstream surfaces as - # the user-facing ``Invalid catalog format`` error instead of a - # raw Python traceback. - if not isinstance(catalog_data, dict): - raise ExtensionError( - f"Invalid catalog format from {entry.url}: expected a JSON object" - ) - if "schema_version" not in catalog_data or "extensions" not in catalog_data: - raise ExtensionError(f"Invalid catalog format from {entry.url}") - if not isinstance(catalog_data.get("extensions"), dict): - raise ExtensionError( - f"Invalid catalog format from {entry.url}: " - "'extensions' must be a JSON object" - ) + self._validate_catalog_payload(catalog_data, entry.url) # Save to cache self.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 39611cd8b7..89fd033187 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -1860,6 +1860,48 @@ def _open_url(self, url: str, timeout: int = 10): from specify_cli.authentication.http import open_url return open_url(url, timeout) + def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None: + """Validate a parsed preset-catalog payload's shape. + + Applied to both network-fetched and cache-loaded payloads so a + once-poisoned cache (older spec-kit version, manual edit, upstream + served a bad payload before the network-side guards were added) + cannot re-crash ``_get_merged_packs`` on subsequent calls. + + Checking only key presence would let a payload like + ``{"presets": []}`` or ``{"presets": null}`` slip through here and + then crash with ``AttributeError: 'list' object has no attribute + 'items'`` deep inside ``_get_merged_packs``. The sibling + integration catalog reader already guards both the root object and + the nested mapping (see ``integrations/catalog.py``); the preset + catalog must stay consistent so a malformed payload surfaces as + the user-facing ``Invalid preset catalog format`` error instead of + a raw Python traceback. + + Args: + catalog_data: Parsed JSON payload from the catalog source. + url: Source URL — used in the error message so the user can + tell which catalog in a multi-catalog stack is malformed. + + Raises: + PresetError: If the payload's shape is invalid. + """ + if not isinstance(catalog_data, dict): + raise PresetError( + f"Invalid preset catalog format from {url}: " + "expected a JSON object" + ) + if ( + "schema_version" not in catalog_data + or "presets" not in catalog_data + ): + raise PresetError("Invalid preset catalog format") + if not isinstance(catalog_data.get("presets"), dict): + raise PresetError( + f"Invalid preset catalog format from {url}: " + "'presets' must be a JSON object" + ) + def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -2035,41 +2077,25 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = """ cache_file, metadata_file = self._get_cache_paths(entry.url) + # Use cache if valid. A previously-cached payload must clear the + # same shape checks as a freshly-fetched one — otherwise a once- + # poisoned cache would re-crash on every invocation despite the + # cache being "valid" by age. If validation fails on the cached + # read, fall through to the network fetch path so the cache gets + # refreshed. if not force_refresh and self._is_url_cache_valid(entry.url): try: - return json.loads(cache_file.read_text()) - except json.JSONDecodeError: + cached_data = json.loads(cache_file.read_text()) + self._validate_catalog_payload(cached_data, entry.url) + return cached_data + except (json.JSONDecodeError, PresetError): pass try: with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) - # Validate payload shape before iteration. Checking only key - # presence would let a payload like ``{"presets": []}`` or - # ``{"presets": null}`` slip through here and then crash with - # ``AttributeError: 'list' object has no attribute 'items'`` deep - # inside ``_get_merged_packs``. The sibling integration catalog - # reader already guards both the root object and the nested - # mapping (see ``integrations/catalog.py``); the preset catalog - # must stay consistent so a malformed upstream surfaces as the - # user-facing ``Invalid preset catalog format`` error instead of - # a raw Python traceback. - if not isinstance(catalog_data, dict): - raise PresetError( - f"Invalid preset catalog format from {entry.url}: " - "expected a JSON object" - ) - if ( - "schema_version" not in catalog_data - or "presets" not in catalog_data - ): - raise PresetError("Invalid preset catalog format") - if not isinstance(catalog_data.get("presets"), dict): - raise PresetError( - f"Invalid preset catalog format from {entry.url}: " - "'presets' must be a JSON object" - ) + self._validate_catalog_payload(catalog_data, entry.url) self.cache_dir.mkdir(parents=True, exist_ok=True) cache_file.write_text(json.dumps(catalog_data, indent=2)) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index db8c080d7e..4e89663a3d 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2622,6 +2622,76 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload) with pytest.raises(ExtensionError, match="Invalid catalog format"): catalog._fetch_single_catalog(entry, force_refresh=True) + @pytest.mark.parametrize( + "cached_payload", + [ + [], + "oops", + 42, + None, + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_cached_payload( + self, temp_dir, cached_payload + ): + """A poisoned cache silently falls back to the network instead of + crashing — cached payloads pass through the same shape validation + as freshly-fetched ones. + + Without this, a cache poisoned by an older spec-kit version (or a + manual edit, or an upstream that briefly served a bad payload + before the network guards landed) would re-crash every invocation + of ``_get_merged_extensions`` despite the cache being "valid" by + age. The recovery contract is: if the cached payload fails + validation, drop it and refetch — never propagate + ``AttributeError`` to the caller. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + + # Poison the default-URL cache. ``DEFAULT_CATALOG_URL`` is the + # branch that goes through ``is_cache_valid()`` (the non-default + # branch uses per-URL hashed cache files but the same code path + # below). + catalog.cache_dir.mkdir(parents=True, exist_ok=True) + catalog.cache_file.write_text(json.dumps(cached_payload)) + catalog.cache_metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL, + } + ) + ) + + # Network refetch returns a valid payload so the recovery path + # can complete. + valid = { + "schema_version": "1.0", + "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url=ExtensionCatalog.DEFAULT_CATALOG_URL, + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog._fetch_single_catalog(entry, force_refresh=False) + + # The poisoned cache was discarded and the network payload returned. + assert result == valid + def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): """Per-entry guard: one malformed entry shouldn't poison the merge. diff --git a/tests/test_presets.py b/tests/test_presets.py index 514365d1a5..5322d1454d 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1559,6 +1559,77 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, paylo with pytest.raises(PresetError, match="Invalid preset catalog format"): catalog._fetch_single_catalog(entry, force_refresh=True) + @pytest.mark.parametrize( + "cached_payload", + [ + [], + "oops", + 42, + None, + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_cached_payload( + self, project_dir, cached_payload + ): + """A poisoned cache silently falls back to the network instead of + crashing — cached payloads pass through the same shape validation + as freshly-fetched ones. + + Without this, a cache poisoned by an older spec-kit version (or a + manual edit, or an upstream that briefly served a bad payload + before the network guards landed) would re-crash every invocation + of ``_get_merged_packs`` despite the cache being "valid" by age. + The recovery contract is: if the cached payload fails validation, + drop it and refetch — never propagate ``AttributeError`` to the + caller. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + + # Poison the default-URL cache. ``DEFAULT_CATALOG_URL`` and + # non-default URLs both flow through the same cache-load branch. + cache_file, metadata_file = catalog._get_cache_paths( + catalog.DEFAULT_CATALOG_URL + ) + cache_file.parent.mkdir(parents=True, exist_ok=True) + cache_file.write_text(json.dumps(cached_payload)) + metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": catalog.DEFAULT_CATALOG_URL, + } + ) + ) + + # Network refetch returns a valid payload so the recovery path + # can complete. + valid = { + "schema_version": "1.0", + "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url=catalog.DEFAULT_CATALOG_URL, + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog._fetch_single_catalog(entry, force_refresh=False) + + # The poisoned cache was discarded and the network payload returned. + assert result == valid + def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): """Per-entry guard: one malformed entry shouldn't poison the merge.