From 3a444d1eea38e6f4cc33057fcd75aa9ac96e7e97 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:03:54 -0400 Subject: [PATCH 1/3] chore: deprecate overture_releases.yaml, migrate fetch script to STAC - Replace fetch-releases-from-s3.py with fetch_releases_from_stac.py driven by https://stac.overturemaps.org/catalog.json instead of S3 listing - Drop obstore dependency from requirements.txt - Add deprecation header to overture_releases.yaml (kept for existing consumers) - Add index.html landing page for labs.overturemaps.org/data (was 404) - Add unit tests (19) for fetch_releases_from_stac.py - Add test.yml CI workflow (runs on push + PR) - Update Python 3.11 -> 3.12 in both workflows Published bundle before/after: releases.json S3 listing (~30 entries) -> STAC catalog (~2 active releases) latest.ddb unchanged latest.dbb unchanged (legacy alias, now marked deprecated) registry-manifest unchanged overture_releases.yaml unchanged data, gains deprecation comment index.html new: directory landing page (was 404) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 11 +- .github/workflows/test.yml | 35 ++++ .gitignore | 3 + index.html | 193 +++++++++++++++++++ overture_releases.yaml | 3 + utils/fetch-releases-from-s3.py | 89 --------- utils/fetch_releases_from_stac.py | 76 ++++++++ utils/requirements-test.txt | 2 + utils/requirements.txt | 1 - utils/tests/__init__.py | 0 utils/tests/test_fetch_releases_from_stac.py | 168 ++++++++++++++++ 11 files changed, 486 insertions(+), 95 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 index.html delete mode 100644 utils/fetch-releases-from-s3.py create mode 100644 utils/fetch_releases_from_stac.py create mode 100644 utils/requirements-test.txt create mode 100644 utils/tests/__init__.py create mode 100644 utils/tests/test_fetch_releases_from_stac.py diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 51a9d8a..67cd9cc 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -1,4 +1,4 @@ -name: Fetch releases from S3 +name: Build and publish releases artifacts on: push: @@ -33,10 +33,10 @@ jobs: with: persist-credentials: false - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | @@ -44,15 +44,16 @@ jobs: cd utils pip install -r requirements.txt - - name: Build releases.json and latest.dbb + - name: Build releases.json and latest.ddb run: | cd utils - python3 fetch-releases-from-s3.py + python3 fetch_releases_from_stac.py python3 simple-registry-manifest.py - name: Copy output to publish directory run: | mkdir publish + cp index.html publish/ cp utils/releases.json publish/ cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..2688aeb --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,35 @@ +name: Test utils + +on: + push: + branches: main + pull_request: + +permissions: + contents: read + +jobs: + test: + name: Run unit tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Python 3.12 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd utils + pip install -r requirements-test.txt + + - name: Run tests + run: | + cd utils + python -m pytest tests/ -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..41c33c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ + +__pycache__/ +*.pyc diff --git a/index.html b/index.html new file mode 100644 index 0000000..664210a --- /dev/null +++ b/index.html @@ -0,0 +1,193 @@ + + + + + + Overture Maps Data + + + + + + + +
+

Overture Maps Foundation

+

labs.overturemaps.org/data: published release artifacts

+
+ +
+

Available Files

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileDescription
releases.json deprecatedRelease list with latest pointer, supplanted by STAC (SpatioTemporal Asset Catalog)
latest.ddbDuckDB database with views pointing to the latest Overture release on S3
latest.dbb deprecatedAlias for latest.ddb (legacy filename)
registry-manifest.jsonParquet file manifest for the Overture GERS registry
overture_releases.yaml deprecatedHistorical release list, no longer maintained. Use STAC (SpatioTemporal Asset Catalog) instead
+ +

+ For authoritative release discovery, use the + STAC (SpatioTemporal Asset Catalog). + Full documentation at docs.overturemaps.org. +

+
+ + + + + diff --git a/overture_releases.yaml b/overture_releases.yaml index 05da21c..fcca45c 100644 --- a/overture_releases.yaml +++ b/overture_releases.yaml @@ -1,3 +1,6 @@ +# DEPRECATED: This file is no longer maintained and will be removed in a future release. +# Use the Overture STAC catalog for authoritative release discovery: +# https://stac.overturemaps.org/catalog.json - schema: "1.16.0" release: "2026-04-15.0" - schema: "1.16.0" diff --git a/utils/fetch-releases-from-s3.py b/utils/fetch-releases-from-s3.py deleted file mode 100644 index 56d3ee2..0000000 --- a/utils/fetch-releases-from-s3.py +++ /dev/null @@ -1,89 +0,0 @@ -import duckdb, json -from obstore.store import S3Store - -store = S3Store("overturemaps-us-west-2", region="us-west-2", skip_signature=True) - -releases = store.list_with_delimiter("release/") - -output = {} - -for idx, release in enumerate(sorted(releases.get("common_prefixes"), reverse=True)): - path = release.split("/")[1] - if idx == 0: - output["latest"] = path - output["releases"] = [] - output["releases"].append(path) - - print(f" - {path}") - -with open("releases.json", "w") as output_file: - output_file.write(json.dumps(output, indent=4)) - -conn = duckdb.connect("latest.ddb") - -conn.sql( - f""" -INSTALL spatial; -LOAD spatial; - -CREATE OR REPLACE VIEW address AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=addresses/type=address/*.parquet') -); - -CREATE OR REPLACE VIEW bathymetry AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=bathymetry/*.parquet') -); - -CREATE OR REPLACE VIEW building AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building/*.parquet') -); - -CREATE OR REPLACE VIEW building_part AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building_part/*.parquet') -); - -CREATE OR REPLACE VIEW connector AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=connector/*.parquet') -); - -CREATE OR REPLACE VIEW division AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division/*.parquet') -); - -CREATE OR REPLACE VIEW division_area AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_area/*.parquet') -); - -CREATE OR REPLACE VIEW division_boundary AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_boundary/*.parquet') -); - -CREATE OR REPLACE VIEW infrastructure AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=infrastructure/*.parquet') -); - -CREATE OR REPLACE VIEW land AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land/*.parquet') -); - -CREATE OR REPLACE VIEW land_cover AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_cover/*.parquet') -); - -CREATE OR REPLACE VIEW land_use AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_use/*.parquet') -); - -CREATE OR REPLACE VIEW place AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=places/type=place/*.parquet') -); - -CREATE OR REPLACE VIEW segment AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=segment/*.parquet') -); - -CREATE OR REPLACE VIEW water AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=water/*.parquet') -); -""" -) diff --git a/utils/fetch_releases_from_stac.py b/utils/fetch_releases_from_stac.py new file mode 100644 index 0000000..295153a --- /dev/null +++ b/utils/fetch_releases_from_stac.py @@ -0,0 +1,76 @@ +import json +import urllib.request + +import duckdb + +STAC_CATALOG = "https://stac.overturemaps.org/catalog.json" +S3_BASE = "s3://overturemaps-us-west-2/release" + +VIEWS = [ + ("address", "addresses", "address"), + ("bathymetry", "base", "bathymetry"), + ("building", "buildings", "building"), + ("building_part", "buildings", "building_part"), + ("connector", "transportation", "connector"), + ("division", "divisions", "division"), + ("division_area", "divisions", "division_area"), + ("division_boundary", "divisions", "division_boundary"), + ("infrastructure", "base", "infrastructure"), + ("land", "base", "land"), + ("land_cover", "base", "land_cover"), + ("land_use", "base", "land_use"), + ("place", "places", "place"), + ("segment", "transportation", "segment"), + ("water", "base", "water"), +] + + +def fetch_catalog(url: str) -> dict: + with urllib.request.urlopen(url) as response: + return json.loads(response.read()) + + +def parse_releases(catalog: dict) -> dict: + latest = catalog["latest"] + releases = sorted( + [ + link["href"].split("/")[1] + for link in catalog["links"] + if link["rel"] == "child" + ], + reverse=True, + ) + return {"latest": latest, "releases": releases} + + +def build_views_sql(latest: str, s3_base: str = S3_BASE) -> str: + stmts = ["INSTALL spatial;", "LOAD spatial;"] + for view_name, theme, type_ in VIEWS: + path = f"{s3_base}/{latest}/theme={theme}/type={type_}/*.parquet" + stmts.append( + f"CREATE OR REPLACE VIEW {view_name} AS (\n" + f" SELECT * FROM read_parquet('{path}')\n);" + ) + return "\n\n".join(stmts) + + +def create_duckdb_views(db_path: str, latest: str, s3_base: str = S3_BASE) -> None: + conn = duckdb.connect(db_path) + conn.sql(build_views_sql(latest, s3_base)) + + +def main(): + catalog = fetch_catalog(STAC_CATALOG) + output = parse_releases(catalog) + + for release in output["releases"]: + print(f" - {release}") + + with open("releases.json", "w") as f: + f.write(json.dumps(output, indent=4)) + + create_duckdb_views("latest.ddb", output["latest"]) + + +if __name__ == "__main__": + main() diff --git a/utils/requirements-test.txt b/utils/requirements-test.txt new file mode 100644 index 0000000..13f6026 --- /dev/null +++ b/utils/requirements-test.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest>=8.0.0 diff --git a/utils/requirements.txt b/utils/requirements.txt index 2718dd1..7c43c30 100644 --- a/utils/requirements.txt +++ b/utils/requirements.txt @@ -1,3 +1,2 @@ -obstore>=0.7.0 duckdb==1.3.2 pyarrow>=20.0.0 diff --git a/utils/tests/__init__.py b/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/tests/test_fetch_releases_from_stac.py b/utils/tests/test_fetch_releases_from_stac.py new file mode 100644 index 0000000..e053f3d --- /dev/null +++ b/utils/tests/test_fetch_releases_from_stac.py @@ -0,0 +1,168 @@ +import json +import os +import sys +import tempfile +from io import BytesIO +from unittest.mock import MagicMock, patch + +import duckdb +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from fetch_releases_from_stac import ( + VIEWS, + build_views_sql, + create_duckdb_views, + fetch_catalog, + parse_releases, +) + +SAMPLE_CATALOG = { + "type": "Catalog", + "id": "Overture Releases", + "stac_version": "1.1.0", + "description": "All Overture Releases", + "links": [ + {"rel": "root", "href": "./catalog.json", "type": "application/json"}, + { + "rel": "child", + "href": "./2026-05-20.0/catalog.json", + "type": "application/json", + "latest": True, + }, + { + "rel": "child", + "href": "./2026-04-15.0/catalog.json", + "type": "application/json", + }, + ], + "latest": "2026-05-20.0", +} + + +class TestFetchCatalog: + def test_returns_parsed_json(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response): + result = fetch_catalog("https://stac.overturemaps.org/catalog.json") + + assert result == SAMPLE_CATALOG + + def test_uses_provided_url(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://custom.example.com/catalog.json") + mock_open.assert_called_once_with("https://custom.example.com/catalog.json") + + +class TestParseReleases: + def test_extracts_latest(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["latest"] == "2026-05-20.0" + + def test_extracts_child_releases(self): + result = parse_releases(SAMPLE_CATALOG) + assert "2026-05-20.0" in result["releases"] + assert "2026-04-15.0" in result["releases"] + + def test_excludes_root_link(self): + result = parse_releases(SAMPLE_CATALOG) + # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" + # but more importantly rel="root" should be excluded + assert "catalog.json" not in result["releases"] + assert len(result["releases"]) == 2 + + def test_releases_sorted_descending(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["releases"] == sorted(result["releases"], reverse=True) + + def test_returns_dict_with_expected_keys(self): + result = parse_releases(SAMPLE_CATALOG) + assert set(result.keys()) == {"latest", "releases"} + + def test_empty_links(self): + catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} + result = parse_releases(catalog) + assert result["latest"] == "2026-05-20.0" + assert result["releases"] == [] + + def test_single_release(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + +class TestBuildViewsSql: + def test_contains_install_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "INSTALL spatial" in sql + + def test_contains_load_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "LOAD spatial" in sql + + def test_all_views_present(self): + sql = build_views_sql("2026-05-20.0") + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql + + def test_latest_release_in_paths(self): + release = "2026-05-20.0" + sql = build_views_sql(release) + assert release in sql + + def test_custom_s3_base(self): + sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") + assert "s3://my-bucket/release" in sql + + def test_correct_theme_type_mapping(self): + sql = build_views_sql("2026-05-20.0") + assert "theme=addresses/type=address" in sql + assert "theme=buildings/type=building_part" in sql + assert "theme=transportation/type=segment" in sql + assert "theme=divisions/type=division_boundary" in sql + + def test_view_count_matches_views_constant(self): + sql = build_views_sql("2026-05-20.0") + count = sql.count("CREATE OR REPLACE VIEW") + assert count == len(VIEWS) + + +class TestCreateDuckdbViews: + def test_creates_all_views(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.sql.assert_called_once() + sql_arg = mock_conn.sql.call_args[0][0] + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg + + def test_views_reference_correct_release(self): + release = "2026-05-20.0" + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", release) + sql_arg = mock_conn.sql.call_args[0][0] + assert release in sql_arg + + def test_connects_to_provided_path(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn) as mock_connect: + create_duckdb_views("some/path/latest.ddb", "2026-05-20.0") + mock_connect.assert_called_once_with("some/path/latest.ddb") From 8a213d9b51bcd043af758d859740e7ec3cfcb121 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:08:00 -0400 Subject: [PATCH 2/3] fix: add concurrency setting to test workflow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2688aeb..37795b6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,6 +8,10 @@ on: permissions: contents: read +concurrency: + group: "test-${{ github.ref }}" + cancel-in-progress: true + jobs: test: name: Run unit tests From 5c98db902e2a9c69cd23cebc417b969241e40b60 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:23:41 -0400 Subject: [PATCH 3/3] fix: address PR review feedback - Add 30s timeout + User-Agent header to fetch_catalog - Robust href parsing via urlparse (handles absolute + relative hrefs) - Close DuckDB connection in finally block - Remove unused test imports (tempfile, BytesIO, duckdb, pytest) - Update test_uses_provided_url to assert on Request.full_url + timeout - Add tests for absolute href and no-./-prefix relative href Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- utils/fetch_releases_from_stac.py | 19 +- utils/tests/test_fetch_releases_from_stac.py | 367 ++++++++++--------- 2 files changed, 214 insertions(+), 172 deletions(-) diff --git a/utils/fetch_releases_from_stac.py b/utils/fetch_releases_from_stac.py index 295153a..a7cf5a0 100644 --- a/utils/fetch_releases_from_stac.py +++ b/utils/fetch_releases_from_stac.py @@ -1,10 +1,12 @@ import json import urllib.request +from urllib.parse import urlparse import duckdb STAC_CATALOG = "https://stac.overturemaps.org/catalog.json" S3_BASE = "s3://overturemaps-us-west-2/release" +_USER_AGENT = "overturemaps-data/1.0" VIEWS = [ ("address", "addresses", "address"), @@ -25,16 +27,22 @@ ] -def fetch_catalog(url: str) -> dict: - with urllib.request.urlopen(url) as response: +def fetch_catalog(url: str, timeout: int = 30) -> dict: + req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT}) + with urllib.request.urlopen(req, timeout=timeout) as response: return json.loads(response.read()) +def _release_id_from_href(href: str) -> str: + parts = [p for p in urlparse(href).path.split("/") if p and p != "."] + return parts[0] + + def parse_releases(catalog: dict) -> dict: latest = catalog["latest"] releases = sorted( [ - link["href"].split("/")[1] + _release_id_from_href(link["href"]) for link in catalog["links"] if link["rel"] == "child" ], @@ -56,7 +64,10 @@ def build_views_sql(latest: str, s3_base: str = S3_BASE) -> str: def create_duckdb_views(db_path: str, latest: str, s3_base: str = S3_BASE) -> None: conn = duckdb.connect(db_path) - conn.sql(build_views_sql(latest, s3_base)) + try: + conn.sql(build_views_sql(latest, s3_base)) + finally: + conn.close() def main(): diff --git a/utils/tests/test_fetch_releases_from_stac.py b/utils/tests/test_fetch_releases_from_stac.py index e053f3d..e482282 100644 --- a/utils/tests/test_fetch_releases_from_stac.py +++ b/utils/tests/test_fetch_releases_from_stac.py @@ -1,168 +1,199 @@ -import json -import os -import sys -import tempfile -from io import BytesIO -from unittest.mock import MagicMock, patch - -import duckdb -import pytest - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -from fetch_releases_from_stac import ( - VIEWS, - build_views_sql, - create_duckdb_views, - fetch_catalog, - parse_releases, -) - -SAMPLE_CATALOG = { - "type": "Catalog", - "id": "Overture Releases", - "stac_version": "1.1.0", - "description": "All Overture Releases", - "links": [ - {"rel": "root", "href": "./catalog.json", "type": "application/json"}, - { - "rel": "child", - "href": "./2026-05-20.0/catalog.json", - "type": "application/json", - "latest": True, - }, - { - "rel": "child", - "href": "./2026-04-15.0/catalog.json", - "type": "application/json", - }, - ], - "latest": "2026-05-20.0", -} - - -class TestFetchCatalog: - def test_returns_parsed_json(self): - mock_response = MagicMock() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) - mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() - - with patch("urllib.request.urlopen", return_value=mock_response): - result = fetch_catalog("https://stac.overturemaps.org/catalog.json") - - assert result == SAMPLE_CATALOG - - def test_uses_provided_url(self): - mock_response = MagicMock() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) - mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() - - with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: - fetch_catalog("https://custom.example.com/catalog.json") - mock_open.assert_called_once_with("https://custom.example.com/catalog.json") - - -class TestParseReleases: - def test_extracts_latest(self): - result = parse_releases(SAMPLE_CATALOG) - assert result["latest"] == "2026-05-20.0" - - def test_extracts_child_releases(self): - result = parse_releases(SAMPLE_CATALOG) - assert "2026-05-20.0" in result["releases"] - assert "2026-04-15.0" in result["releases"] - - def test_excludes_root_link(self): - result = parse_releases(SAMPLE_CATALOG) - # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" - # but more importantly rel="root" should be excluded - assert "catalog.json" not in result["releases"] - assert len(result["releases"]) == 2 - - def test_releases_sorted_descending(self): - result = parse_releases(SAMPLE_CATALOG) - assert result["releases"] == sorted(result["releases"], reverse=True) - - def test_returns_dict_with_expected_keys(self): - result = parse_releases(SAMPLE_CATALOG) - assert set(result.keys()) == {"latest", "releases"} - - def test_empty_links(self): - catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} - result = parse_releases(catalog) - assert result["latest"] == "2026-05-20.0" - assert result["releases"] == [] - - def test_single_release(self): - catalog = { - **SAMPLE_CATALOG, - "links": [ - {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, - ], - "latest": "2026-05-20.0", - } - result = parse_releases(catalog) - assert result["releases"] == ["2026-05-20.0"] - - -class TestBuildViewsSql: - def test_contains_install_spatial(self): - sql = build_views_sql("2026-05-20.0") - assert "INSTALL spatial" in sql - - def test_contains_load_spatial(self): - sql = build_views_sql("2026-05-20.0") - assert "LOAD spatial" in sql - - def test_all_views_present(self): - sql = build_views_sql("2026-05-20.0") - for view_name, _, _ in VIEWS: - assert f"CREATE OR REPLACE VIEW {view_name}" in sql - - def test_latest_release_in_paths(self): - release = "2026-05-20.0" - sql = build_views_sql(release) - assert release in sql - - def test_custom_s3_base(self): - sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") - assert "s3://my-bucket/release" in sql - - def test_correct_theme_type_mapping(self): - sql = build_views_sql("2026-05-20.0") - assert "theme=addresses/type=address" in sql - assert "theme=buildings/type=building_part" in sql - assert "theme=transportation/type=segment" in sql - assert "theme=divisions/type=division_boundary" in sql - - def test_view_count_matches_views_constant(self): - sql = build_views_sql("2026-05-20.0") - count = sql.count("CREATE OR REPLACE VIEW") - assert count == len(VIEWS) - - -class TestCreateDuckdbViews: - def test_creates_all_views(self): - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn): - create_duckdb_views(":memory:", "2026-05-20.0") - mock_conn.sql.assert_called_once() - sql_arg = mock_conn.sql.call_args[0][0] - for view_name, _, _ in VIEWS: - assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg - - def test_views_reference_correct_release(self): - release = "2026-05-20.0" - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn): - create_duckdb_views(":memory:", release) - sql_arg = mock_conn.sql.call_args[0][0] - assert release in sql_arg - - def test_connects_to_provided_path(self): - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn) as mock_connect: - create_duckdb_views("some/path/latest.ddb", "2026-05-20.0") - mock_connect.assert_called_once_with("some/path/latest.ddb") +import json +import sys +import os +from unittest.mock import MagicMock, patch + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from fetch_releases_from_stac import ( + VIEWS, + build_views_sql, + create_duckdb_views, + fetch_catalog, + parse_releases, +) + +SAMPLE_CATALOG = { + "type": "Catalog", + "id": "Overture Releases", + "stac_version": "1.1.0", + "description": "All Overture Releases", + "links": [ + {"rel": "root", "href": "./catalog.json", "type": "application/json"}, + { + "rel": "child", + "href": "./2026-05-20.0/catalog.json", + "type": "application/json", + "latest": True, + }, + { + "rel": "child", + "href": "./2026-04-15.0/catalog.json", + "type": "application/json", + }, + ], + "latest": "2026-05-20.0", +} + + +class TestFetchCatalog: + def test_returns_parsed_json(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response): + result = fetch_catalog("https://stac.overturemaps.org/catalog.json") + + assert result == SAMPLE_CATALOG + + def test_uses_provided_url(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://custom.example.com/catalog.json") + req = mock_open.call_args[0][0] + assert req.full_url == "https://custom.example.com/catalog.json" + + def test_applies_timeout(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://stac.overturemaps.org/catalog.json", timeout=10) + assert mock_open.call_args[1]["timeout"] == 10 + + +class TestParseReleases: + def test_extracts_latest(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["latest"] == "2026-05-20.0" + + def test_extracts_child_releases(self): + result = parse_releases(SAMPLE_CATALOG) + assert "2026-05-20.0" in result["releases"] + assert "2026-04-15.0" in result["releases"] + + def test_excludes_root_link(self): + result = parse_releases(SAMPLE_CATALOG) + # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" + # but more importantly rel="root" should be excluded + assert "catalog.json" not in result["releases"] + assert len(result["releases"]) == 2 + + def test_releases_sorted_descending(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["releases"] == sorted(result["releases"], reverse=True) + + def test_returns_dict_with_expected_keys(self): + result = parse_releases(SAMPLE_CATALOG) + assert set(result.keys()) == {"latest", "releases"} + + def test_empty_links(self): + catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} + result = parse_releases(catalog) + assert result["latest"] == "2026-05-20.0" + assert result["releases"] == [] + + def test_single_release(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_absolute_href_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + { + "rel": "child", + "href": "https://stac.overturemaps.org/2026-05-20.0/catalog.json", + }, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_relative_href_without_dotslash_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + + def test_contains_install_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "INSTALL spatial" in sql + + def test_contains_load_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "LOAD spatial" in sql + + def test_all_views_present(self): + sql = build_views_sql("2026-05-20.0") + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql + + def test_latest_release_in_paths(self): + release = "2026-05-20.0" + sql = build_views_sql(release) + assert release in sql + + def test_custom_s3_base(self): + sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") + assert "s3://my-bucket/release" in sql + + def test_correct_theme_type_mapping(self): + sql = build_views_sql("2026-05-20.0") + assert "theme=addresses/type=address" in sql + assert "theme=buildings/type=building_part" in sql + assert "theme=transportation/type=segment" in sql + assert "theme=divisions/type=division_boundary" in sql + + def test_view_count_matches_views_constant(self): + sql = build_views_sql("2026-05-20.0") + count = sql.count("CREATE OR REPLACE VIEW") + assert count == len(VIEWS) + + +class TestCreateDuckdbViews: + def test_creates_all_views(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.sql.assert_called_once() + sql_arg = mock_conn.sql.call_args[0][0] + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg + + def test_views_reference_correct_release(self): + release = "2026-05-20.0" + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", release) + sql_arg = mock_conn.sql.call_args[0][0] + assert release in sql_arg + + def test_closes_connection(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.close.assert_called_once() +