diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 51a9d8a..67cd9cc 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -1,4 +1,4 @@ -name: Fetch releases from S3 +name: Build and publish releases artifacts on: push: @@ -33,10 +33,10 @@ jobs: with: persist-credentials: false - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | @@ -44,15 +44,16 @@ jobs: cd utils pip install -r requirements.txt - - name: Build releases.json and latest.dbb + - name: Build releases.json and latest.ddb run: | cd utils - python3 fetch-releases-from-s3.py + python3 fetch_releases_from_stac.py python3 simple-registry-manifest.py - name: Copy output to publish directory run: | mkdir publish + cp index.html publish/ cp utils/releases.json publish/ cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..37795b6 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,39 @@ +name: Test utils + +on: + push: + branches: main + pull_request: + +permissions: + contents: read + +concurrency: + group: "test-${{ github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run unit tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Python 3.12 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd utils + pip install -r requirements-test.txt + + - name: Run tests + run: | + cd utils + python -m pytest tests/ -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..41c33c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ + +__pycache__/ +*.pyc diff --git a/index.html b/index.html new file mode 100644 index 0000000..664210a --- /dev/null +++ b/index.html @@ -0,0 +1,193 @@ + + + + + + Overture Maps Data + + + + + + + +
+

Overture Maps Foundation

+

labs.overturemaps.org/data: published release artifacts

+
+ +
+

Available Files

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileDescription
releases.json deprecatedRelease list with latest pointer, supplanted by STAC (SpatioTemporal Asset Catalog)
latest.ddbDuckDB database with views pointing to the latest Overture release on S3
latest.dbb deprecatedAlias for latest.ddb (legacy filename)
registry-manifest.jsonParquet file manifest for the Overture GERS registry
overture_releases.yaml deprecatedHistorical release list, no longer maintained. Use STAC (SpatioTemporal Asset Catalog) instead
+ +

+ For authoritative release discovery, use the + STAC (SpatioTemporal Asset Catalog). + Full documentation at docs.overturemaps.org. +

+
+ + + + + diff --git a/overture_releases.yaml b/overture_releases.yaml index 05da21c..fcca45c 100644 --- a/overture_releases.yaml +++ b/overture_releases.yaml @@ -1,3 +1,6 @@ +# DEPRECATED: This file is no longer maintained and will be removed in a future release. +# Use the Overture STAC catalog for authoritative release discovery: +# https://stac.overturemaps.org/catalog.json - schema: "1.16.0" release: "2026-04-15.0" - schema: "1.16.0" diff --git a/utils/fetch-releases-from-s3.py b/utils/fetch-releases-from-s3.py deleted file mode 100644 index 56d3ee2..0000000 --- a/utils/fetch-releases-from-s3.py +++ /dev/null @@ -1,89 +0,0 @@ -import duckdb, json -from obstore.store import S3Store - -store = S3Store("overturemaps-us-west-2", region="us-west-2", skip_signature=True) - -releases = store.list_with_delimiter("release/") - -output = {} - -for idx, release in enumerate(sorted(releases.get("common_prefixes"), reverse=True)): - path = release.split("/")[1] - if idx == 0: - output["latest"] = path - output["releases"] = [] - output["releases"].append(path) - - print(f" - {path}") - -with open("releases.json", "w") as output_file: - output_file.write(json.dumps(output, indent=4)) - -conn = duckdb.connect("latest.ddb") - -conn.sql( - f""" -INSTALL spatial; -LOAD spatial; - -CREATE OR REPLACE VIEW address AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=addresses/type=address/*.parquet') -); - -CREATE OR REPLACE VIEW bathymetry AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=bathymetry/*.parquet') -); - -CREATE OR REPLACE VIEW building AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building/*.parquet') -); - -CREATE OR REPLACE VIEW building_part AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building_part/*.parquet') -); - -CREATE OR REPLACE VIEW connector AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=connector/*.parquet') -); - -CREATE OR REPLACE VIEW division AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division/*.parquet') -); - -CREATE OR REPLACE VIEW division_area AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_area/*.parquet') -); - -CREATE OR REPLACE VIEW division_boundary AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_boundary/*.parquet') -); - -CREATE OR REPLACE VIEW infrastructure AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=infrastructure/*.parquet') -); - -CREATE OR REPLACE VIEW land AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land/*.parquet') -); - -CREATE OR REPLACE VIEW land_cover AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_cover/*.parquet') -); - -CREATE OR REPLACE VIEW land_use AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_use/*.parquet') -); - -CREATE OR REPLACE VIEW place AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=places/type=place/*.parquet') -); - -CREATE OR REPLACE VIEW segment AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=segment/*.parquet') -); - -CREATE OR REPLACE VIEW water AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=water/*.parquet') -); -""" -) diff --git a/utils/fetch_releases_from_stac.py b/utils/fetch_releases_from_stac.py new file mode 100644 index 0000000..a7cf5a0 --- /dev/null +++ b/utils/fetch_releases_from_stac.py @@ -0,0 +1,87 @@ +import json +import urllib.request +from urllib.parse import urlparse + +import duckdb + +STAC_CATALOG = "https://stac.overturemaps.org/catalog.json" +S3_BASE = "s3://overturemaps-us-west-2/release" +_USER_AGENT = "overturemaps-data/1.0" + +VIEWS = [ + ("address", "addresses", "address"), + ("bathymetry", "base", "bathymetry"), + ("building", "buildings", "building"), + ("building_part", "buildings", "building_part"), + ("connector", "transportation", "connector"), + ("division", "divisions", "division"), + ("division_area", "divisions", "division_area"), + ("division_boundary", "divisions", "division_boundary"), + ("infrastructure", "base", "infrastructure"), + ("land", "base", "land"), + ("land_cover", "base", "land_cover"), + ("land_use", "base", "land_use"), + ("place", "places", "place"), + ("segment", "transportation", "segment"), + ("water", "base", "water"), +] + + +def fetch_catalog(url: str, timeout: int = 30) -> dict: + req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT}) + with urllib.request.urlopen(req, timeout=timeout) as response: + return json.loads(response.read()) + + +def _release_id_from_href(href: str) -> str: + parts = [p for p in urlparse(href).path.split("/") if p and p != "."] + return parts[0] + + +def parse_releases(catalog: dict) -> dict: + latest = catalog["latest"] + releases = sorted( + [ + _release_id_from_href(link["href"]) + for link in catalog["links"] + if link["rel"] == "child" + ], + reverse=True, + ) + return {"latest": latest, "releases": releases} + + +def build_views_sql(latest: str, s3_base: str = S3_BASE) -> str: + stmts = ["INSTALL spatial;", "LOAD spatial;"] + for view_name, theme, type_ in VIEWS: + path = f"{s3_base}/{latest}/theme={theme}/type={type_}/*.parquet" + stmts.append( + f"CREATE OR REPLACE VIEW {view_name} AS (\n" + f" SELECT * FROM read_parquet('{path}')\n);" + ) + return "\n\n".join(stmts) + + +def create_duckdb_views(db_path: str, latest: str, s3_base: str = S3_BASE) -> None: + conn = duckdb.connect(db_path) + try: + conn.sql(build_views_sql(latest, s3_base)) + finally: + conn.close() + + +def main(): + catalog = fetch_catalog(STAC_CATALOG) + output = parse_releases(catalog) + + for release in output["releases"]: + print(f" - {release}") + + with open("releases.json", "w") as f: + f.write(json.dumps(output, indent=4)) + + create_duckdb_views("latest.ddb", output["latest"]) + + +if __name__ == "__main__": + main() diff --git a/utils/requirements-test.txt b/utils/requirements-test.txt new file mode 100644 index 0000000..13f6026 --- /dev/null +++ b/utils/requirements-test.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest>=8.0.0 diff --git a/utils/requirements.txt b/utils/requirements.txt index 2718dd1..7c43c30 100644 --- a/utils/requirements.txt +++ b/utils/requirements.txt @@ -1,3 +1,2 @@ -obstore>=0.7.0 duckdb==1.3.2 pyarrow>=20.0.0 diff --git a/utils/tests/__init__.py b/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/tests/test_fetch_releases_from_stac.py b/utils/tests/test_fetch_releases_from_stac.py new file mode 100644 index 0000000..e482282 --- /dev/null +++ b/utils/tests/test_fetch_releases_from_stac.py @@ -0,0 +1,199 @@ +import json +import sys +import os +from unittest.mock import MagicMock, patch + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from fetch_releases_from_stac import ( + VIEWS, + build_views_sql, + create_duckdb_views, + fetch_catalog, + parse_releases, +) + +SAMPLE_CATALOG = { + "type": "Catalog", + "id": "Overture Releases", + "stac_version": "1.1.0", + "description": "All Overture Releases", + "links": [ + {"rel": "root", "href": "./catalog.json", "type": "application/json"}, + { + "rel": "child", + "href": "./2026-05-20.0/catalog.json", + "type": "application/json", + "latest": True, + }, + { + "rel": "child", + "href": "./2026-04-15.0/catalog.json", + "type": "application/json", + }, + ], + "latest": "2026-05-20.0", +} + + +class TestFetchCatalog: + def test_returns_parsed_json(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response): + result = fetch_catalog("https://stac.overturemaps.org/catalog.json") + + assert result == SAMPLE_CATALOG + + def test_uses_provided_url(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://custom.example.com/catalog.json") + req = mock_open.call_args[0][0] + assert req.full_url == "https://custom.example.com/catalog.json" + + def test_applies_timeout(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://stac.overturemaps.org/catalog.json", timeout=10) + assert mock_open.call_args[1]["timeout"] == 10 + + +class TestParseReleases: + def test_extracts_latest(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["latest"] == "2026-05-20.0" + + def test_extracts_child_releases(self): + result = parse_releases(SAMPLE_CATALOG) + assert "2026-05-20.0" in result["releases"] + assert "2026-04-15.0" in result["releases"] + + def test_excludes_root_link(self): + result = parse_releases(SAMPLE_CATALOG) + # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" + # but more importantly rel="root" should be excluded + assert "catalog.json" not in result["releases"] + assert len(result["releases"]) == 2 + + def test_releases_sorted_descending(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["releases"] == sorted(result["releases"], reverse=True) + + def test_returns_dict_with_expected_keys(self): + result = parse_releases(SAMPLE_CATALOG) + assert set(result.keys()) == {"latest", "releases"} + + def test_empty_links(self): + catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} + result = parse_releases(catalog) + assert result["latest"] == "2026-05-20.0" + assert result["releases"] == [] + + def test_single_release(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_absolute_href_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + { + "rel": "child", + "href": "https://stac.overturemaps.org/2026-05-20.0/catalog.json", + }, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_relative_href_without_dotslash_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + + def test_contains_install_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "INSTALL spatial" in sql + + def test_contains_load_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "LOAD spatial" in sql + + def test_all_views_present(self): + sql = build_views_sql("2026-05-20.0") + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql + + def test_latest_release_in_paths(self): + release = "2026-05-20.0" + sql = build_views_sql(release) + assert release in sql + + def test_custom_s3_base(self): + sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") + assert "s3://my-bucket/release" in sql + + def test_correct_theme_type_mapping(self): + sql = build_views_sql("2026-05-20.0") + assert "theme=addresses/type=address" in sql + assert "theme=buildings/type=building_part" in sql + assert "theme=transportation/type=segment" in sql + assert "theme=divisions/type=division_boundary" in sql + + def test_view_count_matches_views_constant(self): + sql = build_views_sql("2026-05-20.0") + count = sql.count("CREATE OR REPLACE VIEW") + assert count == len(VIEWS) + + +class TestCreateDuckdbViews: + def test_creates_all_views(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.sql.assert_called_once() + sql_arg = mock_conn.sql.call_args[0][0] + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg + + def test_views_reference_correct_release(self): + release = "2026-05-20.0" + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", release) + sql_arg = mock_conn.sql.call_args[0][0] + assert release in sql_arg + + def test_closes_connection(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.close.assert_called_once() +