diff --git a/.github/workflows/python-ci-polaris.yml b/.github/workflows/python-ci-polaris.yml new file mode 100644 index 0000000000..06989539eb --- /dev/null +++ b/.github/workflows/python-ci-polaris.yml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: "Python CI - Polaris" + +on: + push: + branches: + - 'main' + pull_request: + paths: + - 'pyiceberg/**' + - 'tests/**' + - 'dev/docker-compose-polaris.yml' + - 'dev/provision_polaris.py' + - '.github/workflows/python-ci-polaris.yml' + - 'Makefile' + - 'pyproject.toml' + - 'uv.lock' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + polaris-integration-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install UV + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos + - name: Install + run: make install + - name: Run Polaris integration tests + run: make test-polaris + - name: Show debug logs + if: ${{ failure() }} + run: docker compose -f dev/docker-compose-polaris.yml logs diff --git a/.gitignore b/.gitignore index ef8c522482..e32278d18b 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,5 @@ htmlcov pyiceberg/avro/decoder_fast.c pyiceberg/avro/*.html pyiceberg/avro/*.so + +dev/polaris_creds.env diff --git a/Makefile b/Makefile index 032832c8b0..bd83a43a52 100644 --- a/Makefile +++ b/Makefile @@ -108,6 +108,8 @@ test: ## Run all unit tests (excluding integration) test-integration: test-integration-setup test-integration-exec test-integration-cleanup ## Run integration tests +test-polaris: test-polaris-setup test-polaris-exec test-polaris-cleanup ## Run Polaris integration tests + test-integration-setup: install ## Start Docker services for integration tests docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f @@ -123,6 +125,38 @@ test-integration-cleanup: ## Clean up integration test environment fi $(CLEANUP_COMMAND) +test-polaris-setup: install ## Start Docker services for Polaris integration tests + docker compose -f dev/docker-compose-polaris.yml kill + docker compose -f dev/docker-compose-polaris.yml rm -f + docker compose -f dev/docker-compose-polaris.yml up -d --build --wait + uv run $(PYTHON_ARG) python dev/provision_polaris.py > dev/polaris_creds.env + +test-polaris-exec: ## Run Polaris integration tests + @eval $$(cat dev/polaris_creds.env) && \ + PYICEBERG_TEST_CATALOG="polaris" \ + PYICEBERG_CATALOG__POLARIS__TYPE="rest" \ + PYICEBERG_CATALOG__POLARIS__URI="http://localhost:8181/api/catalog" \ + PYICEBERG_CATALOG__POLARIS__OAUTH2_SERVER_URI="http://localhost:8181/api/catalog/v1/oauth/tokens" \ + PYICEBERG_CATALOG__POLARIS__CREDENTIAL="$$CLIENT_ID:$$CLIENT_SECRET" \ + PYICEBERG_CATALOG__POLARIS__SCOPE="PRINCIPAL_ROLE:ALL" \ + PYICEBERG_CATALOG__POLARIS__WAREHOUSE="polaris" \ + PYICEBERG_CATALOG__POLARIS__HEADER__X_ICEBERG_ACCESS_DELEGATION="vended-credentials" \ + PYICEBERG_CATALOG__POLARIS__HEADER__REALM="POLARIS" \ + PYICEBERG_CATALOG__POLARIS__S3__ENDPOINT="http://localhost:9000" \ + PYICEBERG_CATALOG__POLARIS__S3__ACCESS_KEY_ID="admin" \ + PYICEBERG_CATALOG__POLARIS__S3__SECRET_ACCESS_KEY="password" \ + PYICEBERG_CATALOG__POLARIS__S3__REGION="us-east-1" \ + $(TEST_RUNNER) pytest tests/integration/test_catalog.py -k "rest_test_catalog and not test_update_namespace_properties" $(PYTEST_ARGS) + # Skip test_update_namespace_properties: Polaris triggers a CommitConflictException when updates and removals are in the same request. + + +test-polaris-cleanup: ## Clean up Polaris integration test environment + @if [ "${KEEP_COMPOSE}" != "1" ]; then \ + echo "Cleaning up Polaris Docker containers..."; \ + docker compose -f dev/docker-compose-polaris.yml down -v --remove-orphans --timeout 0 2>/dev/null || true; \ + rm -f dev/polaris_creds.env; \ + fi + test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f diff --git a/dev/docker-compose-polaris.yml b/dev/docker-compose-polaris.yml new file mode 100644 index 0000000000..260dbf2024 --- /dev/null +++ b/dev/docker-compose-polaris.yml @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +services: + polaris: + image: apache/polaris:latest + container_name: pyiceberg-polaris + networks: + iceberg_net: + ports: + - 8181:8181 + - 8182:8182 + environment: + - POLARIS_BOOTSTRAP_CREDENTIALS=POLARIS,root,s3cr3t + - polaris.features."ALLOW_INSECURE_STORAGE_TYPES"=true + - polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3"] + - polaris.features."ALLOW_OVERLAPPING_CATALOG_URLS"=true + - polaris.readiness.ignore-severe-issues=true + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 10s + timeout: 10s + retries: 5 + minio: + image: minio/minio + container_name: pyiceberg-polaris-minio + networks: + iceberg_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + command: ["server", "/data", "--console-address", ":9001"] + mc: + image: minio/mc + container_name: pyiceberg-polaris-mc + networks: + iceberg_net: + depends_on: + - minio + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + +networks: + iceberg_net: diff --git a/dev/provision_polaris.py b/dev/provision_polaris.py new file mode 100644 index 0000000000..ed2ddf2e08 --- /dev/null +++ b/dev/provision_polaris.py @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import requests + +POLARIS_URL = "http://localhost:8181/api/management/v1" +POLARIS_TOKEN_URL = "http://localhost:8181/api/catalog/v1/oauth/tokens" + + +def get_token(client_id: str, client_secret: str) -> str: + response = requests.post( + POLARIS_TOKEN_URL, + data={ + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "scope": "PRINCIPAL_ROLE:ALL", + }, + headers={"realm": "POLARIS"}, + ) + response.raise_for_status() + return response.json()["access_token"] + + +def provision() -> None: + # Initial authentication with root credentials + token = get_token("root", "s3cr3t") + headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json", "realm": "POLARIS"} + + # 1. Create Principal + principal_name = "pyiceberg_principal" + principal_resp = requests.post( + f"{POLARIS_URL}/principals", + headers=headers, + json={"name": principal_name, "type": "PRINCIPAL"}, + ) + if principal_resp.status_code == 409: + principal_resp = requests.post( + f"{POLARIS_URL}/principals/{principal_name}/rotate-credentials", + headers=headers, + ) + principal_resp.raise_for_status() + principal_data = principal_resp.json() + client_id = principal_data["credentials"]["clientId"] + client_secret = principal_data["credentials"]["clientSecret"] + + # 2. Assign service_admin role to our principal + requests.put( + f"{POLARIS_URL}/principals/{principal_name}/principal-roles", + headers=headers, + json={"principalRole": {"name": "service_admin"}}, + ).raise_for_status() + + # 3. Create Principal Role for catalog access + role_name = "pyiceberg_role" + requests.post( + f"{POLARIS_URL}/principal-roles", + headers=headers, + json={"principalRole": {"name": role_name}}, + ) # Ignore error if exists + + # 4. Link Principal to Principal Role + requests.put( + f"{POLARIS_URL}/principals/{principal_name}/principal-roles", + headers=headers, + json={"principalRole": {"name": role_name}}, + ).raise_for_status() + + # 5. Create Catalog + catalog_name = "polaris" + requests.post( + f"{POLARIS_URL}/catalogs", + headers=headers, + json={ + "catalog": { + "name": catalog_name, + "type": "INTERNAL", + "readOnly": False, + "properties": { + "default-base-location": "s3://warehouse/polaris/", + "polaris.config.drop-with-purge.enabled": "true", + }, + "storageConfigInfo": { + "storageType": "S3", + "allowedLocations": ["s3://warehouse/polaris/"], + "region": "us-east-1", + "endpoint": "http://minio:9000", + }, + } + }, + ) # Ignore error if exists + + # 6. Link catalog_admin role to our principal role + requests.put( + f"{POLARIS_URL}/principal-roles/{role_name}/catalog-roles/{catalog_name}", + headers=headers, + json={"catalogRole": {"name": "catalog_admin"}}, + ).raise_for_status() + + # 7. Grant explicit privileges to catalog_admin role for this catalog + for privilege in [ + "CATALOG_MANAGE_CONTENT", + "CATALOG_MANAGE_METADATA", + "TABLE_CREATE", + "TABLE_WRITE_DATA", + "TABLE_LIST", + "NAMESPACE_CREATE", + "NAMESPACE_LIST", + ]: + requests.put( + f"{POLARIS_URL}/catalogs/{catalog_name}/catalog-roles/catalog_admin/grants", + headers=headers, + json={"grant": {"type": "catalog", "privilege": privilege}}, + ).raise_for_status() + + # Print credentials for use in CI + print(f"CLIENT_ID={client_id}") + print(f"CLIENT_SECRET={client_secret}") + + +if __name__ == "__main__": + provision()