From 5dd986b230dae6334cef4c8eecbbba844d333f3c Mon Sep 17 00:00:00 2001 From: Yoo HoJun Date: Tue, 19 May 2026 17:01:01 +0900 Subject: [PATCH 01/18] feat: support flag parameters without valuesAllow flags to work with only key names, omitting values. --- .gitignore | 1 + src/workflow/CommandExecutor.py | 28 +++++++++++-- src/workflow/ParameterManager.py | 70 ++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 227f773..1525129 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ gdpr_consent/node_modules/ *~ .streamlit/secrets.toml docs/superpowers/ +.venv/ \ No newline at end of file diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index 86f265b..b15a193 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -5,7 +5,7 @@ import threading from pathlib import Path from .Logger import Logger -from .ParameterManager import ParameterManager +from .ParameterManager import ParameterManager, bool_param_paths_from_param_xml_ini import sys import importlib.util import json @@ -216,7 +216,7 @@ def read_stderr(): stdout_thread.join() stderr_thread.join() - def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> bool: + def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}, tool_instance_name: str = None) -> bool: """ Constructs and executes commands for the specified tool OpenMS TOPP tool based on the given input and output configurations. Ensures that all input/output file lists @@ -234,6 +234,10 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b tool (str): The executable name or path of the tool. input_output (dict): A dictionary specifying the input/output parameter names (as key) and their corresponding file paths (as value). custom_params (dict): A dictionary of custom parameters to pass to the tool. + tool_instance_name (str, optional): Key for ``params.json`` when it differs + from ``tool`` (e.g. multiple instances). Defaults to ``tool``. + Custom parameters whose keys appear in the tool's ParamXML ``type="bool"`` + entries are passed as valueless CLI flags (``-name`` only when enabled). Returns: bool: True if all commands succeeded, False if any failed. @@ -261,8 +265,15 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b commands = [] - # Load parameters for non-defaults params = self.parameter_manager.get_parameters_from_json() + + topp_tool_ini_path = Path(self.parameter_manager.ini_dir, f"{tool}.ini") + # Keys of type="bool" in the .ini: TOPP treats these as on/off flags (omit value when off) + topp_bool_flag_param_keys = ( + bool_param_paths_from_param_xml_ini(topp_tool_ini_path, tool) + if topp_tool_ini_path.exists() + else set() + ) # Construct commands for each process for i in range(n_processes): command = [tool] @@ -284,6 +295,16 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b # Add non-default TOPP tool parameters if tool in params.keys(): for k, v in params[tool].items(): + + if k in topp_bool_flag_param_keys and v != "": + # CLI flag: include "-k" only when enabled + if isinstance(v, str): + is_enabled = v.lower() == "true" + else: + is_enabled = bool(v) + if is_enabled: + command += [f"-{k}"] + continue command += [f"-{k}"] # Skip only empty strings (pass flag with no value) # Note: 0 and 0.0 are valid values, so use explicit check @@ -295,6 +316,7 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b # Add custom parameters for k, v in custom_params.items(): command += [f"-{k}"] + # Skip only empty strings (pass flag with no value) # Note: 0 and 0.0 are valid values, so use explicit check if v != "" and v is not None: diff --git a/src/workflow/ParameterManager.py b/src/workflow/ParameterManager.py index b0c3626..b8f490e 100644 --- a/src/workflow/ParameterManager.py +++ b/src/workflow/ParameterManager.py @@ -3,8 +3,52 @@ import shutil import subprocess import streamlit as st +import xml.etree.ElementTree as ET from pathlib import Path + +def bool_param_paths_from_param_xml_ini(ini_path: Path, tool_stem: str) -> set[str]: + """ + Return short parameter paths for every ```` in a ParamXML .ini file. + + Paths match the suffix after ``Tool:1:`` in pyOpenMS (e.g. ``algorithm:epd:masstrace_snr_filtering``). + """ + try: + root = ET.parse(ini_path).getroot() + except (ET.ParseError, OSError): + return set() + + def local_tag(el: ET.Element) -> str: + t = el.tag + return t.rsplit("}", 1)[-1] if isinstance(t, str) and "}" in t else str(t) + + out: set[str] = set() + + def walk(el: ET.Element, parts: tuple[str, ...]) -> None: + for ch in el: + lt = local_tag(ch) + if lt == "NODE": + nm = ch.get("name") or "" + walk(ch, parts + (nm,)) + elif lt == "ITEM" and (ch.get("type") or "").lower() == "bool": + nm = ch.get("name") or "" + segs = [p for p in parts if p] + if nm: + segs.append(nm) + if not segs: + continue + # Strip tool root NODE name and instance NODE "1" (not part of pyOpenMS short keys) + while segs and segs[0] in (tool_stem, "1"): + segs.pop(0) + if segs: + out.add(":".join(segs)) + + for ch in root: + if local_tag(ch) == "NODE": + walk(ch, ()) + return out + + class ParameterManager: """ Manages the parameters for a workflow, including saving parameters to a JSON file, @@ -29,6 +73,29 @@ def __init__(self, workflow_dir: Path, workflow_name: str = None): # Store workflow name for preset loading; default to directory stem if not provided self.workflow_name = workflow_name or workflow_dir.stem + def bool_pairs_session_key(self) -> str: + """Session state key holding a set of (tool name, param path) for bool TOPP params.""" + return f"{self.ini_dir.parent.stem}-topp-bool-pairs" + + def get_bool_param_pairs(self) -> set: + """Return the cached set of (tool, param path) bool params; empty set if none.""" + return st.session_state.get(self.bool_pairs_session_key(), set()) + + def _merge_bool_params_from_ini(self, tool: str) -> None: + """Load tool.ini (XML) and merge type=bool parameter paths into session_state.""" + ini_path = Path(self.ini_dir, f"{tool}.ini") + if not ini_path.exists(): + return + try: + sk = self.bool_pairs_session_key() + if sk not in st.session_state: + st.session_state[sk] = set() + for short in bool_param_paths_from_param_xml_ini(ini_path, tool): + st.session_state[sk].add((tool, short)) + except RuntimeError: + # No Streamlit session (e.g. plain `python` import) + pass + def create_ini(self, tool: str) -> bool: """ Create an ini file for a TOPP tool if it doesn't exist. @@ -41,11 +108,14 @@ def create_ini(self, tool: str) -> bool: """ ini_path = Path(self.ini_dir, tool + ".ini") if ini_path.exists(): + self._merge_bool_params_from_ini(tool) return True try: subprocess.call([tool, "-write_ini", str(ini_path)]) except FileNotFoundError: return False + if ini_path.exists(): + self._merge_bool_params_from_ini(tool) return ini_path.exists() def save_parameters(self) -> None: From b0e682efefe9f36b38012496096a699bffa2a984 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 15:26:32 +0000 Subject: [PATCH 02/18] ci(docker): publish multi-arch (amd64 + arm64) images to GHCR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror FLASHApp's split-build / manifest-merge approach so both linux/amd64 and linux/arm64 are published for the full and simple variants. Existing `-full` / `-simple` / `latest` tags become multi-arch manifests — k8s overlays, docker-compose users, and direct `docker pull` callers transparently get the right arch. Dockerfile.arm (delta from Dockerfile): - aarch64 miniforge installer - conditional THIRDPARTY/Linux/aarch64 copy (some OpenMS releases ship an empty/missing aarch64 dir) - pruned thirdparty PATH to tools that actually have ARM builds: LuciPHOr2, MSGFPlus, ThermoRawFileParser, Comet, Percolator, Sage Dockerfile_simple.arm (delta from Dockerfile_simple): - aarch64 miniforge installer only — pyOpenMS ships aarch64 wheels on PyPI, so `pip install -r requirements.txt` works as-is The shared docker/entrypoint.sh is reused as-is on ARM: its apptainer/read-only-root handling is arch-neutral and worth keeping. Base stays ubuntu:22.04 (Redis 6.0 predates the ARM64-COW-BUG warning, so no `--ignore-warnings` flag needed). Workflow changes (build-and-test.yml): - `build` renamed `build-amd64`; per-arch tags carry `-amd64`. - New `build-arm64` job runs on `ubuntu-24.04-arm`, builds the `.arm` Dockerfiles for both variants, ends with a pull-back + /_stcore/health probe on push events. - New `create-manifest` job stitches `--amd64` + `--arm64` into multi-arch `-` and `latest` manifests. - test-apptainer / test-nginx / test-traefik / publish-apptainer keep consuming the amd64 artifact only. SIF publishing stays amd64-only this iteration. - PRs build both arches (registry cache keeps warm runs cheap) but don't push; manifest creation also skipped on PRs. Branch-protection note: the `build` required check is renamed to `build-amd64`. Admins should update protected-branch rules and add `build-arm64` / `create-manifest` if those should also be required. --- .github/workflows/build-and-test.yml | 185 +++++++++++++++++++++-- Dockerfile.arm | 211 +++++++++++++++++++++++++++ Dockerfile_simple.arm | 127 ++++++++++++++++ 3 files changed, 513 insertions(+), 10 deletions(-) create mode 100644 Dockerfile.arm create mode 100644 Dockerfile_simple.arm diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 6ed2406..5ca863a 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -38,7 +38,10 @@ jobs: kubectl kustomize k8s/overlays/prod/ | \ kubeconform -summary -strict -kubernetes-version 1.28.0 -skip IngressRoute - build: + build-amd64: + # amd64 path. Produces per-arch tags `--amd64`; the + # multi-arch manifest under `-` (and `latest`) is stitched + # together in `create-manifest` once the sibling `build-arm64` succeeds. needs: lint-manifests runs-on: ubuntu-latest permissions: @@ -75,22 +78,23 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=ref,event=branch,suffix=-${{ matrix.variant }} - type=ref,event=tag,suffix=-${{ matrix.variant }} - type=sha,prefix=,suffix=-${{ matrix.variant }} - type=raw,value=latest,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + type=ref,event=branch,suffix=-${{ matrix.variant }}-amd64 + type=ref,event=tag,suffix=-${{ matrix.variant }}-amd64 + type=sha,prefix=,suffix=-${{ matrix.variant }}-amd64 + type=raw,value=latest-amd64,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - name: Build and conditionally push uses: docker/build-push-action@v5 with: context: . file: ${{ matrix.dockerfile }} + platforms: linux/amd64 load: true push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }} - cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2},mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-amd64 + cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-amd64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} @@ -111,13 +115,174 @@ jobs: path: /tmp/image.tar retention-days: 1 + build-arm64: + # arm64 path. Runs on a native ARM64 runner (no QEMU). Produces per-arch + # tags `--arm64`; gets merged into the multi-arch manifest + # under `-` by the `create-manifest` job below. The build + # uses a separate `Dockerfile.arm` / `Dockerfile_simple.arm` that swaps + # the miniforge installer to aarch64 and (for the full variant) guards + # the THIRDPARTY/Linux/aarch64 copy. Apptainer/nginx/traefik integration + # tests still run only on the amd64 artifact — those gates do not need + # arch duplication right now (HPC consumers of the SIF are amd64). + needs: lint-manifests + runs-on: ubuntu-24.04-arm + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - variant: full + dockerfile: Dockerfile.arm + - variant: simple + dockerfile: Dockerfile_simple.arm + steps: + - name: Free disk space + # OpenMS source build needs ~25 GB of scratch space; the ARM runner + # image is tighter than the AMD one out of the box. Mirrors what + # FLASHApp's publish-docker-images.yml does at the top of its ARM job. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + + - uses: actions/checkout@v4 + + - name: Compute lowercase image name (OCI refs must be lowercase) + run: echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >> "$GITHUB_ENV" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch,suffix=-${{ matrix.variant }}-arm64 + type=ref,event=tag,suffix=-${{ matrix.variant }}-arm64 + type=sha,prefix=,suffix=-${{ matrix.variant }}-arm64 + type=raw,value=latest-arm64,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + + - name: Build and conditionally push + uses: docker/build-push-action@v5 + with: + context: . + file: ${{ matrix.dockerfile }} + platforms: linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-arm64 + cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-arm64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} + provenance: false + build-args: | + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} + + - name: Smoke test the just-pushed arm64 image + # PRs build (validates Dockerfile.arm parses + compiles) but don't + # push, so there's nothing to pull back on PR events. On push/tag, + # pull the just-published image and verify /_stcore/health to catch + # entrypoint regressions that wouldn't surface in the build itself. + if: github.event_name != 'pull_request' + run: | + set -euo pipefail + IMAGE_REF="${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:${{ github.sha }}-${{ matrix.variant }}-arm64" + echo "Smoke-testing $IMAGE_REF" + docker pull "$IMAGE_REF" + docker run -d --rm --name smoketest -p 8501:8501 "$IMAGE_REF" + for i in $(seq 1 90); do + if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8501/_stcore/health; then + echo "Streamlit healthy after ${i} attempts" + docker stop smoketest + exit 0 + fi + sleep 2 + done + echo "ERROR: /_stcore/health never returned 200" + docker logs smoketest || true + docker stop smoketest || true + exit 1 + + create-manifest: + # Stitch the per-arch tags into multi-arch manifest lists. The manifest + # tags reuse the OLD scheme (`-`, `latest`) so existing + # consumers (k8s overlays, docker-compose users, `docker pull` callers) + # keep working transparently — docker now auto-selects the right arch + # on pull. PRs don't push per-arch tags, so there's nothing to merge. + needs: [build-amd64, build-arm64] + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + variant: [full, simple] + steps: + - name: Compute lowercase image name + run: echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >> "$GITHUB_ENV" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Compute manifest tags + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # NB: no -amd64/-arm64 suffix here. These are the multi-arch + # manifest names; they must match the pre-arm64 tag scheme so + # `:main-full`, `:v1.0.0-full`, `:latest` continue to resolve. + tags: | + type=ref,event=branch,suffix=-${{ matrix.variant }} + type=ref,event=tag,suffix=-${{ matrix.variant }} + type=sha,prefix=,suffix=-${{ matrix.variant }} + type=raw,value=latest,enable=${{ matrix.variant == 'full' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + + - name: Create and push multi-arch manifests + # Iterate over manifest tags (newline-separated from metadata-action) + # and merge the matching `-amd64` / `-arm64` per-arch tags into each. + # `--amend` makes the step idempotent across workflow_dispatch reruns. + # `docker manifest push` accepts only one ref per invocation, hence + # the loop. + run: | + set -euo pipefail + while IFS= read -r manifest_tag; do + [ -z "$manifest_tag" ] && continue + amd_tag="${manifest_tag}-amd64" + arm_tag="${manifest_tag}-arm64" + echo "Creating manifest ${manifest_tag} from:" + echo " amd: ${amd_tag}" + echo " arm: ${arm_tag}" + docker manifest create "$manifest_tag" \ + --amend "$amd_tag" \ + --amend "$arm_tag" + docker manifest push "$manifest_tag" + done <<< "${{ steps.meta.outputs.tags }}" + test-apptainer: # Apptainer/Singularity is the dominant container runtime on HPC clusters. # It mounts the root filesystem read-only and runs as the host user's UID # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false @@ -335,7 +500,7 @@ jobs: done <<< "${{ steps.meta.outputs.tags }}" test-nginx: - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false @@ -422,7 +587,7 @@ jobs: done test-traefik: - needs: build + needs: build-amd64 runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/Dockerfile.arm b/Dockerfile.arm new file mode 100644 index 0000000..8571bc5 --- /dev/null +++ b/Dockerfile.arm @@ -0,0 +1,211 @@ +# This Dockerfile builds OpenMS, the TOPP tools, pyOpenMS and thidparty tools. +# It also adds a basic streamlit server that serves a pyOpenMS-based app. +# hints: +# build image and give it a name (here: streamlitapp) with: docker build -f Dockerfile.arm --no-cache -t streamlitapp:latest-arm64 --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log +# check if image was build: docker image ls +# run container: docker run -p 8501:8501 streamlitappsimple:latest +# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell +# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force + +FROM ubuntu:22.04 AS setup-build-system +ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git +ARG OPENMS_BRANCH=release/3.5.0 +ARG PORT=8501 +# Streamlit app GitHub user name (to download artifact from). +ARG GITHUB_USER=OpenMS +# Streamlit app GitHub repository name (to download artifact from). +ARG GITHUB_REPO=streamlit-template + +USER root + +# Install required Ubuntu packages. +RUN apt-get -y update +RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron +RUN update-ca-certificates +RUN apt-get install -y --no-install-recommends --no-install-suggests libsvm-dev libeigen3-dev coinor-libcbc-dev libglpk-dev libzip-dev zlib1g-dev libxerces-c-dev libbz2-dev libomp-dev libhdf5-dev +RUN apt-get install -y --no-install-recommends --no-install-suggests libboost-date-time1.74-dev \ + libboost-iostreams1.74-dev \ + libboost-regex1.74-dev \ + libboost-math1.74-dev \ + libboost-random1.74-dev +RUN apt-get install -y --no-install-recommends --no-install-suggests qt6-base-dev libqt6svg6-dev libqt6opengl6-dev libqt6openglwidgets6 libgl-dev + +# Install Github CLI +RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \ + && mkdir -p -m 755 /etc/apt/keyrings \ + && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ + && apt-get update \ + && apt-get install gh -y + +# Download and install miniforge. +ENV PATH="/root/miniforge3/bin:${PATH}" +RUN wget -q \ + https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh \ + && bash Miniforge3-Linux-aarch64.sh -b \ + && rm -f Miniforge3-Linux-aarch64.sh +RUN mamba --version + +# Make /root traversable so the entrypoint can `source +# /root/miniforge3/bin/activate ...` when the container runs as a non-root +# user (apptainer/singularity maps the host UID into the container; the +# default ubuntu /root is 0700 which would block path traversal). +x only, +# not +r, so the directory listing remains private. +RUN chmod o+x /root + +# Setup mamba environment. +RUN mamba create -n streamlit-env python=3.10 +RUN echo "mamba activate streamlit-env" >> ~/.bashrc +SHELL ["/bin/bash", "--rcfile", "~/.bashrc"] +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +# Install up-to-date cmake via mamba and packages for pyOpenMS build. +RUN mamba install cmake +RUN pip install --upgrade pip && python -m pip install -U setuptools nose cython "autowrap<=0.24" pandas numpy pytest + +# Clone OpenMS branch and the associcated contrib+thirdparties+pyOpenMS-doc submodules. +RUN git clone --recursive --depth=1 -b ${OPENMS_BRANCH} --single-branch ${OPENMS_REPO} && cd /OpenMS + +# Pull Linux compatible third-party dependencies and store them in directory thirdparty. +WORKDIR /OpenMS +RUN mkdir /thirdparty && \ + git submodule update --init THIRDPARTY && \ + cp -r THIRDPARTY/All/* /thirdparty && \ + if [ -d "THIRDPARTY/Linux/aarch64" ]; then \ + cp -r THIRDPARTY/Linux/aarch64/* /thirdparty; \ + fi && \ + chmod -R +x /thirdparty +ENV PATH="/thirdparty/LuciPHOr2:/thirdparty/MSGFPlus:/thirdparty/ThermoRawFileParser:/thirdparty/Comet:/thirdparty/Percolator:/thirdparty/Sage:${PATH}" + +# Build OpenMS and pyOpenMS. +FROM setup-build-system AS compile-openms +WORKDIR / + +# Set up build directory. +RUN mkdir /openms-build +WORKDIR /openms-build + +# Configure. +RUN /bin/bash -c "cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF -DPYOPENMS=ON ../OpenMS -DPY_MEMLEAK_DISABLE=On" + +# Build TOPP tools and clean up. +RUN make -j4 TOPP +RUN rm -rf src doc CMakeFiles + +# Build pyOpenMS wheels and install via pip. +RUN make -j4 pyopenms +WORKDIR /openms-build/pyOpenMS +RUN pip install dist/*.whl + +# Install other dependencies (excluding pyopenms) +COPY requirements.txt ./requirements.txt +RUN grep -Ev '^pyopenms([=<>!~].*)?$' requirements.txt > requirements_cleaned.txt && mv requirements_cleaned.txt requirements.txt +RUN pip install -r requirements.txt + +WORKDIR / +RUN mkdir openms + +# Copy TOPP tools bin directory, add to PATH. +RUN cp -r openms-build/bin /openms/bin +ENV PATH="/openms/bin/:${PATH}" + +# Copy TOPP tools bin directory, add to PATH. +RUN cp -r openms-build/lib /openms/lib +ENV LD_LIBRARY_PATH="/openms/lib/:${LD_LIBRARY_PATH}" + +# Copy share folder, add to PATH, remove source directory. +RUN cp -r OpenMS/share/OpenMS /openms/share +RUN rm -rf OpenMS +ENV OPENMS_DATA_PATH="/openms/share/" + +# Remove build directory. +RUN rm -rf openms-build + +# Prepare and run streamlit app. +FROM compile-openms AS run-app + +# Install Redis server for job queue and nginx for load balancing. +# Redis data lives under $RUNTIME_DIR at runtime (see entrypoint.sh) so no +# /var/lib/redis setup is needed - that path is not writable under Apptainer. +RUN apt-get update && apt-get install -y --no-install-recommends redis-server nginx \ + && rm -rf /var/lib/apt/lists/* + +# Create Redis data directory. Default 0755 root-owned is enough: the docker +# entrypoint runs as root (can write regardless of mode), and the apptainer +# entrypoint relocates Redis state to /tmp/openms-runtime-* so this dir is +# never written under apptainer. +RUN mkdir -p /var/lib/redis + +# Pre-create bind-mount targets so apptainer/singularity has a real attach +# point. Docker auto-creates missing `-v` targets, but singularity uses a +# read-only underlay and silently ignores `:rw` when the target isn't a +# real directory in the SIF — writes then fail with EROFS even though the +# host bind path is writable. Pre-creating these directories costs one +# inode each and changes nothing in docker mode (the user's volume mount +# shadows them). +RUN mkdir -p /workspaces-streamlit-template /mounted-data + +# Create workdir and copy over all streamlit related files/folders. + +# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure +WORKDIR /app +COPY assets/ /app/assets +COPY content/ /app/content +COPY docs/ /app/docs +COPY example-data/ /app/example-data +COPY gdpr_consent/ /app/gdpr_consent +COPY hooks/ /app/hooks +COPY src/ /app/src +COPY utils/ /app/utils +COPY app.py /app/app.py +COPY settings.json /app/settings.json +COPY default-parameters.json /app/default-parameters.json +COPY presets.json /app/presets.json + +# For streamlit configuration +COPY .streamlit/ /app/.streamlit/ +COPY clean-up-workspaces.py /app/clean-up-workspaces.py + +# add cron job to the crontab +RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - + +# Set default worker count (can be overridden via environment variable) +ENV RQ_WORKER_COUNT=1 +ENV REDIS_URL=redis://localhost:6379/0 + +# Number of Streamlit server instances for load balancing (default: 1 = no load balancer) +# Set to >1 to enable nginx load balancer with multiple Streamlit instances +ENV STREAMLIT_SERVER_COUNT=1 + +# Install the apptainer-compatible entrypoint that starts cron (when the root +# FS is writable), Redis, RQ workers, optional nginx load balancer, and the +# Streamlit server. The script falls back to /tmp paths under apptainer. +COPY docker/entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Patch Analytics +RUN mamba run -n streamlit-env python hooks/hook-analytics.py + +# Set Online Deployment +RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json + +# Point the in-app mounted-drive browser at the conventional bind-mount path. +# The browser only renders when this directory exists at runtime, i.e. when +# the user starts the container with `-v /host/path:/mounted-data`. +RUN jq '.local_data_dir = "/mounted-data"' settings.json > tmp.json && mv tmp.json settings.json + +# Download latest OpenMS App executable as a ZIP file. +# ARG declared here (not at the top) — otherwise the per-run token busts the cache. +ARG GITHUB_TOKEN +RUN if [ -n "$GITHUB_TOKEN" ]; then \ + echo "GITHUB_TOKEN is set, proceeding to download the release asset..."; \ + gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \ + else \ + echo "GITHUB_TOKEN is not set, skipping the release asset download."; \ + fi + + +# Run app as container entrypoint. +EXPOSE $PORT +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/Dockerfile_simple.arm b/Dockerfile_simple.arm new file mode 100644 index 0000000..be57317 --- /dev/null +++ b/Dockerfile_simple.arm @@ -0,0 +1,127 @@ +# This Dockerfile creates a container with pyOpenMS +# It also adds a basic streamlit server that serves a pyOpenMS-based app. +# hints: +# build image with: docker build -f Dockerfile_simple.arm --no-cache -t streamlitapp:latest-arm64 --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log +# check if image was build: docker image ls +# run container: docker run -p 8501:8501 streamlitapp:latest +# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell +# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force + +FROM ubuntu:22.04 AS stage1 +ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git +ARG OPENMS_BRANCH=develop +ARG PORT=8501 +# Streamlit app GitHub user name (to download artifact from). +ARG GITHUB_USER=OpenMS +# Streamlit app GitHub repository name (to download artifact from). +ARG GITHUB_REPO=streamlit-template + + +# Step 1: set up a sane build system +USER root + +RUN apt-get -y update +# note: streamlit in docker needs libgtk2.0-dev (see https://yugdamor.medium.com/importerror-libgthread-2-0-so-0-cannot-open-shared-object-file-no-such-file-or-directory-895b94a7827b) +RUN apt-get install -y --no-install-recommends --no-install-suggests wget ca-certificates libgtk2.0-dev curl jq cron nginx +RUN update-ca-certificates + +# Install Github CLI +RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \ + && mkdir -p -m 755 /etc/apt/keyrings \ + && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ + && apt-get update \ + && apt-get install gh -y + +# Download and install miniforge. +ENV PATH="/root/miniforge3/bin:${PATH}" +RUN wget -q \ + https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh \ + && bash Miniforge3-Linux-aarch64.sh -b \ + && rm -f Miniforge3-Linux-aarch64.sh +RUN mamba --version + +# Make /root traversable so the entrypoint can `source +# /root/miniforge3/bin/activate ...` when the container runs as a non-root +# user (apptainer/singularity maps the host UID into the container; the +# default ubuntu /root is 0700 which would block path traversal). +x only, +# not +r, so the directory listing remains private. +RUN chmod o+x /root + +# Setup mamba environment. +RUN mamba create -n streamlit-env python=3.10 +RUN echo "mamba activate streamlit-env" >> ~/.bashrc +SHELL ["/bin/bash", "--rcfile", "~/.bashrc"] +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +#################################### install streamlit +# install packages +COPY requirements.txt requirements.txt +RUN mamba install pip +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt + +# Pre-create bind-mount targets so apptainer/singularity has a real attach +# point. Docker auto-creates missing `-v` targets, but singularity uses a +# read-only underlay and silently ignores `:rw` when the target isn't a +# real directory in the SIF — writes then fail with EROFS even though the +# host bind path is writable. +RUN mkdir -p /workspaces-streamlit-template /mounted-data + +# create workdir and copy over all streamlit related files/folders +WORKDIR /app +# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure +WORKDIR /app +COPY assets/ /app/assets +COPY content/ /app/content +COPY docs/ /app/docs +COPY example-data/ /app/example-data +COPY gdpr_consent/ /app/gdpr_consent +COPY hooks/ /app/hooks +COPY src/ /app/src +COPY utils/ /app/utils +COPY app.py /app/app.py +COPY settings.json /app/settings.json +COPY default-parameters.json /app/default-parameters.json +COPY presets.json /app/presets.json + +# For streamlit configuration +COPY .streamlit/ /app/.streamlit/ + +COPY clean-up-workspaces.py /app/clean-up-workspaces.py + +# add cron job to the crontab +RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - + +# Number of Streamlit server instances for load balancing (default: 1 = no load balancer) +# Set to >1 to enable nginx load balancer with multiple Streamlit instances +ENV STREAMLIT_SERVER_COUNT=1 + +# Install the apptainer-compatible entrypoint (shared with the full image). +# The script auto-skips the Redis/RQ section when redis-server is not +# installed, so it works equally well in the simple variant. +COPY docker/entrypoint.sh /app/entrypoint.sh +RUN chmod +x /app/entrypoint.sh + +# Patch Analytics +RUN mamba run -n streamlit-env python hooks/hook-analytics.py + +# Set Online Deployment +RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json + +# Download latest OpenMS App executable as a ZIP file. +# ARG declared here (not at the top) — otherwise the per-run token busts the cache. +ARG GITHUB_TOKEN +RUN if [ -n "$GITHUB_TOKEN" ]; then \ + echo "GITHUB_TOKEN is set, proceeding to download the release asset..."; \ + gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \ + else \ + echo "GITHUB_TOKEN is not set, skipping the release asset download."; \ + fi + +# make sure that mamba environment is used +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] + +EXPOSE $PORT +ENTRYPOINT ["/app/entrypoint.sh"] From d32c1b9cbc97c4dbf5e2111273416bb6b0e6b6d1 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 19:43:35 +0000 Subject: [PATCH 03/18] ci(docker): extend apptainer/nginx/traefik tests to cover arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the apptainer/nginx/traefik integration tests only ran against the amd64 artifact, so the arm64 image was validated solely by its build succeeding plus a post-push /_stcore/health probe. Now all three integration matrices fan out over arch=[amd64, arm64] with a matrix-driven runs-on, exercising the read-only-root apptainer contract and both kind-based ingress paths on a native ARM runner too. Changes: - `build-amd64` artifact renamed from `openms-streamlit--image` to `openms-streamlit--amd64-image` for symmetry. - `build-arm64` now also `load: true`'s the built image, retags to the kind-friendly `openms-streamlit:test`, saves it as a tar, and uploads it as `openms-streamlit--arm64-image`. The post-push pull-back smoke test is removed — the new apptainer/ nginx/traefik runs subsume it and avoid the slow GHCR pull. - `test-apptainer`, `test-nginx`, `test-traefik` matrices switched from `variant: [full, simple]` to an `include:` list with {variant, arch, runner} tuples; `runs-on: ${{ matrix.runner }}` selects `ubuntu-latest` for amd64 and `ubuntu-24.04-arm` for arm64. Artifact download names get `${{ matrix.arch }}` interpolated. - SIF upload at the tail of `test-apptainer` gated on `matrix.arch == 'amd64'`: arm64 still runs the full apptainer contract end-to-end, but only amd64 produces the SIF that `publish-apptainer` ships to GHCR (HPC SIF consumers are amd64). Note on `publish-apptainer`: it stays on `needs: test-apptainer`, which now waits for the arm64 matrix entries too — meaning an arm64 apptainer regression will block amd64 SIF publishing. Conservative on purpose; happy to decouple via separate jobs if that turns out to be too strict in practice. --- .github/workflows/build-and-test.yml | 115 +++++++++++++++++---------- 1 file changed, 74 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 5ca863a..012291f 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -111,7 +111,7 @@ jobs: - name: Upload image artifact uses: actions/upload-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-amd64-image path: /tmp/image.tar retention-days: 1 @@ -121,9 +121,9 @@ jobs: # under `-` by the `create-manifest` job below. The build # uses a separate `Dockerfile.arm` / `Dockerfile_simple.arm` that swaps # the miniforge installer to aarch64 and (for the full variant) guards - # the THIRDPARTY/Linux/aarch64 copy. Apptainer/nginx/traefik integration - # tests still run only on the amd64 artifact — those gates do not need - # arch duplication right now (HPC consumers of the SIF are amd64). + # the THIRDPARTY/Linux/aarch64 copy. The built image is also uploaded as + # an artifact so the apptainer / nginx / traefik integration jobs can + # exercise the ARM image on a native ARM runner (matrix arch=arm64). needs: lint-manifests runs-on: ubuntu-24.04-arm permissions: @@ -180,6 +180,7 @@ jobs: context: . file: ${{ matrix.dockerfile }} platforms: linux/arm64 + load: true push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} @@ -189,30 +190,22 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Smoke test the just-pushed arm64 image - # PRs build (validates Dockerfile.arm parses + compiles) but don't - # push, so there's nothing to pull back on PR events. On push/tag, - # pull the just-published image and verify /_stcore/health to catch - # entrypoint regressions that wouldn't surface in the build itself. - if: github.event_name != 'pull_request' + - name: Retag for kind (stable local tag) run: | - set -euo pipefail - IMAGE_REF="${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:${{ github.sha }}-${{ matrix.variant }}-arm64" - echo "Smoke-testing $IMAGE_REF" - docker pull "$IMAGE_REF" - docker run -d --rm --name smoketest -p 8501:8501 "$IMAGE_REF" - for i in $(seq 1 90); do - if curl -fsSo /dev/null --max-time 2 http://127.0.0.1:8501/_stcore/health; then - echo "Streamlit healthy after ${i} attempts" - docker stop smoketest - exit 0 - fi - sleep 2 - done - echo "ERROR: /_stcore/health never returned 200" - docker logs smoketest || true - docker stop smoketest || true - exit 1 + # load:true above loaded all meta-action tags into local docker. + # Retag the first one to the stable name the kustomize overlay expects. + FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) + docker tag "$FIRST_TAG" openms-streamlit:test + + - name: Save image as tar + run: docker save openms-streamlit:test -o /tmp/image.tar + + - name: Upload image artifact + uses: actions/upload-artifact@v4 + with: + name: openms-streamlit-${{ matrix.variant }}-arm64-image + path: /tmp/image.tar + retention-days: 1 create-manifest: # Stitch the per-arch tags into multi-arch manifest lists. The manifest @@ -282,19 +275,31 @@ jobs: # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Install apptainer @@ -424,8 +429,12 @@ jobs: if: always() run: apptainer instance stop openms-test || true - - name: Upload validated SIF artifact (push events only) - if: success() && github.event_name != 'pull_request' + - name: Upload validated SIF artifact (amd64 push events only) + # SIF publishing stays amd64-only this iteration (HPC consumers of + # the SIF are amd64). The arm64 matrix entry still exercises the + # full apptainer contract end-to-end; it just doesn't upload the + # resulting SIF for downstream publishing. + if: success() && github.event_name != 'pull_request' && matrix.arch == 'amd64' uses: actions/upload-artifact@v4 with: name: openms-streamlit-${{ matrix.variant }}-sif @@ -500,19 +509,31 @@ jobs: done <<< "${{ steps.meta.outputs.tags }}" test-nginx: - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Load image into local docker @@ -587,19 +608,31 @@ jobs: done test-traefik: - needs: build-amd64 - runs-on: ubuntu-latest + needs: [build-amd64, build-arm64] + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - variant: [full, simple] + include: + - variant: full + arch: amd64 + runner: ubuntu-latest + - variant: full + arch: arm64 + runner: ubuntu-24.04-arm + - variant: simple + arch: amd64 + runner: ubuntu-latest + - variant: simple + arch: arm64 + runner: ubuntu-24.04-arm steps: - uses: actions/checkout@v4 - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-image + name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - name: Load image into local docker From 1d73b6726e8286c27cc3771bfaf63fb691352458 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:07:21 +0000 Subject: [PATCH 04/18] fix(arm): use two-pass cmake so TOPP links against system libstdc++ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARM build of `make -j4 TOPP` failed at the link step with /usr/bin/ld: /root/miniforge3/lib/libyaml-cpp.so.0.8: undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32' The conda-forge libyaml-cpp wheel for aarch64 is built against GLIBCXX_3.4.32 (gcc 13+), but Ubuntu 22.04's system g++ ships with an older libstdc++. Running cmake inside the mamba shell lets it discover /root/miniforge3/lib first, so the conda-forge yaml-cpp gets linked into every TOPP binary and breaks. amd64 happens to work because the conda-forge amd64 yaml-cpp build is older. Fix mirrors FLASHApp's Dockerfile.arm: configure OpenMS in two cmake passes — pass 1 under plain `/bin/bash` with `-DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3` so cmake resolves C++ deps from the system tree (libyaml-cpp from contrib, boost from apt, etc.); pass 2 under `mamba run` with `-DPYOPENMS=ON` so the Python bindings still find conda-forge Python / Cython / NumPy. The IGNORE_PREFIX_PATH flag is repeated on pass 2 to keep the cached C++ link command unchanged. Only Dockerfile.arm changes; Dockerfile (amd64) keeps its single-pass cmake to avoid disturbing the working x86 path. --- Dockerfile.arm | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 8571bc5..2e9a6ab 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -86,8 +86,21 @@ WORKDIR / RUN mkdir /openms-build WORKDIR /openms-build -# Configure. -RUN /bin/bash -c "cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF -DPYOPENMS=ON ../OpenMS -DPY_MEMLEAK_DISABLE=On" +# Configure (two-pass — mirrors FLASHApp.arm). +# Pass 1 runs under plain bash so cmake does NOT search /root/miniforge3 +# when resolving C++ system dependencies. On ARM the conda-forge build of +# libyaml-cpp.so.0.8 is linked against a newer libstdc++ (GLIBCXX_3.4.32, +# i.e. gcc 13+) than ubuntu:22.04's system g++ ships, so letting cmake +# pick the miniforge yaml-cpp makes TOPP linking fail with +# undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32` +# amd64 happens to work because its conda-forge yaml-cpp build is older. +# Pass 2 re-runs cmake inside the mamba env with PYOPENMS=ON so the Python +# bindings can find the conda-forge Python/Cython/NumPy; CMAKE_IGNORE_PREFIX_PATH +# keeps the C++ link command unchanged from pass 1. +SHELL ["/bin/bash", "-c"] +RUN cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS +SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] +RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 . # Build TOPP tools and clean up. RUN make -j4 TOPP From f11bc99fd559bc79ed54e3849a2a7f0be4487f0c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:13:37 +0000 Subject: [PATCH 05/18] fix(arm): install cmake via apt so pass-1 cmake is on plain bash PATH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two-pass cmake split from 1d73b67 runs pass 1 under `SHELL ["/bin/bash", "-c"]`, but the only cmake on the image is the one from `mamba install cmake` at /root/miniforge3/envs/streamlit-env/bin/cmake — not on plain bash's PATH. Result: exit 127 (command not found) the moment pass 1 invokes cmake. FLASHApp.arm sidesteps this by installing cmake via apt; do the same here (just append `cmake` to the existing apt-get install line). The mamba cmake install stays, so pass 2 under the mamba shell continues to use the conda-forge cmake exactly as it did before. Ubuntu 22.04 ships cmake 3.22, comfortably above OpenMS 3.5's 3.15 floor. --- Dockerfile.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 2e9a6ab..e0bbd0f 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -20,7 +20,7 @@ USER root # Install required Ubuntu packages. RUN apt-get -y update -RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron +RUN apt-get install -y --no-install-recommends --no-install-suggests g++ autoconf automake patch libtool make git gpg wget ca-certificates curl jq libgtk2.0-dev openjdk-8-jdk cron cmake RUN update-ca-certificates RUN apt-get install -y --no-install-recommends --no-install-suggests libsvm-dev libeigen3-dev coinor-libcbc-dev libglpk-dev libzip-dev zlib1g-dev libxerces-c-dev libbz2-dev libomp-dev libhdf5-dev RUN apt-get install -y --no-install-recommends --no-install-suggests libboost-date-time1.74-dev \ From 5185c3e0ddd8ade74fd6264ea26d0a07aa077508 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:24:52 +0000 Subject: [PATCH 06/18] fix(arm): call mamba cmake by full path in pass 1 (apt 3.22 is too old) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix (install cmake via apt) didn't actually help: OpenMS 3.5's CMakeLists.txt requires cmake >= 3.24, and Ubuntu 22.04's apt cmake is 3.22.1, which fails configure with CMake Error at src/openms/extern/CMakeLists.txt:11 (cmake_minimum_required): CMake 3.24 or higher is required. You are running version 3.22.1 That's exactly why the existing x86 Dockerfile installs cmake via mamba (the conda-forge build is 3.30+). FLASHApp.arm escapes this by using ubuntu:24.04 (apt cmake 3.28); we stay on 22.04 to minimize churn vs. the working x86 Dockerfile. Fix: in pass 1, call the mamba-env cmake by its full path `/root/miniforge3/envs/streamlit-env/bin/cmake`. The plain-bash SHELL is still in effect, so cmake doesn't pick up any conda-forge environment side effects, and CMAKE_IGNORE_PREFIX_PATH keeps it from auto-discovering miniforge libraries during find_package. The cmake binary itself runs against miniforge's libstdc++, but that's a runtime detail of cmake — it doesn't leak into the configured project's link command. The apt cmake addition from f11bc99 is now redundant but harmless; leaving it in place to keep this diff focused. --- Dockerfile.arm | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index e0bbd0f..53d99d9 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -94,11 +94,15 @@ WORKDIR /openms-build # pick the miniforge yaml-cpp makes TOPP linking fail with # undefined reference to `std::ios_base_library_init()@GLIBCXX_3.4.32` # amd64 happens to work because its conda-forge yaml-cpp build is older. +# We call the mamba-env cmake by full path so we get a version >= 3.24 +# (OpenMS 3.5's floor); ubuntu:22.04's apt cmake is 3.22 which is too old. +# CMAKE_IGNORE_PREFIX_PATH keeps cmake from auto-discovering miniforge libs +# even though the binary itself lives there. # Pass 2 re-runs cmake inside the mamba env with PYOPENMS=ON so the Python # bindings can find the conda-forge Python/Cython/NumPy; CMAKE_IGNORE_PREFIX_PATH # keeps the C++ link command unchanged from pass 1. SHELL ["/bin/bash", "-c"] -RUN cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS +RUN /root/miniforge3/envs/streamlit-env/bin/cmake -DCMAKE_BUILD_TYPE='Release' -DCMAKE_PREFIX_PATH='/OpenMS/contrib-build/;/usr/;/usr/local' -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 -DHAS_XSERVER=OFF -DBOOST_USE_STATIC=OFF ../OpenMS SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"] RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root/miniforge3 . From 0bab3ae16d64dbc5bfa5b7248c77f711b2348263 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:31:53 +0000 Subject: [PATCH 07/18] ci: free disk space at the start of each integration test job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two failures in the previous run (test-traefik full, test-nginx simple) ended with the runner reporting "No space left on device" while flushing its diagnostic log. ubuntu-latest starts with ~14 GB free; downloading the full image artifact (5-8 GB), loading it into docker (decompressed, larger), pulling kind's node image, then loading the OCI tar into the kind cluster easily exceeds that budget. Mirror the cleanup already used by `build-arm64`: drop the runner's preinstalled dotnet / android SDK / ghc / hostedtoolcache to recover ~30 GB. Same step now runs at the top of test-apptainer, test-nginx, and test-traefik on both amd64 (ubuntu-latest) and arm64 (ubuntu-24.04-arm) matrix entries — the arm runner is at least as tight as amd64. --- .github/workflows/build-and-test.yml | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 012291f..ae256dc 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -296,6 +296,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: @@ -530,6 +540,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: @@ -629,6 +649,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Free disk space + # ubuntu-latest has ~14 GB free; the full image (5-8 GB) plus kind + # node image plus loading the OCI tar into both docker and kind can + # exhaust it. The arm runner is even tighter. Same incantation as + # `build-arm64`'s "Free disk space" step. + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + sudo apt-get clean + df -h + - name: Download image artifact uses: actions/download-artifact@v4 with: From 4790d46f8a835788ea5104fa1a49e61187020455 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 20:59:34 +0000 Subject: [PATCH 08/18] fix(arm): keep CMakeFiles/ between make TOPP and make pyopenms After the two-pass cmake configure landed in 5185c3e, the next attempt got past `make -j4 TOPP` (the link error is fixed) but failed fast in `make -j4 pyopenms` with: CMake Error: Not a file: /openms-build/CMakeFiles/VerifyGlobs.cmake CMake Error: Error processing file: /openms-build/CMakeFiles/VerifyGlobs.cmake make: *** [Makefile:11553: cmake_check_build_system] Error 1 `VerifyGlobs.cmake` is generated by cmake for `file(GLOB CONFIGURE_DEPENDS ...)` targets and is consulted by `cmake_check_build_system` at the top of every subsequent `make` invocation. The intermediate cleanup line RUN rm -rf src doc CMakeFiles deleted it, which is fine on the x86 single-pass build (different cmake codepath when PYOPENMS=ON is set in the initial configure, no VerifyGlobs.cmake generated) but breaks the ARM two-pass build. Stop deleting CMakeFiles/ between `make TOPP` and `make pyopenms`. We still drop `src/` and `doc/` for disk savings; keeping CMakeFiles costs only a few hundred MB on the intermediate layer. --- Dockerfile.arm | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Dockerfile.arm b/Dockerfile.arm index 53d99d9..1765980 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -108,7 +108,16 @@ RUN cmake -DPYOPENMS=ON -DPY_MEMLEAK_DISABLE=On -DCMAKE_IGNORE_PREFIX_PATH=/root # Build TOPP tools and clean up. RUN make -j4 TOPP -RUN rm -rf src doc CMakeFiles +# NOTE: do NOT delete CMakeFiles/ here. The two-pass cmake configure used +# above generates CMakeFiles/VerifyGlobs.cmake for the pyOpenMS targets' +# CONFIGURE_DEPENDS globs; the next `make -j4 pyopenms` runs +# `cmake_check_build_system` which fails fast if VerifyGlobs.cmake is gone: +# CMake Error: Not a file: /openms-build/CMakeFiles/VerifyGlobs.cmake +# The x86 single-pass build seems to avoid generating that file (different +# cmake codepath when PYOPENMS is set during the initial configure), which +# is why it can still `rm -rf CMakeFiles` here. CMakeFiles/ adds ~a few +# hundred MB to the intermediate layer — acceptable. +RUN rm -rf src doc # Build pyOpenMS wheels and install via pip. RUN make -j4 pyopenms From c7fdf00070e0d3613c3f12c69cf130f84b299a69 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 25 May 2026 22:12:35 +0000 Subject: [PATCH 09/18] =?UTF-8?q?ci(apptainer):=20drop=20arm64=20=E2=80=94?= =?UTF-8?q?=20no=20upstream=20aarch64=20.deb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eWaterCycle/setup-apptainer@v2 installs apptainer from the upstream .deb asset on the GitHub release. Upstream apptainer only publishes amd64 .debs (verified: every v1.3.x release lists only `apptainer__amd64.deb`, no _arm64 / _aarch64 variant). On the ubuntu-24.04-arm runner the action's `apt-get install ./apptainer_*.deb` fails with sudo exit code 100 because the package can't be resolved. Building apptainer from source on the ARM runner would add ~15 minutes and a maintenance surface (Go toolchain, suid configuration) for limited value — HPC SIF consumers remain amd64. Revert test-apptainer to amd64-only and document why. test-nginx and test-traefik still exercise the ARM image via kind, which gives us functional ARM coverage at the docker-runtime level even without apptainer. Side cleanups now that arm64 is gone from this matrix: - artifact name back to a literal `*-amd64-image` (no matrix.arch) - SIF upload gate drops the `matrix.arch == 'amd64'` check --- .github/workflows/build-and-test.yml | 37 +++++++++++----------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index ae256dc..d915583 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -275,24 +275,21 @@ jobs: # (not root inside the image). The entrypoint must tolerate both: this job # exercises that contract by running the built image under apptainer and # waiting for the streamlit /_stcore/health endpoint to come up. - needs: [build-amd64, build-arm64] - runs-on: ${{ matrix.runner }} + # + # amd64 only: upstream apptainer does NOT publish arm64 .deb assets + # (https://github.com/apptainer/apptainer/releases — every release lists + # only `apptainer__amd64.deb`), so eWaterCycle/setup-apptainer fails + # on ubuntu-24.04-arm with "sudo exit code 100" when its + # `apt-get install ./apptainer_*.deb` resolves a non-existent package. + # Building apptainer from source on the arm runner would add ~15 min and + # significant maintenance surface for limited value (HPC SIF consumers + # remain amd64). Re-evaluate if upstream starts publishing arm64 builds. + needs: build-amd64 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - include: - - variant: full - arch: amd64 - runner: ubuntu-latest - - variant: full - arch: arm64 - runner: ubuntu-24.04-arm - - variant: simple - arch: amd64 - runner: ubuntu-latest - - variant: simple - arch: arm64 - runner: ubuntu-24.04-arm + variant: [full, simple] steps: - uses: actions/checkout@v4 @@ -309,7 +306,7 @@ jobs: - name: Download image artifact uses: actions/download-artifact@v4 with: - name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image + name: openms-streamlit-${{ matrix.variant }}-amd64-image path: /tmp - name: Install apptainer @@ -439,12 +436,8 @@ jobs: if: always() run: apptainer instance stop openms-test || true - - name: Upload validated SIF artifact (amd64 push events only) - # SIF publishing stays amd64-only this iteration (HPC consumers of - # the SIF are amd64). The arm64 matrix entry still exercises the - # full apptainer contract end-to-end; it just doesn't upload the - # resulting SIF for downstream publishing. - if: success() && github.event_name != 'pull_request' && matrix.arch == 'amd64' + - name: Upload validated SIF artifact (push events only) + if: success() && github.event_name != 'pull_request' uses: actions/upload-artifact@v4 with: name: openms-streamlit-${{ matrix.variant }}-sif From f466229e60e41209f50a5d7841e8ab721f08893c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 07:03:16 +0000 Subject: [PATCH 10/18] fix(ci): keep /opt/hostedtoolcache when freeing disk space kind/kubectl/helm setup actions fail with "Cache directory '/opt/hostedtoolcache' does not exist". Drop just dotnet/android/ghc (~34 GB) and leave the tool cache in place. --- .github/workflows/build-and-test.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index d915583..b2c3b31 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -143,7 +143,9 @@ jobs: # image is tighter than the AMD one out of the box. Mirrors what # FLASHApp's publish-docker-images.yml does at the top of its ARM job. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -299,7 +301,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -539,7 +543,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h @@ -648,7 +654,9 @@ jobs: # exhaust it. The arm runner is even tighter. Same incantation as # `build-arm64`'s "Free disk space" step. run: | - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache || true + # Keep /opt/hostedtoolcache: helm/kind-action and setup-kubectl + # cache binaries there and fail if the directory is missing. + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc || true sudo apt-get clean df -h From f0d1db16c36d71c987f1e2f11160402f63e43803 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 08:20:45 +0000 Subject: [PATCH 11/18] ci: dump cluster state on test-nginx/test-traefik failure curl exit-22 doesn't tell us whether the pod, service, or ingress is the broken link. Dump pods/logs/ingress/controller logs on failure so the next run surfaces the actual cause. --- .github/workflows/build-and-test.yml | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b2c3b31..8fb5450 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -626,6 +626,27 @@ jobs: echo "$host -> 200 OK" done + - name: Dump cluster state on failure + if: failure() + run: | + echo "=== nodes ===" + kubectl get nodes -o wide || true + echo "=== pods (all namespaces) ===" + kubectl get pods -A -o wide || true + echo "=== app pods describe ===" + kubectl describe pod -n openms -l app=${SLUG} || true + echo "=== app pod logs ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix || true + echo "=== app pod previous logs (if crashed) ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix --previous || true + echo "=== ingress ===" + kubectl get ingress -A -o wide || true + kubectl describe ingress -n openms || true + echo "=== services + endpoints ===" + kubectl get svc,endpoints -n openms || true + echo "=== ingress-nginx controller logs ===" + kubectl logs -n ingress-nginx -l app.kubernetes.io/component=controller --tail=200 || true + test-traefik: needs: [build-amd64, build-arm64] runs-on: ${{ matrix.runner }} @@ -741,3 +762,23 @@ jobs: echo "" echo "$host -> 200 OK" done + + - name: Dump cluster state on failure + if: failure() + run: | + echo "=== nodes ===" + kubectl get nodes -o wide || true + echo "=== pods (all namespaces) ===" + kubectl get pods -A -o wide || true + echo "=== app pods describe ===" + kubectl describe pod -n openms -l app=${SLUG} || true + echo "=== app pod logs ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix || true + echo "=== app pod previous logs (if crashed) ===" + kubectl logs -n openms -l app=${SLUG} --tail=200 --all-containers --prefix --previous || true + echo "=== traefik ingressroute ===" + kubectl get ingressroute -A -o yaml || true + echo "=== services + endpoints ===" + kubectl get svc,endpoints -n openms || true + echo "=== traefik controller logs ===" + kubectl logs -n traefik -l app.kubernetes.io/name=traefik --tail=200 || true From 7aadb0656d89e0611bd80fa287f5c3dae681829b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 11:48:30 +0000 Subject: [PATCH 12/18] fix(ci): load images into kind via image-archive, drop docker load \`docker load\` + \`kind load docker-image\` keeps the image in both host docker AND each kind node's containerd. With a 5-8 GB image and two kind nodes that's ~25 GB of duplicated storage, which trips the "no space left on device" error in kind's ctr import. Switch to \`kind load image-archive\` so the tar streams directly into each node, and rm the tar after to reclaim /tmp. --- .github/workflows/build-and-test.yml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8fb5450..7890072 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -555,9 +555,6 @@ jobs: name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - - name: Load image into local docker - run: docker load -i /tmp/image.tar - - name: Create kind cluster uses: helm/kind-action@v1 with: @@ -565,7 +562,13 @@ jobs: config: .github/kind-config.yaml - name: Load image into kind cluster - run: kind load docker-image openms-streamlit:test --name test-cluster + # Use `kind load image-archive` (not docker-image) so we never store + # the image in host docker. Saves ~5-8 GB on /var/lib/docker. Delete + # the tar afterwards to free the same again on /tmp — the image is + # now in both kind nodes' containerd, which is enough. + run: | + kind load image-archive /tmp/image.tar --name test-cluster + rm -f /tmp/image.tar - name: Install nginx ingress controller run: | @@ -687,9 +690,6 @@ jobs: name: openms-streamlit-${{ matrix.variant }}-${{ matrix.arch }}-image path: /tmp - - name: Load image into local docker - run: docker load -i /tmp/image.tar - - name: Create kind cluster uses: helm/kind-action@v1 with: @@ -697,7 +697,13 @@ jobs: config: .github/kind-config.yaml - name: Load image into kind cluster - run: kind load docker-image openms-streamlit:test --name traefik-test + # Use `kind load image-archive` (not docker-image) so we never store + # the image in host docker. Saves ~5-8 GB on /var/lib/docker. Delete + # the tar afterwards to free the same again on /tmp — the image is + # now in both kind nodes' containerd, which is enough. + run: | + kind load image-archive /tmp/image.tar --name traefik-test + rm -f /tmp/image.tar - name: Set up Helm uses: azure/setup-helm@v4 From 611902101e1eba9e41080b055ec668711e009f24 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 12:04:23 +0000 Subject: [PATCH 13/18] ci: re-trigger workflow run From 88dcdb03a40dccf0e1c6b23ddc4cc994118b6fc2 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 13:18:35 +0000 Subject: [PATCH 14/18] ci: re-trigger workflow run after outage From 39d6d25935fe8e193ba96b2c0d15aa0bbee286b6 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 26 May 2026 17:57:30 +0000 Subject: [PATCH 15/18] fix(ci): match kind image to what the kustomize overlay references 503s in test-nginx/test-traefik traced to two issues: 1. The prod overlay maps openms-streamlit -> ghcr.io/openms/streamlit-template:main-full, but the build job was re-tagging the local image as openms-streamlit:test. Rendered manifests pointed at the registry name; kind only had :test loaded; pods stayed ErrImagePull. Retag as :main-full so kind has exactly the ref the manifests use. 2. Three of the four pod specs declare imagePullPolicy: Always; the existing sed only rewrote IfNotPresent. With Always and no registry creds in kind, pods loop on ImagePullBackOff. Extend the sed to catch both. --- .github/workflows/build-and-test.yml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7890072..91b2473 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -98,15 +98,17 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Retag for kind (stable local tag) + - name: Retag for kind (image name the kustomize overlay points at) run: | - # load:true above loaded all meta-action tags into local docker. - # Retag the first one to the stable name the kustomize overlay expects. + # The prod overlay sets `newName: ghcr.io/openms/streamlit-template`, + # `newTag: main-full`. The rendered manifests reference that exact + # ref, so we need it loaded into kind under that name. Tag invariant + # across branches/variants so the test always works. FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) - docker tag "$FIRST_TAG" openms-streamlit:test + docker tag "$FIRST_TAG" ghcr.io/openms/streamlit-template:main-full - name: Save image as tar - run: docker save openms-streamlit:test -o /tmp/image.tar + run: docker save ghcr.io/openms/streamlit-template:main-full -o /tmp/image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 @@ -192,15 +194,17 @@ jobs: build-args: | GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} - - name: Retag for kind (stable local tag) + - name: Retag for kind (image name the kustomize overlay points at) run: | - # load:true above loaded all meta-action tags into local docker. - # Retag the first one to the stable name the kustomize overlay expects. + # The prod overlay sets `newName: ghcr.io/openms/streamlit-template`, + # `newTag: main-full`. The rendered manifests reference that exact + # ref, so we need it loaded into kind under that name. Tag invariant + # across branches/variants so the test always works. FIRST_TAG=$(printf '%s\n' "${{ steps.meta.outputs.tags }}" | head -n 1) - docker tag "$FIRST_TAG" openms-streamlit:test + docker tag "$FIRST_TAG" ghcr.io/openms/streamlit-template:main-full - name: Save image as tar - run: docker save openms-streamlit:test -o /tmp/image.tar + run: docker save ghcr.io/openms/streamlit-template:main-full -o /tmp/image.tar - name: Upload image artifact uses: actions/upload-artifact@v4 @@ -580,7 +584,7 @@ jobs: # Filter out Traefik IngressRoute (kind cluster uses nginx) and force imagePullPolicy=Never kubectl kustomize k8s/overlays/prod/ | \ yq 'select(.kind != "IngressRoute")' | \ - sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ + sed -E 's|imagePullPolicy: (IfNotPresent\|Always)|imagePullPolicy: Never|g' | \ sed 's|storageClassName: cinder-csi|storageClassName: standard|g' > /tmp/manifests.yaml for i in 1 2 3 4 5; do if kubectl apply -f /tmp/manifests.yaml; then @@ -720,7 +724,7 @@ jobs: - name: Deploy with Kustomize (full manifests, no filter) run: | kubectl kustomize k8s/overlays/prod/ | \ - sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ + sed -E 's|imagePullPolicy: (IfNotPresent\|Always)|imagePullPolicy: Never|g' | \ sed 's|storageClassName: cinder-csi|storageClassName: standard|g' > /tmp/manifests.yaml for i in 1 2 3 4 5; do if kubectl apply -f /tmp/manifests.yaml; then From 0ad89fa41b2f134665b487a68c725ce6c29f67e6 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 16:06:51 +0000 Subject: [PATCH 16/18] fix(ci): disable provenance on build-amd64 to keep pushes single-manifest create-manifest fails with "ghcr.io/openms/streamlit-template:main-full-amd64 is a manifest list" because docker/build-push-action v5 adds a provenance attestation by default, which buildx packs as a manifest list (image + attestation entries). docker manifest create rejects manifest lists as components. build-arm64 already sets provenance: false for the same reason; mirror that on the amd64 path so both per-arch tags are flat image manifests that can be merged into the multi-arch manifest. --- .github/workflows/build-and-test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 91b2473..3b4055b 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -93,6 +93,11 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + # provenance/attestations turn the pushed tag into a manifest list, + # which the create-manifest job's `docker manifest create` then + # refuses ("is a manifest list"). Keep the push as a single-platform + # image manifest — same as the build-arm64 job. + provenance: false cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}/cache:${{ matrix.variant }}-amd64 cache-to: ${{ github.event_name != 'pull_request' && format('type=registry,ref={0}/{1}/cache:{2}-amd64,mode=max', env.REGISTRY, env.IMAGE_NAME_LC, matrix.variant) || '' }} build-args: | From a703e052c46746fdaf6a03dcbf95a1c49db14956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20David=20M=C3=BCller?= <57191390+t0mdavid-m@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:54:02 +0200 Subject: [PATCH 17/18] Merge pull request #396 from OpenMS/claude/laughing-fermat-ppiw0s Add configurable legal links (Impressum, Privacy, Terms) --- README.md | 21 +++++ gdpr_consent/dist/bundle.js | 2 +- gdpr_consent/src/main.ts | 13 +++ settings.json | 5 ++ src/common/captcha_.py | 11 ++- src/common/common.py | 48 ++++++++++- tests/test_legal_links.py | 160 ++++++++++++++++++++++++++++++++++++ 7 files changed, 255 insertions(+), 5 deletions(-) create mode 100644 tests/test_legal_links.py diff --git a/README.md b/README.md index 09cec89..5a1db4b 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,27 @@ nginx config, PID files — to `/tmp/openms-runtime-$$`, which is always writable inside an apptainer container. The workspace cleanup cron job is skipped in this mode; rerun `clean-up-workspaces.py` manually if needed. +## ⚖️ Legal pages (Impressum, Privacy Policy, Terms of Use) + +Every page shows **Impressum**, **Privacy Policy** and **Terms of Use** links at +the bottom of the sidebar, and the GDPR consent banner links to the privacy +policy. By default these point to the centrally maintained official OpenMS pages +(`https://openms.de/impressum`, `/privacy`, `/terms`). + +If you self-host a fork, override them in `settings.json` — an Impressum must +name the **actual operator**, not OpenMS: + +```json +"legal_links": { + "impressum": "https://your-domain.example/impressum", + "privacy": "https://your-domain.example/privacy", + "terms": "https://your-domain.example/terms" +} +``` + +Any link you omit falls back to its OpenMS default. The `privacy` URL is reused +for the consent banner's privacy-policy link, so consent and policy stay in sync. + ## Documentation Documentation for **users** and **developers** is included as pages in [this template app](https://abi-services.cs.uni-tuebingen.de/streamlit-template/), indicated by the 📖 icon. diff --git a/gdpr_consent/dist/bundle.js b/gdpr_consent/dist/bundle.js index 8614457..0a48bfd 100644 --- a/gdpr_consent/dist/bundle.js +++ b/gdpr_consent/dist/bundle.js @@ -235,7 +235,7 @@ eval("__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpac /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => { "use strict"; -eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['matomo']) {\n klaroConfig.services.push({\n name: 'matomo',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); +eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['matomo']) {\n klaroConfig.services.push({\n name: 'matomo',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Link the consent banner to the privacy policy. Setting privacyPolicyUrl\n // on the 'zz' fallback language makes Klaro render its default\n // \"To learn more, please read our privacy policy.\" text with the URL,\n // regardless of the browser locale.\n if (data.args['privacy_policy']) {\n klaroConfig.translations = {\n zz: {\n privacyPolicyUrl: data.args['privacy_policy']\n }\n };\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); /***/ }), diff --git a/gdpr_consent/src/main.ts b/gdpr_consent/src/main.ts index 059fef8..408f4a4 100644 --- a/gdpr_consent/src/main.ts +++ b/gdpr_consent/src/main.ts @@ -14,6 +14,7 @@ let klaroConfig: { mustConsent: boolean; acceptAll: boolean; services: Service[]; + translations?: Record; } = { mustConsent: true, acceptAll: true, @@ -125,6 +126,18 @@ function onRender(event: Event): void { ) } + // Link the consent banner to the privacy policy. Setting privacyPolicyUrl + // on the 'zz' fallback language makes Klaro render its default + // "To learn more, please read our privacy policy." text with the URL, + // regardless of the browser locale. + if (data.args['privacy_policy']) { + klaroConfig.translations = { + zz: { + privacyPolicyUrl: data.args['privacy_policy'] + } + } + } + // Create a new script element var script = document.createElement('script') diff --git a/settings.json b/settings.json index 0adf0b6..60424cb 100644 --- a/settings.json +++ b/settings.json @@ -3,6 +3,11 @@ "github-user": "OpenMS", "version": "1.1.1", "repository-name": "streamlit-template", + "legal_links": { + "impressum": "https://openms.de/impressum", + "privacy": "https://openms.de/privacy", + "terms": "https://openms.de/terms" + }, "analytics": { "google-analytics": { "enabled": false, diff --git a/src/common/captcha_.py b/src/common/captcha_.py index 498b133..282e124 100644 --- a/src/common/captcha_.py +++ b/src/common/captcha_.py @@ -186,7 +186,7 @@ def add_page(main_script_path_str: str, page_name: str) -> None: # define the function for the captcha control -def captcha_control(): +def captcha_control(privacy_policy_url: str = ""): """ Control and verification of a CAPTCHA to ensure the user is not a robot. @@ -199,6 +199,10 @@ def captcha_control(): The CAPTCHA text is generated as a session state and should not change during refreshes. + Args: + privacy_policy_url (str, optional): URL shown as the privacy policy link + in the GDPR consent banner. Defaults to "". + Returns: None """ @@ -214,7 +218,10 @@ def captcha_control(): with st.spinner(): # Ask for consent st.session_state.tracking_consent = consent_component( - google_analytics=ga, piwik_pro=pp, matomo=mt + google_analytics=ga, + piwik_pro=pp, + matomo=mt, + privacy_policy=privacy_policy_url, ) if st.session_state.tracking_consent is None: # No response by user yet diff --git a/src/common/common.py b/src/common/common.py index 643a224..c048064 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -31,6 +31,37 @@ # Detect system platform OS_PLATFORM = sys.platform +# Default legal/GDPR page links. These point to the centrally maintained +# official OpenMS pages. Forks that self-host should override them via the +# "legal_links" key in settings.json (an Impressum must name the actual +# operator). The defaults live here too — not only in settings.json — so that +# downstream apps built from an older settings.json without a "legal_links" +# key still inherit working legal links by default. +DEFAULT_LEGAL_LINKS = { + "impressum": "https://openms.de/impressum", + "privacy": "https://openms.de/privacy", + "terms": "https://openms.de/terms", +} + + +def get_legal_links() -> dict[str, str]: + """ + Return the legal page URLs (Impressum, Privacy Policy, Terms of Use). + + Values from the "legal_links" object in settings.json override the + built-in OpenMS defaults. Empty override values are ignored so a blank + entry can't erase a default. + + Returns: + dict[str, str]: Mapping of "impressum", "privacy" and "terms" to URLs. + """ + overrides = ( + st.session_state.settings.get("legal_links", {}) + if "settings" in st.session_state + else {} + ) + return {**DEFAULT_LEGAL_LINKS, **{k: v for k, v in overrides.items() if v}} + def is_safe_workspace_name(name: str) -> bool: """ @@ -519,7 +550,7 @@ def page_setup(page: str = "") -> dict[str, Any]: # Render the sidebar params = render_sidebar(page) - captcha_control() + captcha_control(privacy_policy_url=get_legal_links()["privacy"]) # If run in hosted mode, show captcha as long as it has not been solved # if not "local" in sys.argv: @@ -532,7 +563,7 @@ def page_setup(page: str = "") -> dict[str, Any]: "controllo" in params.keys() and params["controllo"] == False ): # Apply captcha by calling the captcha_control function - captcha_control() + captcha_control(privacy_policy_url=get_legal_links()["privacy"]) return params @@ -764,6 +795,19 @@ def change_workspace(): f'
{app_name}
Version: {version_info}
', unsafe_allow_html=True, ) + + # Legal links (Impressum, Privacy Policy, Terms of Use), shown on every + # page. URLs are configurable via "legal_links" in settings.json. + links = get_legal_links() + st.markdown( + '
' + f'Impressum · ' + f'Privacy Policy · ' + f'Terms of Use' + "
", + unsafe_allow_html=True, + ) return params diff --git a/tests/test_legal_links.py b/tests/test_legal_links.py new file mode 100644 index 0000000..a38e201 --- /dev/null +++ b/tests/test_legal_links.py @@ -0,0 +1,160 @@ +""" +Tests for get_legal_links() in src/common/common.py. + +get_legal_links() resolves the Impressum / Privacy Policy / Terms of Use URLs +shown in the sidebar footer (on every page) and the privacy-policy link wired +into the GDPR consent banner. It merges the optional "legal_links" object from +settings.json over the built-in official-OpenMS defaults so that: + + * apps built from a settings.json without a "legal_links" key still inherit + working legal links by default, + * a self-hosting fork can override any or all of the three URLs, + * an empty/blank override value never erases a default. + +Streamlit (and the other heavy runtime deps pulled in by common.py) are mocked +before import so the helper can be unit-tested without a running Streamlit app, +mirroring tests/test_parameter_presets.py. +""" +import os +import sys +from unittest.mock import MagicMock + +# Add project root to path for imports +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(PROJECT_ROOT) + + +class FakeSessionState(dict): + """Minimal stand-in for Streamlit's SessionState. + + Supports both attribute access (``state.settings``) and item/membership + access (``"settings" in state``), exactly like the real SessionState that + common.py relies on. + """ + + def __getattr__(self, name): + try: + return self[name] + except KeyError as exc: + raise AttributeError(name) from exc + + def __setattr__(self, name, value): + self[name] = value + + +# Mock streamlit (with a SessionState-like session_state) and the other heavy +# imports pulled in by src/common/common.py, so importing get_legal_links here +# doesn't require a running Streamlit app context. +# +# IMPORTANT: these mocks are installed into sys.modules only for the duration of +# the import below and then restored, so they don't leak into other test modules +# (e.g. the AppTest-based tests that need the real `streamlit` package). This +# mirrors the pattern in tests/test_parameter_presets.py. +mock_streamlit = MagicMock() +mock_streamlit.session_state = FakeSessionState() + +_MOCKED_MODULES = { + "streamlit": mock_streamlit, + "streamlit.components": MagicMock(), + "streamlit.components.v1": MagicMock(), + "streamlit.source_util": MagicMock(), + "pandas": MagicMock(), + "psutil": MagicMock(), + # Local submodules with their own heavy deps (e.g. the captcha image library). + "src.common.captcha_": MagicMock(), + "src.common.admin": MagicMock(), +} +_saved_modules = {name: sys.modules.get(name) for name in _MOCKED_MODULES} +sys.modules.update(_MOCKED_MODULES) + +# Force a FRESH import of src.common.common under the streamlit mock, even if an +# earlier test module (e.g. test_gui.py) already imported the real-streamlit-bound +# version. Save whatever was cached first so we can restore it afterwards. +_saved_common = sys.modules.pop("src.common.common", None) + +from src.common.common import get_legal_links, DEFAULT_LEGAL_LINKS # noqa: E402 + +# Restore the real modules (or remove ones that weren't present) so that other +# test modules get the genuine packages. +for _name, _orig in _saved_modules.items(): + if _orig is None: + sys.modules.pop(_name, None) + else: + sys.modules[_name] = _orig +# Restore the original cached common module (the real-streamlit-bound one, if +# any) so AppTest-based test modules keep getting the genuine package. +# get_legal_links keeps working: it holds a reference to the freshly-imported +# mock-bound module's globals (and the same `mock_streamlit` object the tests +# mutate). +if _saved_common is None: + sys.modules.pop("src.common.common", None) +else: + sys.modules["src.common.common"] = _saved_common + + +def setup_function(_): + """Reset session_state before each test for isolation.""" + mock_streamlit.session_state = FakeSessionState() + + +def test_defaults_point_to_openms(): + """The built-in defaults are the official OpenMS pages.""" + assert DEFAULT_LEGAL_LINKS == { + "impressum": "https://openms.de/impressum", + "privacy": "https://openms.de/privacy", + "terms": "https://openms.de/terms", + } + + +def test_defaults_when_settings_not_loaded(): + """No settings loaded at all -> defaults, no crash.""" + mock_streamlit.session_state = FakeSessionState() + assert get_legal_links() == DEFAULT_LEGAL_LINKS + + +def test_defaults_when_no_legal_links_key(): + """settings present but without 'legal_links' -> all OpenMS defaults.""" + mock_streamlit.session_state = FakeSessionState({"settings": {}}) + assert get_legal_links() == DEFAULT_LEGAL_LINKS + + +def test_overrides_replace_defaults(): + """A fork's custom legal_links replace every default.""" + mock_streamlit.session_state = FakeSessionState( + { + "settings": { + "legal_links": { + "impressum": "https://acme.example/impressum", + "privacy": "https://acme.example/privacy", + "terms": "https://acme.example/terms", + } + } + } + ) + assert get_legal_links() == { + "impressum": "https://acme.example/impressum", + "privacy": "https://acme.example/privacy", + "terms": "https://acme.example/terms", + } + + +def test_partial_override_keeps_other_defaults(): + """Overriding only one link leaves the others at their OpenMS default.""" + mock_streamlit.session_state = FakeSessionState( + {"settings": {"legal_links": {"impressum": "https://acme.example/impressum"}}} + ) + links = get_legal_links() + assert links["impressum"] == "https://acme.example/impressum" + assert links["privacy"] == DEFAULT_LEGAL_LINKS["privacy"] + assert links["terms"] == DEFAULT_LEGAL_LINKS["terms"] + + +def test_empty_or_none_override_falls_back_to_default(): + """A blank/None override must not erase the default for that key.""" + mock_streamlit.session_state = FakeSessionState( + {"settings": {"legal_links": {"privacy": "", "impressum": None}}} + ) + links = get_legal_links() + assert links["privacy"] == DEFAULT_LEGAL_LINKS["privacy"] + assert links["impressum"] == DEFAULT_LEGAL_LINKS["impressum"] + assert links["terms"] == DEFAULT_LEGAL_LINKS["terms"] From 76a793e6ff71e273b3d74e4944eabe1b9f4c3892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20David=20M=C3=BCller?= <57191390+t0mdavid-m@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:39:44 +0200 Subject: [PATCH 18/18] fix treafik ingress --- k8s/base/traefik-ingressroute.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/k8s/base/traefik-ingressroute.yaml b/k8s/base/traefik-ingressroute.yaml index b202891..857314f 100644 --- a/k8s/base/traefik-ingressroute.yaml +++ b/k8s/base/traefik-ingressroute.yaml @@ -4,7 +4,8 @@ metadata: name: streamlit-traefik spec: entryPoints: - - web + - web # 301-redirects to websecure + - websecure routes: - match: PathPrefix(`/`) kind: Rule @@ -16,3 +17,4 @@ spec: name: stroute httpOnly: true sameSite: lax + secure: true # only send the affinity cookie over HTTPS \ No newline at end of file