diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9a519ed..03c1d75 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: pull_request: branches: [main] +# Least privilege by default; no job needs write access. +permissions: + contents: read + concurrency: group: ci-${{ github.ref }} cancel-in-progress: true @@ -17,12 +21,12 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.11", "3.12"] + python: ["3.11", "3.12", "3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # hatch-vcs needs git history - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.python }} cache: pip @@ -40,26 +44,45 @@ jobs: run: pytest -m "not slow and not gpu" --cov=cortex_score --cov-report=xml - name: Upload coverage if: matrix.python == '3.11' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-xml path: coverage.xml packaging: - name: build + twine check + name: build + twine check + wheel smoke runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" - - run: | + - name: Build + metadata check + run: | python -m pip install --upgrade pip build twine python -m build python -m twine check dist/* - - uses: actions/upload-artifact@v4 + - name: Install built wheel and smoke-test (no torch) + run: | + python -m pip install dist/*.whl + cd "$RUNNER_TEMP" # import the installed wheel, not the source tree + python -c " + import sys + import numpy as np + import cortex_score + assert 'torch' not in sys.modules + from cortex_score import score_from_predictions + r = score_from_predictions(np.random.randn(4, 20484).astype('float32'), model_revision='ci-wheel') + assert len(r.networks) == 5 + print('wheel smoke OK', cortex_score.__version__) + " + - name: Wheel-content packaging tests + run: | + pip install -e ".[dev]" + pytest tests/integration/test_packaging.py -m slow + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: dist path: dist/ @@ -68,10 +91,10 @@ jobs: name: importable with no torch runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" - name: Install base only (no [gpu-deps]) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d564db4..36ffad5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,28 +5,31 @@ on: tags: - "v*.*.*" +# Default to read-only; the publish job opts into id-token below. +permissions: + contents: read + jobs: build-and-publish: name: build + publish to PyPI runs-on: ubuntu-latest permissions: - id-token: write # trusted publishing + id-token: write # trusted publishing (OIDC); no API token stored contents: read environment: name: pypi url: https://pypi.org/p/cortex-score steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # hatch-vcs reads the tag history - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" - name: Install build tooling - run: | - python -m pip install --upgrade pip build twine + run: python -m pip install --upgrade pip build twine - name: Build sdist + wheel run: python -m build @@ -34,5 +37,22 @@ jobs: - name: Verify metadata run: python -m twine check dist/* + - name: Smoke-test the built wheel before publishing + run: | + python -m pip install dist/*.whl + cd "$RUNNER_TEMP" # import the installed wheel, not the source tree + python -c " + import sys + import numpy as np + import cortex_score + assert 'torch' not in sys.modules + from cortex_score import score_from_predictions + r = score_from_predictions(np.random.randn(4, 20484).astype('float32'), model_revision='release-smoke') + assert len(r.networks) == 5 + print('release wheel smoke OK', cortex_score.__version__) + " + - name: Publish to PyPI via trusted publishing - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 + with: + attestations: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 470f437..310929a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ This project follows [Semantic Versioning](https://semver.org/). ## [Unreleased] +## [0.1.0] - 2026-06-10 + +First public release. The CPU-only postprocessing tier +(`score_from_predictions` / `score_from_prediction_bundle`) and the +`ScoreResult` JSON contract (`SCHEMA_VERSION = "1.0"`) are stable. + ### Added - Initial package scaffold (Epics 0-8): - Bundled atlas data (Schaefer-400 + Yeo-17 + 5-network rollup) with @@ -20,16 +26,36 @@ This project follows [Semantic Versioning](https://semver.org/). ergonomic, requires explicit scientific assumptions. - `score(video_path, runner=None)` — full pipeline. - `CortexScorer` class for batch reuse. - - Two-tier cache: prediction cache + score cache, atomic writes, - cache_manifest.json, invalidation matrix. + - Two-tier cache infrastructure (prediction cache + score cache, atomic + writes, cache_manifest.json, invalidation matrix). NOTE: this is + plumbing for a future release — the scoring path does not read or write + it yet, so `cache info` reports empty until caching is wired in v0.1.1. - Typer CLI under `[cli]` extra: `doctor`, `score`, `from-predictions`, `schema`, `cache info`, `cache clear`. + +### Fixed (pre-release hardening) +- `result_id` is now the SHA-256 of the result's own canonical JSON (with + `result_id` blanked), so it is reproducible from the serialized artifact. + The previous hand-built hash payload disagreed with the serialized form + (`+00:00` vs `Z` datetimes), making the documented audit hash unverifiable. +- MZ3 scalar-overlay export now emits the real NiiVue format (uint16 magic + `0x5A4D`, `attr=8`/isSCALAR, 16-byte header, gzip). The earlier port wrote + a header NiiVue rejected at the magic check. +- `score_from_predictions` rejects 1-D/non-finite inputs and unsupported + meshes with clear errors at the boundary (`UnsupportedMeshError`) instead + of an opaque `IndexError` / a raw traceback through the CLI. +- Version is read from installed distribution metadata + (`importlib.metadata`); the build no longer ships a generated `_version.py`. + +### Removed +- The no-op `score --no-cache` flag (it silently did nothing). It will + return when the cache is wired into the scoring path. - TRIBE v2 adapter under `[gpu-deps]` extra (TRIBE itself installed from `requirements/tribev2-gpu.txt`, pinned to commit `34f52344e5ba96660fac877393e1954e399d3ef3`). - - 118-test suite at 88.66% coverage with property tests, schema - snapshot, cache invalidation matrix, packaging smoke, and - import-without-GPU gate. + - 135-test suite at ~90% coverage with property tests, schema + snapshot, cache invalidation matrix, packaging smoke, MZ3 format + round-trip, result_id verifiability, and import-without-GPU gate. ### Notes - The package source is MIT-licensed. The bundled atlases ship under diff --git a/CITATION.cff b/CITATION.cff index 3910cd4..5e35d7f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,10 +2,14 @@ cff-version: 1.2.0 title: "cortex-score" message: "If you use this software, please cite both this package and the underlying TRIBE v2 brain-encoding model." type: software +version: "0.1.0" +date-released: "2026-06-10" +license: MIT authors: - given-names: Madhav family-names: Chauhan repository-code: "https://github.com/madhavcodez/cortex-score" +url: "https://github.com/madhavcodez/cortex-score" abstract: >- cortex-score packages the TRIBE v2 brain-encoding pipeline as an installable Python library that summarizes predicted cortical responses for any video @@ -17,7 +21,6 @@ keywords: - neuroimaging - video-analysis - tribe -license: MIT references: - type: software title: "TRIBE v2" diff --git a/README.md b/README.md index 74d179a..4eefa60 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ a model trained on fMRI scans of people watching videos. > viewer. Treat them as a creative signal, not a clinical one. I built this to make a brain-encoding model usable from a few lines of Python. -It's a pre-release (v0.1) — not on PyPI yet. +It's an early release (v0.1): the CPU-only postprocessing tier is stable, and the +JSON output contract is locked behind `SCHEMA_VERSION = "1.0"`. **Useful for:** @@ -115,7 +116,7 @@ sequenceDiagram RUN-->>API: PredictionBundle API->>POST: aggregate → Yeo-17 → z-score POST-->>SCH: 5 networks + metrics - SCH->>SCH: result_id = sha256(body) + SCH->>SCH: result_id = sha256(canonical JSON, id blanked) SCH-->>U: ScoreResult JSON ``` @@ -211,6 +212,7 @@ class MyRunner: | `MissingOptionalDependencyError` | `[gpu-deps]` / `tribev2` missing when `score()` is called without a runner | | `MissingExternalToolError` | `ffmpeg` / `uvx` absent on PATH at TRIBE-load time | | `IncompatiblePredictionShapeError` | `preds.shape[1]` doesn't match the mesh's vertex count | +| `UnsupportedMeshError` | A `mesh=` other than `fsaverage5` was requested (subclasses `ValueError` and `CortexScoreError`) | | `AtlasMismatchError` | Bundled atlas SHA-256 disagrees with `data/manifest.json` (corrupted wheel) | ## Output @@ -220,7 +222,7 @@ Every score is a self-describing JSON object. The contract is locked behind | Field | What it gives you | |---|---| -| `result_id` | SHA-256 of the payload — a stable id for caches, audit logs, dedup | +| `result_id` | SHA-256 of the result's own canonical JSON (with `result_id` blanked) — reproducible from the JSON alone, so it verifies as a stable id for caches, audit logs, dedup | | `provenance.model_revision` | Which TRIBE v2 commit produced the numbers | | `atlas.*_sha256` | Fingerprints of the exact Schaefer / Yeo / network-group data used | | `normalization.scope` | `within_video` by default — two clips aren't comparable on the same axis unless you opt into a reference distribution | @@ -272,7 +274,7 @@ embedded in `network_groups.json` and exposed via `NetworkScore.color`. - **platformdirs** — XDG / `%LOCALAPPDATA%` cache dirs, override via `CORTEX_SCORE_CACHE_DIR` - **Bundled atlas** — Schaefer 2018 + Yeo 2011 on fsaverage5, SHA-256 fingerprinted (~337 KB) - **Encoder** — TRIBE v2 @ `34f52344` (Llama 3.2-3B + V-JEPA2 + W2V-BERT), pinned to commit -- **Tests** — `pytest` + `hypothesis`, 125 tests at 87.95% coverage, `ruff` + `mypy --strict` clean +- **Tests** — `pytest` + `hypothesis`, 135 tests at ~90% coverage, `ruff` + `mypy --strict` clean ## Licenses diff --git a/docs/install-gpu.md b/docs/install-gpu.md new file mode 100644 index 0000000..8728ac8 --- /dev/null +++ b/docs/install-gpu.md @@ -0,0 +1,65 @@ +# GPU install (full `score()` pipeline) + +The base `cortex-score` install is **CPU-only** — it covers +`score_from_predictions` / `score_from_prediction_bundle`, which take a +prediction tensor you already have. To run the full `score("clip.mp4")` +pipeline you also need TRIBE v2 and its GPU stack. + +TRIBE v2 is **not** a declared dependency of `cortex-score`: PyPI rejects +published metadata that contains direct-URL (Git) dependencies, so it must +be installed separately. `cortex-score[gpu-deps]` declares the *compatible +environment* (torch / transformers / moviepy versions), and the pinned +TRIBE commit is installed from a requirements file. + +## Requirements + +- A CUDA-capable GPU (TRIBE v2 weights are ~12 GB; plan for ≥16 GB VRAM). +- `ffmpeg` and `uvx` (from [uv](https://github.com/astral-sh/uv)) on `PATH` — + TRIBE's preprocessing shells out to them. +- A Hugging Face account with access to the gated Llama 3.2-3B weights. + +## Install + +```bash +# 1. Base package + the TRIBE-compatible GPU dependency matrix +pip install "cortex-score[gpu-deps]" + +# 2. TRIBE v2 itself, pinned to the tested commit +pip install -r requirements/tribev2-gpu.txt + +# 3. External tools (example: Debian/Ubuntu) +sudo apt-get install -y ffmpeg +curl -LsSf https://astral.sh/uv/install.sh | sh # provides `uvx` + +# 4. Authenticate for the gated model weights +huggingface-cli login +``` + +`requirements/tribev2-gpu.txt` pins TRIBE v2 to commit +`34f52344e5ba96660fac877393e1954e399d3ef3`, which matches +`cortex_score.runners.tribev2.TRIBEV2_PINNED_REVISION`. Bumping one +requires bumping the other and re-running the GPU smoke test. + +## Verify + +```bash +cortex-score doctor +``` + +`doctor` reports Python, `cortex-score`, torch (+ CUDA), tribev2, ffmpeg, +uvx, the Hugging Face token, and the cache directory. Every row should read +`ok` (or report what to install) before you run `score()`. + +## Run + +```python +from cortex_score import score + +result = score("clip.mp4") +result.save("clip.score.json") +``` + +> TRIBE v2 is licensed **CC-BY-NC-4.0**. Scores produced through the full +> `score()` path inherit the non-commercial restriction; it is emitted as a +> runtime warning on first load and recorded in every +> `ScoreResult.license_restrictions`. diff --git a/pyproject.toml b/pyproject.toml index dde9e47..f3fe126 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["hatchling>=1.21", "hatch-vcs>=0.4"] +requires = ["hatchling>=1.27", "hatch-vcs>=0.4"] build-backend = "hatchling.build" [project] @@ -7,7 +7,8 @@ name = "cortex-score" description = "Score any video for predicted cortical engagement across 5 brain networks (visual, language, faces, attention, motion). Built on Meta FAIR's TRIBE v2 encoding model." readme = "README.md" requires-python = ">=3.11" -license = { file = "LICENSE" } +license = "MIT" +license-files = ["LICENSE", "LICENSE-THIRD-PARTY.md"] authors = [{ name = "Madhav Chauhan" }] keywords = [ "fmri", @@ -19,14 +20,17 @@ keywords = [ "yeo", "cortical-response", ] +# License metadata uses the PEP 639 SPDX expression (`license = "MIT"` +# above); the deprecated `License ::` trove classifier is intentionally +# omitted so PyPI does not flag a conflicting/duplicate license. classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Medical Science Apps.", @@ -87,12 +91,14 @@ Issues = "https://github.com/madhavcodez/cortex-score/issues" # ---- Hatch build configuration --------------------------------------- +# hatch-vcs computes the version from git tags and writes it into the +# built distribution metadata. We deliberately do NOT use the vcs +# build hook to generate a `_version.py` source file: the runtime reads +# the version back from installed metadata (see cortex_score/version.py), +# so there is one authority and no generated artifact in the wheel. [tool.hatch.version] source = "vcs" -[tool.hatch.build.hooks.vcs] -version-file = "src/cortex_score/_version.py" - [tool.hatch.build.targets.wheel] packages = ["src/cortex_score"] @@ -127,7 +133,6 @@ include = [ [tool.ruff] line-length = 100 target-version = "py311" -extend-exclude = ["src/cortex_score/_version.py"] [tool.ruff.lint] select = [ @@ -194,8 +199,7 @@ filterwarnings = ["error", "ignore::DeprecationWarning"] branch = true source = ["cortex_score"] omit = [ - "src/cortex_score/_version.py", - "src/cortex_score/runners/tribev2.py", # GPU-only; covered by @pytest.mark.gpu + "src/cortex_score/runners/tribev2.py", # GPU-only; exercised by @pytest.mark.gpu ] [tool.coverage.report] diff --git a/src/cortex_score/__init__.py b/src/cortex_score/__init__.py index 206a522..410cbfe 100644 --- a/src/cortex_score/__init__.py +++ b/src/cortex_score/__init__.py @@ -29,8 +29,6 @@ from __future__ import annotations -from cortex_score._version import __version__ - # High-level API. score_from_predictions / score_from_prediction_bundle # stay CPU-only; score() lazily resolves a runner. from cortex_score.api import ( @@ -48,6 +46,7 @@ MissingOptionalDependencyError, ModelLicenseError, PreprocessingWarning, + UnsupportedMeshError, ) # Schemas and processing modules are pure NumPy / Pydantic — safe to @@ -63,6 +62,7 @@ SegmentMeta, TimingMeta, ) +from cortex_score.version import __version__ # Semantic grouping (Version > API > Schemas > Exceptions) is more # readable in docs than alphabetical, so the RUF022 sort hint is @@ -92,6 +92,7 @@ "MissingOptionalDependencyError", "MissingExternalToolError", "IncompatiblePredictionShapeError", + "UnsupportedMeshError", "AtlasMismatchError", "ModelLicenseError", "PreprocessingWarning", diff --git a/src/cortex_score/api.py b/src/cortex_score/api.py index 36ec2f4..976c781 100644 --- a/src/cortex_score/api.py +++ b/src/cortex_score/api.py @@ -17,11 +17,10 @@ from __future__ import annotations -import datetime as _dt import hashlib from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import Literal import numpy as np import numpy.typing as npt @@ -31,7 +30,7 @@ load_schaefer400, load_yeo17, ) -from cortex_score.exceptions import MissingOptionalDependencyError +from cortex_score.exceptions import MissingOptionalDependencyError, UnsupportedMeshError from cortex_score.processing.aggregate import aggregate_to_rois from cortex_score.processing.networks import build_network_summary from cortex_score.processing.normalize import DEFAULT_EPS, zscore_within_atlas @@ -201,38 +200,14 @@ def score_from_prediction_bundle( license_restrictions = default_tribev2_license_restrictions() - created_at = utc_now() - # IMPORTANT: result_id is the audit identity of this ScoreResult. To - # remain stable across releases, it MUST cover every JSON-output - # field including framing strings. Building the body from the same - # constants ScoreResult uses for serialization keeps the hash from - # drifting if framing copy ever changes. (Bug caught in code review.) - from cortex_score.schemas import ( - FRAMING_DISCLAIMER, - FRAMING_PRIMARY, - FRAMING_SCIENTIFIC, - ) - - body: dict[str, Any] = { - "schema_version": "1.0", - "framing": FRAMING_PRIMARY, - "framing_scientific": FRAMING_SCIENTIFIC, - "framing_disclaimer": FRAMING_DISCLAIMER, - "input": input_meta.model_dump(mode="json"), - "timing": timing.model_dump(mode="json"), - "normalization": normalization.model_dump(mode="json"), - "atlas": atlas.model_dump(mode="json"), - "provenance": provenance.model_dump(mode="json"), - "license_restrictions": [r.model_dump(mode="json") for r in license_restrictions], - "warnings": [w.model_dump(mode="json") for w in config.warnings], - "networks": [n.model_dump(mode="json") for n in networks], - "created_at": created_at.isoformat(), - } - result_id = compute_result_id(body) - - return ScoreResult( - result_id=result_id, - created_at=created_at, + # Build the result with an empty result_id, then stamp the audit hash + # computed from the result's OWN canonical serialization. This makes + # result_id cover exactly the fields ScoreResult serializes (framing, + # schema_version, created_at, ...) and removes the drift risk of + # hand-rebuilding a parallel payload dict. See compute_result_id(). + draft = ScoreResult( + result_id="", + created_at=utc_now(), input=input_meta, timing=timing, normalization=normalization, @@ -242,6 +217,7 @@ def score_from_prediction_bundle( warnings=config.warnings, networks=networks, ) + return draft.model_copy(update={"result_id": compute_result_id(draft)}) def score_from_predictions( @@ -283,11 +259,7 @@ def score_from_predictions( A ``ScoreResult``. """ if mesh != "fsaverage5": - msg = ( - f"mesh='{mesh}' is not supported in v0.1; only 'fsaverage5' is " - f"shipped. Future versions will gate this via an explicit Mesh enum." - ) - raise ValueError(msg) + raise UnsupportedMeshError(mesh=mesh, supported=("fsaverage5",)) # Validate at the public boundary so callers see a clear error # before reaching PredictionBundle.__post_init__. @@ -300,6 +272,16 @@ def score_from_predictions( ) preds_f32 = coerce_float32(preds) + # Shape-check before reading shape[1] so a 1-D/0-D array fails with a + # clear ValueError at this boundary instead of an opaque IndexError + # inside PredictionBundle construction. + if preds_f32.ndim != 2: + msg = ( + f"score_from_predictions(): preds must be 2D (T, V); got shape " + f"{preds_f32.shape}. If you have a 1D time-mean array, reshape to (1, V)." + ) + raise ValueError(msg) + bundle = PredictionBundle( vertex_predictions=preds_f32, mesh="fsaverage5", @@ -503,8 +485,3 @@ def _sha256_file(path: Path, *, chunk_size: int = 1 << 20) -> str: "score_from_prediction_bundle", "score_from_predictions", ] - - -# Silence unused-import warnings for re-exports that are imported only -# by cortex_score.__init__ via this module's namespace. -_ = _dt diff --git a/src/cortex_score/atlas.py b/src/cortex_score/atlas.py index 00880a8..c803d29 100644 --- a/src/cortex_score/atlas.py +++ b/src/cortex_score/atlas.py @@ -39,15 +39,18 @@ class AtlasAssets: name: Stable atlas identifier (e.g. ``"schaefer400"`` or ``"yeo17"``). n_parcels: Number of parcels (excluding the medial wall). vertex_to_parcel: shape ``(V,)`` int64. Parcel id per fsaverage5 - vertex; ``-1`` for medial wall. - parcel_labels: Length-``n_parcels`` list of human-readable labels. + vertex; ``-1`` for medial wall. Marked read-only (the loaders + are ``lru_cache``-d, so an in-place mutation would corrupt every + subsequent use for the process lifetime). + parcel_labels: Length-``n_parcels`` tuple of human-readable labels + (a tuple, not a list, so the frozen dataclass is fully immutable). sha256: SHA-256 of the underlying ``.npy`` file (for provenance). """ name: str n_parcels: int vertex_to_parcel: npt.NDArray[np.int64] - parcel_labels: list[str] + parcel_labels: tuple[str, ...] sha256: str @@ -82,6 +85,18 @@ def _load_npy(filename: str) -> tuple[np.ndarray, str]: return arr, hashlib.sha256(raw).hexdigest() +def _frozen_int64(arr: np.ndarray) -> npt.NDArray[np.int64]: + """Return ``arr`` as a read-only int64 array. + + The atlas loaders are ``lru_cache``-d, so the returned array lives for + the process lifetime. Making it read-only turns an accidental in-place + write into an immediate error instead of silent, global corruption. + """ + out: npt.NDArray[np.int64] = arr.astype(np.int64, copy=False) + out.flags.writeable = False + return out + + def _load_labels(filename: str) -> list[str]: raw = json.loads(_read_text(filename)) if not isinstance(raw, list): @@ -173,8 +188,8 @@ def load_schaefer400() -> AtlasAssets: return AtlasAssets( name="schaefer400", n_parcels=n_parcels, - vertex_to_parcel=arr.astype(np.int64, copy=False), - parcel_labels=labels, + vertex_to_parcel=_frozen_int64(arr), + parcel_labels=tuple(labels), sha256=sha, ) @@ -194,8 +209,8 @@ def load_yeo17() -> AtlasAssets: return AtlasAssets( name="yeo17", n_parcels=n_parcels, - vertex_to_parcel=arr.astype(np.int64, copy=False), - parcel_labels=labels, + vertex_to_parcel=_frozen_int64(arr), + parcel_labels=tuple(labels), sha256=sha, ) @@ -212,4 +227,4 @@ def load_schaefer400_to_yeo17() -> npt.NDArray[np.int64]: f"expected (n_schaefer={load_schaefer400().n_parcels},)" ) raise AtlasMismatchError(msg) - return arr.astype(np.int64, copy=False) + return _frozen_int64(arr) diff --git a/src/cortex_score/cli.py b/src/cortex_score/cli.py index 9ffcc32..6656b38 100644 --- a/src/cortex_score/cli.py +++ b/src/cortex_score/cli.py @@ -185,10 +185,8 @@ def score( None, "--output-dir", help="Directory for per-video JSON (required for batch)." ), compact: bool = typer.Option(False, "--compact", help="Serialize without indent."), - no_cache: bool = typer.Option(False, "--no-cache", help="Skip cache for this run."), ) -> None: """Full pipeline: video(s) -> ScoreResult JSON.""" - _ = no_cache # cache integration ships in v0.1.1 if len(videos) > 1 and output_dir is None: _log("multiple inputs require --output-dir") raise typer.Exit(code=2) @@ -207,7 +205,12 @@ def score( except MissingOptionalDependencyError as exc: _log(str(exc)) raise typer.Exit(code=3) from exc - except CortexScoreError as exc: + except (CortexScoreError, FileNotFoundError, ValueError) as exc: + # FileNotFoundError (missing video) and ValueError (bad input) + # are not CortexScoreError subclasses; catch them here so the + # user gets a clean stderr message + exit 1 instead of a raw + # Python traceback. MissingOptionalDependencyError (above) keeps + # its dedicated exit code 3. _log(f"error: {exc}") raise typer.Exit(code=1) from exc @@ -241,7 +244,10 @@ def from_predictions( model_revision=model_revision, source="npy", ) - except CortexScoreError as exc: + except (CortexScoreError, ValueError) as exc: + # Unsupported --mesh, bad --tr, or a non-2D / non-finite tensor + # surface as ValueError at the boundary; keep them off the + # traceback path. (UnsupportedMeshError is both, caught here.) _log(f"error: {exc}") raise typer.Exit(code=1) from exc diff --git a/src/cortex_score/exceptions.py b/src/cortex_score/exceptions.py index 9dd7e1f..791c3f2 100644 --- a/src/cortex_score/exceptions.py +++ b/src/cortex_score/exceptions.py @@ -66,6 +66,23 @@ def __init__( self.mesh = mesh +class UnsupportedMeshError(CortexScoreError, ValueError): + """Raised when a caller requests a cortical mesh this version doesn't ship. + + Subclasses ``ValueError`` (so existing ``except ValueError`` blocks keep + working) *and* ``CortexScoreError`` (so the documented ``except + CortexScoreError`` boundary — used by the CLI — catches it instead of + leaking a raw traceback). + """ + + def __init__(self, mesh: str, supported: tuple[str, ...]) -> None: + supported_str = ", ".join(repr(m) for m in supported) + msg = f"mesh={mesh!r} is not supported in this version; supported: {supported_str}." + super().__init__(msg) + self.mesh = mesh + self.supported = supported + + class AtlasMismatchError(CortexScoreError, ValueError): """Raised when atlas vertex/parcel assignments are internally inconsistent. @@ -99,5 +116,9 @@ class PreprocessingWarning(UserWarning): could affect interpretation — letterboxing, aspect-ratio resampling, or significant frame-rate downsampling. - Surfaces in ``ScoreResult.warnings`` as well. + This is a Python ``warnings`` category, distinct from the + ``ScoreWarning`` Pydantic model recorded in ``ScoreResult.warnings``. + Code that emits this warning and also wants it in the result artifact + is responsible for appending a corresponding ``ScoreWarning`` via + ``ScoreConfig.warnings`` — there is no automatic bridge between the two. """ diff --git a/src/cortex_score/export/niivue.py b/src/cortex_score/export/niivue.py index 5cc1bd1..17db2a5 100644 --- a/src/cortex_score/export/niivue.py +++ b/src/cortex_score/export/niivue.py @@ -1,33 +1,49 @@ """Niivue-compatible MZ3 scalar overlay writer. -Direct port of Cortexia's -``apps/worker/src/clipcortex_worker/pipeline/postprocess.py::write_mz3_scalar_overlay``. -The MZ3 format is a small zlib-compressed header + float32 scalars per -vertex; no external dependency is required to write it. +Writes a per-vertex scalar overlay in the MZ3 format that NiiVue +(and surf-ice) read. The MZ3 layout is a fixed 16-byte little-endian +header followed by the float32 scalar payload, gzip-compressed as a +whole; no external dependency is required to write it. + +Header (16 bytes, `` Path: - """Write a Niivue-compatible MZ3 scalar overlay. + """Write a NiiVue-compatible MZ3 scalar overlay. Args: vertex_data: shape ``(V,)``, float32-compatible. One scalar per @@ -46,12 +62,15 @@ def write_mz3_scalar_overlay( scalars = np.ascontiguousarray(vertex_data, dtype=np.float32) header = struct.pack( - "= n_yeo: + msg = ( + f"network group definitions reference Yeo index {max_index} but " + f"z_yeo_preds has only {n_yeo} columns; atlas / network_groups mismatch" + ) + raise AtlasMismatchError(msg) + summaries: list[dict[str, Any]] = [] - for group in _network_groups_cached(): - indices = [idx for idx in group.yeo_indices if idx < n_yeo] - if not indices: - energy_ts = np.zeros(z_yeo_preds.shape[0], dtype=np.float32) - mean_z_ts = np.zeros(z_yeo_preds.shape[0], dtype=np.float32) - else: - group_ts = z_yeo_preds[:, indices] - energy_ts = np.abs(group_ts).mean(axis=1).astype(np.float32) - mean_z_ts = group_ts.mean(axis=1).astype(np.float32) + for group in groups: + group_ts = z_yeo_preds[:, list(group.yeo_indices)] + energy_ts = np.abs(group_ts).mean(axis=1).astype(np.float32) + mean_z_ts = group_ts.mean(axis=1).astype(np.float32) summaries.append( { diff --git a/src/cortex_score/processing/validate.py b/src/cortex_score/processing/validate.py index 7a09781..49e9e8a 100644 --- a/src/cortex_score/processing/validate.py +++ b/src/cortex_score/processing/validate.py @@ -32,7 +32,8 @@ def validate_predictions_against_mesh( Raises: IncompatiblePredictionShapeError: if ``preds`` is not 2D, has zero timesteps, or vertex count does not match. - ValueError: on totally malformed input (zero dimensions). + ValueError: on totally malformed input (zero dimensions) or on + non-finite values (NaN / inf). """ if preds.ndim != 2: msg = ( @@ -46,6 +47,17 @@ def validate_predictions_against_mesh( msg = f"predictions must have at least 1 timestep; got T={t}" raise ValueError(msg) + # Corrupted encoder output (NaN/inf) would otherwise sail through + # normalization and produce a schema-valid but numerically garbage + # ScoreResult. Fail loudly at the boundary instead. + if not np.isfinite(preds).all(): + n_bad = int((~np.isfinite(preds)).sum()) + msg = ( + f"predictions contain {n_bad} non-finite value(s) (NaN or inf). " + "Check the upstream encoder output before scoring." + ) + raise ValueError(msg) + if v != expected_n_vertices: raise IncompatiblePredictionShapeError( expected_n_vertices=expected_n_vertices, diff --git a/src/cortex_score/runners/tribev2.py b/src/cortex_score/runners/tribev2.py index 5428984..b46f54a 100644 --- a/src/cortex_score/runners/tribev2.py +++ b/src/cortex_score/runners/tribev2.py @@ -36,6 +36,7 @@ from cortex_score.exceptions import ( MissingExternalToolError, MissingOptionalDependencyError, + PreprocessingWarning, ) from cortex_score.schemas import PredictionBundle, SegmentMeta @@ -214,6 +215,13 @@ def _parse_segments(segments_obj: object) -> tuple[SegmentMeta, ...]: start_col = cols.get("start") or cols.get("start_s") or cols.get("t_start") end_col = cols.get("end") or cols.get("end_s") or cols.get("t_end") if start_col is None or end_col is None: + warnings.warn( + "TRIBE segments DataFrame has unrecognized columns " + f"{list(segments_obj.columns)!r}; segment timing will be " + "absent from the PredictionBundle.", + PreprocessingWarning, + stacklevel=2, + ) return () for i, row in segments_obj.reset_index(drop=True).iterrows(): out.append( @@ -257,5 +265,14 @@ def _pick(d: dict[str, object], *keys: str, default: float = 0.0) -> float: end = _pick(d, "end", "end_s", "t_end") out.append(SegmentMeta(index=i, start_s=start, end_s=end)) return tuple(out) - except TypeError: - return () + except TypeError as exc: + # Unexpected element type from a future TRIBE change: keep whatever + # parsed cleanly and surface the rest rather than silently dropping + # all segments with no diagnostic. + warnings.warn( + f"TRIBE segments object raised {exc!r} during parsing; " + f"returning {len(out)} segment(s) parsed before the error.", + PreprocessingWarning, + stacklevel=2, + ) + return tuple(out) diff --git a/src/cortex_score/schemas.py b/src/cortex_score/schemas.py index a6d74be..6084d50 100644 --- a/src/cortex_score/schemas.py +++ b/src/cortex_score/schemas.py @@ -25,7 +25,7 @@ import sys from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import Literal import numpy as np import numpy.typing as npt @@ -37,12 +37,13 @@ model_validator, ) +from cortex_score.processing.metrics import METRICS_VERSION +from cortex_score.processing.networks import NetworkId + # _CORTEX_SCORE_VERSION is a module-private constant (SCREAMING_SNAKE_CASE) # aliasing the dunder __version__. N812 misreads "lowercase -> non-lowercase" # but the destination IS a constant; the alias is intentional. -from cortex_score._version import __version__ as _CORTEX_SCORE_VERSION # noqa: N812 -from cortex_score.processing.metrics import METRICS_VERSION -from cortex_score.processing.networks import NetworkId +from cortex_score.version import __version__ as _CORTEX_SCORE_VERSION # noqa: N812 SCHEMA_VERSION: str = "1.0" """Top-level schema version. Bump on any breaking JSON contract change.""" @@ -224,7 +225,8 @@ class InputMeta(_StrictModel): ) content_sha256: str | None = Field( default=None, - description="SHA-256 of the input bytes, if a file was provided.", + pattern=r"^[0-9a-fA-F]{64}$", + description="SHA-256 (hex) of the input bytes, if a file was provided.", ) duration_s: float | None = Field( default=None, @@ -281,19 +283,16 @@ class AtlasMeta(_StrictModel): description="Bundled atlas identifier (e.g. 'schaefer2018-400-yeo17-fsaverage5')." ) atlas_sha256: str = Field( - min_length=64, - max_length=64, - description="SHA-256 of the Schaefer-400 vertex .npy used.", + pattern=r"^[0-9a-f]{64}$", + description="SHA-256 (lowercase hex) of the Schaefer-400 vertex .npy used.", ) yeo_atlas_sha256: str = Field( - min_length=64, - max_length=64, - description="SHA-256 of the Yeo-17 vertex .npy used.", + pattern=r"^[0-9a-f]{64}$", + description="SHA-256 (lowercase hex) of the Yeo-17 vertex .npy used.", ) network_groups_sha256: str = Field( - min_length=64, - max_length=64, - description="SHA-256 of network_groups.json used.", + pattern=r"^[0-9a-f]{64}$", + description="SHA-256 (lowercase hex) of network_groups.json used.", ) network_group_source: str = Field( description="Group definition source id (e.g. 'cortexia-network-groups-v1')." @@ -307,21 +306,22 @@ class ProvenanceMeta(_StrictModel): input bytes. """ - cortex_score_version: str + cortex_score_version: str = Field(min_length=1) schema_version: str = SCHEMA_VERSION metrics_version: str = METRICS_VERSION serialization_version: str = SERIALIZATION_VERSION - model_id: str - model_revision: str + model_id: str = Field(min_length=1) + model_revision: str = Field(min_length=1) tribev2_package_version: str | None = None runner: str = Field( + min_length=1, description=( "Fully qualified class name of the runner that produced the " "predictions (e.g. 'cortex_score.runners.tribev2.TribeV2Runner') " "or 'external' when score_from_predictions was used directly." - ) + ), ) - python_version: str + python_version: str = Field(min_length=1) torch_version: str | None = None cuda_available: bool | None = None device: str | None = None @@ -357,23 +357,23 @@ class NetworkScore(_StrictModel): energy_timeseries: tuple[float, ...] mean_z_timeseries: tuple[float, ...] group_definition_sha256: str = Field( - min_length=64, - max_length=64, - description="SHA-256 of the network_groups.json that defined this group.", + pattern=r"^[0-9a-f]{64}$", + description="SHA-256 (lowercase hex) of the network_groups.json that defined this group.", ) - @field_validator("yeo_labels") - @classmethod - def _labels_match_indices( - cls, - v: tuple[str, ...], - info: Any, - ) -> tuple[str, ...]: - idx = info.data.get("yeo_indices") if hasattr(info, "data") else None - if idx is not None and len(idx) != len(v): - msg = f"yeo_labels length {len(v)} does not match yeo_indices length {len(idx)}" + @model_validator(mode="after") + def _labels_match_indices(self) -> NetworkScore: + # model_validator(mode="after") sees both fields as typed + # attributes — no Pydantic field-ordering dependency, no dict + # lookup, no dead hasattr guard. Matches the cross-field pattern + # used by NormalizationMeta and ScoreResult in this module. + if len(self.yeo_labels) != len(self.yeo_indices): + msg = ( + f"yeo_labels length {len(self.yeo_labels)} does not match " + f"yeo_indices length {len(self.yeo_indices)}" + ) raise ValueError(msg) - return v + return self class ScoreResult(_StrictModel): @@ -387,8 +387,10 @@ class ScoreResult(_StrictModel): schema_version: str = SCHEMA_VERSION result_id: str = Field( description=( - "SHA-256 of the canonical JSON of this result with result_id " - "itself set to the empty string. Stable hash for audit logs." + "SHA-256 audit identity. Computed as the hash of this result's " + "model_dump(mode='json') with result_id set to '', re-serialized " + "with sorted keys and compact separators. Reproducible from the " + "JSON alone, so consumers can verify it. See compute_result_id()." ), ) created_at: _dt.datetime = Field( @@ -474,11 +476,31 @@ def save(self, path: str | Path) -> Path: # --------------------------------------------------------------------- -def compute_result_id(payload_without_id: dict[str, Any]) -> str: - """Stable SHA-256 of a result payload with ``result_id`` cleared.""" - payload = dict(payload_without_id) - payload["result_id"] = "" - raw = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str) +def compute_result_id(result: ScoreResult) -> str: + """Stable SHA-256 audit identity of a ``ScoreResult``. + + Defined as the SHA-256 of the result's *own* canonical JSON + serialization with ``result_id`` blanked to the empty string: + + 1. ``result.model_dump(mode="json")`` — the exact field set and value + encoding the model serializes (so the hash can never drift away + from ``ScoreResult``'s real fields; the previous implementation + rebuilt a parallel dict by hand and silently disagreed with the + serialized artifact, e.g. ``+00:00`` vs ``Z`` datetimes). + 2. ``result_id`` set to ``""``. + 3. ``json.dumps(..., sort_keys=True, separators=(",", ":"))`` — a + canonical, key-order-independent, whitespace-free encoding. + + Recomputing this over any serialized ``ScoreResult`` (after blanking + ``result_id``) reproduces the id, so downstream consumers can verify + the audit hash from the JSON alone. No ``default=`` fallback is used: + every value is already JSON-native after ``model_dump(mode="json")``, + so a non-serializable value is a real bug that should raise loudly + rather than be silently stringified. + """ + cleared = result.model_copy(update={"result_id": ""}) + payload = cleared.model_dump(mode="json") + raw = json.dumps(payload, sort_keys=True, separators=(",", ":")) return hashlib.sha256(raw.encode("utf-8")).hexdigest() @@ -525,10 +547,6 @@ def utc_now() -> _dt.datetime: return _dt.datetime.now(_dt.UTC) -def python_version_string() -> str: - return platform.python_version() - - def _detect_torch_environment() -> tuple[str | None, bool | None, str | None]: """Return (torch_version, cuda_available, device) without forcing an import.""" if "torch" not in sys.modules: diff --git a/src/cortex_score/version.py b/src/cortex_score/version.py new file mode 100644 index 0000000..4f3f602 --- /dev/null +++ b/src/cortex_score/version.py @@ -0,0 +1,23 @@ +"""Single source of truth for the runtime version. + +``hatch-vcs`` (``[tool.hatch.version] source = "vcs"`` in ``pyproject.toml``) +computes the version from git tags and bakes it into the installed +distribution metadata at build/install time. We read it back at runtime +via ``importlib.metadata`` so there is exactly one authority and no +generated ``_version.py`` artifact shipped in the wheel. + +The fallback only triggers for a raw source tree that was never installed +(``importlib.metadata`` has no distribution to read). It is a +self-identifying sentinel — never a plausible-looking real version — so a +``ScoreResult`` produced from an uninstalled checkout cannot silently +claim a wrong provenance version. +""" + +from __future__ import annotations + +from importlib import metadata + +try: + __version__ = metadata.version("cortex-score") +except metadata.PackageNotFoundError: # pragma: no cover - uninstalled source tree + __version__ = "0.0.0+unknown" diff --git a/tests/fixtures/schema_v1.json b/tests/fixtures/schema_v1.json index 3019220..6f5fa5c 100644 --- a/tests/fixtures/schema_v1.json +++ b/tests/fixtures/schema_v1.json @@ -4,18 +4,6 @@ "additionalProperties": false, "description": "Atlas provenance \u2014 pins the bundled data this score was computed against.", "properties": { - "atlas_sha256": { - "description": "SHA-256 of the Schaefer-400 vertex .npy used.", - "maxLength": 64, - "minLength": 64, - "title": "Atlas Sha256", - "type": "string" - }, - "atlas_version": { - "description": "Bundled atlas identifier (e.g. 'schaefer2018-400-yeo17-fsaverage5').", - "title": "Atlas Version", - "type": "string" - }, "mesh": { "const": "fsaverage5", "title": "Mesh", @@ -26,24 +14,33 @@ "title": "N Vertices", "type": "integer" }, - "network_group_source": { - "description": "Group definition source id (e.g. 'cortexia-network-groups-v1').", - "title": "Network Group Source", + "atlas_version": { + "description": "Bundled atlas identifier (e.g. 'schaefer2018-400-yeo17-fsaverage5').", + "title": "Atlas Version", "type": "string" }, - "network_groups_sha256": { - "description": "SHA-256 of network_groups.json used.", - "maxLength": 64, - "minLength": 64, - "title": "Network Groups Sha256", + "atlas_sha256": { + "description": "SHA-256 (lowercase hex) of the Schaefer-400 vertex .npy used.", + "pattern": "^[0-9a-f]{64}$", + "title": "Atlas Sha256", "type": "string" }, "yeo_atlas_sha256": { - "description": "SHA-256 of the Yeo-17 vertex .npy used.", - "maxLength": 64, - "minLength": 64, + "description": "SHA-256 (lowercase hex) of the Yeo-17 vertex .npy used.", + "pattern": "^[0-9a-f]{64}$", "title": "Yeo Atlas Sha256", "type": "string" + }, + "network_groups_sha256": { + "description": "SHA-256 (lowercase hex) of network_groups.json used.", + "pattern": "^[0-9a-f]{64}$", + "title": "Network Groups Sha256", + "type": "string" + }, + "network_group_source": { + "description": "Group definition source id (e.g. 'cortexia-network-groups-v1').", + "title": "Network Group Source", + "type": "string" } }, "required": [ @@ -62,6 +59,19 @@ "additionalProperties": false, "description": "Where the prediction came from on disk / wire.\n\n``filename`` is always safe to share \u2014 it's just the basename of the\ninput video. ``absolute_path`` is opt-in: by default it is None, so\nshared ScoreResult JSON does not leak the user's filesystem layout\nor home-directory username. To populate it, pass\n``ScoreConfig(include_absolute_path=True)`` or construct the\n``InputMeta`` explicitly.", "properties": { + "filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Basename of the input video (e.g. 'clip.mp4'). Safe to share.", + "title": "Filename" + }, "absolute_path": { "anyOf": [ { @@ -78,6 +88,7 @@ "content_sha256": { "anyOf": [ { + "pattern": "^[0-9a-fA-F]{64}$", "type": "string" }, { @@ -85,7 +96,7 @@ } ], "default": null, - "description": "SHA-256 of the input bytes, if a file was provided.", + "description": "SHA-256 (hex) of the input bytes, if a file was provided.", "title": "Content Sha256" }, "duration_s": { @@ -102,19 +113,6 @@ "description": "Video duration in seconds.", "title": "Duration S" }, - "filename": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Basename of the input video (e.g. 'clip.mp4'). Safe to share.", - "title": "Filename" - }, "fps": { "anyOf": [ { @@ -176,29 +174,6 @@ "additionalProperties": false, "description": "One of the five 5-network rollup entries.", "properties": { - "color": { - "pattern": "^#[0-9A-Fa-f]{6}$", - "title": "Color", - "type": "string" - }, - "description": { - "title": "Description", - "type": "string" - }, - "energy_timeseries": { - "items": { - "type": "number" - }, - "title": "Energy Timeseries", - "type": "array" - }, - "group_definition_sha256": { - "description": "SHA-256 of the network_groups.json that defined this group.", - "maxLength": 64, - "minLength": 64, - "title": "Group Definition Sha256", - "type": "string" - }, "id": { "enum": [ "visual", @@ -214,36 +189,58 @@ "title": "Label", "type": "string" }, - "mean_energy": { - "minimum": 0, - "title": "Mean Energy", - "type": "number" + "description": { + "title": "Description", + "type": "string" }, - "mean_z_timeseries": { + "color": { + "pattern": "^#[0-9A-Fa-f]{6}$", + "title": "Color", + "type": "string" + }, + "yeo_indices": { "items": { - "type": "number" + "type": "integer" }, - "title": "Mean Z Timeseries", + "title": "Yeo Indices", "type": "array" }, + "yeo_labels": { + "items": { + "type": "string" + }, + "title": "Yeo Labels", + "type": "array" + }, + "mean_energy": { + "minimum": 0, + "title": "Mean Energy", + "type": "number" + }, "peak_energy": { "minimum": 0, "title": "Peak Energy", "type": "number" }, - "yeo_indices": { + "energy_timeseries": { "items": { - "type": "integer" + "type": "number" }, - "title": "Yeo Indices", + "title": "Energy Timeseries", "type": "array" }, - "yeo_labels": { + "mean_z_timeseries": { "items": { - "type": "string" + "type": "number" }, - "title": "Yeo Labels", + "title": "Mean Z Timeseries", "type": "array" + }, + "group_definition_sha256": { + "description": "SHA-256 (lowercase hex) of the network_groups.json that defined this group.", + "pattern": "^[0-9a-f]{64}$", + "title": "Group Definition Sha256", + "type": "string" } }, "required": [ @@ -266,18 +263,27 @@ "additionalProperties": false, "description": "How the per-network z-scores were computed.", "properties": { - "epsilon": { - "description": "Ridge added to the std denominator to avoid division by zero.", - "exclusiveMinimum": 0, - "title": "Epsilon", - "type": "number" - }, "method": { "const": "zscore", "default": "zscore", "title": "Method", "type": "string" }, + "scope": { + "default": "within_video", + "enum": [ + "within_video", + "reference_distribution" + ], + "title": "Scope", + "type": "string" + }, + "epsilon": { + "description": "Ridge added to the std denominator to avoid division by zero.", + "exclusiveMinimum": 0, + "title": "Epsilon", + "type": "number" + }, "reference_id": { "anyOf": [ { @@ -290,15 +296,6 @@ "default": null, "description": "Identifier of the cross-clip reference distribution used when scope='reference_distribution'. None for within-video.", "title": "Reference Id" - }, - "scope": { - "default": "within_video", - "enum": [ - "within_video", - "reference_distribution" - ], - "title": "Scope", - "type": "string" } }, "required": [ @@ -312,63 +309,56 @@ "description": "Software-side provenance.\n\nThese fields are enough to reproduce a score offline given the same\ninput bytes.", "properties": { "cortex_score_version": { + "minLength": 1, "title": "Cortex Score Version", "type": "string" }, - "cuda_available": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Cuda Available" - }, - "device": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Device" + "schema_version": { + "default": "1.0", + "title": "Schema Version", + "type": "string" }, "metrics_version": { "default": "1.0", "title": "Metrics Version", "type": "string" }, + "serialization_version": { + "default": "1.0", + "title": "Serialization Version", + "type": "string" + }, "model_id": { + "minLength": 1, "title": "Model Id", "type": "string" }, "model_revision": { + "minLength": 1, "title": "Model Revision", "type": "string" }, - "python_version": { - "title": "Python Version", - "type": "string" + "tribev2_package_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Tribev2 Package Version" }, "runner": { "description": "Fully qualified class name of the runner that produced the predictions (e.g. 'cortex_score.runners.tribev2.TribeV2Runner') or 'external' when score_from_predictions was used directly.", + "minLength": 1, "title": "Runner", "type": "string" }, - "schema_version": { - "default": "1.0", - "title": "Schema Version", - "type": "string" - }, - "serialization_version": { - "default": "1.0", - "title": "Serialization Version", + "python_version": { + "minLength": 1, + "title": "Python Version", "type": "string" }, "torch_version": { @@ -383,7 +373,19 @@ "default": null, "title": "Torch Version" }, - "tribev2_package_version": { + "cuda_available": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cuda Available" + }, + "device": { "anyOf": [ { "type": "string" @@ -393,7 +395,7 @@ } ], "default": null, - "title": "Tribev2 Package Version" + "title": "Device" } }, "required": [ @@ -440,6 +442,12 @@ "additionalProperties": false, "description": "TRIBE-side timing constants used for this prediction.", "properties": { + "tr_seconds": { + "description": "Effective TR (seconds per segment).", + "exclusiveMinimum": 0, + "title": "Tr Seconds", + "type": "number" + }, "hrf_lag_seconds": { "description": "HRF lag in seconds.", "minimum": 0, @@ -451,12 +459,6 @@ "minimum": 1, "title": "N Segments", "type": "integer" - }, - "tr_seconds": { - "description": "Effective TR (seconds per segment).", - "exclusiveMinimum": 0, - "title": "Tr Seconds", - "type": "number" } }, "required": [ @@ -471,8 +473,15 @@ "additionalProperties": false, "description": "Top-level result \u2014 the JSON contract of cortex-score.\n\nFields are intentionally verbose: this artifact may be archived,\naudited, cited, or fed into another model years from now, so being\nself-describing matters more than being terse.", "properties": { - "atlas": { - "$ref": "#/$defs/AtlasMeta" + "schema_version": { + "default": "1.0", + "title": "Schema Version", + "type": "string" + }, + "result_id": { + "description": "SHA-256 audit identity. Computed as the hash of this result's model_dump(mode='json') with result_id set to '', re-serialized with sorted keys and compact separators. Reproducible from the JSON alone, so consumers can verify it. See compute_result_id().", + "title": "Result Id", + "type": "string" }, "created_at": { "description": "UTC ISO-8601 timestamp at which the score was computed.", @@ -485,52 +494,38 @@ "title": "Framing", "type": "string" }, - "framing_disclaimer": { - "default": "cortex-score does not measure real viewer engagement. It summarizes predicted fMRI-like responses from a pretrained brain-encoding model for an average subject.", - "title": "Framing Disclaimer", - "type": "string" - }, "framing_scientific": { "default": "cortex-score summarizes TRIBE v2 predicted cortical responses for any video across five Cortexia-defined network groups.", "title": "Framing Scientific", "type": "string" }, + "framing_disclaimer": { + "default": "cortex-score does not measure real viewer engagement. It summarizes predicted fMRI-like responses from a pretrained brain-encoding model for an average subject.", + "title": "Framing Disclaimer", + "type": "string" + }, "input": { "$ref": "#/$defs/InputMeta" }, - "license_restrictions": { - "default": [], - "items": { - "$ref": "#/$defs/LicenseRestriction" - }, - "title": "License Restrictions", - "type": "array" - }, - "networks": { - "items": { - "$ref": "#/$defs/NetworkScore" - }, - "title": "Networks", - "type": "array" + "timing": { + "$ref": "#/$defs/TimingMeta" }, "normalization": { "$ref": "#/$defs/NormalizationMeta" }, + "atlas": { + "$ref": "#/$defs/AtlasMeta" + }, "provenance": { "$ref": "#/$defs/ProvenanceMeta" }, - "result_id": { - "description": "SHA-256 of the canonical JSON of this result with result_id itself set to the empty string. Stable hash for audit logs.", - "title": "Result Id", - "type": "string" - }, - "schema_version": { - "default": "1.0", - "title": "Schema Version", - "type": "string" - }, - "timing": { - "$ref": "#/$defs/TimingMeta" + "license_restrictions": { + "default": [], + "items": { + "$ref": "#/$defs/LicenseRestriction" + }, + "title": "License Restrictions", + "type": "array" }, "warnings": { "default": [], @@ -539,6 +534,13 @@ }, "title": "Warnings", "type": "array" + }, + "networks": { + "items": { + "$ref": "#/$defs/NetworkScore" + }, + "title": "Networks", + "type": "array" } }, "required": [ diff --git a/tests/integration/test_cli.py b/tests/integration/test_cli.py index 9c48855..a48a805 100644 --- a/tests/integration/test_cli.py +++ b/tests/integration/test_cli.py @@ -93,3 +93,30 @@ def test_score_requires_output_dir_for_multiple_inputs(tmp_path: Path) -> None: result = runner.invoke(app, ["score", str(v1), str(v2)]) assert result.exit_code == 2 assert "--output-dir" in result.output or "output-dir" in (result.stderr or "") + + +def test_score_missing_file_exits_cleanly(tmp_path: Path) -> None: + """A missing video must exit 1 with a clean message, not a traceback.""" + result = runner.invoke(app, ["score", str(tmp_path / "nope.mp4")]) + assert result.exit_code == 1 + assert "not found" in result.output + # No leaked Python traceback. + assert "Traceback" not in result.output + + +def test_from_predictions_unsupported_mesh_exits_cleanly(tmp_path: Path) -> None: + preds_path = tmp_path / "p.npy" + np.save(preds_path, np.random.default_rng(2).standard_normal((3, 20484)).astype(np.float32)) + result = runner.invoke(app, ["from-predictions", str(preds_path), "--mesh", "fsaverage6"]) + assert result.exit_code == 1 + assert "not supported" in result.output + assert "Traceback" not in result.output + + +def test_from_predictions_non_2d_npy_exits_cleanly(tmp_path: Path) -> None: + preds_path = tmp_path / "bad.npy" + np.save(preds_path, np.zeros(20484, dtype=np.float32)) # 1-D + result = runner.invoke(app, ["from-predictions", str(preds_path)]) + assert result.exit_code == 1 + assert "2D" in result.output + assert "Traceback" not in result.output diff --git a/tests/integration/test_score_from_predictions.py b/tests/integration/test_score_from_predictions.py index 9eb4f03..c020804 100644 --- a/tests/integration/test_score_from_predictions.py +++ b/tests/integration/test_score_from_predictions.py @@ -5,6 +5,7 @@ import json import numpy as np +import pytest from cortex_score import ( PredictionBundle, @@ -13,6 +14,7 @@ score_from_predictions, ) from cortex_score.api import ScoreConfig +from cortex_score.exceptions import CortexScoreError, UnsupportedMeshError from cortex_score.schemas import ( FRAMING_DISCLAIMER, FRAMING_PRIMARY, @@ -144,6 +146,30 @@ def test_score_config_normalization_scope_propagates() -> None: assert result.normalization.reference_id == "cortexia-v1-68clip" +def test_score_from_predictions_rejects_1d_input() -> None: + """A 1-D array must fail with a clear ValueError at the boundary, not + an opaque IndexError inside PredictionBundle construction.""" + with pytest.raises(ValueError, match="must be 2D"): + score_from_predictions(np.zeros(20484, dtype=np.float32), model_revision="test") + + +def test_score_from_predictions_rejects_unsupported_mesh() -> None: + with pytest.raises(UnsupportedMeshError, match="not supported"): + score_from_predictions(_synthetic_preds(), mesh="fsaverage6", model_revision="test") + # The error must be catchable both as a ValueError and via the + # documented `except CortexScoreError` boundary. + assert issubclass(UnsupportedMeshError, ValueError) + assert issubclass(UnsupportedMeshError, CortexScoreError) + + +def test_score_from_predictions_rejects_non_finite() -> None: + bad = _synthetic_preds() + bad[0, 0] = np.nan + bad[1, 1] = np.inf + with pytest.raises(ValueError, match="non-finite"): + score_from_predictions(bad, model_revision="test") + + def test_emitted_json_contains_top_level_provenance_fields() -> None: """Reviewer-required: every result must carry full provenance.""" result = score_from_predictions(_synthetic_preds(), model_revision="test") diff --git a/tests/unit/test_export_niivue.py b/tests/unit/test_export_niivue.py index 05edefa..83cece9 100644 --- a/tests/unit/test_export_niivue.py +++ b/tests/unit/test_export_niivue.py @@ -1,9 +1,15 @@ -"""Unit tests for the MZ3 scalar overlay writer.""" +"""Unit tests for the MZ3 scalar overlay writer. + +These validate the bytes against the *real* MZ3 format NiiVue reads +(uint16 magic 23117 at offset 0, attr bitfield at offset 2 with +isSCALAR=8, 16-byte header, gzip-compressed), not a self-referential +constant — so a regression to the old broken layout fails here. +""" from __future__ import annotations +import gzip import struct -import zlib from pathlib import Path import numpy as np @@ -11,7 +17,15 @@ from cortex_score.export.niivue import write_mz3_scalar_overlay -_MZ3_MAGIC = 0x4D5A_0003 +_MZ3_MAGIC = 0x5A4D # 23117, ASCII "MZ" +_MZ3_ATTR_ISSCALAR = 8 + + +def _decode_header(path: Path) -> tuple[int, int, int, int, int, np.ndarray]: + raw = gzip.decompress(path.read_bytes()) + magic, attr, nface, nvert, nskip = struct.unpack(" None: @@ -21,27 +35,45 @@ def test_writes_file(tmp_path: Path) -> None: assert out.stat().st_size > 0 -def test_payload_has_magic_after_decompression(tmp_path: Path) -> None: +def test_payload_is_gzip_with_niivue_magic(tmp_path: Path) -> None: data = np.zeros(100, dtype=np.float32) out = write_mz3_scalar_overlay(data, tmp_path / "x.mz3") - raw = zlib.decompress(out.read_bytes()) - magic = struct.unpack(" None: +def test_scalar_count_and_values_round_trip(tmp_path: Path) -> None: n = 250 data = np.linspace(-1.0, 1.0, n, dtype=np.float32) out = write_mz3_scalar_overlay(data, tmp_path / "x.mz3") - raw = zlib.decompress(out.read_bytes()) - # Header layout: None: + """Same input -> identical bytes (mtime pinned), so overlays are + content-addressable and reproducible.""" + data = np.linspace(0.0, 5.0, 64, dtype=np.float32) + a = write_mz3_scalar_overlay(data, tmp_path / "a.mz3").read_bytes() + b = write_mz3_scalar_overlay(data, tmp_path / "b.mz3").read_bytes() + assert a == b + + +def test_casts_non_float32_input(tmp_path: Path) -> None: + data = np.arange(10, dtype=np.float64) + out = write_mz3_scalar_overlay(data, tmp_path / "x.mz3") + _magic, _attr, _nface, nvert, _nskip, scalars = _decode_header(out) + assert nvert == 10 + assert scalars.dtype == np.float32 + np.testing.assert_array_equal(scalars, data.astype(np.float32)) + + def test_rejects_2d_input(tmp_path: Path) -> None: with pytest.raises(ValueError, match="must be 1D"): write_mz3_scalar_overlay(np.zeros((10, 10), dtype=np.float32), tmp_path / "x.mz3") diff --git a/tests/unit/test_networks.py b/tests/unit/test_networks.py index 9b676ed..0854773 100644 --- a/tests/unit/test_networks.py +++ b/tests/unit/test_networks.py @@ -66,3 +66,13 @@ def test_build_network_summary_constant_input() -> None: def test_build_network_summary_shape_validation() -> None: with pytest.raises(ValueError, match="must be 2D"): build_network_summary(np.zeros(17, dtype=np.float32)) + + +def test_build_network_summary_out_of_range_yeo_index_raises() -> None: + """A too-narrow array (fewer columns than the groups reference) must + raise AtlasMismatchError instead of silently zero-filling networks.""" + from cortex_score.exceptions import AtlasMismatchError + + z = np.zeros((4, 5), dtype=np.float32) # groups reference indices up to 16 + with pytest.raises(AtlasMismatchError, match="mismatch"): + build_network_summary(z) diff --git a/tests/unit/test_schemas.py b/tests/unit/test_schemas.py index a321a01..01958c4 100644 --- a/tests/unit/test_schemas.py +++ b/tests/unit/test_schemas.py @@ -197,16 +197,43 @@ def test_score_result_framing_defaults_are_baked_in() -> None: assert r.framing_disclaimer == FRAMING_DISCLAIMER -def test_compute_result_id_is_stable() -> None: - payload = { - "schema_version": "1.0", - "input": {"path": None, "content_sha256": None}, - "result_id": "should-be-overwritten", - } - a = compute_result_id(payload) - b = compute_result_id(payload) +def _full_result(result_id: str = "z" * 64) -> ScoreResult: + timing = TimingMeta(tr_seconds=1.0, hrf_lag_seconds=5.0, n_segments=2) + networks = tuple(_net(i, 2) for i in ("visual", "language", "faces", "attention", "motion")) + return ScoreResult( + result_id=result_id, + created_at=_dt.datetime(2026, 1, 1, tzinfo=_dt.UTC), + input=InputMeta(), + timing=timing, + normalization=NormalizationMeta(epsilon=1e-6), + atlas=_atlas(), + provenance=build_provenance(model_id="x", model_revision="y", runner="external"), + networks=networks, + ) + + +def test_compute_result_id_is_stable_and_blank_invariant() -> None: + result = _full_result() + a = compute_result_id(result) + b = compute_result_id(result) assert a == b assert len(a) == 64 + # result_id is blanked before hashing, so the stored value can't + # influence the hash. + assert compute_result_id(result.model_copy(update={"result_id": "x" * 64})) == a + + +def test_result_id_is_verifiable_from_serialized_json() -> None: + """The documented contract: re-hashing a serialized result (with + result_id blanked) reproduces result_id. This is the round-trip the + old hand-built-dict implementation silently broke (Z vs +00:00).""" + result = _full_result().model_copy(update={"result_id": ""}) + stamped = result.model_copy(update={"result_id": compute_result_id(result)}) + + rebuilt = ScoreResult.model_validate_json(stamped.to_json()) + assert rebuilt.result_id == stamped.result_id + # Recompute from the deserialized object: must match. + assert compute_result_id(rebuilt) == stamped.result_id def test_segment_meta_is_frozen() -> None: