diff --git a/artifact-package-integrity-gate/README.md b/artifact-package-integrity-gate/README.md new file mode 100644 index 0000000..1aafc9f --- /dev/null +++ b/artifact-package-integrity-gate/README.md @@ -0,0 +1,26 @@ +# Artifact Package Integrity Gate + +This module adds a Scientific Data & Code Hosting slice for reviewer-ready research artifact packages. It is self-contained, dependency-free, and synthetic-data-only so reviewers can validate it without credentials, cloud storage, or a running platform. + +It covers the issue #14 requirements by validating: + +- datasets, notebooks, code, figures, media, and model artifacts with deterministic type classification +- metadata-aware preview plans for tabular data, notebooks, figures, JSON, model files, and deferred large payloads +- DataCite, JSON-LD, and schema.org package metadata completeness +- FAIR access requirements, reusable licenses, reviewer access evidence, version hashes, and persistent links +- pinned executable environments and rerun commands that only reference hosted artifacts +- stable export packets with package and source digests for DOI/API/archive workflows + +## Local Validation + +```sh +node artifact-package-integrity-gate/test.js +node artifact-package-integrity-gate/demo.js +``` + +## Demo Evidence + +- [demo.mp4](demo.mp4) shows the problem, implementation scope, acceptance behavior, and validation commands. +- [demo.svg](demo.svg) provides a static reviewer dashboard preview. +- [requirements-map.md](requirements-map.md) maps the implementation to issue #14. +- [acceptance-notes.md](acceptance-notes.md) lists the reviewer checks. diff --git a/artifact-package-integrity-gate/acceptance-notes.md b/artifact-package-integrity-gate/acceptance-notes.md new file mode 100644 index 0000000..98c2bea --- /dev/null +++ b/artifact-package-integrity-gate/acceptance-notes.md @@ -0,0 +1,24 @@ +# Acceptance Notes + +## What This Adds + +The `artifact-package-integrity-gate` module gives SCIBASE a deterministic validation layer for hosted scientific data and code packages before they are exposed through persistent links, DOI metadata, reviewer packets, or rerun buttons. + +## Why It Is Distinct + +This is not a broad storage sketch or another simple FAIR manifest. It focuses on the package boundary where reviewers need to know whether every artifact is hashed, previewable, licensed, metadata-complete, access-controlled, and connected to a pinned executable environment. + +## Reviewer Checks + +1. Run `node artifact-package-integrity-gate/test.js`. +2. Run `node artifact-package-integrity-gate/demo.js`. +3. Confirm the passing package reports `packageReady: true`. +4. Confirm the broken package test catches missing sha256 hashes, incomplete DataCite fields, unpinned runtimes, and commands that reference missing hosted inputs. +5. Inspect `demo.svg` or `demo.mp4` for the reviewer-facing workflow summary. + +## Payout Conditions Covered + +- Issue #14 has a live Algora bounty route. +- The PR body includes `/claim #14`. +- The module includes a short demo video artifact. +- The implementation is dependency-free and locally verifiable. diff --git a/artifact-package-integrity-gate/demo.js b/artifact-package-integrity-gate/demo.js new file mode 100644 index 0000000..8e3a301 --- /dev/null +++ b/artifact-package-integrity-gate/demo.js @@ -0,0 +1,89 @@ +"use strict"; + +const { evaluateArtifactPackage } = require("./index"); + +const packageInput = { + generatedAt: "2026-05-17T12:00:00.000Z", + project: { id: "proj-ocean-sensor-2026", title: "Ocean sensor reproducibility package" }, + metadata: { + datacite: { + identifier: "10.5555/scibase.ocean-sensor.2026", + creators: ["C. Oceanographer", "D. Data Steward"], + titles: ["Ocean sensor reproducibility package"], + publisher: "SCIBASE.AI", + publicationYear: "2026", + resourceType: "Dataset and software", + }, + jsonLd: { + "@context": "https://schema.org", + "@type": "Dataset", + name: "Ocean sensor reproducibility package", + }, + schemaOrg: { + "@type": "Dataset", + name: "Ocean sensor reproducibility package", + }, + }, + artifacts: [ + { + id: "sensor-readings", + path: "data/sensor-readings.parquet", + bytes: 18_000_000, + hash: "1".repeat(64), + license: "CC-BY-4.0", + access: "public", + version: 2, + previousVersionHash: "2".repeat(64), + metadata: { title: "Sensor readings", creators: ["C. Oceanographer"], keywords: ["ocean", "sensor"] }, + }, + { + id: "calibration-notebook", + path: "notebooks/calibration.ipynb", + bytes: 900_000, + hash: "3".repeat(64), + license: "MIT", + access: "public", + metadata: { title: "Calibration notebook", creators: ["D. Data Steward"], keywords: ["calibration"] }, + }, + { + id: "temperature-map", + path: "figures/temperature-map.png", + bytes: 850_000, + hash: "4".repeat(64), + license: "CC-BY-4.0", + access: "restricted", + accessJustification: "review-only embargo before journal supplement release", + reviewerAccessWindow: "2026-05-17/2026-06-17", + metadata: { title: "Temperature anomaly map", creators: ["D. Data Steward"], keywords: ["figure"] }, + }, + ], + environments: [ + { + id: "python-reproducer", + name: "Pinned Python notebook runner", + image: "ghcr.io/scibase/ocean-runner@sha256:" + "5".repeat(64), + runtimes: ["python", "jupyter"], + trigger: "run-analysis-button", + commands: [ + { + id: "reproduce-calibration", + label: "Reproduce calibration", + command: "python scripts/run_notebook.py notebooks/calibration.ipynb", + inputs: ["data/sensor-readings.parquet", "notebooks/calibration.ipynb"], + outputs: ["figures/temperature-map.png"], + }, + ], + }, + ], +}; + +const result = evaluateArtifactPackage(packageInput); + +console.log("Artifact package integrity demo"); +console.log(JSON.stringify(result.dashboard, null, 2)); +console.log("Preview plan:"); +for (const artifact of result.artifacts) { + console.log(`- ${artifact.path}: ${artifact.preview.previewKind} (${artifact.classification.category})`); +} +console.log("Export packet:"); +console.log(JSON.stringify(result.exportPacket, null, 2)); diff --git a/artifact-package-integrity-gate/demo.mp4 b/artifact-package-integrity-gate/demo.mp4 new file mode 100644 index 0000000..7d6edfc Binary files /dev/null and b/artifact-package-integrity-gate/demo.mp4 differ diff --git a/artifact-package-integrity-gate/demo.svg b/artifact-package-integrity-gate/demo.svg new file mode 100644 index 0000000..a4afbaf --- /dev/null +++ b/artifact-package-integrity-gate/demo.svg @@ -0,0 +1,39 @@ + + Artifact package integrity gate demo + Static reviewer dashboard for the SCIBASE artifact package integrity module. + + + Artifact Package Integrity Gate + Issue #14 reviewer package validation for hosted data, code, metadata, and rerun environments. + + + + Package Ready + TRUE + + + + + Previewable Artifacts + 3 + + + + + Runnable Commands + 1 + + + + Validation Flow + + + Classify artifacts and assign metadata-aware previews + + Check DataCite, JSON-LD, schema.org, licenses, access windows, and version hashes + + Verify pinned runtimes and rerun commands before enabling reproduce buttons + + Emit persistent links plus source and package digests for reviewer export packets + + diff --git a/artifact-package-integrity-gate/index.js b/artifact-package-integrity-gate/index.js new file mode 100644 index 0000000..200ab20 --- /dev/null +++ b/artifact-package-integrity-gate/index.js @@ -0,0 +1,375 @@ +"use strict"; + +const crypto = require("node:crypto"); +const path = require("node:path"); + +const DEFAULT_POLICY = { + maxInlinePreviewBytes: 25 * 1024 * 1024, + largeArtifactBytes: 2 * 1024 * 1024 * 1024, + requiredMetadataFields: ["identifier", "creators", "titles", "publisher", "publicationYear", "resourceType"], + requiredArtifactMetadataFields: ["title", "creators", "keywords"], + acceptedLicenses: ["CC-BY-4.0", "CC0-1.0", "MIT", "Apache-2.0", "BSD-3-Clause"], + minimumFairScore: 80, + persistentLinkBase: "https://scibase.ai/artifacts", +}; + +const TYPE_BY_EXTENSION = { + ".csv": { category: "dataset", previewKind: "tabular-preview", metadataKind: "Dataset" }, + ".tsv": { category: "dataset", previewKind: "tabular-preview", metadataKind: "Dataset" }, + ".xlsx": { category: "dataset", previewKind: "spreadsheet-preview", metadataKind: "Dataset" }, + ".json": { category: "dataset", previewKind: "json-tree-preview", metadataKind: "Dataset" }, + ".parquet": { category: "dataset", previewKind: "schema-preview", metadataKind: "Dataset" }, + ".py": { category: "code", previewKind: "code-viewer", metadataKind: "SoftwareSourceCode" }, + ".r": { category: "code", previewKind: "code-viewer", metadataKind: "SoftwareSourceCode" }, + ".jl": { category: "code", previewKind: "code-viewer", metadataKind: "SoftwareSourceCode" }, + ".ipynb": { category: "notebook", previewKind: "notebook-render", metadataKind: "SoftwareSourceCode" }, + ".png": { category: "figure", previewKind: "image-thumbnail", metadataKind: "ImageObject" }, + ".jpg": { category: "figure", previewKind: "image-thumbnail", metadataKind: "ImageObject" }, + ".jpeg": { category: "figure", previewKind: "image-thumbnail", metadataKind: "ImageObject" }, + ".svg": { category: "figure", previewKind: "image-thumbnail", metadataKind: "ImageObject" }, + ".mp4": { category: "media", previewKind: "media-thumbnail", metadataKind: "VideoObject" }, + ".h5": { category: "model", previewKind: "model-summary", metadataKind: "DataDownload" }, + ".onnx": { category: "model", previewKind: "model-summary", metadataKind: "DataDownload" }, + ".pt": { category: "model", previewKind: "model-summary", metadataKind: "DataDownload" }, +}; + +function canonicalize(value) { + if (Array.isArray(value)) { + return value.map(canonicalize); + } + if (value && typeof value === "object") { + return Object.keys(value) + .sort() + .reduce((result, key) => { + result[key] = canonicalize(value[key]); + return result; + }, {}); + } + return value; +} + +function stableDigest(value) { + return crypto.createHash("sha256").update(JSON.stringify(canonicalize(value))).digest("hex"); +} + +function normalizeList(value) { + if (!Array.isArray(value)) { + return []; + } + return value.filter(Boolean).map(String).sort(); +} + +function classifyArtifact(artifact) { + const extension = path.extname(artifact.path || artifact.name || "").toLowerCase(); + const known = TYPE_BY_EXTENSION[extension] || { + category: "supplement", + previewKind: "download-only", + metadataKind: "CreativeWork", + }; + + return { + extension: extension || "none", + category: known.category, + previewKind: known.previewKind, + metadataKind: known.metadataKind, + executable: ["code", "notebook"].includes(known.category), + }; +} + +function isSha256(value) { + return typeof value === "string" && /^[a-f0-9]{64}$/i.test(value); +} + +function hasPinnedRuntimeImage(value) { + return typeof value === "string" && (value.includes("@sha256:") || value.startsWith("sha256:")); +} + +function hasValue(value) { + if (Array.isArray(value)) { + return value.length > 0; + } + return value !== undefined && value !== null && value !== ""; +} + +function makeFinding(target, severity, message, action) { + return { target, severity, message, action }; +} + +function severityWeight(severity) { + return { blocker: 4, high: 3, medium: 2, low: 1 }[severity] || 0; +} + +function compareFindings(a, b) { + const severityDelta = severityWeight(b.severity) - severityWeight(a.severity); + if (severityDelta !== 0) { + return severityDelta; + } + return `${a.target}:${a.message}`.localeCompare(`${b.target}:${b.message}`); +} + +function validateArtifact(artifact, policy) { + const classification = classifyArtifact(artifact); + const findings = []; + const metadata = artifact.metadata || {}; + + if (!artifact.id) { + findings.push(makeFinding(artifact.path || "artifact", "blocker", "artifact lacks stable id", "assign a UUID or repository-local artifact id")); + } + + if (!artifact.path) { + findings.push(makeFinding(artifact.id || "artifact", "blocker", "artifact lacks repository path", "record the hosted path before export")); + } + + if (!isSha256(artifact.hash)) { + findings.push(makeFinding(artifact.id || artifact.path, "blocker", "artifact lacks sha256 content hash", "store a deterministic sha256 digest")); + } + + for (const field of policy.requiredArtifactMetadataFields) { + if (!hasValue(metadata[field])) { + findings.push(makeFinding(artifact.id || artifact.path, "high", `artifact metadata missing ${field}`, "complete reviewer-facing metadata")); + } + } + + if (!policy.acceptedLicenses.includes(artifact.license)) { + findings.push(makeFinding(artifact.id || artifact.path, "high", "artifact license is missing or unsupported", "attach a reusable license or restrict export")); + } + + if (artifact.access === "restricted" && (!artifact.accessJustification || !artifact.reviewerAccessWindow)) { + findings.push( + makeFinding( + artifact.id || artifact.path, + "high", + "restricted artifact lacks reviewer access evidence", + "record access justification and review window" + ) + ); + } + + if ((artifact.bytes || 0) > policy.largeArtifactBytes && artifact.storageTier !== "object-storage") { + findings.push( + makeFinding( + artifact.id || artifact.path, + "medium", + "large artifact is not routed to object storage", + "move large payload to object storage with a persistent link" + ) + ); + } + + if ((artifact.version || 1) > 1 && !artifact.previousVersionHash) { + findings.push( + makeFinding( + artifact.id || artifact.path, + "medium", + "versioned artifact lacks previous version hash", + "link the prior artifact hash for diff and rollback" + ) + ); + } + + const inlinePreview = (artifact.bytes || 0) <= policy.maxInlinePreviewBytes; + const preview = { + artifactId: artifact.id, + path: artifact.path, + category: classification.category, + previewKind: inlinePreview ? classification.previewKind : "deferred-preview", + metadataKind: classification.metadataKind, + inlinePreview, + reason: inlinePreview ? "safe for metadata-aware preview" : "preview generated asynchronously for large payload", + }; + + return { + id: artifact.id, + path: artifact.path, + hash: artifact.hash, + license: artifact.license, + access: artifact.access || "public", + bytes: artifact.bytes || 0, + version: artifact.version || 1, + classification, + preview, + findings, + }; +} + +function validateMetadata(metadata, policy) { + const findings = []; + const datacite = metadata.datacite || {}; + const jsonLd = metadata.jsonLd || {}; + const schemaOrg = metadata.schemaOrg || {}; + + for (const field of policy.requiredMetadataFields) { + if (!hasValue(datacite[field])) { + findings.push(makeFinding("datacite", "high", `DataCite metadata missing ${field}`, "complete the required DataCite field")); + } + } + + if (!jsonLd["@context"] || !jsonLd["@type"]) { + findings.push(makeFinding("json-ld", "high", "JSON-LD context or type is missing", "publish machine-readable JSON-LD")); + } + + if (!schemaOrg["@type"] || !schemaOrg.name) { + findings.push(makeFinding("schema.org", "high", "schema.org type or name is missing", "publish discoverable schema.org markup")); + } + + const totalChecks = policy.requiredMetadataFields.length + 4; + const failedChecks = findings.length; + const score = Math.max(0, Math.round(((totalChecks - failedChecks) / totalChecks) * 100)); + + return { + datacite, + jsonLd, + schemaOrg, + score, + findings, + }; +} + +function validateEnvironments(environments, artifactPaths) { + const findings = []; + const plans = []; + + for (const environment of environments) { + const envFindings = []; + if (!environment.id) { + envFindings.push(makeFinding(environment.name || "environment", "blocker", "environment lacks stable id", "assign a stable runtime id")); + } + if (!hasPinnedRuntimeImage(environment.image)) { + envFindings.push( + makeFinding( + environment.id || environment.name, + "high", + "runtime image is not pinned by digest", + "pin Docker or OCI image with sha256 digest" + ) + ); + } + if (normalizeList(environment.runtimes).length === 0) { + envFindings.push(makeFinding(environment.id || environment.name, "medium", "runtime stack is not declared", "declare Python, R, Julia, or model runtime")); + } + + const commands = Array.isArray(environment.commands) ? environment.commands : []; + if (commands.length === 0) { + envFindings.push(makeFinding(environment.id || environment.name, "medium", "environment has no executable commands", "add rerun or reproduce commands")); + } + + const runnableCommands = commands.map((command) => { + const missingInputs = normalizeList(command.inputs).filter((input) => !artifactPaths.has(input)); + if (missingInputs.length > 0) { + envFindings.push( + makeFinding( + `${environment.id || environment.name}:${command.id || command.label}`, + "blocker", + `command references missing inputs: ${missingInputs.join(", ")}`, + "attach every command input as a hosted artifact" + ) + ); + } + + return { + id: command.id, + label: command.label, + command: command.command, + inputs: normalizeList(command.inputs), + outputs: normalizeList(command.outputs), + eligible: missingInputs.length === 0 && hasPinnedRuntimeImage(environment.image), + }; + }); + + const plan = { + id: environment.id, + name: environment.name, + image: environment.image, + runtimes: normalizeList(environment.runtimes), + trigger: environment.trigger || "manual", + commands: runnableCommands, + ready: envFindings.filter((finding) => finding.severity === "blocker" || finding.severity === "high").length === 0, + }; + + plans.push(plan); + findings.push(...envFindings); + } + + if (environments.length === 0) { + findings.push(makeFinding("environments", "high", "no executable environment is registered", "add a pinned runtime with at least one rerun command")); + } + + return { plans, findings }; +} + +function buildPersistentLinks(artifacts, baseUrl) { + return artifacts.map((artifact) => ({ + artifactId: artifact.id, + path: artifact.path, + href: `${baseUrl.replace(/\/$/, "")}/${encodeURIComponent(artifact.id || artifact.path)}`, + access: artifact.access, + })); +} + +function evaluateArtifactPackage(input, options = {}) { + const policy = { ...DEFAULT_POLICY, ...(input.policy || {}), ...(options.policy || {}) }; + const artifacts = Array.isArray(input.artifacts) ? input.artifacts : []; + const environments = Array.isArray(input.environments) ? input.environments : []; + const metadata = input.metadata || {}; + + const artifactReports = artifacts.map((artifact) => validateArtifact(artifact, policy)); + const artifactPaths = new Set(artifactReports.map((artifact) => artifact.path).filter(Boolean)); + const metadataReport = validateMetadata(metadata, policy); + const environmentReport = validateEnvironments(environments, artifactPaths); + + const findings = [ + ...artifactReports.flatMap((artifact) => artifact.findings), + ...metadataReport.findings, + ...environmentReport.findings, + ].sort(compareFindings); + + const blockerCount = findings.filter((finding) => finding.severity === "blocker").length; + const highCount = findings.filter((finding) => finding.severity === "high").length; + const previewableArtifacts = artifactReports.filter((artifact) => artifact.preview.previewKind !== "download-only").length; + const runnableCommands = environmentReport.plans.flatMap((plan) => plan.commands).filter((command) => command.eligible).length; + const categoryCounts = artifactReports.reduce((counts, artifact) => { + counts[artifact.classification.category] = (counts[artifact.classification.category] || 0) + 1; + return counts; + }, {}); + + const sourceDigest = stableDigest({ artifacts, metadata, environments }); + const exportPacket = { + scope: "scientific-artifact-package-integrity", + projectId: input.project && input.project.id ? input.project.id : "unassigned-project", + generatedAt: input.generatedAt || new Date(0).toISOString(), + sourceDigest, + packageDigest: stableDigest({ + sourceDigest, + previewPlan: artifactReports.map((artifact) => artifact.preview), + metadataScore: metadataReport.score, + environmentPlan: environmentReport.plans, + }), + persistentLinks: buildPersistentLinks(artifactReports, policy.persistentLinkBase), + }; + + return { + dashboard: { + artifacts: artifactReports.length, + categories: categoryCounts, + previewableArtifacts, + executableEnvironments: environmentReport.plans.length, + runnableCommands, + metadataScore: metadataReport.score, + blockers: blockerCount, + highRiskFindings: highCount, + packageReady: blockerCount === 0 && highCount === 0 && metadataReport.score >= policy.minimumFairScore, + }, + artifacts: artifactReports.map(({ findings: _findings, ...artifact }) => artifact), + metadata: metadataReport, + environments: environmentReport.plans, + findings, + exportPacket, + }; +} + +module.exports = { + DEFAULT_POLICY, + classifyArtifact, + evaluateArtifactPackage, + stableDigest, +}; diff --git a/artifact-package-integrity-gate/requirements-map.md b/artifact-package-integrity-gate/requirements-map.md new file mode 100644 index 0000000..373ac12 --- /dev/null +++ b/artifact-package-integrity-gate/requirements-map.md @@ -0,0 +1,38 @@ +# Requirements Map + +Target issue: [#14 Scientific/Engineering Data & Code Hosting](https://github.com/SCIBASE-AI/SCIBASE.AI/issues/14) + +## Scalable Storage Engine + +- `classifyArtifact` recognizes datasets, notebooks, code, figures, media, and model artifacts by file extension. +- Artifact reports include category counts, content hashes, versions, storage tier signals, and persistent export links. +- Large artifacts are routed to deferred preview handling and object-storage policy checks. + +## Metadata-Aware Previews + +- Preview plans cover tabular files, spreadsheet files, JSON trees, notebooks, code viewers, image/media thumbnails, and model summaries. +- Oversized files receive asynchronous preview guidance instead of unsafe inline rendering. + +## Structured Metadata And Standards + +- `evaluateArtifactPackage` validates DataCite required fields, JSON-LD context/type, and schema.org type/name. +- Artifact-level metadata checks enforce titles, creators, and keywords. +- The export packet contains deterministic source and package digests for DOI/API/archive workflows. + +## FAIR Compliance + +- License checks require reusable licenses or explicit restriction handling. +- Restricted artifacts must include access justification and a reviewer access window. +- Persistent links are generated for each artifact in the package. +- Versioned artifacts require prior hashes for diff and rollback evidence. + +## Executable Environments + +- Runtime plans require pinned container images with sha256 digests. +- Rerun commands are eligible only when their declared inputs exist as hosted artifacts. +- The dashboard exposes runnable command counts and package readiness. + +## Reviewer Verification + +- `test.js` covers successful package readiness, file classification, metadata checks, persistent link export, broken-package blockers, and stable digest canonicalization. +- `demo.js` prints a complete synthetic package dashboard, preview plan, and export packet. diff --git a/artifact-package-integrity-gate/test.js b/artifact-package-integrity-gate/test.js new file mode 100644 index 0000000..a9c195d --- /dev/null +++ b/artifact-package-integrity-gate/test.js @@ -0,0 +1,154 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const { classifyArtifact, evaluateArtifactPackage, stableDigest } = require("./index"); + +const HASH_A = "a".repeat(64); +const HASH_B = "b".repeat(64); +const HASH_C = "c".repeat(64); +const HASH_D = "d".repeat(64); + +const validPackage = { + generatedAt: "2026-05-17T12:00:00.000Z", + project: { id: "proj-microbiome-2026", title: "Microbiome replication package" }, + metadata: { + datacite: { + identifier: "10.5555/scibase.microbiome.2026", + creators: ["A. Researcher", "B. Analyst"], + titles: ["Microbiome replication package"], + publisher: "SCIBASE.AI", + publicationYear: "2026", + resourceType: "Dataset and software", + }, + jsonLd: { + "@context": "https://schema.org", + "@type": "Dataset", + name: "Microbiome replication package", + }, + schemaOrg: { + "@type": "Dataset", + name: "Microbiome replication package", + }, + }, + artifacts: [ + { + id: "art-raw-counts", + path: "data/raw-counts.csv", + bytes: 1024, + hash: HASH_A, + license: "CC-BY-4.0", + access: "public", + version: 2, + previousVersionHash: HASH_B, + metadata: { title: "Raw abundance counts", creators: ["A. Researcher"], keywords: ["microbiome", "counts"] }, + }, + { + id: "art-notebook", + path: "notebooks/reproduce.ipynb", + bytes: 2048, + hash: HASH_C, + license: "MIT", + access: "public", + metadata: { title: "Reproduction notebook", creators: ["B. Analyst"], keywords: ["notebook"] }, + }, + { + id: "art-figure", + path: "figures/alpha-diversity.png", + bytes: 4096, + hash: HASH_D, + license: "CC-BY-4.0", + access: "restricted", + accessJustification: "contains unpublished cohort label in thumbnail metadata", + reviewerAccessWindow: "2026-05-17/2026-06-17", + metadata: { title: "Alpha diversity preview", creators: ["B. Analyst"], keywords: ["figure"] }, + }, + ], + environments: [ + { + id: "env-python", + name: "Pinned Python reproducer", + image: "ghcr.io/scibase/reproducer@sha256:" + "e".repeat(64), + runtimes: ["python"], + trigger: "run-analysis-button", + commands: [ + { + id: "rerun-notebook", + label: "Rerun notebook", + command: "python scripts/run_notebook.py notebooks/reproduce.ipynb", + inputs: ["data/raw-counts.csv", "notebooks/reproduce.ipynb"], + outputs: ["figures/alpha-diversity.png"], + }, + ], + }, + ], +}; + +const result = evaluateArtifactPackage(validPackage); + +assert.equal(classifyArtifact({ path: "data/results.parquet" }).category, "dataset"); +assert.equal(classifyArtifact({ path: "analysis/model.onnx" }).previewKind, "model-summary"); +assert.equal(result.dashboard.artifacts, 3); +assert.equal(result.dashboard.categories.dataset, 1); +assert.equal(result.dashboard.categories.notebook, 1); +assert.equal(result.dashboard.categories.figure, 1); +assert.equal(result.dashboard.previewableArtifacts, 3); +assert.equal(result.dashboard.runnableCommands, 1); +assert.equal(result.dashboard.blockers, 0); +assert.equal(result.dashboard.highRiskFindings, 0); +assert.equal(result.dashboard.packageReady, true); +assert.match(result.exportPacket.packageDigest, /^[a-f0-9]{64}$/); +assert.equal(result.exportPacket.persistentLinks.length, 3); +assert.ok(result.artifacts.find((artifact) => artifact.id === "art-notebook").classification.executable); + +const brokenPackage = { + generatedAt: "2026-05-17T12:00:00.000Z", + project: { id: "proj-broken" }, + metadata: { + datacite: { + identifier: "10.5555/missing-fields", + }, + jsonLd: { name: "Missing type" }, + schemaOrg: {}, + }, + artifacts: [ + { + id: "art-private", + path: "data/participant-export.csv", + bytes: 4 * 1024 * 1024 * 1024, + hash: "not-a-hash", + access: "restricted", + version: 3, + metadata: { title: "Participant export" }, + }, + ], + environments: [ + { + id: "env-unpinned", + name: "Unpinned runner", + image: "python:3.12", + runtimes: [], + commands: [ + { + id: "rerun", + label: "Rerun", + command: "python analysis.py data/missing.csv", + inputs: ["data/missing.csv"], + }, + ], + }, + ], +}; + +const broken = evaluateArtifactPackage(brokenPackage); +assert.equal(broken.dashboard.packageReady, false); +assert.ok(broken.dashboard.blockers >= 2); +assert.ok(broken.findings.some((finding) => finding.message.includes("missing inputs"))); +assert.ok(broken.findings.some((finding) => finding.message.includes("sha256"))); +assert.ok(broken.findings.some((finding) => finding.message.includes("DataCite"))); +assert.equal(broken.environments[0].commands[0].eligible, false); + +const digestA = stableDigest({ b: 2, a: [3, 1] }); +const digestB = stableDigest({ a: [3, 1], b: 2 }); +assert.equal(digestA, digestB); + +console.log("artifact package integrity gate tests passed");