diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index da9db6c..7abbaf7 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -11,6 +11,10 @@ on: push: branches: [main, master] +concurrency: + group: rust-ci-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index c32021a..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,27 +0,0 @@ - -# Contributor Covenant Code of Conduct - -## Our Pledge - -We pledge to make participation a harassment-free experience for everyone. - -## Our Standards - -**Positive behavior:** -* Using welcoming language -* Being respectful of differing viewpoints -* Accepting constructive criticism -* Focusing on what is best for the community - -**Unacceptable behavior:** -* Harassment, trolling, or personal attacks -* Publishing private information without permission - -## Enforcement - -Report issues to the maintainers. All complaints will be reviewed. - -## Attribution - -Adapted from [Contributor Covenant](https://www.contributor-covenant.org/) v2.1. - diff --git a/Justfile b/Justfile index 465036a..8aafee0 100644 --- a/Justfile +++ b/Justfile @@ -51,8 +51,12 @@ assail: @command -v panic-attack >/dev/null 2>&1 && panic-attack assail . || echo "panic-attack not found — install from https://github.com/hyperpolymath/panic-attacker" # --- Domain-Specific Recipes (verisimiser) --- - -# Augment a database with VeriSimDB octad\naugment DB_URL:\n cargo run -- augment {{DB_URL}}\n\n# Check octad layer completeness\ncheck-octad DB_URL:\n cargo run -- check-octad {{DB_URL}}\n\n# Generate migration scripts\nmigrate DB_URL:\n cargo run -- migrate {{DB_URL}} +# +# (Reserved.) Recipes for clap subcommands like `augment`, `check-octad`, +# and `migrate` were removed per ADR-0003: they wrapped subcommands that +# don't exist in src/main.rs (the real subcommands are `init`, `generate`, +# `start`, `drift`, `provenance`, `history`, `status`, `octad`). +# Re-add wrappers here when their underlying subcommands ship. # Run contractile checks contractile-check: diff --git a/README.adoc b/README.adoc index 87184fd..a2df91f 100644 --- a/README.adoc +++ b/README.adoc @@ -7,12 +7,19 @@ Jonathan D.A. Jewell :icons: font :source-highlighter: rouge +== Related repos + +* https://github.com/hyperpolymath/verisimdb-data[`verisimdb-data`] — + flat-file store for scan results and drift snapshots, and the dogfood + site for the hyperpolymath Idris2 + Zig ABI shared with `proven`, + `burble`, `gossamer`. Verisimiser does not require it. + == What Is This? VeriSimiser augments existing databases with capabilities from -https://github.com/hyperpolymath/nextgen-databases[VeriSimDB]'s octad model — -specifically the capabilities that work as genuine piggybacks without requiring -you to replace your database. +https://github.com/hyperpolymath/nextgen-databases[VeriSimDB]'s **octad +model** — eight concerns layered over any backend without forcing a +schema migration. **Honest framing**: this is not a pure bolt-on like the language -isers. Language -isers generate a separate wrapper alongside your code — one-way @@ -20,64 +27,47 @@ dependency, your code untouched. Database augmentation is fundamentally different because it interacts with shared mutable state. VeriSimiser is therefore split into two tiers: -* **Tier 1 (true piggyback)** — capabilities that sit alongside or in front - of your database, never touching its storage engine. These are safe bolt-ons. -* **Tier 2 (augmentation layer)** — capabilities that require additional - storage alongside your database. These are honest about being "VeriSimDB - with your database as one backend" rather than pretending to be invisible. +* **Tier 1 (true piggyback)** — concerns that sit alongside or in front + of your database, writing only to a sidecar and never touching your + storage engine. Safe bolt-ons. +* **Tier 2 (augmentation layer)** — modality overlays (graph, vector, + tensor, semantic, document, spatial) that require additional storage + alongside your database. Honest about being "VeriSimDB with your + database as one backend" rather than pretending to be invisible. -== VeriSimDB's Octad: Eight Modalities +== The Octad: Eight Concerns -Each entity in VeriSimDB exists simultaneously across up to 8 representations: +Every entity in a verisimiser-augmented database is observable along up +to eight concerns. Two are inherent to the target database (always on); +the remaining six are added by sidecars. -[cols="1,2,1,1"] +[cols="1,3,2,1"] |=== -| Modality | Purpose | Storage | VeriSimiser Tier - -| **Graph** | RDF triples and property graph edges | Pure Rust | Tier 2 -| **Vector** | Embeddings for similarity search | HNSW | Tier 2 -| **Tensor** | Multi-dimensional numeric data | ndarray/Burn | Tier 2 -| **Semantic** | Type annotations and CBOR proof blobs | CBOR | Tier 2 -| **Document** | Full-text searchable content | Tantivy | Tier 2 -| **Temporal** | Version history and time-series | chrono | Tier 1 ✓ -| **Provenance** | Origin tracking and transformation chain | SHA-256 hash-chain | Tier 1 ✓ -| **Spatial** | Geospatial coordinates and geometries | R-tree | Tier 2 +| Concern | Purpose | Sidecar storage | Tier + +| **Data** | The original entity as stored in the target DB | (target DB itself) | inherent +| **Metadata** | Schema and type information extracted from the target DB | `verisimdb_metadata` | inherent +| **Provenance** | SHA-256 hash-chain of who did what and when | `verisimdb_provenance_log` | Tier 1 ✓ +| **Lineage** | Directed-edge graph of data derivation (intended DAG; see ADR future) | `verisimdb_lineage_graph` | Tier 1 +| **Constraints** | Cross-dimensional invariants and drift between Data + Metadata + overlays | (rules + observers, not a table) | Tier 1 +| **AccessControl** | Row/column-level access policies evaluated at query time | `verisimdb_access_policies` | Tier 1 +| **Temporal** | Version history with point-in-time queries and rollback | `verisimdb_temporal_versions` | Tier 1 ✓ +| **Simulation** | What-if branching and sandbox query execution | `verisimdb_simulation_branches` + `_deltas` | Tier 2 |=== +The codebase commits to this ontology in +`src/abi/mod.rs::OctadDimension`, `src/manifest/mod.rs::OctadConfig`, +and the codegen tables. See `docs/decisions/ADR-0001-octad-ontology.adoc` +for why "concerns" was chosen over the alternative "modalities" framing. + == Tier 1: True Piggybacks These work like PostGIS — they add capability without replacing anything. -=== Cross-Modal Drift Detection - -The jewel of VeriSimDB. Drift detection monitors whether representations -of the same entity stay consistent across modalities. VeriSimiser can -overlay drift detection onto any database: - -* Your database stores the primary data (unchanged) -* VeriSimiser maintains a lightweight **drift index** alongside it -* Queries pass through to your database normally -* VeriSimiser intercepts results and checks for cross-modal inconsistencies -* Alerts when representations drift apart - -This is a **read-path augmentation** — it observes query results, it doesn't -modify them. Safe to add, safe to remove, no data dependency. - -VeriSimDB detects eight categories of cross-modal drift: - -1. Structural drift — schema changes not reflected across modalities -2. Semantic drift — meaning divergence between representations -3. Temporal drift — version skew between modalities -4. Statistical drift — distribution shift in vector/tensor spaces -5. Referential drift — broken links between graph and document modalities -6. Provenance drift — transformation chain inconsistencies -7. Spatial drift — coordinates inconsistent with other modalities -8. Embedding drift — vector embeddings stale relative to source documents - === Provenance Tracking Hash-chain verified origin tracking for every piece of data. VeriSimiser -can add provenance to any database as a sidecar: +adds provenance to any database as a sidecar: * Every write is intercepted and a provenance record created * SHA-256 hash chain links provenance records in order @@ -90,7 +80,7 @@ change what happened. The provenance chain is stored in a separate sidecar === Temporal Versioning -Automatic version history for entities. VeriSimiser can maintain a temporal +Automatic version history for entities. VeriSimiser maintains a temporal sidecar that records every state change: * Point-in-time queries: "what did this entity look like at time T?" @@ -98,67 +88,95 @@ sidecar that records every state change: * Rollback capability: restore any entity to a previous state * Retention policies: auto-prune history older than N days -This piggybacks onto write events (triggers, CDC, or application-level hooks) -and stores version history in a separate sidecar. +This piggybacks onto write events (triggers, CDC, or application-level +hooks) and stores version history in a separate sidecar. + +=== Constraints & Drift Detection + +The Constraints concern enforces cross-dimensional invariants and +observes *drift* — places where Data, Metadata, and any active Tier 2 +overlays disagree. Drift detection is a **read-path observer**: it +inspects query results, never modifies them. + +VeriSimiser tracks eight symptomatic categories (mapped onto the +Constraints concern): + +. **Structural drift** — schema changes not reflected across active overlays +. **Semantic drift** — meaning divergence between overlays +. **Temporal drift** — version skew between overlays +. **Statistical drift** — distribution shift in vector/tensor overlays +. **Referential drift** — broken links between the graph overlay and Data +. **Provenance drift** — transformation-chain inconsistencies +. **Spatial drift** — coordinates inconsistent with other overlays +. **Embedding drift** — vector embeddings stale relative to source documents + +(Each category needs a computable definition before it can be detected; +this is on the roadmap, not implemented yet.) + +=== Lineage + +Directed-edge graph of data derivation: which entity was derived from +which other entity, and by what transformation. Intended as a DAG (cycle +prevention enforcement is a future ADR). + +=== Access Control + +Row-level and column-level policies evaluated at query time. Independent +of any backend-native role system; verisimiser interprets policies and +filters/redacts results. == Tier 2: Augmentation Layer These capabilities require additional storage alongside your database. -They're honest about being "VeriSimDB modalities with your database as the -document/primary store." This is still valuable — it's how you get octad -capabilities incrementally — but it's not a bolt-on. +They are honest about being "VeriSimDB modalities with your database as +the document/primary store." This is still valuable — it's how you get +extra modality projections incrementally — but it's not invisible. -* **Graph overlay**: Add RDF triples and property graph edges to entities +* **Graph overlay**: Add RDF triples and property-graph edges to entities in your relational database. Stored in a separate graph index. -* **Vector overlay**: Add embeddings for similarity search. Stored in HNSW - index alongside your database. +* **Vector overlay**: Add embeddings for similarity search. Stored in + HNSW index alongside your database. * **Tensor overlay**: Add multi-dimensional numeric data. Stored in ndarray-backed sidecar. * **Semantic overlay**: Add type annotations and proof blobs. Stored in CBOR sidecar. +* **Document overlay**: Add full-text search over text columns. + Stored in a Tantivy index. * **Spatial overlay**: Add geospatial coordinates. Stored in R-tree sidecar. +* **Simulation**: Branched copies of data for what-if analysis. Stored + in `verisimdb_simulation_branches` + `_deltas`. -Each Tier 2 modality has its own storage and can be enabled independently. -Your primary database remains the source of truth for its native data. +Each Tier 2 overlay has its own storage and can be enabled +independently. Your primary database remains the source of truth for +its native data. == The Manifest [source,toml] ---- -[verisimiser] +[project] name = "my-augmented-db" +version = "0.1.0" [database] -target-db = "postgresql" -connection-string = "postgres://localhost/mydb" - -# Tier 1: true piggybacks (no additional storage in your database) -[tier1] -drift-detection = true # cross-modal drift monitoring -provenance = true # SHA-256 hash-chain audit trail -temporal-versioning = true # automatic version history - -[tier1.provenance] -sidecar = "sqlite" # sqlite | file | verisim -sidecar-path = ".verisimiser/provenance.db" - -[tier1.temporal] -sidecar = "sqlite" -retention-days = 90 - -# Tier 2: augmentation layer (additional storage alongside your database) -[tier2] -graph = false # RDF/property graph overlay -vector = false # embedding similarity search -tensor = false # multi-dimensional numeric -semantic = false # type annotations + proof blobs -spatial = false # geospatial coordinates - -[tier2.vector] -# model = "sentence-transformers/all-MiniLM-L6-v2" -# dimensions = 384 +backend = "sqlite" # sqlite | postgresql | mongodb +connection-string-env = "DATABASE_URL" # env var; never the literal secret +schema-source = "schema.sql" # optional; SQL DDL describing target schema + +[octad] +enable-provenance = true +enable-lineage = true +enable-temporal = true +enable-access-control = true +enable-simulation = false # Tier 2: branching/sandboxing + +[sidecar] +storage = "sqlite" # sqlite (default) | json +path = ".verisim/sidecar.db" ---- +Run `verisimiser init --database sqlite` to generate a starter manifest. + == Architecture [source] @@ -167,20 +185,20 @@ Your Application │ ├──── writes ────► Your Database (unchanged) │ │ - │ VeriSimiser intercepts + │ VeriSimiser intercepts (write-path observer) │ │ │ ┌─────────────┼──────────────┐ │ │ │ │ - │ Drift Index Provenance Temporal - │ (Tier 1) Sidecar Sidecar - │ (Tier 1) (Tier 1) + │ Provenance Temporal Lineage / AccessControl / + │ sidecar sidecar Constraints sidecars (Tier 1) │ - └──── optional ──► Tier 2 Sidecars - (Graph, Vector, Tensor, - Semantic, Spatial) + └──── optional ──► Tier 2 modality overlays + (Graph, Vector, Tensor, Semantic, + Document, Spatial, Simulation) ---- -**Interception methods** (configurable per database): +**Interception methods** (configurable per database — most are still +roadmap items, see `ROADMAP.adoc`): * **PostgreSQL**: logical replication / `pg_notify` / triggers * **MySQL**: binlog CDC / triggers @@ -188,32 +206,53 @@ Your Application * **MongoDB**: change streams * **Application-level**: middleware / ORM hooks +== ABI + +The Application Binary Interface for the augmentation layer is declared +in two languages: + +* `src/interface/abi/` — Idris2 type definitions + (`Types.idr`, `Layout.idr`, `Foreign.idr`). These are the canonical + shapes; formal proofs of correctness against them are future work + (see `docs/decisions/ADR-0002-verification-tree.adoc`). +* `src/interface/ffi/` — Zig C-compatible FFI implementing the Idris2 + ABI for use from native code. + +The Rust `src/abi/mod.rs` mirrors the Idris2 types as plain structs +used directly by the CLI and codegen. + == Relationship to VeriSimDB VeriSimiser is NOT a replacement for VeriSimDB. It's a gateway drug. -* **VeriSimiser Tier 1** gives you drift detection, provenance, and temporal - versioning on your existing database. Zero commitment. -* **VeriSimiser Tier 2** gives you individual octad modalities as sidecars. - Incremental adoption. -* **Full VeriSimDB** gives you the complete octad with native cross-modal - querying, VCL, and built-in drift normalisation. Full commitment. +* **VeriSimiser Tier 1** gives you provenance, temporal, lineage, + access control, and constraints/drift detection on your existing + database. Zero commitment. +* **VeriSimiser Tier 2** gives you individual modality overlays + (graph/vector/tensor/semantic/document/spatial) and simulation as + sidecars. Incremental adoption. +* **Full VeriSimDB** gives you the complete model with native + cross-modal querying, VCL, and built-in drift normalisation. Full + commitment. -The migration path is: Tier 1 → Tier 2 → full VeriSimDB (if you want it). -Most users will be happy at Tier 1 or Tier 2. +The migration path is: Tier 1 → Tier 2 → full VeriSimDB (if you want +it). Most users will be happy at Tier 1 or Tier 2. == Integration with TypedQLiser -VeriSimiser works alongside https://github.com/hyperpolymath/typedqliser[TypedQLiser]: +VeriSimiser works alongside +https://github.com/hyperpolymath/typedqliser[TypedQLiser]: * TypedQLiser type-checks your queries (compile-time, no runtime cost) -* VeriSimiser augments your database with octad capabilities (runtime) +* VeriSimiser augments your database with octad concerns (runtime) * Together: formally verified queries against an augmented database == Status -**Pre-alpha.** Architecture defined, tier system designed. Tier 1 (drift -detection, provenance, temporal versioning) is the priority implementation. +**Pre-alpha.** Architecture defined, octad ontology pinned (ADR-0001), +codegen scaffolding in place. The next implementation milestone is V-L1-C1: +end-to-end SQLite Tier 1 piggyback (`sqlite3_update_hook` → +`verisimdb_provenance_log` sidecar). See `ROADMAP.adoc`. Part of the https://github.com/hyperpolymath/iseriser[-iser family]. **#3 priority** (after TypedQLiser and Chapeliser). diff --git a/ROADMAP.adoc b/ROADMAP.adoc index 7f0565f..92748b8 100644 --- a/ROADMAP.adoc +++ b/ROADMAP.adoc @@ -4,35 +4,78 @@ :toc: :icons: font +The phases below are stated in terms of the *concerns* octad +(Data, Metadata, Provenance, Lineage, Constraints, AccessControl, +Temporal, Simulation) per `docs/decisions/ADR-0001-octad-ontology.adoc`. +Tier 2 *modalities* (graph, vector, tensor, semantic, document, spatial) +are independent overlay representations layered on top. + == Phase 0: Scaffold (COMPLETE) -* [x] RSR template with full CI/CD (17 workflows) -* [x] CLI with subcommands (init, start, drift, provenance, history, status, octad) -* [x] Manifest parser (verisimiser.toml with tier1/tier2 config) -* [x] Tier 1 data types (DriftReport, ProvenanceRecord, TemporalVersion) -* [x] ABI module stubs (Idris2 + Zig FFI) -* [x] README with two-tier architecture and honest framing - -== Phase 1: PostgreSQL Tier 1 MVP + +* [x] RSR template with full CI/CD +* [x] CLI with subcommands (init, generate, start, drift, provenance, + history, status, octad) +* [x] Manifest parser (`verisimiser.toml` with `[octad]` toggles + + legacy `tier1`/`tier2` back-compat) +* [x] ABI types in Rust (`src/abi/mod.rs`) + Idris2 declarations + (`src/interface/abi/`) + Zig FFI stubs (`src/interface/ffi/`) +* [x] Codegen for sidecar overlay schema and query interceptor SQL +* [x] README + ADRs covering octad ontology, verification tree, + Justfile recipes + +== Phase 1: SQLite Tier 1 MVP + +The shortest end-to-end loop: SQLite target, SQLite sidecar, provenance ++ temporal concerns. Sequencing follows the bottom-up issue plan in +`docs/decisions/` (forthcoming). + +* [ ] SQLite interception via `sqlite3_update_hook` +* [ ] Provenance sidecar — write-path observer, SHA-256 hash chain + covering operation + actor + before-snapshot + transformation (not + just operation + ts) +* [ ] Temporal sidecar — version history, point-in-time read, + rollback, partial-unique-index enforcement of "exactly one current" +* [ ] Property tests for hash-chain integrity, version ordering, and + sidecar isolation (Tier 1 never writes to target) +* [ ] `verisimiser doctor` + `verisimiser validate` subcommands +* [ ] Structured logging (`tracing`), `--log-format=json|pretty` + +== Phase 2: PostgreSQL Tier 1 + * [ ] PostgreSQL logical replication interception -* [ ] Provenance sidecar (SQLite) — write-path observer -* [ ] SHA-256 hash-chain integrity for provenance records -* [ ] Temporal versioning sidecar — point-in-time queries -* [ ] Cross-modal drift detection — read-path observer -* [ ] Drift index with 8-category classification -* [ ] Idris2 ABI proofs: sidecar isolation, hash-chain integrity, version ordering -* [ ] Zig FFI bridge: database connection, overlay operations, VCL-total queries -* [ ] End-to-end test: PostgreSQL -> verisimiser overlay -> VCL-total query - -== Phase 2: Multi-Backend Support -* [ ] SQLite interception via sqlite3_update_hook / WAL monitoring +* [ ] Provenance + temporal sidecars against PG target +* [ ] Idris2 ABI proofs: sidecar isolation, hash-chain integrity, + version ordering +* [ ] Zig FFI bridge: database connection, overlay operations + +== Phase 3: Multi-Backend Support + * [ ] MongoDB interception via change streams -* [ ] Redis interception via keyspace notifications -* [ ] MySQL interception via binlog CDC * [ ] Application-level middleware / ORM hooks * [ ] Backend-agnostic interception trait abstraction * [ ] Per-backend integration tests +* [ ] MySQL (binlog CDC) / Redis (keyspace notifications) — only if + there is real demand; the manifest enum currently excludes them. + +== Phase 4: Constraints / Drift Detection + +* [ ] Per-category drift definition (one ADR per category) +* [ ] First implemented category: Temporal drift (version skew — + cheapest to define and observe) +* [ ] Drift index storage + query API +* [ ] `verisimiser drift` subcommand wired to real measurements + +== Phase 5: AccessControl + Lineage + +* [ ] AccessControl model ADR: principals, role composition, deny vs + allow precedence, view interaction +* [ ] Typed policy condition language (replace free-form SQL TEXT) +* [ ] Lineage DAG enforcement: self-edge CHECK + cycle prevention + ADR +* [ ] Lineage traversal subcommand (upstream/downstream) + +== Phase 6: Tier 2 Modality Overlays -== Phase 3: Tier 2 Overlays * [ ] Graph overlay (RDF triples / property graph edges) * [ ] Vector overlay (HNSW embedding similarity search) * [ ] Tensor overlay (ndarray multi-dimensional numeric data) @@ -41,26 +84,37 @@ * [ ] Spatial overlay (R-tree geospatial coordinates) * [ ] Independent enable/disable per overlay via manifest -== Phase 4: VCL-total Integration +== Phase 7: Simulation + +* [ ] Branching semantics ADR (isolation, merge policy, conflict + resolution) +* [ ] FK enforcement on `simulation_branches.parent_branch` +* [ ] `verisimiser simulate` subcommand + +== Phase 8: VCL-total Integration + * [ ] VCL-total type-safe query parsing -* [ ] Cross-tier queries (Tier 1 + Tier 2 in single query) +* [ ] Cross-concern queries (Tier 1 + Tier 2 in single query) * [ ] TypedQLiser integration for compile-time query validation * [ ] Query planner for multi-sidecar operations * [ ] Performance benchmarks: overhead of augmentation layer -== Phase 5: Production Hardening -* [ ] Retention policies (auto-prune temporal history) +== Phase 9: Production Hardening + +* [ ] Retention policies (`[retention]` section in manifest) * [ ] Sidecar compaction and garbage collection -* [ ] Concurrent access safety (multi-writer provenance chains) +* [ ] Concurrent access safety (multi-writer provenance chains — + per-entity serialisation + UNIQUE(entity_id, previous_hash)) * [ ] Backup and restore for sidecars * [ ] Monitoring and alerting integration * [ ] Error recovery and graceful degradation * [ ] Shell completions (bash, zsh, fish) -== Phase 6: Ecosystem -* [ ] PanLL panel for drift monitoring dashboard +== Phase 10: Ecosystem + * [ ] BoJ-server cartridge (MCP integration) -* [ ] SqueakWell integration (database recovery via cross-modal constraint propagation) -* [ ] Migration tooling: Tier 1 -> Tier 2 -> full VeriSimDB +* [ ] SqueakWell integration (database recovery via cross-concern + constraint propagation) +* [ ] Migration tooling: Tier 1 → Tier 2 → full VeriSimDB * [ ] Publish to crates.io * [ ] Chainguard container image diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index ca3ebc8..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,21 +0,0 @@ -# Security Policy - -## Supported Versions - -| Version | Supported | -|---------|-----------| -| 0.1.x | ✅ | - -## Reporting a Vulnerability - -Please report security vulnerabilities to: j.d.a.jewell@open.ac.uk - -Do NOT open a public issue for security vulnerabilities. - -## Response Time - -We aim to respond within 48 hours and provide a fix within 7 days for critical issues. - -## Scope - -This policy covers the verisimiser CLI tool and its generated artifacts. diff --git a/docs/architecture/TOPOLOGY.md b/docs/architecture/TOPOLOGY.md index 3609305..66827d9 100644 --- a/docs/architecture/TOPOLOGY.md +++ b/docs/architecture/TOPOLOGY.md @@ -12,7 +12,7 @@ verisimiser/ │ ├── src/manifest/ — TOML manifest parsing (verisimiser.toml) │ ├── src/tier1/ — Tier 1 piggyback data types │ │ ├── drift.rs — DriftReport, DriftCategory (8 categories) -│ │ ├── provenance.rs — ProvenanceRecord, SHA-256 hash chain +│ │ ├── provenance.rs — re-exports abi::ProvenanceEntry; future write-path helpers (V-L1-C1) │ │ └── temporal.rs — TemporalVersion, point-in-time snapshots │ ├── src/tier2/ — Tier 2 overlay stubs (graph, vector, tensor, semantic, document, spatial) │ ├── src/intercept/ — Per-backend interception strategies diff --git a/docs/decisions/ADR-0001-octad-ontology.adoc b/docs/decisions/ADR-0001-octad-ontology.adoc new file mode 100644 index 0000000..4d29be0 --- /dev/null +++ b/docs/decisions/ADR-0001-octad-ontology.adoc @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0001: Canonical octad ontology — concerns, not modalities +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/19[V-L1-A1]. + +Closes as wontfix: https://github.com/hyperpolymath/verisimiser/issues/21[V-L1-A3] (the modalities-first refactor). + +Unblocks: https://github.com/hyperpolymath/verisimiser/issues/20[V-L1-A2] (the README rewrite). + +== Context + +Two competing ontologies have lived in this repository under the same name +("octad"): + +Modalities octad (README §"VeriSimDB's Octad: Eight Modalities"):: +Graph · Vector · Tensor · Semantic · Document · Temporal · Provenance · Spatial. +These are *representations* of an entity — how the same data is stored in +different shapes. The README's eight cross-modal drift categories +(structural, semantic, temporal, statistical, referential, provenance, +spatial, embedding) presuppose this ontology. + +Concerns octad (`src/abi/mod.rs::OctadDimension`, `src/manifest/mod.rs::OctadConfig`, `src/main.rs::print_octad`):: +Data · Metadata · Provenance · Lineage · Constraints · AccessControl · +Temporal · Simulation. These are *concerns/aspects* of data — what you +want to know or enforce about it. + +The code commits to the concerns octad; the README leads with the modalities +octad. A user cannot answer "what does an octad-augmented entity look like?" +without picking one. + +== Decision + +The *concerns* octad is canonical. + +The eight dimensions of the verisimiser octad are: + +. **Data** — the original entity as stored in the target database. +. **Metadata** — schema and type information. +. **Provenance** — SHA-256 hash-chain tracking of who did what and when. +. **Lineage** — directed-edge graph of data derivation (target nameschematically a DAG; see ADR-0004 when written). +. **Constraints** — cross-dimensional invariant enforcement, including + drift detection between Data + Metadata + active overlays. +. **AccessControl** — policy-based row/column-level access permissions. +. **Temporal** — version history with point-in-time queries and rollback. +. **Simulation** — what-if branching and sandbox query execution. + +Modalities (Graph, Vector, Tensor, Semantic, Document, Spatial) are +*Tier 2 overlays* — independent representational projections that a user +can enable per-entity for similarity search, full-text search, geospatial +indexing, etc. They are not "the octad" and not co-equal with the eight +concerns. Provenance and Temporal in the modalities list collapse onto +the same-named concerns. + +The eight "cross-modal drift categories" become *symptoms observed by +the Constraints concern* when Data, Metadata, and the active overlays +disagree: + +[cols="1,2"] +|=== +| Drift category (legacy framing) | Where it lives in the concerns ontology + +| Structural | Constraints (Data vs Metadata schema agreement) +| Semantic | Constraints across overlays +| Temporal | Constraints between Temporal versions across overlays +| Statistical | Constraints over Tier 2 vector/tensor overlay drift +| Referential | Constraints between Tier 2 graph overlay and Data +| Provenance | Constraints over Provenance chain integrity +| Spatial | Constraints over Tier 2 spatial overlay +| Embedding | Constraints between Tier 2 vector overlay and source documents +|=== + +== Consequences + +. README and ROADMAP must be rewritten to drop the modalities octad table + and reframe the drift categories under Constraints. Tracked as V-L1-A2. +. The modalities-first refactor (V-L1-A3) is *not* done. Closed as wontfix. +. Tier 2 design (V-L1-F, V-L1-G, V-L1-H) continues to use modality terms + for overlays — they are just no longer presented as "the octad." +. `OctadDimension` enum and `OctadConfig` fields are stable; no source-level + rename triggered by this ADR. + +== Alternatives considered + +Modalities octad as canonical:: +Rejected. Would have required rewriting `abi`, `manifest`, `codegen`, tests, +and example manifests — a multi-week change for pre-alpha framing. The +codebase had already converged on the concerns ontology; the only cost of +keeping it is doc updates. + +"Both, with one as primary":: +Considered but rejected. Two ontologies sharing a name is the bug; +ranking them doesn't fix it. + +Renaming "octad" to something else (e.g. "octave"):: +Out of scope here. The brand survives; the contents are defined. diff --git a/docs/decisions/ADR-0002-verification-tree.adoc b/docs/decisions/ADR-0002-verification-tree.adoc new file mode 100644 index 0000000..f9bce7d --- /dev/null +++ b/docs/decisions/ADR-0002-verification-tree.adoc @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0002: Strip the empty verification/ subtree +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/15[V-L3-D1]. + +== Context + +The `verification/` tree contained eight subdirectories +(`benchmarks/`, `coverage/`, `fuzzing/`, `proofs/`, `safety_case/`, +`simulations/`, `tests/`, `traceability/`), each with a ~20-byte +`README.adoc` and an a2ml manifest. Zero proofs, zero benchmarks, +zero fuzzing, zero safety case. + +(The actual Idris2 ABI declarations and Zig FFI implementation live in +`src/interface/abi/{Types,Layout,Foreign}.idr` and `src/interface/ffi/` +respectively — those are real Phase 0 stubs and are unaffected by this +ADR. The README's reference to "Idris2 proofs (in `src/interface/abi/`)" +already points at the correct location for the stubs; only the proofs +themselves are still future work.) + +A reader following the README's verification/-shaped tree was misled +into thinking benchmarks, fuzzing, safety cases, and traceability +artifacts existed for this product. + +== Decision + +Strip the empty `verification/` subtree from the repository. + +Reintroduce `verification/` only when there is something to put in it. +The first content is expected to be a property-test harness landing +alongside V-L1-C1 (the Tier 1 SQLite piggyback) — at that point a +single `verification/property-tests/` directory will be added with +real content. Don't predeclare the rest of the eight subdirs until +they have artifacts. + +== Consequences + +. The empty `verification/` subtree is deleted in the same change set + that lands this ADR. +. The README's "Status" / "Architecture" copy that referenced + `verification/` is reworded to point at `src/interface/abi/` only + (where the Idris2 stubs actually live). +. Future verification artifacts (proofs, benchmarks, fuzz targets) are + added directory-by-directory as they appear, not predeclared. The + first such addition is expected alongside V-L1-C1 (Tier 1 SQLite + piggyback) as a property-test harness. + +== Alternatives considered + +Populate the subtree:: +Rejected for now. Writing eight stub READMEs costs effort and doesn't +move the product forward. The contracts the README cited (drift +correctness, chain integrity, version ordering, sidecar isolation) +become provable only once the Tier 1 implementation exists, which is +V-L1-C1. + +Leave the subtree, soften the README:: +Rejected. Empty scaffolding misleads contributors who follow the tree +looking for examples to extend. diff --git a/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc b/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc new file mode 100644 index 0000000..281eb87 --- /dev/null +++ b/docs/decisions/ADR-0003-justfile-aspirational-recipes.adoc @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += ADR-0003: Remove aspirational Justfile recipes +:revdate: 2026-05-13 +:status: Accepted + +== Status + +Accepted — 2026-05-13. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/11[V-L3-C2]. +Unblocks: https://github.com/hyperpolymath/verisimiser/issues/10[V-L3-C1] (the +mechanical fix to the broken recipe block). + +== Context + +`Justfile` carried a recipe block whose source contained literal `\n` +characters where newlines were intended. The intent was three recipes +(`augment`, `check-octad`, `migrate`) calling clap subcommands of the +same names. The clap CLI in `src/main.rs` exposes +`init / generate / start / drift / provenance / history / status / octad` +— there are no `augment`, `check-octad`, or `migrate` subcommands. + +So the Justfile advertised commands that don't exist, and did so in a +syntactically broken way that made the breakage invisible to `just --list`. + +== Decision + +Delete the three aspirational recipes from `Justfile`. + +The Justfile should only reference subcommands that exist. When new +subcommands are added (per their own design issues), recipe wrappers +can be added at the same time. + +== Consequences + +. The broken `\n`-collapsed block is removed cleanly (V-L3-C1). +. `just --list` no longer shows phantom recipes. +. Future "convenience wrapper" recipes must be added in the same change + set as the underlying subcommand. + +== Alternatives considered + +Add the missing subcommands:: +Out of scope for V-L3-C2/C1, which are ground-clearing issues. The +semantics of `augment` / `check-octad` / `migrate` aren't pinned down — +e.g. `augment DB_URL` overlaps with `generate` (overlay DDL) plus `start` +(daemon). Designing them is a separate issue, not a one-line fix. + +Keep the recipes pointing at not-yet-implemented subcommands:: +Rejected. The recipes would still fail with "Unknown subcommand"; the +broken UX would just be in a different place. diff --git a/docs/theory/provenance-threat-model.adoc b/docs/theory/provenance-threat-model.adoc new file mode 100644 index 0000000..efd26f4 --- /dev/null +++ b/docs/theory/provenance-threat-model.adoc @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) += Provenance threat model +:toc: left +:toclevels: 3 +:icons: font + +This document fixes what the Provenance concern's hash chain proves and +what it doesn't. It binds the design choices made in V-L2-C1, V-L2-C2, +V-L2-L1, V-L2-L2, and the ADR-0004 follow-up. + +Resolves: https://github.com/hyperpolymath/verisimiser/issues/25[V-L1-B1]. + +== Scope + +In scope:: the `Provenance` octad concern as implemented by +`ProvenanceEntry` in `src/abi/mod.rs` plus the sidecar table +`verisimdb_provenance_log` plus (post V-L2-L1) the `chain_head` table. + +Out of scope:: denial-of-service against the sidecar; side-channels +(timing, cache); tampering of the target database itself +(verisimiser only sees what its interceptors intercept); retroactive +provenance for pre-existing rows (the genesis entry for an entity +attests its existence at the moment verisimiser started observing it, +not before); cryptographic compromise of SHA-256. + +== Adversaries + +Four adversaries cover the relevant capability axes. Each is a +*lattice point*; real attackers combine capabilities. + +[cols="1,3"] +|=== +| Tag | Capability + +| **R** | Read-only — can read both the target database and the +sidecar. No write to either. Models: a forensic auditor; +a leaked replica; a debugging copy on a laptop. + +| **SW** | Sidecar-Write — can append new rows to +`verisimdb_provenance_log` and `verisimdb_temporal_versions` but +**cannot delete or rewrite existing rows**. Models: a sidecar +configured append-only (filesystem-level WORM, S3 Object Lock, +SQLite + revoked-DELETE/UPDATE permissions); also models a buggy +verisimiser daemon that double-writes. + +| **SR** | Sidecar-Rewrite — can rewrite or delete arbitrary rows +in the sidecar. Models: root on the sidecar host; compromised +application credential with full sidecar privileges; a backup +operator restoring an older sidecar snapshot. + +| **CK** | Clock-skew — can write entries (via SW or SR) with +timestamps that lie. Models: a system clock that drifts; an +adversary who controls the clock source; coordinated backdating. +|=== + +== Per-adversary protection matrix + +For each adversary, what the chain proves about each field: +**P** = protected (tampering detected), +**N** = not protected, +**C** = conditionally protected (see note). + +[cols="2,1,1,1,1"] +|=== +| Field | R | SW | SR | SR+CK + +| Genesis existence / order | P | P | N | N +| `previous_hash` of any entry | P | P | C-1 | C-1 +| `entity_id` of any entry | P | P | C-1 | C-1 +| `operation` of any entry | P | P | C-1 | C-1 +| `actor` of any entry | P | P | C-1 | C-1 +| `timestamp` of any entry | P | P | C-1 | N (CK falsifies) +| `before_snapshot` of any entry | P | P | C-1 | C-1 +| `transformation` of any entry | P | P | C-1 | C-1 +| Absence of an entry | C-2 | C-2 | N | N +| Total ordering across entities | N | N | N | N +|=== + +**C-1** — under SR (or SR+CK), the adversary can rewrite an +arbitrary suffix of the chain (recomputing hashes as they go). What's +preserved against SR is **only the prefix up to the most-recent +externally attested hash** (e.g. a hash periodically signed by an +out-of-band notary, anchored to an append-only log, or published to +a transparency service). Without an external anchor, the chain +proves *nothing* against SR. + +**C-2** — absence is provable only if every legitimate append goes +through verisimiser. Direct writes to the target database that +bypass interception are invisible to the chain; the chain cannot +attest to what it never saw. + +== Field coverage requirement + +A direct consequence of C-1 / C-2 and the per-adversary matrix: + +[NOTE] +==== +Every field that an auditor will rely on for forensic purposes +**must** participate in the hash. `actor`, `before_snapshot`, and +`transformation` are all such fields — they are the audit. If they +are not in the preimage, the chain protects them against R and SW +only by *coincidence* (because the row itself was hash-keyed in the +DB), not by design. + +This document therefore *requires* V-L2-C1: the preimage must cover +`previous_hash`, `entity_id`, `operation`, `actor`, `timestamp`, +`before_snapshot`, `transformation`. Any future field added to +`ProvenanceEntry` must either be added to the preimage or +explicitly recorded here with a justification for its omission. +==== + +== Canonical encoding requirement + +A direct consequence of "the hash protects the field" being a +*function*, not a relation: + +[NOTE] +==== +Two distinct preimages must produce distinct hashes (collision +resistance is SHA-256's job). Two *equal* preimages must produce +equal hashes (canonicalisation is our job). The encoding must: + +. Domain-separate verisimiser provenance hashes from any other +hash the system computes (`b"verisim-prov-v1\0"`). +. Length-prefix variable-length fields so concatenation is +unambiguous. +. Use a canonical timestamp encoding (V-L2-C2: + `i64_le(secs) || u32_le(nanos)`), not a string representation that + admits multiple valid forms for the same instant. +==== + +== Append serialisation requirement + +A direct consequence of "previous_hash chains entries linearly": + +[NOTE] +==== +Two writers cannot independently chain from the same `previous_hash` +without forking the chain. Verisimiser must serialise appends +per-entity. V-L2-L1 specifies the write-path lock; V-L2-L2 specifies +the database UNIQUE constraint that makes forks structurally +impossible even if the lock is bypassed. + +The chain is *per-entity-serial* but *cross-entity-parallel*. +A global serial order across entities is *not* a requirement +(see "Total ordering" in the matrix above). +==== + +== Anchor / notary (future) + +Protection against SR requires an *external anchor* that the +adversary cannot rewrite. Options, none of which this document +mandates yet: + +. **Periodic notarisation** — every N minutes, sign the latest +chain_head with a key not held on the sidecar host, and publish +the signature to an out-of-band log. +. **Transparency log** — submit each `chain_head` update to an +external append-only log (Sigstore-style). +. **Replication to immutable storage** — write each new entry to +S3 Object Lock (or equivalent) as a defence in depth. + +The threat model leaves the choice for ADR-0005 once a deployment +context exists. + +== Out-of-band assumptions + +. The sidecar host's clock is monotonic and within bounded skew of +real time. Without this, all timestamps are advisory (see CK in the +matrix). +. Verisimiser's process integrity is assumed — a verisimiser binary +that has been swapped for a malicious one can produce a hash-chain +that verifies against itself but attests to nothing real. Binary +provenance is a separate concern (out of scope here). +. SHA-256 is collision-resistant in the cryptographic sense for the +lifetime of the audit window. + +== Open questions + +. Should `Option` fields (`before_snapshot`, `transformation`) +encode `None` vs `Some("")` distinctly? The current proposal collapses +them (both encode as `u64_le(0)` length). Document explicitly that +the chain treats "no snapshot" and "empty snapshot" identically; if a +future use case requires distinguishing them, a single sentinel byte +(`0x00` for None, `0x01` for Some) prefixed inside the length-prefixed +slot resolves it. +. Should the chain include an explicit `chain_id` covering all of an +entity's entries (in addition to chaining via `previous_hash`)? Cheap +defence in depth against entity_id confusion; defer to ADR-0004. + +== Cross-references + +* V-L2-C1 — implements the field coverage + domain separation +* V-L2-C2 — implements canonical timestamp encoding +* V-L2-C3 — positive tamper-detection tests +* V-L2-C4 — removes the wontfix test that codified the C-1 gap +* V-L2-L1 — per-entity write-path serialisation +* V-L2-L2 — UNIQUE INDEX(entity_id, previous_hash) defence in depth +* V-L2-N1 — deduplicates the type used here (ProvenanceEntry vs + ProvenanceRecord) +* ADR-0004 (future) — records the binding choices made here diff --git a/examples/SafeDOMExample.res b/examples/SafeDOMExample.res deleted file mode 100644 index 2c1b5b3..0000000 --- a/examples/SafeDOMExample.res +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: PMPL-1.0-or-later -// Example: Using SafeDOM for formally verified DOM mounting - -open SafeDOM - -// Example 1: Basic mounting with error handling -let mountApp = () => { - mountSafe( - "#app", - "

Hello, World!

Mounted safely with proofs.

", - ~onSuccess=el => { - Console.log("✓ App mounted successfully!") - Console.log("Element:", el) - }, - ~onError=err => { - Console.error("✗ Mount failed:", err) - } - ) -} - -// Example 2: Wait for DOM ready before mounting -let mountWhenDOMReady = () => { - mountWhenReady( - "#app", - "

App Title

", - ~onSuccess=_ => Console.log("✓ Mounted after DOM ready"), - ~onError=err => Console.error("✗ Failed:", err) - ) -} - -// Example 3: Batch mounting (atomic - all or nothing) -let mountMultiple = () => { - let specs = [ - {selector: "#header", html: "

Site Title

"}, - {selector: "#nav", html: ""}, - {selector: "#main", html: "

Content here

"}, - {selector: "#footer", html: "
© 2026
"} - ] - - switch mountBatch(specs) { - | Ok(elements) => { - Console.log(`✓ Successfully mounted ${Array.length(elements)} elements`) - elements->Array.forEach(el => Console.log(" -", el)) - } - | Error(err) => { - Console.error("✗ Batch mount failed:", err) - Console.error(" (None were mounted - atomic operation)") - } - } -} - -// Example 4: Explicit validation before mounting -let mountWithValidation = () => { - // Validate selector first - switch ProvenSelector.validate("#my-app") { - | Error(e) => Console.error(`Invalid selector: ${e}`) - | Ok(validSelector) => { - // Validate HTML - switch ProvenHTML.validate("
Content
") { - | Error(e) => Console.error(`Invalid HTML: ${e}`) - | Ok(validHtml) => { - // Now mount with proven safety - switch mount(validSelector, validHtml) { - | Mounted(el) => Console.log("✓ Mounted with validated inputs:", el) - | MountPointNotFound(s) => Console.error(`✗ Element not found: ${s}`) - | InvalidSelector(_) => Console.error("Impossible - already validated") - | InvalidHTML(_) => Console.error("Impossible - already validated") - } - } - } - } -} - -// Example 5: Integration with TEA -module MyApp = { - type model = {message: string} - type msg = NoOp - - let init = () => {message: "Hello from TEA"} - let update = (model, _msg) => model - let view = model => `

${model.message}

` -} - -let mountTEAApp = () => { - let model = MyApp.init() - let html = MyApp.view(model) - - mountWhenReady( - "#tea-app", - html, - ~onSuccess=el => { - Console.log("✓ TEA app mounted") - // Set up event handlers, subscriptions here - }, - ~onError=err => Console.error(`✗ TEA mount failed: ${err}`) - ) -} - -// Entry point -let main = () => { - Console.log("SafeDOM Examples") - Console.log("================\n") - - // Choose which example to run - mountWhenDOMReady() // Run on DOM ready -} - -// Auto-execute when module loads -main() diff --git a/examples/web-project-deno.json b/examples/web-project-deno.json deleted file mode 100644 index 5ddd3bd..0000000 --- a/examples/web-project-deno.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "// NOTE": "Example deno.json for ReScript web projects", - "tasks": { - "build": "deno run -A npm:rescript", - "clean": "deno run -A npm:rescript clean", - "watch": "deno run -A npm:rescript -w", - "serve": "deno run -A jsr:@std/http/file-server .", - "test": "deno test --allow-all" - }, - "imports": { - "rescript": "^12.0.0", - "@rescript/core": "npm:@rescript/core@^1.6.0", - "safe-dom/": "https://raw.githubusercontent.com/hyperpolymath/rescript-dom-mounter/main/src/", - "proven/": "../proven/bindings/rescript/src/" - }, - "compilerOptions": { - "allowJs": true, - "checkJs": false - } -} diff --git a/src/abi/mod.rs b/src/abi/mod.rs index b50c83b..d21e3c0 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -161,34 +161,79 @@ pub struct ProvenanceEntry { pub transformation: Option, } +/// Domain-separation tag for verisimiser provenance hashes (V-L2-C1). +/// +/// Bumping the version suffix (`v1` -> `v2`) constitutes a hash-algorithm +/// migration: existing chains keep verifying with the old tag, new +/// entries use the new tag, and `verify()` dispatches on the stored tag. +/// (No migration is currently planned; the tag exists for future-proofing.) +const PROV_DOMAIN_TAG: &[u8] = b"verisim-prov-v1\0"; + impl ProvenanceEntry { - /// Compute the SHA-256 hash for a provenance entry, chaining from the previous hash. + /// Compute the SHA-256 hash for a provenance entry (V-L2-C1, V-L2-C2). + /// + /// Preimage = domain tag || length-prefixed fields || canonical timestamp: /// - /// The hash covers: previous_hash, entity_id, operation, and timestamp. - /// This ensures that any tampering with the chain is detectable. + /// ```text + /// SHA-256( + /// "verisim-prov-v1\0" + /// || u64_le(len(previous_hash)) || previous_hash + /// || u64_le(len(entity_id)) || entity_id + /// || u64_le(len(operation)) || operation + /// || u64_le(len(actor)) || actor + /// || i64_le(timestamp.timestamp()) + /// || u32_le(timestamp.timestamp_subsec_nanos()) + /// || u64_le(len(before_snapshot.unwrap_or(""))) + /// || before_snapshot.unwrap_or("") + /// || u64_le(len(transformation.unwrap_or(""))) + /// || transformation.unwrap_or("") + /// ) + /// ``` + /// + /// All seven fields participate, so tampering with any of them is + /// detectable. See `docs/theory/provenance-threat-model.adoc` for the + /// adversary matrix and `docs/decisions/ADR-0004` (forthcoming) for + /// the binding choices. pub fn compute_hash( previous_hash: &str, entity_id: &str, operation: &str, - timestamp: &str, + actor: &str, + timestamp: &DateTime, + before_snapshot: Option<&str>, + transformation: Option<&str>, ) -> String { + fn write_lp(hasher: &mut Sha256, bytes: &[u8]) { + hasher.update((bytes.len() as u64).to_le_bytes()); + hasher.update(bytes); + } let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); + hasher.update(PROV_DOMAIN_TAG); + write_lp(&mut hasher, previous_hash.as_bytes()); + write_lp(&mut hasher, entity_id.as_bytes()); + write_lp(&mut hasher, operation.as_bytes()); + write_lp(&mut hasher, actor.as_bytes()); + hasher.update(timestamp.timestamp().to_le_bytes()); + hasher.update(timestamp.timestamp_subsec_nanos().to_le_bytes()); + write_lp(&mut hasher, before_snapshot.unwrap_or("").as_bytes()); + write_lp(&mut hasher, transformation.unwrap_or("").as_bytes()); format!("{:x}", hasher.finalize()) } - /// Verify that this entry's hash is consistent with its contents. + /// Verify that this entry's hash is consistent with all of its contents. /// - /// Returns `true` if the stored hash matches the recomputed hash. + /// Returns `true` iff the stored hash matches the recomputed hash over + /// the full field set (previous_hash, entity_id, operation, actor, + /// timestamp, before_snapshot, transformation). pub fn verify(&self) -> bool { let expected = Self::compute_hash( &self.previous_hash, &self.entity_id, &self.operation, - &self.timestamp.to_rfc3339(), + &self.actor, + &self.timestamp, + self.before_snapshot.as_deref(), + self.transformation.as_deref(), ); self.hash == expected } @@ -196,7 +241,7 @@ impl ProvenanceEntry { /// Create a new genesis entry (first in the chain for an entity). pub fn genesis(entity_id: &str, actor: &str) -> Self { let timestamp = Utc::now(); - let hash = Self::compute_hash("", entity_id, "insert", ×tamp.to_rfc3339()); + let hash = Self::compute_hash("", entity_id, "insert", actor, ×tamp, None, None); Self { hash, previous_hash: String::new(), @@ -216,7 +261,10 @@ impl ProvenanceEntry { &self.hash, &self.entity_id, operation, - ×tamp.to_rfc3339(), + actor, + ×tamp, + None, + None, ); Self { hash, @@ -491,11 +539,126 @@ mod tests { } #[test] - fn test_provenance_tamper_detection() { + fn test_provenance_tamper_entity_id() { let mut entry = ProvenanceEntry::genesis("entity-1", "system"); - // Tamper with the entity_id after hash computation. entry.entity_id = "entity-2".to_string(); - assert!(!entry.verify(), "Tampered entry should fail verification"); + assert!( + !entry.verify(), + "tampering with entity_id must break verify" + ); + } + + /// V-L2-C3: actor is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_actor() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.actor = "mallory".to_string(); + assert!(!entry.verify(), "tampering with actor must break verify"); + } + + /// V-L2-C3: before_snapshot is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_before_snapshot() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + // Adding a snapshot (None -> Some) should break the original hash. + entry.before_snapshot = Some("{\"redacted\":true}".to_string()); + assert!( + !entry.verify(), + "tampering with before_snapshot must break verify" + ); + } + + /// V-L2-C3: transformation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_transformation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.transformation = Some("evil-rewrite".to_string()); + assert!( + !entry.verify(), + "tampering with transformation must break verify" + ); + } + + /// V-L2-C3: operation is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_operation() { + let mut entry = ProvenanceEntry::genesis("entity-1", "alice"); + entry.operation = "delete".to_string(); + assert!( + !entry.verify(), + "tampering with operation must break verify" + ); + } + + /// V-L2-C3: previous_hash is hashed; tampering with it must be detected. + #[test] + fn test_provenance_tamper_previous_hash() { + let genesis = ProvenanceEntry::genesis("entity-1", "alice"); + let mut update = genesis.chain("update", "bob"); + update.previous_hash = "deadbeef".to_string(); + assert!( + !update.verify(), + "tampering with previous_hash must break verify" + ); + } + + /// V-L2-C2: hash depends on the canonical (i64+u32) timestamp encoding, + /// not on a string representation that might vary. Two `DateTime` + /// values that represent the same instant — one parsed from RFC3339, + /// one constructed via `from_timestamp` — must produce the same hash. + #[test] + fn test_provenance_hash_timestamp_canonical() { + let parsed: DateTime = "2026-05-13T08:00:00.000000000Z".parse().unwrap(); + let built = DateTime::::from_timestamp(parsed.timestamp(), 0).unwrap(); + assert_eq!( + parsed, built, + "construction paths must yield equal instants" + ); + + let h1 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &parsed, None, None); + let h2 = ProvenanceEntry::compute_hash("", "e1", "insert", "alice", &built, None, None); + assert_eq!( + h1, h2, + "canonical timestamp encoding must be path-independent" + ); + } + + /// V-L2-C3: round-trip — build a chain of N entries and assert every + /// mutation of every field breaks verification. + #[test] + fn test_provenance_chain_round_trip_mutation_matrix() { + let g = ProvenanceEntry::genesis("post-7", "system"); + let u1 = g.chain("update", "alice"); + let u2 = u1.chain("update", "bob"); + let d = u2.chain("delete", "alice"); + for entry in [&g, &u1, &u2, &d] { + assert!(entry.verify(), "every legitimate entry must verify"); + } + + for original in [&g, &u1, &u2, &d] { + // Permute each hash-covered field and assert verify fails. + for mutate in [ + |e: &mut ProvenanceEntry| e.actor.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.entity_id.push_str("-tamper"), + |e: &mut ProvenanceEntry| e.operation.push_str("-tamper"), + |e: &mut ProvenanceEntry| { + e.previous_hash = "00".repeat(32); + }, + |e: &mut ProvenanceEntry| { + e.timestamp += chrono::Duration::nanoseconds(1); + }, + |e: &mut ProvenanceEntry| { + e.before_snapshot = Some("tampered".into()); + }, + |e: &mut ProvenanceEntry| { + e.transformation = Some("tampered".into()); + }, + ] { + let mut clone = original.clone(); + mutate(&mut clone); + assert!(!clone.verify(), "field mutation must break verification"); + } + } } #[test] diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index 1d1ea02..f6c6a91 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -17,6 +17,52 @@ use crate::codegen::parser::ParsedSchema; use crate::manifest::OctadConfig; +// --------------------------------------------------------------------------- +// Identifier validation (V-L2-G1) +// --------------------------------------------------------------------------- + +/// Permitted identifier shape for any user-controlled name that flows into +/// generated DDL: leading ASCII letter or underscore, then ASCII letters, +/// digits, or underscores. This is a deliberately conservative subset of +/// SQL's quoted-identifier rules — it rejects names that would be valid +/// under quoting but make our `format!()`-based DDL emission unsafe. +/// +/// Returns `Err` with the offending identifier quoted so the user can +/// rename or alias the source table. +fn validate_identifier(name: &str) -> std::result::Result<&str, String> { + if name.is_empty() { + return Err("identifier is empty".into()); + } + let mut chars = name.chars(); + let first = chars.next().unwrap(); + if !(first.is_ascii_alphabetic() || first == '_') { + return Err(format!( + "identifier {:?} must start with an ASCII letter or underscore", + name + )); + } + for c in chars { + if !(c.is_ascii_alphanumeric() || c == '_') { + return Err(format!( + "identifier {:?} contains invalid character {:?}; \ + only ASCII letters, digits, and underscores are allowed \ + in identifiers that flow into generated DDL (V-L2-G1)", + name, c + )); + } + } + Ok(name) +} + +/// Convenience: validate and panic with a structured message if invalid. +/// Used in the few DDL-emitting paths that don't propagate errors. +fn must_validate_identifier(name: &str) -> &str { + match validate_identifier(name) { + Ok(n) => n, + Err(e) => panic!("invalid identifier in generated DDL: {}", e), + } +} + // --------------------------------------------------------------------------- // Overlay generation // --------------------------------------------------------------------------- @@ -86,20 +132,26 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { ); // Generate INSERT statements for each discovered table. + // + // V-L2-G1: every identifier flowing into the SQL string here is + // validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$` + // is rejected at codegen time rather than allowed to land in DDL + // (where it would be an injection vector). if !schema.tables.is_empty() { ddl.push_str("-- Seed metadata from parsed schema\n"); for table in &schema.tables { + let table_name = must_validate_identifier(&table.name); let pk_cols: Vec<&str> = table .columns .iter() .filter(|c| c.is_primary_key) - .map(|c| c.name.as_str()) + .map(|c| must_validate_identifier(c.name.as_str())) .collect(); let pk_str = pk_cols.join(","); ddl.push_str(&format!( "INSERT OR IGNORE INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\ \x20 VALUES ('{}', {}, '{}', datetime('now'));\n", - table.name, + table_name, table.columns.len(), pk_str, )); @@ -114,7 +166,13 @@ fn generate_metadata_table(schema: &ParsedSchema) -> String { /// /// Stores a SHA-256 hash-chained audit trail of all data modifications. /// Each row chains to its predecessor via `previous_hash`, forming an -/// append-only, tamper-evident log. +/// append-only, tamper-evident log (see +/// `docs/theory/provenance-threat-model.adoc`). +/// +/// The `chain_head` table is the per-entity head pointer used for the +/// write-path lock (V-L2-L1). The UNIQUE INDEX on `(entity_id, +/// previous_hash)` (V-L2-L2) makes chain forks structurally impossible +/// — defence in depth for if the lock is ever bypassed. fn generate_provenance_table() -> String { "-- Provenance: SHA-256 hash-chained audit trail\n\ CREATE TABLE IF NOT EXISTS verisimdb_provenance_log (\n\ @@ -122,14 +180,30 @@ fn generate_provenance_table() -> String { \x20 previous_hash TEXT NOT NULL,\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 operation TEXT NOT NULL, -- insert, update, delete, transform\n\ + \x20 operation TEXT NOT NULL CHECK (operation IN ('insert','update','delete','transform')), -- V-L2-J1\n\ \x20 actor TEXT NOT NULL,\n\ \x20 timestamp TEXT NOT NULL, -- ISO 8601\n\ \x20 before_snapshot TEXT, -- JSON of entity state before operation\n\ \x20 transformation TEXT -- description of transformation applied\n\ );\n\ + -- V-L2-L2: forbid chain forks at the DB level. Genesis records all\n\ + -- carry previous_hash='' so this also enforces a single genesis per\n\ + -- entity.\n\ + CREATE UNIQUE INDEX IF NOT EXISTS ux_provenance_chain\n\ + \x20 ON verisimdb_provenance_log(entity_id, previous_hash);\n\ CREATE INDEX IF NOT EXISTS idx_provenance_entity ON verisimdb_provenance_log(entity_id);\n\ - CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\n" + CREATE INDEX IF NOT EXISTS idx_provenance_table ON verisimdb_provenance_log(table_name);\n\ + \n\ + -- V-L2-L1: per-entity head pointer. The write path takes a row\n\ + -- lock here (SELECT … FOR UPDATE / BEGIN IMMEDIATE) so concurrent\n\ + -- appenders on the same entity serialise; cross-entity appends\n\ + -- remain parallel. Each successful append updates head_hash in\n\ + -- the same transaction as the INSERT into verisimdb_provenance_log.\n\ + CREATE TABLE IF NOT EXISTS verisimdb_provenance_chain_head (\n\ + \x20 entity_id TEXT PRIMARY KEY,\n\ + \x20 head_hash TEXT NOT NULL,\n\ + \x20 updated_at TEXT NOT NULL\n\ + );\n\n" .to_string() } @@ -139,16 +213,20 @@ fn generate_provenance_table() -> String { /// Together, these edges form a DAG that can be traversed to answer /// "where did this data come from?" and "what is affected if this changes?" fn generate_lineage_table() -> String { - "-- Lineage: data derivation DAG\n\ + "-- Lineage: data derivation graph (DAG by intent; cycle prevention is\n\ + -- a runtime concern — see V-L1-G1 / V-L2-I2).\n\ CREATE TABLE IF NOT EXISTS verisimdb_lineage_graph (\n\ \x20 edge_id TEXT PRIMARY KEY,\n\ \x20 source_entity TEXT NOT NULL,\n\ \x20 source_table TEXT NOT NULL,\n\ \x20 target_entity TEXT NOT NULL,\n\ \x20 target_table TEXT NOT NULL,\n\ - \x20 derivation_type TEXT NOT NULL, -- copy, transform, aggregate, join, filter\n\ + \x20 derivation_type TEXT NOT NULL\n\ + \x20 CHECK (derivation_type IN ('copy','transform','aggregate','join','filter')), -- V-L2-J1\n\ \x20 description TEXT,\n\ - \x20 created_at TEXT NOT NULL -- ISO 8601\n\ + \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ + \x20 -- V-L2-I1: self-edges are not derivations; rejected at DB level.\n\ + \x20 CHECK (NOT (source_entity = target_entity AND source_table = target_table))\n\ );\n\ CREATE INDEX IF NOT EXISTS idx_lineage_source ON verisimdb_lineage_graph(source_entity);\n\ CREATE INDEX IF NOT EXISTS idx_lineage_target ON verisimdb_lineage_graph(target_entity);\n\n" @@ -161,18 +239,27 @@ fn generate_lineage_table() -> String { /// point-in-time queries and rollback. Each version records when it /// became active (`valid_from`) and when it was superseded (`valid_to`). fn generate_temporal_table() -> String { - "-- Temporal: version history with point-in-time support\n\ + "-- Temporal: version history with point-in-time support.\n\ + -- V-L2-H1: the partial UNIQUE INDEX enforces exactly one\n\ + -- current row per (entity, table) — \"only one version is\n\ + -- valid right now\" was an application-layer invariant before;\n\ + -- now it's structural.\n\ + -- V-L2-J1: operation is a closed set.\n\ + -- V-L2-H2: valid_to (if set) must not predate valid_from.\n\ CREATE TABLE IF NOT EXISTS verisimdb_temporal_versions (\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 version INTEGER NOT NULL,\n\ + \x20 version INTEGER NOT NULL CHECK (version >= 1),\n\ \x20 valid_from TEXT NOT NULL, -- ISO 8601\n\ \x20 valid_to TEXT, -- ISO 8601, NULL if current\n\ \x20 snapshot TEXT NOT NULL, -- JSON serialisation of entity state\n\ - \x20 operation TEXT NOT NULL, -- insert, update, rollback\n\ - \x20 PRIMARY KEY (entity_id, table_name, version)\n\ + \x20 operation TEXT NOT NULL CHECK (operation IN ('insert','update','rollback')),\n\ + \x20 PRIMARY KEY (entity_id, table_name, version),\n\ + \x20 CHECK (valid_to IS NULL OR valid_to >= valid_from)\n\ );\n\ - CREATE INDEX IF NOT EXISTS idx_temporal_current ON verisimdb_temporal_versions(entity_id, table_name) WHERE valid_to IS NULL;\n\n" + CREATE UNIQUE INDEX IF NOT EXISTS ux_temporal_current\n\ + \x20 ON verisimdb_temporal_versions(entity_id, table_name)\n\ + \x20 WHERE valid_to IS NULL;\n\n" .to_string() } @@ -182,16 +269,18 @@ fn generate_temporal_table() -> String { /// evaluated at query time to filter and redact data based on the /// requesting principal's identity and roles. fn generate_access_policy_table() -> String { - "-- Access Control: row/column-level access policies\n\ + "-- Access Control: row/column-level access policies.\n\ + -- V-L2-J1: access_level is a closed set.\n\ CREATE TABLE IF NOT EXISTS verisimdb_access_policies (\n\ \x20 policy_id TEXT PRIMARY KEY,\n\ \x20 target_table TEXT NOT NULL,\n\ \x20 target_column TEXT, -- NULL means whole-row policy\n\ \x20 principal TEXT NOT NULL, -- user, role, or group identifier\n\ - \x20 access_level TEXT NOT NULL, -- read, write, admin, deny\n\ - \x20 condition TEXT, -- SQL-like filter condition\n\ + \x20 access_level TEXT NOT NULL\n\ + \x20 CHECK (access_level IN ('read','write','admin','deny')),\n\ + \x20 condition TEXT, -- SQL-like filter condition (V-L1-H1)\n\ \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ - \x20 active INTEGER NOT NULL DEFAULT 1\n\ + \x20 active INTEGER NOT NULL DEFAULT 1 CHECK (active IN (0,1))\n\ );\n\ CREATE INDEX IF NOT EXISTS idx_access_table ON verisimdb_access_policies(target_table);\n\ CREATE INDEX IF NOT EXISTS idx_access_principal ON verisimdb_access_policies(principal);\n\n" @@ -203,22 +292,26 @@ fn generate_access_policy_table() -> String { /// Stores branched copies of data for what-if analysis. Each branch /// is isolated from the main data until explicitly merged. fn generate_simulation_table() -> String { - "-- Simulation: what-if branching and sandbox queries\n\ + "-- Simulation: what-if branching and sandbox queries.\n\ + -- V-L2-J1: status is a closed set; parent_branch is a self-FK\n\ + -- (was previously declared but un-enforced).\n\ CREATE TABLE IF NOT EXISTS verisimdb_simulation_branches (\n\ \x20 branch_id TEXT PRIMARY KEY,\n\ - \x20 parent_branch TEXT, -- NULL for root branch\n\ + \x20 parent_branch TEXT REFERENCES verisimdb_simulation_branches(branch_id), -- NULL for root\n\ \x20 name TEXT NOT NULL,\n\ \x20 description TEXT,\n\ \x20 created_at TEXT NOT NULL, -- ISO 8601\n\ \x20 merged_at TEXT, -- ISO 8601, NULL if not merged\n\ - \x20 status TEXT NOT NULL DEFAULT 'active' -- active, merged, abandoned\n\ + \x20 status TEXT NOT NULL DEFAULT 'active'\n\ + \x20 CHECK (status IN ('active','merged','abandoned'))\n\ );\n\n\ CREATE TABLE IF NOT EXISTS verisimdb_simulation_deltas (\n\ \x20 delta_id TEXT PRIMARY KEY,\n\ \x20 branch_id TEXT NOT NULL REFERENCES verisimdb_simulation_branches(branch_id),\n\ \x20 entity_id TEXT NOT NULL,\n\ \x20 table_name TEXT NOT NULL,\n\ - \x20 operation TEXT NOT NULL, -- insert, update, delete\n\ + \x20 operation TEXT NOT NULL\n\ + \x20 CHECK (operation IN ('insert','update','delete')), -- V-L2-J1\n\ \x20 delta_data TEXT NOT NULL, -- JSON of the change\n\ \x20 created_at TEXT NOT NULL -- ISO 8601\n\ );\n\ @@ -267,6 +360,7 @@ mod tests { enable_lineage: true, enable_temporal: true, enable_access_control: true, + enable_constraints: true, enable_simulation: true, }; let ddl = generate_sidecar_schema(&schema, &octad); @@ -287,6 +381,7 @@ mod tests { enable_lineage: false, enable_temporal: false, enable_access_control: false, + enable_constraints: false, enable_simulation: false, }; let ddl = generate_sidecar_schema(&schema, &octad); @@ -321,6 +416,24 @@ mod tests { assert!(ddl.contains("actor")); } + /// V-L2-L2: forks are forbidden by a UNIQUE INDEX on + /// (entity_id, previous_hash). + #[test] + fn test_provenance_table_has_unique_chain_index() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_provenance_chain")); + assert!(ddl.contains("(entity_id, previous_hash)")); + } + + /// V-L2-L1: chain_head table exists for per-entity write serialisation. + #[test] + fn test_provenance_table_has_chain_head() { + let ddl = generate_provenance_table(); + assert!(ddl.contains("verisimdb_provenance_chain_head")); + assert!(ddl.contains("head_hash")); + } + #[test] fn test_temporal_table_has_versioning() { let ddl = generate_temporal_table(); @@ -329,4 +442,88 @@ mod tests { assert!(ddl.contains("valid_to")); assert!(ddl.contains("snapshot")); } + + /// V-L2-H1: the partial UNIQUE INDEX enforces exactly-one-current. + #[test] + fn test_temporal_table_has_partial_unique_index() { + let ddl = generate_temporal_table(); + assert!(ddl.contains("UNIQUE INDEX")); + assert!(ddl.contains("ux_temporal_current")); + assert!(ddl.contains("WHERE valid_to IS NULL")); + } + + /// V-L2-H2: valid_to must not predate valid_from. + #[test] + fn test_temporal_table_has_valid_to_check() { + let ddl = generate_temporal_table(); + assert!(ddl.contains("valid_to IS NULL OR valid_to >= valid_from")); + } + + /// V-L2-I1: lineage self-edges are forbidden by CHECK. + #[test] + fn test_lineage_table_forbids_self_edges() { + let ddl = generate_lineage_table(); + assert!(ddl.contains("NOT (source_entity = target_entity")); + } + + /// V-L2-J1: simulation status is a closed set; parent_branch FK exists. + #[test] + fn test_simulation_table_constraints() { + let ddl = generate_simulation_table(); + assert!(ddl.contains("REFERENCES verisimdb_simulation_branches(branch_id)")); + assert!(ddl.contains("status IN ('active','merged','abandoned')")); + assert!(ddl.contains("operation IN ('insert','update','delete')")); + } + + /// V-L2-J1: provenance, lineage, access enum CHECKs. + #[test] + fn test_enum_checks() { + let prov = generate_provenance_table(); + assert!(prov.contains("operation IN ('insert','update','delete','transform')")); + + let lin = generate_lineage_table(); + assert!( + lin.contains("derivation_type IN ('copy','transform','aggregate','join','filter')") + ); + + let acc = generate_access_policy_table(); + assert!(acc.contains("access_level IN ('read','write','admin','deny')")); + } + + /// V-L2-G1: identifier validator accepts safe names, rejects everything + /// outside `^[A-Za-z_][A-Za-z0-9_]*$`. This is the codegen-side guard + /// against SQL injection via table/column names. + #[test] + fn test_validate_identifier_accepts_safe() { + for ok in &["posts", "Posts", "_x", "x_1", "Post_2026"] { + assert!( + validate_identifier(ok).is_ok(), + "{:?} should be accepted", + ok + ); + } + } + + #[test] + fn test_validate_identifier_rejects_unsafe() { + let attacks = [ + "", // empty + "1posts", // leading digit + "po sts", // space + "posts;", // statement terminator + "posts'); DROP TABLE x;--", // classic injection + "posts\"", // quote + "posts`", // backtick + "posts/*", // comment open + "schema.table", // dotted + "ünicode", // non-ASCII + ]; + for attack in &attacks { + assert!( + validate_identifier(attack).is_err(), + "{:?} should be rejected", + attack + ); + } + } } diff --git a/src/codegen/query.rs b/src/codegen/query.rs index 86d1e3c..b61cd74 100644 --- a/src/codegen/query.rs +++ b/src/codegen/query.rs @@ -121,7 +121,7 @@ fn build_entity_id_expr(pk_columns: &[&str], table_name: &str, backend: Database // No PK defined — fall back to internal row identifier. match backend { DatabaseBackend::SQLite => format!("{}.rowid", table_name), - DatabaseBackend::PostgreSQL => format!("{}::text", format!("{}.ctid", table_name)), + DatabaseBackend::PostgreSQL => format!("{}.ctid::text", table_name), DatabaseBackend::MongoDB => "CAST(_id AS TEXT)".to_string(), } } else if pk_columns.len() == 1 { @@ -160,7 +160,12 @@ fn generate_provenance_view( table_name ); - // Use a subquery to get the latest provenance entry per entity. + // V-L2-K1: the previous greatest-N-per-group subquery used a broken + // correlation (the inner `MAX(p2.timestamp)` subquery referenced the + // *outer* uncorrelated `verisimdb_provenance_log` row instead of the + // grouping aliased as `prov`). Replaced with the canonical + // ROW_NUMBER() partition-by-entity pattern, which works on SQLite + // 3.25+ and PostgreSQL. format!( "{comment}\ CREATE VIEW IF NOT EXISTS verisimdb_{table_name}_with_provenance AS\n\ @@ -173,14 +178,14 @@ fn generate_provenance_view( FROM {table_name}\n\ LEFT JOIN (\n\ \x20 SELECT entity_id, operation, actor, timestamp, hash\n\ - \x20 FROM verisimdb_provenance_log\n\ - \x20 WHERE table_name = '{table_name}'\n\ - \x20 AND timestamp = (\n\ - \x20 SELECT MAX(p2.timestamp)\n\ - \x20 FROM verisimdb_provenance_log p2\n\ - \x20 WHERE p2.entity_id = verisimdb_provenance_log.entity_id\n\ - \x20 AND p2.table_name = '{table_name}'\n\ - \x20 )\n\ + \x20 FROM (\n\ + \x20 SELECT entity_id, operation, actor, timestamp, hash,\n\ + \x20 ROW_NUMBER() OVER (PARTITION BY entity_id\n\ + \x20 ORDER BY timestamp DESC) AS _rn\n\ + \x20 FROM verisimdb_provenance_log\n\ + \x20 WHERE table_name = '{table_name}'\n\ + \x20 ) ranked\n\ + \x20 WHERE ranked._rn = 1\n\ ) prov ON prov.entity_id = ({entity_id_expr});\n\n", columns = column_list.join(",\n"), ) @@ -343,6 +348,7 @@ mod tests { enable_lineage: true, enable_temporal: true, enable_access_control: true, + enable_constraints: true, enable_simulation: false, }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); @@ -364,6 +370,7 @@ mod tests { enable_lineage: false, enable_temporal: false, enable_access_control: false, + enable_constraints: false, enable_simulation: false, }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); @@ -384,15 +391,30 @@ mod tests { enable_lineage: false, enable_temporal: false, enable_access_control: false, + enable_constraints: false, enable_simulation: false, }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); - let view = interceptors[0].provenance_view.as_ref().expect("TODO: handle error"); + let view = interceptors[0] + .provenance_view + .as_ref() + .expect("TODO: handle error"); assert!(view.contains("verisimdb_posts_with_provenance")); assert!(view.contains("posts.id")); assert!(view.contains("posts.title")); assert!(view.contains("verisimdb_provenance_log")); + + // V-L2-K1: the latest-per-entity selection uses ROW_NUMBER() OVER + // PARTITION BY, not the previous broken self-correlation. + assert!( + view.contains("ROW_NUMBER() OVER (PARTITION BY entity_id"), + "provenance view must use ROW_NUMBER()-partition pattern (V-L2-K1)" + ); + assert!( + !view.contains("WHERE p2.entity_id = verisimdb_provenance_log.entity_id"), + "broken self-correlation must be gone" + ); } #[test] @@ -403,11 +425,15 @@ mod tests { enable_lineage: false, enable_temporal: true, enable_access_control: false, + enable_constraints: false, enable_simulation: false, }; let interceptors = generate_interceptors(&schema, &octad, DatabaseBackend::SQLite); - let view = interceptors[0].temporal_view.as_ref().expect("TODO: handle error"); + let view = interceptors[0] + .temporal_view + .as_ref() + .expect("TODO: handle error"); assert!(view.contains("verisimdb_posts_with_temporal")); assert!(view.contains("verisimdb_temporal_versions")); assert!(view.contains("valid_to IS NULL")); diff --git a/src/lib.rs b/src/lib.rs index a584e89..38eba2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,4 @@ #![forbid(unsafe_code)] -#![allow( - dead_code, - clippy::too_many_arguments, - clippy::manual_strip, - clippy::if_same_then_else, - clippy::vec_init_then_push, - clippy::upper_case_acronyms, - clippy::format_in_format_args, - clippy::enum_variant_names, - clippy::module_inception, - clippy::doc_lazy_continuation, - clippy::manual_clamp, - clippy::type_complexity -)] // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // diff --git a/src/main.rs b/src/main.rs index bac3e96..596e161 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,3 @@ -#![allow( - dead_code, - clippy::too_many_arguments, - clippy::manual_strip, - clippy::if_same_then_else, - clippy::vec_init_then_push, - clippy::upper_case_acronyms, - clippy::format_in_format_args, - clippy::enum_variant_names, - clippy::module_inception, - clippy::doc_lazy_continuation, - clippy::manual_clamp, - clippy::type_complexity -)] #![forbid(unsafe_code)] // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) @@ -31,13 +17,7 @@ use anyhow::Result; use clap::{Parser, Subcommand}; - -mod abi; -mod codegen; -mod intercept; -mod manifest; -mod tier1; -mod tier2; +use verisimiser::{abi, codegen, manifest}; /// VeriSimiser — augment any database with VeriSimDB octad capabilities. #[derive(Parser)] @@ -54,6 +34,12 @@ enum Commands { /// Database backend: postgresql, sqlite, or mongodb. #[arg(short, long, default_value = "postgresql")] database: String, + /// Project name (default: "my-augmented-db"). + #[arg(short, long)] + name: Option, + /// Overwrite an existing verisimiser.toml. + #[arg(short, long)] + force: bool, }, /// Parse the target database schema and generate sidecar overlay + interceptors. Generate { @@ -106,7 +92,11 @@ enum Commands { fn main() -> Result<()> { let cli = Cli::parse(); match cli.command { - Commands::Init { database } => manifest::init_manifest(&database), + Commands::Init { + database, + name, + force, + } => manifest::init_manifest(&database, name.as_deref(), force), Commands::Generate { manifest, output } => { let m = manifest::load_manifest(&manifest)?; @@ -124,7 +114,7 @@ fn main() -> Result<()> { }; // Determine the backend for SQL dialect selection. - let backend_name = m.database.effective_backend(); + let backend_name = m.database.effective_backend()?; let backend = abi::DatabaseBackend::from_str(backend_name) .unwrap_or(abi::DatabaseBackend::PostgreSQL); @@ -159,7 +149,7 @@ fn main() -> Result<()> { } else { &m.verisimiser.name }; - let backend = m.database.effective_backend(); + let backend = m.database.effective_backend()?; println!( "Starting VeriSimiser augmentation for {} ({})", name, backend @@ -203,8 +193,7 @@ fn main() -> Result<()> { Commands::Status { manifest } => { let m = manifest::load_manifest(&manifest)?; - manifest::print_status(&m); - Ok(()) + manifest::print_status(&m) } Commands::Octad => { diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index c6a678b..a88bc8d 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -108,14 +108,28 @@ impl Default for DatabaseConfig { } impl DatabaseConfig { - /// Returns the effective backend name, considering legacy `target_db` field. - pub fn effective_backend(&self) -> &str { - if !self.backend.is_empty() && self.backend != "postgresql" { - &self.backend - } else if !self.target_db.is_empty() { - &self.target_db - } else { - &self.backend + /// Returns the effective backend name. + /// + /// `target-db` is a legacy field kept for backward compatibility with the + /// old manifest schema. The new field is `backend`. If both are set to + /// distinct values, refuse rather than silently picking one — value-based + /// tie-breaking (the previous behaviour) silently picked sqlite when a + /// user set `backend = "postgresql"` alongside `target-db = "sqlite"` + /// (V-L2-E1). + pub fn effective_backend(&self) -> Result<&str> { + let new_set = !self.backend.is_empty(); + let old_set = !self.target_db.is_empty(); + match (new_set, old_set) { + (true, true) if self.backend != self.target_db => anyhow::bail!( + "verisimiser.toml sets both [database].backend = {:?} and \ + [database].target-db = {:?}. target-db is the legacy field; \ + remove it and keep backend.", + self.backend, + self.target_db + ), + (true, _) => Ok(self.backend.as_str()), + (false, true) => Ok(self.target_db.as_str()), + (false, false) => Ok("postgresql"), } } } @@ -149,6 +163,11 @@ pub struct OctadConfig { #[serde(rename = "enable-access-control", default = "default_true")] pub enable_access_control: bool, + /// Enable cross-dimensional invariant enforcement and drift detection. + /// V-L2-D1: explicit field (was previously derived from "count > 2"). + #[serde(rename = "enable-constraints", default = "default_true")] + pub enable_constraints: bool, + /// Enable simulation/sandbox mode (what-if queries on branched data). #[serde(rename = "enable-simulation", default)] pub enable_simulation: bool, @@ -161,35 +180,32 @@ impl Default for OctadConfig { enable_lineage: true, enable_temporal: true, enable_access_control: true, + enable_constraints: true, enable_simulation: false, } } } impl OctadConfig { - /// Returns the count of enabled octad dimensions (always includes data + metadata = 2). + /// Returns the count of enabled octad dimensions, in 2..=8. + /// + /// Data and Metadata are always counted (inherent in the target DB). + /// The other six are summed from explicit toggles. V-L2-D1: every + /// concern is now explicit; the previous "Constraints is implied if + /// anything else is on" arithmetic is gone. pub fn enabled_count(&self) -> usize { - let mut count = 2; // data + metadata are always present - if self.enable_provenance { - count += 1; - } - if self.enable_lineage { - count += 1; - } - if self.enable_temporal { - count += 1; - } - if self.enable_access_control { - count += 1; - } - if self.enable_simulation { - count += 1; - } - // constraints is implied when any other dimension is enabled - if count > 2 { - count += 1; - } // constraints - count + let optionals: usize = [ + self.enable_provenance, + self.enable_lineage, + self.enable_temporal, + self.enable_access_control, + self.enable_constraints, + self.enable_simulation, + ] + .into_iter() + .filter(|b| *b) + .count(); + 2 + optionals } } @@ -299,25 +315,30 @@ pub fn load_manifest(path: &str) -> Result { /// Generate a new `verisimiser.toml` manifest file with the Phase 1 schema. /// /// The `database` parameter sets the backend type (postgresql, sqlite, mongodb). -/// Fails if the file already exists to prevent accidental overwrites. -pub fn init_manifest(database: &str) -> Result<()> { +/// `name` overrides the project name (defaults to `"my-augmented-db"`). +/// If `force` is false and the file exists, the call fails. (V-L2-O1) +/// +/// The toggle defaults are read from `OctadConfig::default()` so editing the +/// defaults in code automatically updates the generated template. +pub fn init_manifest(database: &str, name: Option<&str>, force: bool) -> Result<()> { let path = "verisimiser.toml"; - if std::path::Path::new(path).exists() { - anyhow::bail!("{} already exists — remove it first to reinitialise", path); + if std::path::Path::new(path).exists() && !force { + anyhow::bail!( + "{} already exists — pass --force to overwrite, or remove the file first", + path + ); } - let enable_simulation = if database == "sqlite" { - "false" - } else { - "false" - }; + let defaults = OctadConfig::default(); + let project_name = name.unwrap_or("my-augmented-db"); + let bool_str = |b: bool| if b { "true" } else { "false" }; let template = format!( r#"# SPDX-License-Identifier: PMPL-1.0-or-later # VeriSimiser manifest — augment {database} with VeriSimDB octad capabilities [project] -name = "my-augmented-db" +name = "{project_name}" version = "0.1.0" # description = "My database augmented with VeriSimDB octad dimensions" @@ -327,16 +348,23 @@ connection-string-env = "DATABASE_URL" # schema-source = "schema.sql" [octad] -enable-provenance = true -enable-lineage = true -enable-temporal = true -enable-access-control = true -enable-simulation = {enable_simulation} +enable-provenance = {prov} +enable-lineage = {lin} +enable-temporal = {temp} +enable-access-control = {ac} +enable-constraints = {cons} +enable-simulation = {sim} [sidecar] storage = "sqlite" path = ".verisim/sidecar.db" -"# +"#, + prov = bool_str(defaults.enable_provenance), + lin = bool_str(defaults.enable_lineage), + temp = bool_str(defaults.enable_temporal), + ac = bool_str(defaults.enable_access_control), + cons = bool_str(defaults.enable_constraints), + sim = bool_str(defaults.enable_simulation), ); std::fs::write(path, template)?; @@ -345,14 +373,14 @@ path = ".verisim/sidecar.db" } /// Print a human-readable status summary of a loaded manifest. -pub fn print_status(manifest: &Manifest) { +pub fn print_status(manifest: &Manifest) -> Result<()> { let name = if !manifest.project.name.is_empty() { &manifest.project.name } else { &manifest.verisimiser.name }; - let backend = manifest.database.effective_backend(); + let backend = manifest.database.effective_backend()?; println!("=== VeriSimiser: {} ===", name); println!("Backend: {}", backend); @@ -362,6 +390,7 @@ pub fn print_status(manifest: &Manifest) { ); println!(); + let on_off = |b: bool| if b { "ON" } else { "off" }; println!( "Octad Dimensions ({}/8 enabled):", manifest.octad.enabled_count() @@ -370,50 +399,147 @@ pub fn print_status(manifest: &Manifest) { println!(" Metadata: ALWAYS ON (schema introspection)"); println!( " Provenance: {}", - if manifest.octad.enable_provenance { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_provenance) ); println!( " Lineage: {}", - if manifest.octad.enable_lineage { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_lineage) ); println!( " Constraints: {}", - if manifest.octad.enabled_count() > 2 { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_constraints) ); println!( " Access Control: {}", - if manifest.octad.enable_access_control { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_access_control) ); println!( " Temporal: {}", - if manifest.octad.enable_temporal { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_temporal) ); println!( " Simulation: {}", - if manifest.octad.enable_simulation { - "ON" - } else { - "off" - } + on_off(manifest.octad.enable_simulation) ); + Ok(()) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + /// V-L2-D1: enabled_count is bounded by 2..=8 for every flag combination. + #[test] + fn test_enabled_count_bounds() { + for mask in 0u8..64 { + let octad = OctadConfig { + enable_provenance: mask & 0b000001 != 0, + enable_lineage: mask & 0b000010 != 0, + enable_temporal: mask & 0b000100 != 0, + enable_access_control: mask & 0b001000 != 0, + enable_constraints: mask & 0b010000 != 0, + enable_simulation: mask & 0b100000 != 0, + }; + let c = octad.enabled_count(); + assert!( + (2..=8).contains(&c), + "enabled_count out of range for mask={:#08b}: got {}", + mask, + c + ); + } + } + + /// V-L2-D1: enabled_count exactly equals 2 + popcount(toggles). + #[test] + fn test_enabled_count_arithmetic() { + let octad = OctadConfig { + enable_provenance: true, + enable_lineage: false, + enable_temporal: true, + enable_access_control: false, + enable_constraints: true, + enable_simulation: false, + }; + assert_eq!(octad.enabled_count(), 2 + 3); + } + + /// V-L2-E1: setting both backend and target_db to the *same* value + /// is harmless — single source of truth. + #[test] + fn test_effective_backend_agreement() { + let cfg = DatabaseConfig { + backend: "sqlite".to_string(), + target_db: "sqlite".to_string(), + ..Default::default() + }; + assert_eq!(cfg.effective_backend().unwrap(), "sqlite"); + } + + /// V-L2-E1: setting both to *conflicting* values must error loudly. + #[test] + fn test_effective_backend_conflict_errors() { + let cfg = DatabaseConfig { + backend: "postgresql".to_string(), + target_db: "sqlite".to_string(), + ..Default::default() + }; + let err = cfg.effective_backend().unwrap_err().to_string(); + assert!( + err.contains("postgresql"), + "error mentions modern field value" + ); + assert!(err.contains("sqlite"), "error mentions legacy field value"); + } + + /// V-L2-E1: modern-only and legacy-only both work. + #[test] + fn test_effective_backend_single_source() { + let modern = DatabaseConfig { + backend: "sqlite".to_string(), + target_db: String::new(), + ..Default::default() + }; + assert_eq!(modern.effective_backend().unwrap(), "sqlite"); + + let legacy = DatabaseConfig { + backend: String::new(), + target_db: "mongodb".to_string(), + ..Default::default() + }; + assert_eq!(legacy.effective_backend().unwrap(), "mongodb"); + } + + /// V-L2-E1: with nothing set, default is postgresql. + #[test] + fn test_effective_backend_default() { + let cfg = DatabaseConfig { + backend: String::new(), + target_db: String::new(), + ..Default::default() + }; + assert_eq!(cfg.effective_backend().unwrap(), "postgresql"); + } + + /// V-L2-O1: init_manifest template reflects OctadConfig::default(). + #[test] + fn test_init_manifest_template_uses_defaults() { + // We can't actually call init_manifest in a unit test (it writes + // to CWD), but we can check that the template *would* be + // consistent by computing what it would emit and asserting + // the toggle lines match Default. + let defaults = OctadConfig::default(); + // If a future patch flips a default, this test makes the + // template-vs-Default invariant visible. + assert!(defaults.enable_provenance); + assert!(defaults.enable_lineage); + assert!(defaults.enable_temporal); + assert!(defaults.enable_access_control); + assert!(defaults.enable_constraints); + assert!(!defaults.enable_simulation); + } } diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index 4886e18..283e7ee 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -1,57 +1,18 @@ // SPDX-License-Identifier: PMPL-1.0-or-later // Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) // -// Provenance tracking via SHA-256 hash chains. -// Write-path observer: records what happened, never changes what happened. - -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -/// A single link in the provenance hash chain. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProvenanceRecord { - /// Hash of this record (SHA-256 of previous_hash + entity_id + operation + timestamp). - pub hash: String, - /// Hash of the previous record in the chain (empty string for genesis). - pub previous_hash: String, - /// Entity this record is about. - pub entity_id: String, - /// What happened: "create", "update", "delete", "transform". - pub operation: String, - /// Who did it (user, service, or system identifier). - pub actor: String, - /// When it happened. - pub timestamp: chrono::DateTime, - /// Optional: what the entity looked like before (for updates/deletes). - pub before_snapshot: Option, - /// Optional: transformation description (for derived data). - pub transformation: Option, -} +// Tier 1 provenance write-path helpers. +// +// Type definitions live in `crate::abi` — this module exists for the +// *write-path* code (V-L1-C1 onwards: hooking the target database, +// appending tamper-evident records to the sidecar). The duplicate +// `ProvenanceRecord` struct that previously lived here was removed +// in V-L2-N1 (it shadowed `abi::ProvenanceEntry` and risked drifting +// from the canonical hash function). +// +// Re-export the canonical type so existing `use crate::tier1::provenance::…` +// call sites continue to work. -impl ProvenanceRecord { - /// Compute the hash for this record, chaining from the previous hash. - pub fn compute_hash( - previous_hash: &str, - entity_id: &str, - operation: &str, - timestamp: &str, - ) -> String { - let mut hasher = Sha256::new(); - hasher.update(previous_hash.as_bytes()); - hasher.update(entity_id.as_bytes()); - hasher.update(operation.as_bytes()); - hasher.update(timestamp.as_bytes()); - format!("{:x}", hasher.finalize()) - } +pub use crate::abi::ProvenanceEntry; - /// Verify that this record's hash is consistent with its contents. - pub fn verify(&self) -> bool { - let expected = Self::compute_hash( - &self.previous_hash, - &self.entity_id, - &self.operation, - &self.timestamp.to_rfc3339(), - ); - self.hash == expected - } -} +// Write-path helpers (V-L2-L1) will land here. diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 093ad97..77abf64 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -75,29 +75,30 @@ fn test_full_pipeline_blog_schema() { enable_lineage: true, enable_temporal: true, enable_access_control: true, + enable_constraints: true, enable_simulation: false, }; let overlay_ddl = overlay::generate_sidecar_schema(&schema, &octad); // Verify all expected sidecar tables are present. assert!( - overlay_ddl.contains("verisim_metadata"), + overlay_ddl.contains("verisimdb_metadata"), "Should contain metadata table" ); assert!( - overlay_ddl.contains("verisim_provenance_log"), + overlay_ddl.contains("verisimdb_provenance_log"), "Should contain provenance table" ); assert!( - overlay_ddl.contains("verisim_lineage_graph"), + overlay_ddl.contains("verisimdb_lineage_graph"), "Should contain lineage table" ); assert!( - overlay_ddl.contains("verisim_temporal_versions"), + overlay_ddl.contains("verisimdb_temporal_versions"), "Should contain temporal table" ); assert!( - overlay_ddl.contains("verisim_access_policies"), + overlay_ddl.contains("verisimdb_access_policies"), "Should contain access policies table" ); @@ -149,9 +150,9 @@ fn test_full_pipeline_blog_schema() { // Step 4: Render interceptors to SQL and verify output. let rendered = query::render_interceptors(&interceptors); - assert!(rendered.contains("verisim_users_with_provenance")); - assert!(rendered.contains("verisim_posts_with_temporal")); - assert!(rendered.contains("verisim_comments_with_provenance")); + assert!(rendered.contains("verisimdb_users_with_provenance")); + assert!(rendered.contains("verisimdb_posts_with_temporal")); + assert!(rendered.contains("verisimdb_comments_with_provenance")); } // --------------------------------------------------------------------------- @@ -234,7 +235,7 @@ vector = false assert_eq!(manifest.verisimiser.name, "legacy-db"); assert_eq!(manifest.database.target_db, "postgresql"); - assert_eq!(manifest.database.effective_backend(), "postgresql"); + assert_eq!(manifest.database.effective_backend().unwrap(), "postgresql"); assert!(manifest.tier1.provenance); assert!(manifest.tier1.temporal_versioning); assert!(manifest.tier1.drift_detection); @@ -269,19 +270,27 @@ fn test_provenance_chain_integrity_multi_step() { assert_ne!(update1.hash, update2.hash); assert_ne!(update2.hash, delete.hash); - // Tamper detection: mutating any entry should break verification. + // Tamper detection: every hash-covered field must break verification + // when mutated (V-L2-C1, V-L2-C3, V-L2-C4). let mut tampered = update1.clone(); tampered.actor = "evil-mallory".to_string(); assert!( - tampered.verify(), - "Actor is not part of hash — tamper to actor alone is invisible" + !tampered.verify(), + "actor is part of the hash; tampering with it must break verify" ); - // But modifying a hash-covered field should be detected. + let mut tampered_op = update1.clone(); tampered_op.operation = "delete".to_string(); assert!( !tampered_op.verify(), - "Tampering with operation should break verification" + "tampering with operation must break verify" + ); + + let mut tampered_snap = update1.clone(); + tampered_snap.before_snapshot = Some("{}".into()); + assert!( + !tampered_snap.verify(), + "before_snapshot is part of the hash; tampering with it must break verify" ); } @@ -433,7 +442,9 @@ fn test_end_to_end_file_workflow() { .unwrap(); } - // Write a manifest file. + // Write a manifest file. Note: on Windows, schema_path uses backslashes + // which are escape characters in TOML basic strings — emit the path as a + // TOML literal string (single-quoted) to dodge escape interpretation. let manifest_path = dir.path().join("verisimiser.toml"); { let mut f = std::fs::File::create(&manifest_path).unwrap(); @@ -446,7 +457,7 @@ name = "test-articles" [database] backend = "sqlite" connection-string-env = "TEST_DB" -schema-source = "{}" +schema-source = '{}' [octad] enable-provenance = true @@ -476,14 +487,14 @@ path = ".verisim/test.db" // Generate overlay. let overlay_ddl = overlay::generate_sidecar_schema(&schema, &manifest.octad); - assert!(overlay_ddl.contains("verisim_provenance_log")); - assert!(overlay_ddl.contains("verisim_temporal_versions")); + assert!(overlay_ddl.contains("verisimdb_provenance_log")); + assert!(overlay_ddl.contains("verisimdb_temporal_versions")); assert!( - !overlay_ddl.contains("verisim_lineage_graph"), + !overlay_ddl.contains("verisimdb_lineage_graph"), "Lineage is disabled" ); assert!( - !overlay_ddl.contains("verisim_access_policies"), + !overlay_ddl.contains("verisimdb_access_policies"), "Access control is disabled" ); diff --git a/verification/0.1-AI-MANIFEST.a2ml b/verification/0.1-AI-MANIFEST.a2ml deleted file mode 100644 index 39b370f..0000000 --- a/verification/0.1-AI-MANIFEST.a2ml +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-pillar" -level: 1 -parent: "../0-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Primary verification pillar. Contains evidence for correctness, - performance, formal proofs, randomized testing, and aerospace-grade - high-assurance metrics (MC/DC coverage, traceability, safety cases). - -canonical_locations: - tests: "tests/" - benchmarks: "benchmarks/" - proofs: "proofs/" - fuzzing: "fuzzing/" - simulations: "simulations/" - coverage: "coverage/" - traceability: "traceability/" - safety_case: "safety_case/" - -invariants: - - "Evidence MUST be reproducible and documented" - - "High-assurance deployments MUST satisfy traceability and safety_case requirements" diff --git a/verification/README.adoc b/verification/README.adoc deleted file mode 100644 index f07e7f3..0000000 --- a/verification/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Verification Pillar diff --git a/verification/benchmarks/0.2-AI-MANIFEST.a2ml b/verification/benchmarks/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 6416309..0000000 --- a/verification/benchmarks/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "benches-pillar" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Benches pillar. diff --git a/verification/benchmarks/README.adoc b/verification/benchmarks/README.adoc deleted file mode 100644 index 5db7648..0000000 --- a/verification/benchmarks/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Benchmarks Unit diff --git a/verification/coverage/0.2-AI-MANIFEST.a2ml b/verification/coverage/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index fc15bd3..0000000 --- a/verification/coverage/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-coverage" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for coverage. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/coverage/README.adoc b/verification/coverage/README.adoc deleted file mode 100644 index 2566956..0000000 --- a/verification/coverage/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Coverage Unit diff --git a/verification/fuzzing/0.2-AI-MANIFEST.a2ml b/verification/fuzzing/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 79c4fef..0000000 --- a/verification/fuzzing/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "fuzzing-unit" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Fuzzing unit for high-rigor verification. diff --git a/verification/fuzzing/README.adoc b/verification/fuzzing/README.adoc deleted file mode 100644 index edeb179..0000000 --- a/verification/fuzzing/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Fuzzing Unit diff --git a/verification/proofs/0.2-AI-MANIFEST.a2ml b/verification/proofs/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 0e5666f..0000000 --- a/verification/proofs/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-proofs" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Sub-unit focusing on proofs. diff --git a/verification/proofs/README.adoc b/verification/proofs/README.adoc deleted file mode 100644 index 1ae324d..0000000 --- a/verification/proofs/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Proofs Unit diff --git a/verification/safety_case/0.2-AI-MANIFEST.a2ml b/verification/safety_case/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 818fba4..0000000 --- a/verification/safety_case/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-safety_case" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for safety case. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/safety_case/README.adoc b/verification/safety_case/README.adoc deleted file mode 100644 index 47c8e36..0000000 --- a/verification/safety_case/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Safety case Unit diff --git a/verification/simulations/0.2-AI-MANIFEST.a2ml b/verification/simulations/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index f40fc1c..0000000 --- a/verification/simulations/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "simulations-unit" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - Simulations unit for high-rigor verification. diff --git a/verification/simulations/README.adoc b/verification/simulations/README.adoc deleted file mode 100644 index 8e1b13a..0000000 --- a/verification/simulations/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Simulations Unit diff --git a/verification/tests/0.2-AI-MANIFEST.a2ml b/verification/tests/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index 0008fcf..0000000 --- a/verification/tests/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1 +0,0 @@ -# AI Manifest - Level 1: tests diff --git a/verification/tests/README.adoc b/verification/tests/README.adoc deleted file mode 100644 index 344bf86..0000000 --- a/verification/tests/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Tests Unit diff --git a/verification/traceability/0.2-AI-MANIFEST.a2ml b/verification/traceability/0.2-AI-MANIFEST.a2ml deleted file mode 100644 index defa125..0000000 --- a/verification/traceability/0.2-AI-MANIFEST.a2ml +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: PMPL-1.0-or-later ---- -### [META] -id: "verification-unit-traceability" -level: 2 -parent: "../0.1-AI-MANIFEST.a2ml" - ---- -### [AI_MANIFEST] -description: | - High-assurance verification unit for traceability. - Critical for safety-of-life and aerospace-grade deployment standards. diff --git a/verification/traceability/README.adoc b/verification/traceability/README.adoc deleted file mode 100644 index ff23dd7..0000000 --- a/verification/traceability/README.adoc +++ /dev/null @@ -1 +0,0 @@ -= Traceability Unit