From 297e0437f75ff7f42dd94f9a6b2e0cf14cc2f8a2 Mon Sep 17 00:00:00 2001 From: hyperpolymath Date: Sat, 16 May 2026 16:13:14 +0100 Subject: [PATCH] fix(#26): unify ProvenanceRecord/ProvenanceEntry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Canonical type: `abi::ProvenanceEntry`. Rationale — `abi::ProvenanceEntry` is the richer, persistence-boundary type: it carries the full chain API (`genesis` / `chain` / `verify` / domain-tagged length-prefixed `compute_hash`), is the type the SQLite write path (`append_provenance` / `verify_chain`) and the Idris2 ABI / Zig FFI bridge are written against, and is the type the threat-model doc names as the implementation. `tier1::provenance::ProvenanceRecord` was a byte-for-byte duplicate struct (identical 8 fields) whose `compute_hash`/`verify` had already been reduced to thin shims that just delegated to `ProvenanceEntry`. It was orphaned — nothing in the tree constructed it. No field divergence: the two structs had identical fields and field order, so this is a pure dedup, not a unification of diverged shapes. No `From`/alias impls were needed. Changes: - Deleted `tier1::provenance::ProvenanceRecord` struct + impl block (the duplicate `compute_hash`/`verify` shims). - Replaced with `pub use crate::abi::ProvenanceEntry;` so any external caller of `tier1::provenance::*` resolves to the single canonical definition. `tier1/provenance.rs` now holds only write-path logic (`append_provenance`, `verify_chain`, `SIDECAR_DDL`, `init_sidecar_schema`) — no type definitions. - Updated stale references in `docs/architecture/TOPOLOGY.md` and `ROADMAP.adoc` from `ProvenanceRecord` to `ProvenanceEntry`. Refs migrated: 0 code call sites (the tier1 type was orphaned; only its own self-references plus 2 documentation mentions). `grep -r ProvenanceRecord src/` now returns zero hits. On-disk / JSON stability: unchanged. `ProvenanceEntry` and the deleted `ProvenanceRecord` had identical fields; the deleted shims already computed the canonical hash via `ProvenanceEntry::compute_hash`. No serde field names, SQL column names, hash preimage, or DB schema are touched — provenance integrity is preserved bit-for-bit. Build/test: `cargo build` clean (only the pre-existing unrelated `RetentionConfig` unused-import warning in gc.rs). `cargo test` green: 87 lib + 9 integration + 2 sqlite-e2e tests, 0 failed. No offline-cache failures. Acceptance: - [x] `grep -r ProvenanceRecord src/` returns zero hits - [x] `cargo build` clean, `cargo test` green - [x] `tier1/provenance.rs` contains only write-path logic, no type defs Unblocks #31/#32 (they touch the same provenance types — there is now exactly one type and one `compute_hash` to evolve). Co-Authored-By: Claude Opus 4.7 --- ROADMAP.adoc | 2 +- docs/architecture/TOPOLOGY.md | 2 +- src/tier1/provenance.rs | 71 +++++++---------------------------- 3 files changed, 16 insertions(+), 59 deletions(-) diff --git a/ROADMAP.adoc b/ROADMAP.adoc index 5295b2f..a900176 100644 --- a/ROADMAP.adoc +++ b/ROADMAP.adoc @@ -9,7 +9,7 @@ * [x] CLI with subcommands (init, start, drift, provenance, history, status, octad, doctor, gc, validate, version) * [x] Manifest parser (verisimiser.toml with `[tier1]`/`[tier2]`/`[retention]` config) * [x] Concerns octad fixed canonical (ADR-0004): Data, Metadata, Provenance, Lineage, Constraints, AccessControl, Temporal, Simulation -* [x] Tier 1 data types per concern (ProvenanceRecord, TemporalVersion, DriftReport, AccessPredicate) +* [x] Tier 1 data types per concern (ProvenanceEntry, TemporalVersion, DriftReport, AccessPredicate) * [x] ABI module stubs (Idris2 + Zig FFI) and domain-tagged `compute_hash` * [x] Drift categories pinned (ADR-0003): input / distance / threshold per category * [x] README rewritten around concerns octad (V-L1-A2, #20) diff --git a/docs/architecture/TOPOLOGY.md b/docs/architecture/TOPOLOGY.md index 3609305..4d9deb3 100644 --- a/docs/architecture/TOPOLOGY.md +++ b/docs/architecture/TOPOLOGY.md @@ -12,7 +12,7 @@ verisimiser/ │ ├── src/manifest/ — TOML manifest parsing (verisimiser.toml) │ ├── src/tier1/ — Tier 1 piggyback data types │ │ ├── drift.rs — DriftReport, DriftCategory (8 categories) -│ │ ├── provenance.rs — ProvenanceRecord, SHA-256 hash chain +│ │ ├── provenance.rs — SHA-256 hash-chain write path (canonical ProvenanceEntry lives in src/abi/) │ │ └── temporal.rs — TemporalVersion, point-in-time snapshots │ ├── src/tier2/ — Tier 2 overlay stubs (graph, vector, tensor, semantic, document, spatial) │ ├── src/intercept/ — Per-backend interception strategies diff --git a/src/tier1/provenance.rs b/src/tier1/provenance.rs index fc457b1..fc0c49e 100644 --- a/src/tier1/provenance.rs +++ b/src/tier1/provenance.rs @@ -14,69 +14,26 @@ // `abi::ProvenanceEntry::compute_hash` (domain-tagged + length-prefixed // — see ADR-0002 / #27); this module just persists the entries. -use crate::abi::ProvenanceEntry; use chrono::{DateTime, Utc}; use rusqlite::{params, Connection, TransactionBehavior}; -use serde::{Deserialize, Serialize}; // ========================================================================= -// Public re-export: the canonical entry shape +// Canonical entry shape // ========================================================================= -/// A single link in the provenance hash chain. Mirrors -/// `abi::ProvenanceEntry` 1:1 — kept here for backward compatibility -/// with code that imported `tier1::provenance::ProvenanceRecord`. New -/// callers should prefer the canonical type in `abi`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProvenanceRecord { - pub hash: String, - pub previous_hash: String, - pub entity_id: String, - pub operation: String, - pub actor: String, - pub timestamp: DateTime, - pub before_snapshot: Option, - pub transformation: Option, -} - -impl ProvenanceRecord { - /// Backward-compat shim. Computes the canonical hash via - /// `abi::ProvenanceEntry::compute_hash` rather than the older - /// string-based form. - pub fn compute_hash( - previous_hash: &str, - entity_id: &str, - operation: &str, - actor: &str, - timestamp: &DateTime, - before_snapshot: Option<&str>, - transformation: Option<&str>, - ) -> String { - ProvenanceEntry::compute_hash( - previous_hash, - entity_id, - operation, - actor, - timestamp, - before_snapshot, - transformation, - ) - } - - /// Verify that this record's stored hash matches a fresh recompute. - pub fn verify(&self) -> bool { - let expected = Self::compute_hash( - &self.previous_hash, - &self.entity_id, - &self.operation, - &self.actor, - &self.timestamp, - self.before_snapshot.as_deref(), - self.transformation.as_deref(), - ); - self.hash == expected - } -} +// The provenance entry type is defined once, in `crate::abi`. It is the +// canonical representation used across the Rust CLI, the Idris2 ABI +// proofs, and the Zig FFI bridge, and it is the type persisted at the +// SQLite boundary by `append_provenance` below. +// +// This module previously carried a byte-for-byte duplicate struct +// (same fields, its own `compute_hash`/`verify`) under a different +// name. It was orphaned — nothing constructed it — and a second copy of +// the hash function is an integrity hazard: a future change to one +// `compute_hash` would silently leave the other broken (#26). The +// duplicate has been deleted; the canonical type is re-exported here so +// `tier1::provenance::ProvenanceEntry` resolves to the one definition. +pub use crate::abi::ProvenanceEntry; // ========================================================================= // SQLite sidecar schema