Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/ppvm-tableau/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ harness = false
[[bench]]
name = "measure-all"
harness = false

[[bench]]
name = "rot2-apply"
harness = false
71 changes: 71 additions & 0 deletions crates/ppvm-tableau/benches/rot2-apply.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: 2026 The PPVM Authors
// SPDX-License-Identifier: Apache-2.0

//! Two-qubit-rotation (`rotate_2`) throughput benchmark.
//!
//! `rotate_2` (RXX/RYY/RZZ) is the only caller of the "apply" coefficient
//! accumulation (`compute_coefficients_after_pauli_apply`). The headline
//! `stim-circuits` bench is T-gate heavy and never hits this path, so this bench
//! exercises it directly: a branchy brickwork of non-Clifford two-qubit
//! rotations whose coefficient vector grows into the thousands, making the
//! per-`rotate_2` apply cost dominate.
//!
//! Matches the cultivation config (`ByteFxHashF64<8>, usize`) so the workload is
//! representative of a real branchy run.

use std::f64::consts::PI;
use std::time::Duration;

use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use ppvm_pauli_sum::config::indexmap::ByteFxHashF64;
use ppvm_tableau::prelude::*;

type Tab = GeneralizedTableau<ByteFxHashF64<8>, usize>;

/// Branchy brickwork of two-qubit rotations on `n` qubits over `layers` layers.
fn rot2_brickwork(n: usize, layers: usize) -> Tab {
let mut tab: Tab = GeneralizedTableau::new_with_seed(n, 1e-10, 1);
for q in (0..n).step_by(2) {
tab.h(q);
}
for layer in 0..layers {
for a in (0..n.saturating_sub(1)).step_by(2) {
tab.rxx(a, a + 1, 0.3 * PI);
tab.ryy(a, a + 1, 0.4 * PI);
}
for a in (1..n.saturating_sub(1)).step_by(2) {
tab.rzz(a, a + 1, 0.25 * PI);
tab.rxx(a, a + 1, 0.15 * PI);
}
if layer % 2 == 0 {
for q in (1..n).step_by(2) {
tab.h(q);
}
}
}
tab
}

pub fn rot2_apply_benchmarks(c: &mut Criterion) {
let mut group = c.benchmark_group("rot2-apply");
// (qubits, layers): each grows the coefficient vector to a different scale.
for &(n, layers) in &[(8usize, 4usize), (10, 4), (12, 3)] {
let m = rot2_brickwork(n, layers).coefficients.len();
group.bench_with_input(
BenchmarkId::from_parameter(format!("n{n}_l{layers}_m{m}")),
&(n, layers),
|b, &(n, layers)| b.iter(|| rot2_brickwork(n, layers)),
);
}
group.finish();
}

criterion_group! {
name = benches;
config = Criterion::default()
.warm_up_time(Duration::from_millis(500))
.measurement_time(Duration::from_secs(2))
.sample_size(30);
targets = rot2_apply_benchmarks
}
criterion_main!(benches);
48 changes: 29 additions & 19 deletions crates/ppvm-tableau/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

use std::{fmt::Debug, marker::PhantomData};

// Only the rayon branch-coefficient helpers coalesce into a map now; the apply
// path and the default-feature branch path both use flat-Vec sort-merge/relabel.
#[cfg(feature = "rayon")]
use fxhash::FxHashMap as HashMap;

use bitvec::array::BitArray;
Expand Down Expand Up @@ -465,23 +468,33 @@ where
branch_coefficients_seq(items.iter().copied(), 2 * items.len(), params)
}

/// Sequential accumulation of apply coefficients.
/// Sequential relabel of apply coefficients.
///
/// Pauli application sends every branch to `branch_index = idx ^ stab_anticomm_bits`.
/// XOR by a fixed constant is a bijection, so distinct input indices always map
/// to distinct branch indices: unlike the T-gate branch split (which emits two
/// streams that genuinely collide), the apply path produces no index collisions
/// at all. A per-index coalesce can therefore never merge two entries — the
/// `entry()`-keyed map was pure overhead (hash every key + table allocation) for
/// what is a straight relabel. We instead build a flat `Vec` in one sequential,
/// prefetch-friendly pass and let the caller apply the magnitude cutoff. (The
/// returned keys are unique by the bijection above; the `Vec` backing relies on
/// that, exactly as the old map did implicitly.)
fn apply_coefficients_seq<I, CoeffType>(
items: impl IntoIterator<Item = (Complex<CoeffType>, I)>,
capacity: usize,
stab_anticomm_bits: I,
destab_anticomm_bits: I,
odd_phase_mask: I,
phase_decomp: u8,
) -> HashMap<I, Complex<CoeffType>>
) -> Vec<(I, Complex<CoeffType>)>
where
I: TableauIndex,
CoeffType: One + Zero + Clone + num::Num,
Complex<CoeffType>:
std::ops::Mul<Output = Complex<CoeffType>> + std::ops::AddAssign + From<Complex64> + Copy,
{
let mut map: HashMap<I, Complex<CoeffType>> =
HashMap::with_capacity_and_hasher(capacity, Default::default());
let mut out: Vec<(I, Complex<CoeffType>)> = Vec::with_capacity(capacity);
for (coeff, idx) in items {
debug_assert!(
!(coeff.re == CoeffType::zero() && coeff.im == CoeffType::zero()),
Expand All @@ -497,23 +510,27 @@ where
let branch_phase = (branch_phase_contribution + phase_decomp) % 4;
let phase_factor: Complex<CoeffType> =
COMPLEX_PHASE_CONVERSION[branch_phase as usize].into();
let branch_coefficient = phase_factor * coeff;
*map.entry(branch_index).or_insert(Complex::zero()) += branch_coefficient;
out.push((branch_index, phase_factor * coeff));
}
map
out
}

/// Accumulate coefficients for pauli application. When the coefficient count
/// exceeds `RAYON_COEFF_THRESHOLD`, uses parallel map/collect followed by
/// sequential accumulation. Below the threshold, falls back to sequential.
/// Relabel coefficients for pauli application. When the coefficient count
/// exceeds `RAYON_COEFF_THRESHOLD`, the per-element relabel runs as a parallel
/// map; below the threshold it falls back to the sequential relabel.
///
/// Because the relabel `idx ^ stab_anticomm_bits` is a bijection (see
/// [`apply_coefficients_seq`]), the parallel map's output already has unique
/// keys — there is nothing to coalesce, so the result is collected straight into
/// a flat `Vec` with no sequential fold afterwards.
#[cfg(feature = "rayon")]
fn apply_coefficients_parallel<I, CoeffType>(
items: &[(Complex<CoeffType>, I)],
stab_anticomm_bits: I,
destab_anticomm_bits: I,
odd_phase_mask: I,
phase_decomp: u8,
) -> HashMap<I, Complex<CoeffType>>
) -> Vec<(I, Complex<CoeffType>)>
where
I: TableauIndex + Send + Sync,
CoeffType: One + Zero + Clone + Send + Sync + num::Num,
Expand All @@ -523,7 +540,7 @@ where
if items.len() >= RAYON_COEFF_THRESHOLD {
use rayon::prelude::*;

let pairs: Vec<(I, Complex<CoeffType>)> = items
return items
.par_iter()
.map(|&(coeff, idx)| {
let branch_index = idx ^ stab_anticomm_bits;
Expand All @@ -539,13 +556,6 @@ where
(branch_index, phase_factor * coeff)
})
.collect();

let mut map: HashMap<I, Complex<CoeffType>> =
HashMap::with_capacity_and_hasher(pairs.len(), Default::default());
for (branch_idx, branch_coeff) in pairs {
*map.entry(branch_idx).or_insert(Complex::zero()) += branch_coeff;
}
return map;
}

apply_coefficients_seq(
Expand Down
95 changes: 95 additions & 0 deletions crates/ppvm-tableau/tests/apply_path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// SPDX-FileCopyrightText: 2026 The PPVM Authors
// SPDX-License-Identifier: Apache-2.0

//! Differential lock for the two-qubit-rotation (`rotate_2`) coefficient path.
//!
//! `rotate_2` (RXX/RYY/RZZ) is the *only* caller of
//! `compute_coefficients_after_pauli_apply`, i.e. the "apply" coefficient
//! accumulation in `data.rs`. That accumulation relabels every branch index by
//! a fixed `idx ^ stab_anticomm_bits`, which is a bijection — so the keys never
//! collide and the coalescing container can never actually merge two entries.
//!
//! This test pins the measured-bit record of a branchy RXX/RYY/RZZ brickwork
//! over many seeds to an FNV-1a digest, so that swapping the apply
//! accumulation's storage strategy (hash coalesce → direct relabel) is proven
//! to leave every measurement outcome bit-identical.

use ppvm_pauli_sum::config::indexmap::ByteFxHashF64;
use ppvm_tableau::prelude::*;

type Tab = GeneralizedTableau<ByteFxHashF64<8>, usize>;

const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;

fn fnv1a_update(mut h: u64, byte: u8) -> u64 {
h ^= byte as u64;
h = h.wrapping_mul(FNV_PRIME);
h
}

/// Deterministic branchy two-qubit-rotation circuit on `n` qubits.
///
/// Brickwork layers of RXX / RYY / RZZ at non-Clifford angles, interleaved with
/// Hadamards, so the coefficient vector genuinely branches and every
/// `rotate_2` exercises the apply path on a non-trivial superposition.
fn build_rot2_brickwork(n: usize, layers: usize) -> Tab {
let mut tab: Tab = GeneralizedTableau::new_with_seed(n, 1e-10, 1);
for q in (0..n).step_by(2) {
tab.h(q);
}
for layer in 0..layers {
// even brickwork pairs
for a in (0..n.saturating_sub(1)).step_by(2) {
tab.rxx(a, a + 1, 0.3 * std::f64::consts::PI);
tab.ryy(a, a + 1, 0.4 * std::f64::consts::PI);
}
// odd brickwork pairs
for a in (1..n.saturating_sub(1)).step_by(2) {
tab.rzz(a, a + 1, 0.25 * std::f64::consts::PI);
tab.rxx(a, a + 1, 0.15 * std::f64::consts::PI);
}
if layer % 2 == 0 {
for q in (1..n).step_by(2) {
tab.h(q);
}
}
}
tab
}

/// Fork `tab` over `seeds` independent RNG streams, measure every qubit, and
/// fold the full outcome record into an FNV-1a digest.
fn measure_record_digest(tab: &Tab, n: usize, seeds: u64) -> u64 {
let mut h = FNV_OFFSET;
for seed in 0..seeds {
let mut forked = tab.fork(Some(seed));
for q in 0..n {
let bit = forked.measure(q).expect("no lost qubits in this circuit");
h = fnv1a_update(h, bit as u8);
}
}
h
}

#[test]
fn rot2_apply_path_measurement_digest_is_stable() {
let n = 8;
let tab = build_rot2_brickwork(n, 3);
// The circuit must actually branch, or it wouldn't exercise the apply path.
assert!(
tab.coefficients.len() > 8,
"expected a branchy superposition, got {} coefficients",
tab.coefficients.len()
);

let digest = measure_record_digest(&tab, n, 256);
println!("rot2_apply_path digest = {digest:#018x}");

// Golden digest captured on the hash-coalesce apply path (pre-refactor).
// The direct-relabel apply path must reproduce it bit-for-bit.
assert_eq!(
digest, 0x2401_e08e_70e6_ecc8,
"measurement record changed — apply-path refactor is not behaviour-preserving"
);
}
Loading