diff --git a/Cargo.toml b/Cargo.toml index 2d00641..0cf1cec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -114,6 +114,7 @@ harness = false [[bench]] name = "sla" harness = false +required-features = ["viz"] [workspace] members = ["pathmap-derive"] diff --git a/benches/binary_keys.rs b/benches/binary_keys.rs index 2d8ded7..8f3c1e0 100644 --- a/benches/binary_keys.rs +++ b/benches/binary_keys.rs @@ -77,6 +77,22 @@ fn binary_val_count_bench(bencher: Bencher, n: u64) { assert_eq!(sink, n as usize); } +#[divan::bench(args = [125, 250, 500, 1000, 2000, 4000])] +fn binary_goat_val_count_bench(bencher: Bencher, n: u64) { + + let keys = make_keys(n as usize, 1); + + let mut map: PathMap = PathMap::new(); + for i in 0..n { map.set_val_at(&keys[i as usize], i); } + + //Benchmark the time taken to count the number of values in the map + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count() + }); + assert_eq!(sink, n as usize); +} + #[divan::bench(args = [50, 100, 200, 400, 800, 1600])] fn binary_drop_head(bencher: Bencher, n: u64) { diff --git a/benches/cities.rs b/benches/cities.rs index 231cb6e..cc5cbc9 100644 --- a/benches/cities.rs +++ b/benches/cities.rs @@ -168,6 +168,25 @@ fn cities_val_count(bencher: Bencher) { assert_eq!(sink, unique_count); } +#[divan::bench()] +fn cities_goat_val_count(bencher: Bencher) { + + let pairs = read_data(); + let mut map = PathMap::new(); + let mut unique_count = 0; + for (k, v) in pairs.iter() { + if map.set_val_at(k, *v).is_none() { + unique_count += 1; + } + } + + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count(); + }); + assert_eq!(sink, unique_count); +} + #[cfg(feature="arena_compact")] #[divan::bench()] fn cities_val_count_act(bencher: Bencher) { diff --git a/benches/shakespeare.rs b/benches/shakespeare.rs index 2040ba5..5528f73 100644 --- a/benches/shakespeare.rs +++ b/benches/shakespeare.rs @@ -113,6 +113,25 @@ fn shakespeare_words_val_count(bencher: Bencher) { assert_eq!(sink, unique_count); } +#[divan::bench()] +fn shakespeare_words_goat_val_count(bencher: Bencher) { + + let strings = read_data(true); + let mut map = PathMap::new(); + let mut unique_count = 0; + for (v, k) in strings.iter().enumerate() { + if map.set_val_at(k, v).is_none() { + unique_count += 1; + } + } + + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count(); + }); + assert_eq!(sink, unique_count); +} + #[divan::bench()] fn shakespeare_sentences_insert(bencher: Bencher) { @@ -168,6 +187,25 @@ fn shakespeare_sentences_val_count(bencher: Bencher) { assert_eq!(sink, unique_count); } +#[divan::bench()] +fn shakespeare_sentences_goat_val_count(bencher: Bencher) { + + let strings = read_data(false); + let mut map = PathMap::new(); + let mut unique_count = 0; + for (v, k) in strings.iter().enumerate() { + if map.set_val_at(k, v).is_none() { + unique_count += 1; + } + } + + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count(); + }); + assert_eq!(sink, unique_count); +} + #[cfg(feature="arena_compact")] #[divan::bench()] fn shakespeare_sentences_val_count_act(bencher: Bencher) { diff --git a/benches/sla.rs b/benches/sla.rs index 5d84dbe..92d89f0 100644 --- a/benches/sla.rs +++ b/benches/sla.rs @@ -388,7 +388,7 @@ fn tipover_attention_weave() { // let res = rtq.vF_mut().merkleize(); // println!("{:?}", res.hash); let t0 = Instant::now(); - println!("{:?} {:?}", rtq.vF().read_zipper().into_cata_cached(morphisms::alg::hash), t0.elapsed().as_micros()); + // println!("{:?} {:?}", rtq.vF().read_zipper().into_cata_cached(morphisms::alg::hash), t0.elapsed().as_micros()); return; // rtk.vF_mut().merkleize(); diff --git a/benches/sparse_keys.rs b/benches/sparse_keys.rs index 8489314..42c4d42 100644 --- a/benches/sparse_keys.rs +++ b/benches/sparse_keys.rs @@ -92,6 +92,26 @@ fn sparse_val_count_bench(bencher: Bencher, n: u64) { assert_eq!(sink, n as usize); } +#[divan::bench(args = [125, 250, 500, 1000, 2000, 4000])] +fn sparse_goat_val_count_bench(bencher: Bencher, n: u64) { + + let mut r = StdRng::seed_from_u64(1); + let keys: Vec> = (0..n).into_iter().map(|_| { + let len = (r.random::() % 18) + 3; //length between 3 and 20 chars + (0..len).into_iter().map(|_| r.random::()).collect() + }).collect(); + + let mut map: PathMap = PathMap::new(); + for i in 0..n { map.set_val_at(&keys[i as usize], i); } + + //Benchmark the time taken to count the number of values in the map + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count() + }); + assert_eq!(sink, n as usize); +} + #[divan::bench(args = [50, 100, 200, 400, 800, 1600])] fn binary_drop_head(bencher: Bencher, n: u64) { diff --git a/benches/superdense_keys.rs b/benches/superdense_keys.rs index 597954d..34a09ce 100644 --- a/benches/superdense_keys.rs +++ b/benches/superdense_keys.rs @@ -253,6 +253,21 @@ fn superdense_val_count_bench(bencher: Bencher, n: u64) { assert_eq!(sink, n as usize); } +#[divan::bench(sample_size = 1, args = [100, 200, 400, 800, 1600, 3200, 20_000])] +fn superdense_goat_val_count_bench(bencher: Bencher, n: u64) { + + let mut map: PathMap = PathMap::new(); + for i in 0..n { map.set_val_at(prefix_key(&i), i); } + + //Benchmark the time taken to count the number of values in the map + let mut sink = 0; + bencher.bench_local(|| { + *black_box(&mut sink) = map.goat_val_count() + }); + assert_eq!(sink, n as usize); +} + + #[cfg(feature="arena_compact")] #[divan::bench(sample_size = 1, args = [100, 200, 400, 800, 1600, 3200, 20_000])] fn superdense_val_count_bench_act(bencher: Bencher, n: u64) { diff --git a/src/dense_byte_node.rs b/src/dense_byte_node.rs index dd846a2..defaccf 100644 --- a/src/dense_byte_node.rs +++ b/src/dense_byte_node.rs @@ -29,7 +29,7 @@ pub struct ByteNode { #[cfg(feature = "nightly")] values: Vec, #[cfg(not(feature = "nightly"))] - values: Vec, + pub(crate) values: Vec, alloc: A, } @@ -991,10 +991,18 @@ impl> TrieNode t + cf.has_val() as usize + cf.rec().map(|r| val_count_below_node(r, cache)).unwrap_or(0) }); } - fn node_goat_val_count(&self) -> usize { +/* fn node_goat_val_count(&self) -> usize { return self.values.iter().rfold(0, |t, cf| { - t + cf.has_val() as usize + t + cf.has_val() as usize + cf.rec().map(|r| r.as_tagged().node_goat_val_count()).unwrap_or(0) }); + }*/ + #[inline] + fn node_goat_val_count(&self) -> usize { + let mut result = 0; + for cf in self.values.iter() { + result += cf.has_val() as usize + } + result } fn node_child_iter_start(&self) -> (u64, Option<&TrieNodeODRc>) { for (pos, cf) in self.values.iter().enumerate() { diff --git a/src/line_list_node.rs b/src/line_list_node.rs index 040f714..6612031 100644 --- a/src/line_list_node.rs +++ b/src/line_list_node.rs @@ -403,7 +403,7 @@ impl LineListNode { } } #[inline] - unsafe fn child_in_slot(&self) -> &TrieNodeODRc { + pub(crate) unsafe fn child_in_slot(&self) -> &TrieNodeODRc { match SLOT { 0 => unsafe{ &*self.val_or_child0.child }, 1 => unsafe{ &*self.val_or_child1.child }, @@ -419,7 +419,7 @@ impl LineListNode { } } #[inline] - unsafe fn val_in_slot(&self) -> &V { + pub(crate) unsafe fn val_in_slot(&self) -> &V { match SLOT { 0 => unsafe{ &**self.val_or_child0.val }, 1 => unsafe{ &**self.val_or_child1.val }, @@ -1986,6 +1986,25 @@ impl TrieNode for LineListNode } result } +/* #[inline] + fn node_goat_val_count(&self) -> usize { + let mut result = 0; + if self.is_used_value_0() { + result += 1; + } + if self.is_used_value_1() { + result += 1; + } + if self.is_used_child_0() { + let child_node = unsafe{ self.child_in_slot::<0>() }; + result += child_node.as_tagged().node_goat_val_count(); + } + if self.is_used_child_1() { + let child_node = unsafe{ self.child_in_slot::<1>() }; + result += child_node.as_tagged().node_goat_val_count(); + } + result + }*/ #[inline] fn node_goat_val_count(&self) -> usize { //Here are 3 alternative implementations. They're basically the same in perf, with a slight edge to the diff --git a/src/trie_map.rs b/src/trie_map.rs index 3c5b0f3..5d21186 100644 --- a/src/trie_map.rs +++ b/src/trie_map.rs @@ -511,9 +511,25 @@ impl PathMap { let root_val = unsafe{ &*self.root_val.get() }.is_some() as usize; match self.root() { Some(root) => { - traverse_physical(root, - |node, ctx: usize| { ctx + node.node_goat_val_count() }, - |ctx, child_ctx| { ctx + child_ctx }, + // root.as_tagged().node_goat_val_count() + root_val + // traverse_physical(root, + // |node, ctx: usize| { ctx + node.node_goat_val_count() }, + // |ctx, child_ctx| { ctx + child_ctx }, + // ) + root_val + + // traverse_split_cata( + // root, + // |v, _| { 1usize }, + // |_, w, _| { 1 + w }, + // |bm, ws: &mut [usize], _| { ws.iter().sum() } + // ) + root_val + // Adam: this doesn't need to be called "traverse_osplit_cata" or be exposed under this interface; it can just live in morphisms + traverse_osplit_cata( + root, + |v, _| { 1usize }, // on leaf values + |_, w, _| { 1 + w }, // on values amongst a path + |bm, w: usize, _, total| { *total += w }, // on merging children into a node + |bm, total: usize, _| { total } // finalizing a node ) + root_val }, None => root_val diff --git a/src/trie_node.rs b/src/trie_node.rs index 111bd5e..0687c4f 100644 --- a/src/trie_node.rs +++ b/src/trie_node.rs @@ -7,7 +7,7 @@ use dyn_clone::*; use local_or_heap::LocalOrHeap; use arrayvec::ArrayVec; -use crate::utils::ByteMask; +use crate::utils::{BitMask, ByteMask}; use crate::alloc::Allocator; use crate::dense_byte_node::*; use crate::ring::*; @@ -2422,16 +2422,147 @@ fn traverse_physical_children_internal(node: TaggedNode { let mut ctx = Ctx::default(); - let (mut tok, mut child) = node.node_child_iter_start(); - while let Some(child_node) = child { - let child_ctx = traverse_physical_internal(child_node, node_f, fold_f, cache); - ctx = fold_f(ctx, child_ctx); - (tok, child) = node.node_child_iter_next(tok); + match node { + TaggedNodeRef::DenseByteNode(n) => { + for cf in n.values.iter() { + if let Some(rec) = cf.rec() { + let child_ctx = traverse_physical_internal(rec, node_f, fold_f, cache); + ctx = fold_f(ctx, child_ctx); + } + } + } + TaggedNodeRef::LineListNode(n) => { + if n.is_used_child_0() { + let child_node = unsafe{ n.child_in_slot::<0>() }; + let child_ctx = traverse_physical_internal(child_node, node_f, fold_f, cache); + ctx = fold_f(ctx, child_ctx); + } + if n.is_used_child_1() { + let child_node = unsafe{ n.child_in_slot::<1>() }; + let child_ctx = traverse_physical_internal(child_node, node_f, fold_f, cache); + ctx = fold_f(ctx, child_ctx); + } + } + TaggedNodeRef::CellByteNode(_) => { todo!() } + TaggedNodeRef::TinyRefNode(_) => { todo!() } + TaggedNodeRef::EmptyNode => { todo!() } } node_f(node, ctx) } +// This experiment is still OK, but the `&mut [W]` is awkward to instantiate if you don't actually have +/*pub fn traverse_split_cata<'a, A : Allocator, V : TrieValue, W, MapF, CollapseF, AlgF>(node: &TrieNodeODRc, mut map_f: MapF, mut collapse_f: CollapseF, alg_f: AlgF) -> W +where + MapF: Copy + FnMut(&V, &[u8]) -> W + 'a, + CollapseF: Copy + FnMut(&V, W, &[u8]) -> W + 'a, + AlgF: Copy + Fn(&ByteMask, &mut [W], &[u8]) -> W + 'a, +{ + match node.as_tagged() { + TaggedNodeRef::DenseByteNode(n) => { + let mut ws = [const { std::mem::MaybeUninit::::uninit() }; 256]; + // let mut ws: Vec> = Vec::with_capacity(n.mask.count_bits()); + // unsafe { ws.set_len(n.mask.count_bits()) }; + let mut c = 0; + for cf in n.values.iter() { + if let Some(rec) = cf.rec() { + let w = traverse_split_cata(rec, map_f, collapse_f, alg_f); + if let Some(v) = cf.val() { + ws[c].write(collapse_f(v, w, &[])); + } else { + ws[c].write(w); + } + } else if let Some(v) = cf.val() { + ws[c].write(map_f(v, &[])); + } + c += 1; + } + alg_f(&n.mask, unsafe { std::mem::transmute(&mut ws[..c]) }, &[]) + } + TaggedNodeRef::LineListNode(n) => { + // let mut ws = vec![]; + // if n.is_used_value_0() { + // ws.append(map_f(unsafe { n.val_in_slot::<0>() }, &[])); + // } + // if n.is_used_value_1() { + // ws.append(map_f(unsafe { n.val_in_slot::<1>() }, &[])); + // } + // if n.is_used_child_0() { + // let child_node = unsafe{ n.child_in_slot::<0>() }; + // let child_ctx = traverse_split_cata(child_node, map_f, collapse_f, alg_f); + // + // } + // if n.is_used_child_1() { + // let child_node = unsafe{ n.child_in_slot::<1>() }; + // let child_ctx = traverse_physical_internal(child_node, node_f, fold_f, cache); + // ctx = fold_f(ctx, child_ctx); + // } + alg_f(&ByteMask::new(), &mut [], &[]) + } + TaggedNodeRef::CellByteNode(_) => { todo!() } + TaggedNodeRef::TinyRefNode(_) => { todo!() } + TaggedNodeRef::EmptyNode => { todo!() } + } +} +*/ + +// Adam: This seems to be a winner, though it needs some work, the split alg gives us the opportunity to nicely compose the different calls for the different node types without introducing overhead +pub fn traverse_osplit_cata<'a, A : Allocator, V : TrieValue, Alg : Default, W, MapF, CollapseF, InAlgF, OutAlgF>(node: &TrieNodeODRc, mut map_f: MapF, mut collapse_f: CollapseF, in_alg_f: InAlgF, out_alg_f: OutAlgF) -> W +where + MapF: Copy + FnMut(&V, &[u8]) -> W + 'a, + CollapseF: Copy + FnMut(&V, W, &[u8]) -> W + 'a, + InAlgF: Copy + Fn(&ByteMask, W, &[u8], &mut Alg), + OutAlgF: Copy + Fn(&ByteMask, Alg, &[u8]) -> W + 'a, +{ + match node.as_tagged() { + TaggedNodeRef::DenseByteNode(n) => { + let mut ws = Some(Alg::default()); + for cf in n.values.iter() { + if let Some(rec) = cf.rec() { + let w = traverse_osplit_cata(rec, map_f, collapse_f, in_alg_f, out_alg_f); + if let Some(v) = cf.val() { + in_alg_f(&n.mask, collapse_f(v, w, &[]), &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } else { + in_alg_f(&n.mask, w, &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } + } else if let Some(v) = cf.val() { + in_alg_f(&n.mask, map_f(v, &[]), &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } + } + out_alg_f(&n.mask, unsafe { std::mem::take(&mut ws).unwrap_unchecked() }, &[]) + } + TaggedNodeRef::LineListNode(n) => { + // Adam: I skimped out on the collapse logic here, I assume there are some built-in LineListNode functions I can use for prefixes, or another way to organize the branching based on the mask directly + let mut ws = Some(Alg::default()); + + if n.is_used_value_0() { + in_alg_f(&ByteMask::new(), map_f(unsafe { n.val_in_slot::<0>() }, &[]), &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } + if n.is_used_value_1() { + in_alg_f(&ByteMask::new(), map_f(unsafe { n.val_in_slot::<1>() }, &[]), &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } + if n.is_used_child_0() { + let child_node = unsafe{ n.child_in_slot::<0>() }; + let w = traverse_osplit_cata(child_node, map_f, collapse_f, in_alg_f, out_alg_f); + in_alg_f(&ByteMask::new(), w, &[], unsafe { ws.as_mut().unwrap_unchecked() }); + + } + if n.is_used_child_1() { + let child_node = unsafe{ n.child_in_slot::<1>() }; + let w = traverse_osplit_cata(child_node, map_f, collapse_f, in_alg_f, out_alg_f); + in_alg_f(&ByteMask::new(), w, &[], unsafe { ws.as_mut().unwrap_unchecked() }); + } + + out_alg_f(&ByteMask::new(), unsafe { std::mem::take(&mut ws).unwrap_unchecked() }, &[]) + } + TaggedNodeRef::CellByteNode(_) => { todo!() } + TaggedNodeRef::TinyRefNode(_) => { todo!() } + TaggedNodeRef::EmptyNode => { + out_alg_f(&ByteMask::new(), Alg::default(), &[]) + } + } +} + /// Internal function to walk a mut TrieNodeODRc ref along a path /// /// If `stop_early` is `true`, this function will return the parent node of the path and will never return @@ -2483,6 +2614,9 @@ pub(crate) fn make_cell_node(node: &mut Tr // module come from the visibility of the trait it is derived on. In this case, `TrieNode` //Credit to QuineDot for his ideas on this pattern here: https://users.rust-lang.org/t/inferred-lifetime-for-dyn-trait/112116/7 pub(crate) use opaque_dyn_rc_trie_node::TrieNodeODRc; +use crate::morphisms::SplitCata; +use crate::TrieValue; + #[cfg(not(feature = "slim_ptrs"))] mod opaque_dyn_rc_trie_node { use std::sync::Arc;