diff --git a/chain/src/txhashset.rs b/chain/src/txhashset.rs index 3cc28ee04..348fc66e8 100644 --- a/chain/src/txhashset.rs +++ b/chain/src/txhashset.rs @@ -869,7 +869,10 @@ impl<'a> Extension<'a> { debug!( LOGGER, - "txhashset: validated the output|rproof|kernel mmrs, took {}s", + "txhashset: validated the output {}, rproof {}, kernel {} mmrs, took {}s", + self.output_pmmr.unpruned_size(), + self.rproof_pmmr.unpruned_size(), + self.kernel_pmmr.unpruned_size(), now.elapsed().as_secs(), ); @@ -1224,6 +1227,9 @@ fn input_pos_to_rewind( return Ok(bitmap); } + // + // TODO - rework this loop to use Bitmap::fast_or() on a vec of bitmaps. + // loop { if current == block_header.hash() { break; diff --git a/core/src/core/pmmr.rs b/core/src/core/pmmr.rs index 03c01c22a..810b0edc1 100644 --- a/core/src/core/pmmr.rs +++ b/core/src/core/pmmr.rs @@ -594,6 +594,9 @@ pub fn peak_map_height(mut pos: u64) -> (u64, u64) { /// are built. pub fn bintree_postorder_height(num: u64) -> u64 { + if num == 0 { + return 0; + } peak_map_height(num - 1).1 } @@ -669,7 +672,13 @@ pub fn family_branch(pos: u64, last_pos: u64) -> Vec<(u64, u64)> { branch } -/// Gets the position of the rightmost node (i.e. leaf) relative to the current -fn bintree_rightmost(num: u64) -> u64 { +/// Gets the position of the rightmost node (i.e. leaf) beneath the provided subtree root. +pub fn bintree_rightmost(num: u64) -> u64 { num - bintree_postorder_height(num) } + +/// Gets the position of the rightmost node (i.e. leaf) beneath the provided subtree root. +pub fn bintree_leftmost(num: u64) -> u64 { + let height = bintree_postorder_height(num); + num + 2 - (2 << height) +} diff --git a/core/tests/pmmr.rs b/core/tests/pmmr.rs index 5a1c0e568..e294aacfc 100644 --- a/core/tests/pmmr.rs +++ b/core/tests/pmmr.rs @@ -56,6 +56,32 @@ fn first_100_mmr_heights() { } } +// The pos of the rightmost leaf for the provided MMR size (last leaf in subtree). +#[test] +fn test_bintree_rightmost() { + assert_eq!(pmmr::bintree_rightmost(0), 0); + assert_eq!(pmmr::bintree_rightmost(1), 1); + assert_eq!(pmmr::bintree_rightmost(2), 2); + assert_eq!(pmmr::bintree_rightmost(3), 2); + assert_eq!(pmmr::bintree_rightmost(4), 4); + assert_eq!(pmmr::bintree_rightmost(5), 5); + assert_eq!(pmmr::bintree_rightmost(6), 5); + assert_eq!(pmmr::bintree_rightmost(7), 5); +} + +// The pos of the leftmost leaf for the provided MMR size (first leaf in subtree). +#[test] +fn test_bintree_leftmost() { + assert_eq!(pmmr::bintree_leftmost(0), 0); + assert_eq!(pmmr::bintree_leftmost(1), 1); + assert_eq!(pmmr::bintree_leftmost(2), 2); + assert_eq!(pmmr::bintree_leftmost(3), 1); + assert_eq!(pmmr::bintree_leftmost(4), 4); + assert_eq!(pmmr::bintree_leftmost(5), 5); + assert_eq!(pmmr::bintree_leftmost(6), 4); + assert_eq!(pmmr::bintree_leftmost(7), 1); +} + #[test] fn test_n_leaves() { // make sure we handle an empty MMR correctly diff --git a/store/src/pmmr.rs b/store/src/pmmr.rs index 1d70ba6a7..369aece3c 100644 --- a/store/src/pmmr.rs +++ b/store/src/pmmr.rs @@ -157,11 +157,7 @@ where } /// Rewind the PMMR backend to the given position. - fn rewind( - &mut self, - position: u64, - rewind_rm_pos: &Bitmap, - ) -> Result<(), String> { + fn rewind(&mut self, position: u64, rewind_rm_pos: &Bitmap) -> Result<(), String> { // First rewind the leaf_set with the necessary added and removed positions. if self.prunable { self.leaf_set.rewind(position, rewind_rm_pos); @@ -224,9 +220,8 @@ where pub fn new( data_dir: String, prunable: bool, - header: Option<&BlockHeader> + header: Option<&BlockHeader>, ) -> io::Result> { - let hash_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_HASH_FILE))?; let data_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_DATA_FILE))?; @@ -239,8 +234,8 @@ where LeafSet::copy_snapshot(leaf_set_path.clone(), leaf_snapshot_path.clone())?; } - let prune_list = PruneList::open(format!("{}/{}", data_dir, PMMR_PRUN_FILE))?; let leaf_set = LeafSet::open(leaf_set_path.clone())?; + let prune_list = PruneList::open(format!("{}/{}", data_dir, PMMR_PRUN_FILE))?; Ok(PMMRBackend { data_dir, @@ -303,6 +298,8 @@ where format!("Could not write to log data storage, disk full? {:?}", e), )); } + + // Flush the leaf_set to disk. self.leaf_set.flush()?; Ok(()) @@ -421,18 +418,12 @@ where Ok(true) } - fn pos_to_rm( - &self, - cutoff_pos: u64, - rewind_rm_pos: &Bitmap, - ) -> (Bitmap, Bitmap) { + fn pos_to_rm(&self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> (Bitmap, Bitmap) { let mut expanded = Bitmap::create(); - let leaf_pos_to_rm = self.leaf_set.removed_pre_cutoff( - cutoff_pos, - rewind_rm_pos, - &self.prune_list, - ); + let leaf_pos_to_rm = + self.leaf_set + .removed_pre_cutoff(cutoff_pos, rewind_rm_pos, &self.prune_list); for x in leaf_pos_to_rm.iter() { expanded.add(x); diff --git a/store/src/prune_list.rs b/store/src/prune_list.rs index 5e1b27d3a..331d6230c 100644 --- a/store/src/prune_list.rs +++ b/store/src/prune_list.rs @@ -13,8 +13,6 @@ // limitations under the License. //! The Grin "Prune List" implementation. -//! Currently implemented as a vec of u64 positions. -//! *Soon* to be implemented as a compact bitmap. //! //! Maintains a set of pruned root node positions that define the pruned //! and compacted "gaps" in the MMR data and hash files. @@ -29,7 +27,9 @@ use std::path::Path; use croaring::Bitmap; -use core::core::pmmr::{bintree_postorder_height, family, is_leaf, path}; +use core::core::pmmr::{bintree_postorder_height, family, path}; + +use util::LOGGER; /// Maintains a list of previously pruned nodes in PMMR, compacting the list as /// parents get pruned and allowing checking whether a leaf is pruned. Given @@ -46,6 +46,10 @@ pub struct PruneList { path: Option, /// Bitmap representing pruned root node positions. bitmap: Bitmap, + /// Bitmap representing all pruned node positions (everything under the pruned roots). + pruned_cache: Bitmap, + shift_cache: Vec, + leaf_shift_cache: Vec, } unsafe impl Send for PruneList {} @@ -57,6 +61,9 @@ impl PruneList { PruneList { path: None, bitmap: Bitmap::create(), + pruned_cache: Bitmap::create(), + shift_cache: vec![], + leaf_shift_cache: vec![], } } @@ -72,31 +79,42 @@ impl PruneList { Bitmap::create() }; - Ok(PruneList { + let mut prune_list = PruneList { path: Some(path.clone()), bitmap, - }) + pruned_cache: Bitmap::create(), + shift_cache: vec![], + leaf_shift_cache: vec![], + }; + + // Now built the shift and pruned caches from the bitmap we read from disk. + prune_list.init_caches(); + + if !prune_list.bitmap.is_empty() { + debug!(LOGGER, "prune_list: bitmap {} pos ({} bytes), pruned_cache {} pos ({} bytes), shift_cache {}, leaf_shift_cache {}", + prune_list.bitmap.cardinality(), + prune_list.bitmap.get_serialized_size_in_bytes(), + prune_list.pruned_cache.cardinality(), + prune_list.pruned_cache.get_serialized_size_in_bytes(), + prune_list.shift_cache.len(), + prune_list.leaf_shift_cache.len(), + ); + } + + Ok(prune_list) } - fn clear_leaves(&mut self) { - let mut leaf_pos = Bitmap::create(); - for x in self.bitmap.iter() { - if is_leaf(x as u64) { - leaf_pos.add(x); - } - } - self.bitmap.andnot_inplace(&leaf_pos); + fn init_caches(&mut self) { + self.build_shift_cache(); + self.build_leaf_shift_cache(); + self.build_pruned_cache(); } /// Save the prune_list to disk. /// Clears out leaf pos before saving to disk /// as we track these via the leaf_set. pub fn flush(&mut self) -> io::Result<()> { - // First clear any leaf pos from the prune_list (these are tracked via the - // leaf_set). - self.clear_leaves(); - - // Now run the optimization step on the bitmap. + // Run the optimization step on the bitmap. self.bitmap.run_optimize(); // TODO - consider writing this to disk in a tmp file and then renaming? @@ -108,12 +126,16 @@ impl PruneList { file.flush()?; } + // Rebuild our "shift caches" here as we are flushing changes to disk + // and the contents of our prune_list has likely changed. + self.init_caches(); + Ok(()) } /// Return the total shift from all entries in the prune_list. pub fn get_total_shift(&self) -> u64 { - self.get_shift(self.bitmap.maximum() as u64 + 1) + self.get_shift(self.bitmap.maximum() as u64) } /// Computes by how many positions a node at pos should be shifted given the @@ -121,46 +143,87 @@ impl PruneList { /// Note: the node at pos may be pruned and may be compacted away itself and /// the caller needs to be aware of this. pub fn get_shift(&self, pos: u64) -> u64 { - let pruned = self.pruned_lte(pos); + if self.bitmap.is_empty() { + return 0; + } - // skip by the number of leaf nodes pruned in the preceeding subtrees - // which just 2^height - // except in the case of height==0 - // (where we want to treat the pruned tree as 0 leaves) - pruned - .iter() - .map(|n| { - let height = bintree_postorder_height(*n); - // height 0, 1 node, offset 0 = 0 + 0 - // height 1, 3 nodes, offset 2 = 1 + 1 - // height 2, 7 nodes, offset 6 = 3 + 3 - // height 3, 15 nodes, offset 14 = 7 + 7 + let idx = self.bitmap.rank(pos as u32); + if idx == 0 { + return 0; + } + + if idx > self.shift_cache.len() as u64 { + self.shift_cache[self.shift_cache.len() - 1] + } else { + self.shift_cache[idx as usize - 1] + } + } + + fn build_shift_cache(&mut self) { + if self.bitmap.is_empty() { + return; + } + + self.shift_cache.clear(); + for pos in self.bitmap.iter() { + let pos = pos as u64; + let prev_shift = self.get_shift(pos - 1); + + let curr_shift = if self.is_pruned_root(pos) { + let height = bintree_postorder_height(pos); 2 * ((1 << height) - 1) - }) - .sum() + } else { + 0 + }; + + self.shift_cache.push(prev_shift + curr_shift); + } } /// As above, but only returning the number of leaf nodes to skip for a /// given leaf. Helpful if, for instance, data for each leaf is being stored /// separately in a continuous flat-file. pub fn get_leaf_shift(&self, pos: u64) -> u64 { - let pruned = self.pruned_lte(pos); + if self.bitmap.is_empty() { + return 0; + } - // skip by the number of leaf nodes pruned in the preceeding subtrees - // which just 2^height - // except in the case of height==0 - // (where we want to treat the pruned tree as 0 leaves) - pruned - .iter() - .map(|&n| { - let height = bintree_postorder_height(n); + let idx = self.bitmap.rank(pos as u32); + if idx == 0 { + return 0; + } + + if idx > self.leaf_shift_cache.len() as u64 { + self.leaf_shift_cache[self.leaf_shift_cache.len() - 1] + } else { + self.leaf_shift_cache[idx as usize - 1] + } + } + + fn build_leaf_shift_cache(&mut self) { + if self.bitmap.is_empty() { + return; + } + + self.leaf_shift_cache.clear(); + + for pos in self.bitmap.iter() { + let pos = pos as u64; + let prev_shift = self.get_leaf_shift(pos - 1); + + let curr_shift = if self.is_pruned_root(pos) { + let height = bintree_postorder_height(pos); if height == 0 { 0 } else { 1 << height } - }) - .sum() + } else { + 0 + }; + + self.leaf_shift_cache.push(prev_shift + curr_shift); + } } /// Push the node at the provided position in the prune list. Compacts the @@ -171,13 +234,13 @@ impl PruneList { loop { let (parent, sibling) = family(current); - if self.bitmap.contains(sibling as u32) { + if self.bitmap.contains(sibling as u32) || self.pruned_cache.contains(sibling as u32) { + self.pruned_cache.add(current as u32); self.bitmap.remove(sibling as u32); current = parent; } else { - if !self.is_pruned(current) { - self.bitmap.add(current as u32); - } + self.pruned_cache.add(current as u32); + self.bitmap.add(current as u32); break; } } @@ -198,31 +261,29 @@ impl PruneList { self.bitmap.to_vec().into_iter().map(|x| x as u64).collect() } - /// Checks if the specified position has been pruned, - /// either directly (pos contained in the prune list itself) - /// or indirectly (pos is beneath a pruned root). + /// Is the pos pruned? + /// Assumes the pruned_cache is fully built and up to date. pub fn is_pruned(&self, pos: u64) -> bool { - if self.is_empty() { - return false; - } + self.pruned_cache.contains(pos as u32) + } - let path = path(pos, self.bitmap.maximum() as u64); - path.into_iter().any(|x| self.bitmap.contains(x as u32)) + fn build_pruned_cache(&mut self) { + if self.bitmap.is_empty() { + return; + } + self.pruned_cache = Bitmap::create_with_capacity(self.bitmap.maximum()); + for pos in 1..(self.bitmap.maximum() + 1) { + let path = path(pos as u64, self.bitmap.maximum() as u64); + let pruned = path.into_iter().any(|x| self.bitmap.contains(x as u32)); + if pruned { + self.pruned_cache.add(pos as u32) + } + } + self.pruned_cache.run_optimize(); } /// Is the specified position a root of a pruned subtree? pub fn is_pruned_root(&self, pos: u64) -> bool { self.bitmap.contains(pos as u32) } - - fn pruned_lte(&self, pos: u64) -> Vec { - let mut res = vec![]; - for x in self.bitmap.iter() { - if x > pos as u32 { - break; - } - res.push(x as u64); - } - res - } } diff --git a/store/tests/prune_list.rs b/store/tests/prune_list.rs index c14f1aa3b..d6330b34d 100644 --- a/store/tests/prune_list.rs +++ b/store/tests/prune_list.rs @@ -26,14 +26,18 @@ fn test_is_pruned() { assert_eq!(pl.is_pruned(3), false); pl.add(2); - assert_eq!(pl.len(), 1); - assert_eq!(pl.to_vec(), [2]); + pl.flush().unwrap(); + + assert_eq!(pl.to_vec(), vec![2]); assert_eq!(pl.is_pruned(1), false); assert_eq!(pl.is_pruned(2), true); assert_eq!(pl.is_pruned(3), false); assert_eq!(pl.is_pruned(4), false); + pl.add(2); pl.add(1); + pl.flush().unwrap(); + assert_eq!(pl.len(), 1); assert_eq!(pl.to_vec(), [3]); assert_eq!(pl.is_pruned(1), true); @@ -42,6 +46,11 @@ fn test_is_pruned() { assert_eq!(pl.is_pruned(4), false); pl.add(4); + + // Flushing the prune_list removes any individual leaf positions. + // This assumes we will track these outside the prune_list via the leaf_set. + pl.flush().unwrap(); + assert_eq!(pl.len(), 2); assert_eq!(pl.to_vec(), [3, 4]); assert_eq!(pl.is_pruned(1), true); @@ -49,17 +58,6 @@ fn test_is_pruned() { assert_eq!(pl.is_pruned(3), true); assert_eq!(pl.is_pruned(4), true); assert_eq!(pl.is_pruned(5), false); - - // Flushing the prune_list removes any individual leaf positions. - // This assumes we will track these outside the prune_list via the leaf_set. - pl.flush().unwrap(); - assert_eq!(pl.len(), 1); - assert_eq!(pl.to_vec(), [3]); - assert_eq!(pl.is_pruned(1), true); - assert_eq!(pl.is_pruned(2), true); - assert_eq!(pl.is_pruned(3), true); - assert_eq!(pl.is_pruned(4), false); - assert_eq!(pl.is_pruned(5), false); } #[test] @@ -77,7 +75,9 @@ fn test_get_leaf_shift() { // leaves will not shift shift anything // we only start shifting after pruning a parent pl.add(1); - assert_eq!(pl.len(), 1); + pl.flush().unwrap(); + + assert_eq!(pl.to_vec(), vec![1]); assert_eq!(pl.get_leaf_shift(1), 0); assert_eq!(pl.get_leaf_shift(2), 0); assert_eq!(pl.get_leaf_shift(3), 0); @@ -85,7 +85,10 @@ fn test_get_leaf_shift() { // now add the sibling leaf pos (pos 1 and pos 2) which will prune the parent // at pos 3 this in turn will "leaf shift" the leaf at pos 3 by 2 + pl.add(1); pl.add(2); + pl.flush().unwrap(); + assert_eq!(pl.len(), 1); assert_eq!(pl.get_leaf_shift(1), 0); assert_eq!(pl.get_leaf_shift(2), 0); @@ -97,6 +100,8 @@ fn test_get_leaf_shift() { // leaf offset of subsequent pos will be 2 // 00100120 pl.add(4); + pl.flush().unwrap(); + assert_eq!(pl.len(), 2); assert_eq!(pl.to_vec(), [3, 4]); assert_eq!(pl.get_leaf_shift(1), 0); @@ -112,7 +117,10 @@ fn test_get_leaf_shift() { // the two smaller subtrees (pos 3 and pos 6) are rolled up to larger subtree // (pos 7) the leaf offset is now 4 to cover entire subtree containing first // 4 leaves 00100120 + pl.add(4); pl.add(5); + pl.flush().unwrap(); + assert_eq!(pl.len(), 1); assert_eq!(pl.to_vec(), [7]); assert_eq!(pl.get_leaf_shift(1), 0); @@ -132,6 +140,8 @@ fn test_get_leaf_shift() { pl.add(11); pl.add(12); pl.add(4); + pl.flush().unwrap(); + assert_eq!(pl.len(), 2); assert_eq!(pl.to_vec(), [6, 13]); assert_eq!(pl.get_leaf_shift(2), 0); @@ -154,12 +164,17 @@ fn test_get_shift() { // pruning only a leaf node does not shift any subsequent pos // we will only start shifting when a parent can be pruned pl.add(1); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [1]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 0); + pl.add(1); pl.add(2); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [3]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); @@ -171,6 +186,8 @@ fn test_get_shift() { // pos 3 is not a leaf and is already in prune list // prune it and check we are still consistent pl.add(3); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [3]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); @@ -180,6 +197,8 @@ fn test_get_shift() { assert_eq!(pl.get_shift(6), 2); pl.add(4); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [3, 4]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); @@ -188,7 +207,10 @@ fn test_get_shift() { assert_eq!(pl.get_shift(5), 2); assert_eq!(pl.get_shift(6), 2); + pl.add(4); pl.add(5); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [7]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); @@ -204,6 +226,8 @@ fn test_get_shift() { for x in 6..1000 { pl.add(x); } + pl.flush().unwrap(); + // and check we shift by a large number (hopefully the correct number...) assert_eq!(pl.get_shift(1010), 996); @@ -212,6 +236,8 @@ fn test_get_shift() { pl.add(8); pl.add(5); pl.add(4); + pl.flush().unwrap(); + assert_eq!(pl.to_vec(), [6, 10]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0);