From 3583028781ba3b2641e9147a02e4b28090d04187 Mon Sep 17 00:00:00 2001 From: Antioch Peverell Date: Wed, 24 Feb 2021 17:02:03 +0000 Subject: [PATCH] Prune list iterators (#3574) * wip * use range beneath subtree for efficient is_pruned check * various iterators over the prune list * improved prune list iter and subtree handling * use take_while so unpruned iterators are not infinite --- store/src/prune_list.rs | 73 ++++++++++++++++++++-- store/tests/prune_list.rs | 128 ++++++++++++++++++++++++++++++++------ 2 files changed, 176 insertions(+), 25 deletions(-) diff --git a/store/src/prune_list.rs b/store/src/prune_list.rs index b5c3ebe6d..9e4f084a9 100644 --- a/store/src/prune_list.rs +++ b/store/src/prune_list.rs @@ -21,8 +21,11 @@ //! must be shifted the appropriate amount when reading from the hash and data //! files. -use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use std::{ + io::{self, Write}, + ops::Range, +}; use croaring::Bitmap; use grin_core::core::pmmr; @@ -256,11 +259,6 @@ impl PruneList { self.bitmap.is_empty() } - /// Convert the prune_list to a vec of pos. - pub fn to_vec(&self) -> Vec { - self.bitmap.iter().map(|x| x as u64).collect() - } - /// A pos is pruned if it is a pruned root directly or if it is /// beneath the "next" pruned subtree. /// We only need to consider the "next" subtree due to the append-only MMR structure. @@ -283,4 +281,67 @@ impl PruneList { assert!(pos > 0, "prune list 1-indexed, 0 not valid pos"); self.bitmap.contains(pos as u32) } + + /// Iterator over the entries in the prune list (pruned roots). + pub fn iter(&self) -> impl Iterator + '_ { + self.bitmap.iter().map(|x| x as u64) + } + + /// Iterator over the pruned "bintree range" for each pruned root. + pub fn pruned_bintree_range_iter(&self) -> impl Iterator> + '_ { + self.iter().map(|x| pmmr::bintree_range(x)) + } + + /// Iterator over all pos that are *not* pruned based on current prune_list. + pub fn unpruned_iter(&self, cutoff_pos: u64) -> impl Iterator + '_ { + UnprunedIterator::new(self.pruned_bintree_range_iter()) + .take_while(move |x| *x <= cutoff_pos) + } + + /// Iterator over all leaf pos that are *not* pruned based on current prune_list. + /// Note this is not necessarily the same as the "leaf_set" as an output + /// can be spent but not yet pruned. + pub fn unpruned_leaf_iter(&self, cutoff_pos: u64) -> impl Iterator + '_ { + self.unpruned_iter(cutoff_pos).filter(|x| pmmr::is_leaf(*x)) + } +} + +struct UnprunedIterator { + inner: I, + current_excl_range: Option>, + current_pos: u64, +} + +impl>> UnprunedIterator { + fn new(mut inner: I) -> UnprunedIterator { + let current_excl_range = inner.next(); + UnprunedIterator { + inner, + current_excl_range, + current_pos: 1, + } + } +} + +impl>> Iterator for UnprunedIterator { + type Item = u64; + + fn next(&mut self) -> Option { + if let Some(range) = &self.current_excl_range { + if self.current_pos < range.start { + let next = self.current_pos; + self.current_pos += 1; + Some(next) + } else { + // skip the entire excluded range, moving to next excluded range as necessary + self.current_pos = range.end; + self.current_excl_range = self.inner.next(); + self.next() + } + } else { + let next = self.current_pos; + self.current_pos += 1; + Some(next) + } + } } diff --git a/store/tests/prune_list.rs b/store/tests/prune_list.rs index 89b5972e9..b56bf349f 100644 --- a/store/tests/prune_list.rs +++ b/store/tests/prune_list.rs @@ -42,9 +42,8 @@ fn test_is_pruned() { assert_eq!(pl.is_pruned(3), false); pl.add(2); - pl.flush().unwrap(); - assert_eq!(pl.to_vec(), vec![2]); + assert_eq!(pl.iter().collect::>(), [2]); assert_eq!(pl.is_pruned(1), false); assert_eq!(pl.is_pruned(2), true); assert_eq!(pl.is_pruned(3), false); @@ -52,10 +51,9 @@ fn test_is_pruned() { pl.add(2); pl.add(1); - pl.flush().unwrap(); assert_eq!(pl.len(), 1); - assert_eq!(pl.to_vec(), [3]); + assert_eq!(pl.iter().collect::>(), [3]); assert_eq!(pl.is_pruned(1), true); assert_eq!(pl.is_pruned(2), true); assert_eq!(pl.is_pruned(3), true); @@ -63,17 +61,24 @@ fn test_is_pruned() { pl.add(4); - // Flushing the prune_list removes any individual leaf positions. - // This assumes we will track these outside the prune_list via the leaf_set. - pl.flush().unwrap(); - assert_eq!(pl.len(), 2); - assert_eq!(pl.to_vec(), [3, 4]); + assert_eq!(pl.iter().collect::>(), [3, 4]); assert_eq!(pl.is_pruned(1), true); assert_eq!(pl.is_pruned(2), true); assert_eq!(pl.is_pruned(3), true); assert_eq!(pl.is_pruned(4), true); assert_eq!(pl.is_pruned(5), false); + + // Test some poorly organized (out of order, overlapping) pruning. + let mut pl = PruneList::empty(); + pl.add(2); + pl.add(4); + pl.add(3); + assert_eq!(pl.iter().collect::>(), [3, 4]); + + // now add a higher level pruned root clearing out the subtree. + pl.add(7); + assert_eq!(pl.iter().collect::>(), [7]); } #[test] @@ -93,7 +98,7 @@ fn test_get_leaf_shift() { pl.add(1); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), vec![1]); + assert_eq!(pl.iter().collect::>(), [1]); assert_eq!(pl.get_leaf_shift(1), 0); assert_eq!(pl.get_leaf_shift(2), 0); assert_eq!(pl.get_leaf_shift(3), 0); @@ -119,7 +124,7 @@ fn test_get_leaf_shift() { pl.flush().unwrap(); assert_eq!(pl.len(), 2); - assert_eq!(pl.to_vec(), [3, 4]); + assert_eq!(pl.iter().collect::>(), [3, 4]); assert_eq!(pl.get_leaf_shift(1), 0); assert_eq!(pl.get_leaf_shift(2), 0); assert_eq!(pl.get_leaf_shift(3), 2); @@ -138,7 +143,7 @@ fn test_get_leaf_shift() { pl.flush().unwrap(); assert_eq!(pl.len(), 1); - assert_eq!(pl.to_vec(), [7]); + assert_eq!(pl.iter().collect::>(), [7]); assert_eq!(pl.get_leaf_shift(1), 0); assert_eq!(pl.get_leaf_shift(2), 0); assert_eq!(pl.get_leaf_shift(3), 0); @@ -159,7 +164,7 @@ fn test_get_leaf_shift() { pl.flush().unwrap(); assert_eq!(pl.len(), 2); - assert_eq!(pl.to_vec(), [6, 13]); + assert_eq!(pl.iter().collect::>(), [6, 13]); assert_eq!(pl.get_leaf_shift(2), 0); assert_eq!(pl.get_leaf_shift(4), 0); assert_eq!(pl.get_leaf_shift(8), 2); @@ -182,7 +187,7 @@ fn test_get_shift() { pl.add(1); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [1]); + assert_eq!(pl.iter().collect::>(), [1]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 0); @@ -191,7 +196,7 @@ fn test_get_shift() { pl.add(2); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [3]); + assert_eq!(pl.iter().collect::>(), [3]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 2); @@ -204,7 +209,7 @@ fn test_get_shift() { pl.add(3); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [3]); + assert_eq!(pl.iter().collect::>(), [3]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 2); @@ -215,7 +220,7 @@ fn test_get_shift() { pl.add(4); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [3, 4]); + assert_eq!(pl.iter().collect::>(), [3, 4]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 2); @@ -227,7 +232,7 @@ fn test_get_shift() { pl.add(5); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [7]); + assert_eq!(pl.iter().collect::>(), [7]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 0); @@ -254,7 +259,7 @@ fn test_get_shift() { pl.add(4); pl.flush().unwrap(); - assert_eq!(pl.to_vec(), [6, 10]); + assert_eq!(pl.iter().collect::>(), [6, 10]); assert_eq!(pl.get_shift(1), 0); assert_eq!(pl.get_shift(2), 0); assert_eq!(pl.get_shift(3), 0); @@ -268,3 +273,88 @@ fn test_get_shift() { assert_eq!(pl.get_shift(11), 4); assert_eq!(pl.get_shift(12), 4); } + +#[test] +pub fn test_iter() { + let mut pl = PruneList::empty(); + pl.add(1); + pl.add(2); + pl.add(4); + assert_eq!(pl.iter().collect::>(), [3, 4]); + + let mut pl = PruneList::empty(); + pl.add(1); + pl.add(2); + pl.add(5); + assert_eq!(pl.iter().collect::>(), [3, 5]); +} + +#[test] +pub fn test_pruned_bintree_range_iter() { + let mut pl = PruneList::empty(); + pl.add(1); + pl.add(2); + pl.add(4); + assert_eq!( + pl.pruned_bintree_range_iter().collect::>(), + [1..4, 4..5] + ); + + let mut pl = PruneList::empty(); + pl.add(1); + pl.add(2); + pl.add(5); + assert_eq!( + pl.pruned_bintree_range_iter().collect::>(), + [1..4, 5..6] + ); +} + +#[test] +pub fn test_unpruned_iter() { + let pl = PruneList::empty(); + assert_eq!(pl.unpruned_iter(5).collect::>(), [1, 2, 3, 4, 5]); + + let mut pl = PruneList::empty(); + pl.add(2); + assert_eq!(pl.iter().collect::>(), [2]); + assert_eq!(pl.pruned_bintree_range_iter().collect::>(), [2..3]); + assert_eq!(pl.unpruned_iter(4).collect::>(), [1, 3, 4]); + + let mut pl = PruneList::empty(); + pl.add(2); + pl.add(4); + pl.add(5); + assert_eq!(pl.iter().collect::>(), [2, 6]); + assert_eq!( + pl.pruned_bintree_range_iter().collect::>(), + [2..3, 4..7] + ); + assert_eq!(pl.unpruned_iter(9).collect::>(), [1, 3, 7, 8, 9]); +} + +#[test] +fn test_unpruned_leaf_iter() { + let pl = PruneList::empty(); + assert_eq!( + pl.unpruned_leaf_iter(8).collect::>(), + [1, 2, 4, 5, 8] + ); + + let mut pl = PruneList::empty(); + pl.add(2); + assert_eq!(pl.iter().collect::>(), [2]); + assert_eq!(pl.pruned_bintree_range_iter().collect::>(), [2..3]); + assert_eq!(pl.unpruned_leaf_iter(5).collect::>(), [1, 4, 5]); + + let mut pl = PruneList::empty(); + pl.add(2); + pl.add(4); + pl.add(5); + assert_eq!(pl.iter().collect::>(), [2, 6]); + assert_eq!( + pl.pruned_bintree_range_iter().collect::>(), + [2..3, 4..7] + ); + assert_eq!(pl.unpruned_leaf_iter(9).collect::>(), [1, 8, 9]); +}