cache get_shift() and get_leaf_shift() in prune_list (#1495)

* cache shift and leaf_shift values in prune_list for fast lookup later

* rustfmt

* fixup core tests
This commit is contained in:
Antioch Peverell 2018-09-12 08:19:05 +01:00 committed by GitHub
parent 3eacc06a97
commit 07eefc4d6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 222 additions and 103 deletions

View file

@ -869,7 +869,10 @@ impl<'a> Extension<'a> {
debug!(
LOGGER,
"txhashset: validated the output|rproof|kernel mmrs, took {}s",
"txhashset: validated the output {}, rproof {}, kernel {} mmrs, took {}s",
self.output_pmmr.unpruned_size(),
self.rproof_pmmr.unpruned_size(),
self.kernel_pmmr.unpruned_size(),
now.elapsed().as_secs(),
);
@ -1224,6 +1227,9 @@ fn input_pos_to_rewind(
return Ok(bitmap);
}
//
// TODO - rework this loop to use Bitmap::fast_or() on a vec of bitmaps.
//
loop {
if current == block_header.hash() {
break;

View file

@ -594,6 +594,9 @@ pub fn peak_map_height(mut pos: u64) -> (u64, u64) {
/// are built.
pub fn bintree_postorder_height(num: u64) -> u64 {
if num == 0 {
return 0;
}
peak_map_height(num - 1).1
}
@ -669,7 +672,13 @@ pub fn family_branch(pos: u64, last_pos: u64) -> Vec<(u64, u64)> {
branch
}
/// Gets the position of the rightmost node (i.e. leaf) relative to the current
fn bintree_rightmost(num: u64) -> u64 {
/// Gets the position of the rightmost node (i.e. leaf) beneath the provided subtree root.
pub fn bintree_rightmost(num: u64) -> u64 {
num - bintree_postorder_height(num)
}
/// Gets the position of the rightmost node (i.e. leaf) beneath the provided subtree root.
pub fn bintree_leftmost(num: u64) -> u64 {
let height = bintree_postorder_height(num);
num + 2 - (2 << height)
}

View file

@ -56,6 +56,32 @@ fn first_100_mmr_heights() {
}
}
// The pos of the rightmost leaf for the provided MMR size (last leaf in subtree).
#[test]
fn test_bintree_rightmost() {
assert_eq!(pmmr::bintree_rightmost(0), 0);
assert_eq!(pmmr::bintree_rightmost(1), 1);
assert_eq!(pmmr::bintree_rightmost(2), 2);
assert_eq!(pmmr::bintree_rightmost(3), 2);
assert_eq!(pmmr::bintree_rightmost(4), 4);
assert_eq!(pmmr::bintree_rightmost(5), 5);
assert_eq!(pmmr::bintree_rightmost(6), 5);
assert_eq!(pmmr::bintree_rightmost(7), 5);
}
// The pos of the leftmost leaf for the provided MMR size (first leaf in subtree).
#[test]
fn test_bintree_leftmost() {
assert_eq!(pmmr::bintree_leftmost(0), 0);
assert_eq!(pmmr::bintree_leftmost(1), 1);
assert_eq!(pmmr::bintree_leftmost(2), 2);
assert_eq!(pmmr::bintree_leftmost(3), 1);
assert_eq!(pmmr::bintree_leftmost(4), 4);
assert_eq!(pmmr::bintree_leftmost(5), 5);
assert_eq!(pmmr::bintree_leftmost(6), 4);
assert_eq!(pmmr::bintree_leftmost(7), 1);
}
#[test]
fn test_n_leaves() {
// make sure we handle an empty MMR correctly

View file

@ -157,11 +157,7 @@ where
}
/// Rewind the PMMR backend to the given position.
fn rewind(
&mut self,
position: u64,
rewind_rm_pos: &Bitmap,
) -> Result<(), String> {
fn rewind(&mut self, position: u64, rewind_rm_pos: &Bitmap) -> Result<(), String> {
// First rewind the leaf_set with the necessary added and removed positions.
if self.prunable {
self.leaf_set.rewind(position, rewind_rm_pos);
@ -224,9 +220,8 @@ where
pub fn new(
data_dir: String,
prunable: bool,
header: Option<&BlockHeader>
header: Option<&BlockHeader>,
) -> io::Result<PMMRBackend<T>> {
let hash_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_HASH_FILE))?;
let data_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_DATA_FILE))?;
@ -239,8 +234,8 @@ where
LeafSet::copy_snapshot(leaf_set_path.clone(), leaf_snapshot_path.clone())?;
}
let prune_list = PruneList::open(format!("{}/{}", data_dir, PMMR_PRUN_FILE))?;
let leaf_set = LeafSet::open(leaf_set_path.clone())?;
let prune_list = PruneList::open(format!("{}/{}", data_dir, PMMR_PRUN_FILE))?;
Ok(PMMRBackend {
data_dir,
@ -303,6 +298,8 @@ where
format!("Could not write to log data storage, disk full? {:?}", e),
));
}
// Flush the leaf_set to disk.
self.leaf_set.flush()?;
Ok(())
@ -421,18 +418,12 @@ where
Ok(true)
}
fn pos_to_rm(
&self,
cutoff_pos: u64,
rewind_rm_pos: &Bitmap,
) -> (Bitmap, Bitmap) {
fn pos_to_rm(&self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> (Bitmap, Bitmap) {
let mut expanded = Bitmap::create();
let leaf_pos_to_rm = self.leaf_set.removed_pre_cutoff(
cutoff_pos,
rewind_rm_pos,
&self.prune_list,
);
let leaf_pos_to_rm =
self.leaf_set
.removed_pre_cutoff(cutoff_pos, rewind_rm_pos, &self.prune_list);
for x in leaf_pos_to_rm.iter() {
expanded.add(x);

View file

@ -13,8 +13,6 @@
// limitations under the License.
//! The Grin "Prune List" implementation.
//! Currently implemented as a vec of u64 positions.
//! *Soon* to be implemented as a compact bitmap.
//!
//! Maintains a set of pruned root node positions that define the pruned
//! and compacted "gaps" in the MMR data and hash files.
@ -29,7 +27,9 @@ use std::path::Path;
use croaring::Bitmap;
use core::core::pmmr::{bintree_postorder_height, family, is_leaf, path};
use core::core::pmmr::{bintree_postorder_height, family, path};
use util::LOGGER;
/// Maintains a list of previously pruned nodes in PMMR, compacting the list as
/// parents get pruned and allowing checking whether a leaf is pruned. Given
@ -46,6 +46,10 @@ pub struct PruneList {
path: Option<String>,
/// Bitmap representing pruned root node positions.
bitmap: Bitmap,
/// Bitmap representing all pruned node positions (everything under the pruned roots).
pruned_cache: Bitmap,
shift_cache: Vec<u64>,
leaf_shift_cache: Vec<u64>,
}
unsafe impl Send for PruneList {}
@ -57,6 +61,9 @@ impl PruneList {
PruneList {
path: None,
bitmap: Bitmap::create(),
pruned_cache: Bitmap::create(),
shift_cache: vec![],
leaf_shift_cache: vec![],
}
}
@ -72,31 +79,42 @@ impl PruneList {
Bitmap::create()
};
Ok(PruneList {
let mut prune_list = PruneList {
path: Some(path.clone()),
bitmap,
})
pruned_cache: Bitmap::create(),
shift_cache: vec![],
leaf_shift_cache: vec![],
};
// Now built the shift and pruned caches from the bitmap we read from disk.
prune_list.init_caches();
if !prune_list.bitmap.is_empty() {
debug!(LOGGER, "prune_list: bitmap {} pos ({} bytes), pruned_cache {} pos ({} bytes), shift_cache {}, leaf_shift_cache {}",
prune_list.bitmap.cardinality(),
prune_list.bitmap.get_serialized_size_in_bytes(),
prune_list.pruned_cache.cardinality(),
prune_list.pruned_cache.get_serialized_size_in_bytes(),
prune_list.shift_cache.len(),
prune_list.leaf_shift_cache.len(),
);
}
Ok(prune_list)
}
fn clear_leaves(&mut self) {
let mut leaf_pos = Bitmap::create();
for x in self.bitmap.iter() {
if is_leaf(x as u64) {
leaf_pos.add(x);
}
}
self.bitmap.andnot_inplace(&leaf_pos);
fn init_caches(&mut self) {
self.build_shift_cache();
self.build_leaf_shift_cache();
self.build_pruned_cache();
}
/// Save the prune_list to disk.
/// Clears out leaf pos before saving to disk
/// as we track these via the leaf_set.
pub fn flush(&mut self) -> io::Result<()> {
// First clear any leaf pos from the prune_list (these are tracked via the
// leaf_set).
self.clear_leaves();
// Now run the optimization step on the bitmap.
// Run the optimization step on the bitmap.
self.bitmap.run_optimize();
// TODO - consider writing this to disk in a tmp file and then renaming?
@ -108,12 +126,16 @@ impl PruneList {
file.flush()?;
}
// Rebuild our "shift caches" here as we are flushing changes to disk
// and the contents of our prune_list has likely changed.
self.init_caches();
Ok(())
}
/// Return the total shift from all entries in the prune_list.
pub fn get_total_shift(&self) -> u64 {
self.get_shift(self.bitmap.maximum() as u64 + 1)
self.get_shift(self.bitmap.maximum() as u64)
}
/// Computes by how many positions a node at pos should be shifted given the
@ -121,46 +143,87 @@ impl PruneList {
/// Note: the node at pos may be pruned and may be compacted away itself and
/// the caller needs to be aware of this.
pub fn get_shift(&self, pos: u64) -> u64 {
let pruned = self.pruned_lte(pos);
if self.bitmap.is_empty() {
return 0;
}
// skip by the number of leaf nodes pruned in the preceeding subtrees
// which just 2^height
// except in the case of height==0
// (where we want to treat the pruned tree as 0 leaves)
pruned
.iter()
.map(|n| {
let height = bintree_postorder_height(*n);
// height 0, 1 node, offset 0 = 0 + 0
// height 1, 3 nodes, offset 2 = 1 + 1
// height 2, 7 nodes, offset 6 = 3 + 3
// height 3, 15 nodes, offset 14 = 7 + 7
let idx = self.bitmap.rank(pos as u32);
if idx == 0 {
return 0;
}
if idx > self.shift_cache.len() as u64 {
self.shift_cache[self.shift_cache.len() - 1]
} else {
self.shift_cache[idx as usize - 1]
}
}
fn build_shift_cache(&mut self) {
if self.bitmap.is_empty() {
return;
}
self.shift_cache.clear();
for pos in self.bitmap.iter() {
let pos = pos as u64;
let prev_shift = self.get_shift(pos - 1);
let curr_shift = if self.is_pruned_root(pos) {
let height = bintree_postorder_height(pos);
2 * ((1 << height) - 1)
})
.sum()
} else {
0
};
self.shift_cache.push(prev_shift + curr_shift);
}
}
/// As above, but only returning the number of leaf nodes to skip for a
/// given leaf. Helpful if, for instance, data for each leaf is being stored
/// separately in a continuous flat-file.
pub fn get_leaf_shift(&self, pos: u64) -> u64 {
let pruned = self.pruned_lte(pos);
if self.bitmap.is_empty() {
return 0;
}
// skip by the number of leaf nodes pruned in the preceeding subtrees
// which just 2^height
// except in the case of height==0
// (where we want to treat the pruned tree as 0 leaves)
pruned
.iter()
.map(|&n| {
let height = bintree_postorder_height(n);
let idx = self.bitmap.rank(pos as u32);
if idx == 0 {
return 0;
}
if idx > self.leaf_shift_cache.len() as u64 {
self.leaf_shift_cache[self.leaf_shift_cache.len() - 1]
} else {
self.leaf_shift_cache[idx as usize - 1]
}
}
fn build_leaf_shift_cache(&mut self) {
if self.bitmap.is_empty() {
return;
}
self.leaf_shift_cache.clear();
for pos in self.bitmap.iter() {
let pos = pos as u64;
let prev_shift = self.get_leaf_shift(pos - 1);
let curr_shift = if self.is_pruned_root(pos) {
let height = bintree_postorder_height(pos);
if height == 0 {
0
} else {
1 << height
}
})
.sum()
} else {
0
};
self.leaf_shift_cache.push(prev_shift + curr_shift);
}
}
/// Push the node at the provided position in the prune list. Compacts the
@ -171,13 +234,13 @@ impl PruneList {
loop {
let (parent, sibling) = family(current);
if self.bitmap.contains(sibling as u32) {
if self.bitmap.contains(sibling as u32) || self.pruned_cache.contains(sibling as u32) {
self.pruned_cache.add(current as u32);
self.bitmap.remove(sibling as u32);
current = parent;
} else {
if !self.is_pruned(current) {
self.bitmap.add(current as u32);
}
self.pruned_cache.add(current as u32);
self.bitmap.add(current as u32);
break;
}
}
@ -198,31 +261,29 @@ impl PruneList {
self.bitmap.to_vec().into_iter().map(|x| x as u64).collect()
}
/// Checks if the specified position has been pruned,
/// either directly (pos contained in the prune list itself)
/// or indirectly (pos is beneath a pruned root).
/// Is the pos pruned?
/// Assumes the pruned_cache is fully built and up to date.
pub fn is_pruned(&self, pos: u64) -> bool {
if self.is_empty() {
return false;
}
self.pruned_cache.contains(pos as u32)
}
let path = path(pos, self.bitmap.maximum() as u64);
path.into_iter().any(|x| self.bitmap.contains(x as u32))
fn build_pruned_cache(&mut self) {
if self.bitmap.is_empty() {
return;
}
self.pruned_cache = Bitmap::create_with_capacity(self.bitmap.maximum());
for pos in 1..(self.bitmap.maximum() + 1) {
let path = path(pos as u64, self.bitmap.maximum() as u64);
let pruned = path.into_iter().any(|x| self.bitmap.contains(x as u32));
if pruned {
self.pruned_cache.add(pos as u32)
}
}
self.pruned_cache.run_optimize();
}
/// Is the specified position a root of a pruned subtree?
pub fn is_pruned_root(&self, pos: u64) -> bool {
self.bitmap.contains(pos as u32)
}
fn pruned_lte(&self, pos: u64) -> Vec<u64> {
let mut res = vec![];
for x in self.bitmap.iter() {
if x > pos as u32 {
break;
}
res.push(x as u64);
}
res
}
}

View file

@ -26,14 +26,18 @@ fn test_is_pruned() {
assert_eq!(pl.is_pruned(3), false);
pl.add(2);
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [2]);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), vec![2]);
assert_eq!(pl.is_pruned(1), false);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), false);
assert_eq!(pl.is_pruned(4), false);
pl.add(2);
pl.add(1);
pl.flush().unwrap();
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.is_pruned(1), true);
@ -42,6 +46,11 @@ fn test_is_pruned() {
assert_eq!(pl.is_pruned(4), false);
pl.add(4);
// Flushing the prune_list removes any individual leaf positions.
// This assumes we will track these outside the prune_list via the leaf_set.
pl.flush().unwrap();
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.is_pruned(1), true);
@ -49,17 +58,6 @@ fn test_is_pruned() {
assert_eq!(pl.is_pruned(3), true);
assert_eq!(pl.is_pruned(4), true);
assert_eq!(pl.is_pruned(5), false);
// Flushing the prune_list removes any individual leaf positions.
// This assumes we will track these outside the prune_list via the leaf_set.
pl.flush().unwrap();
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.is_pruned(1), true);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), true);
assert_eq!(pl.is_pruned(4), false);
assert_eq!(pl.is_pruned(5), false);
}
#[test]
@ -77,7 +75,9 @@ fn test_get_leaf_shift() {
// leaves will not shift shift anything
// we only start shifting after pruning a parent
pl.add(1);
assert_eq!(pl.len(), 1);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), vec![1]);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 0);
@ -85,7 +85,10 @@ fn test_get_leaf_shift() {
// now add the sibling leaf pos (pos 1 and pos 2) which will prune the parent
// at pos 3 this in turn will "leaf shift" the leaf at pos 3 by 2
pl.add(1);
pl.add(2);
pl.flush().unwrap();
assert_eq!(pl.len(), 1);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
@ -97,6 +100,8 @@ fn test_get_leaf_shift() {
// leaf offset of subsequent pos will be 2
// 00100120
pl.add(4);
pl.flush().unwrap();
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.get_leaf_shift(1), 0);
@ -112,7 +117,10 @@ fn test_get_leaf_shift() {
// the two smaller subtrees (pos 3 and pos 6) are rolled up to larger subtree
// (pos 7) the leaf offset is now 4 to cover entire subtree containing first
// 4 leaves 00100120
pl.add(4);
pl.add(5);
pl.flush().unwrap();
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [7]);
assert_eq!(pl.get_leaf_shift(1), 0);
@ -132,6 +140,8 @@ fn test_get_leaf_shift() {
pl.add(11);
pl.add(12);
pl.add(4);
pl.flush().unwrap();
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [6, 13]);
assert_eq!(pl.get_leaf_shift(2), 0);
@ -154,12 +164,17 @@ fn test_get_shift() {
// pruning only a leaf node does not shift any subsequent pos
// we will only start shifting when a parent can be pruned
pl.add(1);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [1]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 0);
pl.add(1);
pl.add(2);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
@ -171,6 +186,8 @@ fn test_get_shift() {
// pos 3 is not a leaf and is already in prune list
// prune it and check we are still consistent
pl.add(3);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
@ -180,6 +197,8 @@ fn test_get_shift() {
assert_eq!(pl.get_shift(6), 2);
pl.add(4);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
@ -188,7 +207,10 @@ fn test_get_shift() {
assert_eq!(pl.get_shift(5), 2);
assert_eq!(pl.get_shift(6), 2);
pl.add(4);
pl.add(5);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [7]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
@ -204,6 +226,8 @@ fn test_get_shift() {
for x in 6..1000 {
pl.add(x);
}
pl.flush().unwrap();
// and check we shift by a large number (hopefully the correct number...)
assert_eq!(pl.get_shift(1010), 996);
@ -212,6 +236,8 @@ fn test_get_shift() {
pl.add(8);
pl.add(5);
pl.add(4);
pl.flush().unwrap();
assert_eq!(pl.to_vec(), [6, 10]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);