implement prune_list as a bitmap (#1179) (#1206)

* implement prune_list as a bitmap
and simplify

* cleanup prune_list, use maximum()

* handle migration of prune_list to new bitmap prun file

* legacy filename consts

* cleanup and docs
This commit is contained in:
Antioch Peverell 2018-06-28 20:53:00 -04:00 committed by GitHub
parent 5ac61b0bc8
commit d0f8d325f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 556 additions and 503 deletions

2
Cargo.lock generated
View file

@ -1617,7 +1617,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "secp256k1zkp"
version = "0.7.1"
source = "git+https://github.com/mimblewimble/rust-secp256k1-zkp?branch=testnet3#748296c61341461e46e7b2c05db494d60f96ac44"
source = "git+https://github.com/mimblewimble/rust-secp256k1-zkp?branch=testnet3#2564e8cebccb827ed442886d81866af783aaebc4"
dependencies = [
"arrayvec 0.3.25 (registry+https://github.com/rust-lang/crates.io-index)",
"gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -20,7 +20,6 @@ pub mod hash;
pub mod id;
pub mod merkle_proof;
pub mod pmmr;
pub mod prune_list;
pub mod target;
pub mod transaction;

View file

@ -722,6 +722,18 @@ pub fn path(pos: u64, last_pos: u64) -> Vec<u64> {
path
}
// TODO - this is simpler, test it is actually correct?
// pub fn path(pos: u64, last_pos: u64) -> Vec<u64> {
// let mut path = vec![];
// let mut current = pos;
// while current <= last_pos {
// path.push(current);
// let (parent, _) = family(current);
// current = parent;
// }
// path
// }
/// For a given starting position calculate the parent and sibling positions
/// for the branch/path from that position to the peak of the tree.
/// We will use the sibling positions to generate the "path" of a Merkle proof.

View file

@ -1,173 +0,0 @@
// Copyright 2018 The Grin Developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! The Grin "Prune List" implementation.
//! Currently implemented as a vec of u64 positions.
//! *Soon* to be implemented as a compact bitmap.
//!
//! Maintains a set of pruned root node positions that define the pruned
//! and compacted "gaps" in the MMR data and hash files.
//! The root itself is maintained in the hash file, but all positions beneath
//! the root are compacted away. All positions to the right of a pruned node
//! must be shifted the appropriate amount when reading from the hash and data
//! files.
use core::pmmr::{bintree_postorder_height, family};
/// Maintains a list of previously pruned nodes in PMMR, compacting the list as
/// parents get pruned and allowing checking whether a leaf is pruned. Given
/// a node's position, computes how much it should get shifted given the
/// subtrees that have been pruned before.
///
/// The PruneList is useful when implementing compact backends for a PMMR (for
/// example a single large byte array or a file). As nodes get pruned and
/// removed from the backend to free space, the backend will get more compact
/// but positions of a node within the PMMR will not match positions in the
/// backend storage anymore. The PruneList accounts for that mismatch and does
/// the position translation.
#[derive(Default)]
pub struct PruneList {
/// Vector of pruned nodes positions
pub pruned_nodes: Vec<u64>,
}
impl PruneList {
/// Instantiate a new empty prune list
pub fn new() -> PruneList {
PruneList {
pruned_nodes: vec![],
}
}
/// Computes by how many positions a node at pos should be shifted given the
/// number of nodes that have already been pruned before it. Returns None if
/// the position has already been pruned.
pub fn get_shift(&self, pos: u64) -> Option<u64> {
// get the position where the node at pos would fit in the pruned list, if
// it's already pruned, nothing to skip
let pruned_idx = self.next_pruned_idx(pos);
let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok();
match pruned_idx.or(next_idx) {
None => None,
Some(idx) => {
// skip by the number of elements pruned in the preceding subtrees,
// which is the sum of the size of each subtree
Some(
self.pruned_nodes[0..(idx as usize)]
.iter()
.map(|n| {
let height = bintree_postorder_height(*n);
// height 0, 1 node, offset 0 = 0 + 0
// height 1, 3 nodes, offset 2 = 1 + 1
// height 2, 7 nodes, offset 6 = 3 + 3
// height 3, 15 nodes, offset 14 = 7 + 7
2 * ((1 << height) - 1)
})
.sum(),
)
}
}
}
/// As above, but only returning the number of leaf nodes to skip for a
/// given leaf. Helpful if, for instance, data for each leaf is being stored
/// separately in a continuous flat-file. Returns None if the position has
/// already been pruned.
pub fn get_leaf_shift(&self, pos: u64) -> Option<u64> {
// get the position where the node at pos would fit in the pruned list, if
// it's already pruned, nothing to skip
let pruned_idx = self.next_pruned_idx(pos);
let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok();
let idx = pruned_idx.or(next_idx)?;
Some(
// skip by the number of leaf nodes pruned in the preceeding subtrees
// which just 2^height
// except in the case of height==0
// (where we want to treat the pruned tree as 0 leaves)
self.pruned_nodes[0..(idx as usize)]
.iter()
.map(|n| {
let height = bintree_postorder_height(*n);
if height == 0 {
0
} else {
(1 << height)
}
})
.sum(),
)
}
/// Push the node at the provided position in the prune list. Compacts the
/// list if pruning the additional node means a parent can get pruned as
/// well.
pub fn add(&mut self, pos: u64) {
let mut current = pos;
loop {
let (parent, sibling) = family(current);
match self.pruned_nodes.binary_search(&sibling) {
Ok(idx) => {
self.pruned_nodes.remove(idx);
current = parent;
}
Err(_) => {
if let Some(idx) = self.next_pruned_idx(current) {
self.pruned_nodes.insert(idx, current);
}
break;
}
}
}
}
/// Checks if the specified position has been pruned,
/// either directly (pos contained in the prune list itself)
/// or indirectly (pos is beneath a pruned root).
pub fn is_pruned(&self, pos: u64) -> bool {
self.next_pruned_idx(pos).is_none()
}
/// Gets the index a new pruned node should take in the prune list.
/// If the node has already been pruned, either directly or through one of
/// its parents contained in the prune list, returns None.
pub fn next_pruned_idx(&self, pos: u64) -> Option<usize> {
match self.pruned_nodes.binary_search(&pos) {
Ok(_) => None,
Err(idx) => {
if self.pruned_nodes.len() > idx {
// the node at pos can't be a child of lower position nodes by MMR
// construction but can be a child of the next node, going up parents
// from pos to make sure it's not the case
let next_peak_pos = self.pruned_nodes[idx];
let mut cursor = pos;
loop {
let (parent, _) = family(cursor);
if next_peak_pos == parent {
return None;
}
if next_peak_pos < parent {
break;
}
cursor = parent;
}
}
Some(idx)
}
}
}
}

View file

@ -21,7 +21,6 @@ mod vec_backend;
use core::core::hash::Hash;
use core::core::pmmr::{self, PMMR};
use core::core::prune_list::PruneList;
use core::ser::PMMRIndexHashable;
use vec_backend::{TestElem, VecBackend};
@ -450,266 +449,6 @@ fn pmmr_prune() {
assert_eq!(ba.remove_list.len(), 9);
}
#[test]
fn pmmr_next_pruned_idx() {
let mut pl = PruneList::new();
assert_eq!(pl.pruned_nodes.len(), 0);
assert_eq!(pl.next_pruned_idx(1), Some(0));
assert_eq!(pl.next_pruned_idx(2), Some(0));
assert_eq!(pl.next_pruned_idx(3), Some(0));
pl.add(2);
assert_eq!(pl.pruned_nodes.len(), 1);
assert_eq!(pl.pruned_nodes, [2]);
assert_eq!(pl.next_pruned_idx(1), Some(0));
assert_eq!(pl.next_pruned_idx(2), None);
assert_eq!(pl.next_pruned_idx(3), Some(1));
assert_eq!(pl.next_pruned_idx(4), Some(1));
pl.add(1);
assert_eq!(pl.pruned_nodes.len(), 1);
assert_eq!(pl.pruned_nodes, [3]);
assert_eq!(pl.next_pruned_idx(1), None);
assert_eq!(pl.next_pruned_idx(2), None);
assert_eq!(pl.next_pruned_idx(3), None);
assert_eq!(pl.next_pruned_idx(4), Some(1));
assert_eq!(pl.next_pruned_idx(5), Some(1));
pl.add(3);
assert_eq!(pl.pruned_nodes.len(), 1);
assert_eq!(pl.pruned_nodes, [3]);
assert_eq!(pl.next_pruned_idx(1), None);
assert_eq!(pl.next_pruned_idx(2), None);
assert_eq!(pl.next_pruned_idx(3), None);
assert_eq!(pl.next_pruned_idx(4), Some(1));
assert_eq!(pl.next_pruned_idx(5), Some(1));
}
#[test]
fn pmmr_prune_leaf_shift() {
let mut pl = PruneList::new();
// start with an empty prune list (nothing shifted)
assert_eq!(pl.pruned_nodes.len(), 0);
assert_eq!(pl.get_leaf_shift(1), Some(0));
assert_eq!(pl.get_leaf_shift(2), Some(0));
assert_eq!(pl.get_leaf_shift(4), Some(0));
// now add a single leaf pos to the prune list
// note this does not shift anything (we only start shifting after pruning a
// parent)
pl.add(1);
assert_eq!(pl.pruned_nodes.len(), 1);
assert_eq!(pl.pruned_nodes, [1]);
assert_eq!(pl.get_leaf_shift(1), Some(0));
assert_eq!(pl.get_leaf_shift(2), Some(0));
assert_eq!(pl.get_leaf_shift(3), Some(0));
assert_eq!(pl.get_leaf_shift(4), Some(0));
// now add the sibling leaf pos (pos 1 and pos 2) which will prune the parent
// at pos 3 this in turn will "leaf shift" the leaf at pos 3 by 2
pl.add(2);
assert_eq!(pl.pruned_nodes.len(), 1);
assert_eq!(pl.pruned_nodes, [3]);
assert_eq!(pl.get_leaf_shift(1), None);
assert_eq!(pl.get_leaf_shift(2), None);
assert_eq!(pl.get_leaf_shift(3), Some(2));
assert_eq!(pl.get_leaf_shift(4), Some(2));
assert_eq!(pl.get_leaf_shift(5), Some(2));
// now prune an additional leaf at pos 4
// leaf offset of subsequent pos will be 2
// 00100120
pl.add(4);
assert_eq!(pl.pruned_nodes, [3, 4]);
assert_eq!(pl.get_leaf_shift(1), None);
assert_eq!(pl.get_leaf_shift(2), None);
assert_eq!(pl.get_leaf_shift(3), Some(2));
assert_eq!(pl.get_leaf_shift(4), Some(2));
assert_eq!(pl.get_leaf_shift(5), Some(2));
assert_eq!(pl.get_leaf_shift(6), Some(2));
assert_eq!(pl.get_leaf_shift(7), Some(2));
assert_eq!(pl.get_leaf_shift(8), Some(2));
// now prune the sibling at pos 5
// the two smaller subtrees (pos 3 and pos 6) are rolled up to larger subtree
// (pos 7) the leaf offset is now 4 to cover entire subtree containing first
// 4 leaves 00100120
pl.add(5);
assert_eq!(pl.pruned_nodes, [7]);
assert_eq!(pl.get_leaf_shift(1), None);
assert_eq!(pl.get_leaf_shift(2), None);
assert_eq!(pl.get_leaf_shift(3), None);
assert_eq!(pl.get_leaf_shift(4), None);
assert_eq!(pl.get_leaf_shift(5), None);
assert_eq!(pl.get_leaf_shift(6), None);
assert_eq!(pl.get_leaf_shift(7), Some(4));
assert_eq!(pl.get_leaf_shift(8), Some(4));
assert_eq!(pl.get_leaf_shift(9), Some(4));
// now check we can prune some of these in an arbitrary order
// final result is one leaf (pos 2) and one small subtree (pos 6) pruned
// with leaf offset of 2 to account for the pruned subtree
let mut pl = PruneList::new();
pl.add(2);
pl.add(5);
pl.add(4);
assert_eq!(pl.pruned_nodes, [2, 6]);
assert_eq!(pl.get_leaf_shift(1), Some(0));
assert_eq!(pl.get_leaf_shift(2), Some(0));
assert_eq!(pl.get_leaf_shift(3), Some(0));
assert_eq!(pl.get_leaf_shift(4), None);
assert_eq!(pl.get_leaf_shift(5), None);
assert_eq!(pl.get_leaf_shift(6), Some(2));
assert_eq!(pl.get_leaf_shift(7), Some(2));
assert_eq!(pl.get_leaf_shift(8), Some(2));
assert_eq!(pl.get_leaf_shift(9), Some(2));
pl.add(1);
assert_eq!(pl.pruned_nodes, [7]);
assert_eq!(pl.get_leaf_shift(1), None);
assert_eq!(pl.get_leaf_shift(2), None);
assert_eq!(pl.get_leaf_shift(3), None);
assert_eq!(pl.get_leaf_shift(4), None);
assert_eq!(pl.get_leaf_shift(5), None);
assert_eq!(pl.get_leaf_shift(6), None);
assert_eq!(pl.get_leaf_shift(7), Some(4));
assert_eq!(pl.get_leaf_shift(8), Some(4));
assert_eq!(pl.get_leaf_shift(9), Some(4));
}
#[test]
fn pmmr_prune_shift() {
let mut pl = PruneList::new();
assert!(pl.pruned_nodes.is_empty());
assert_eq!(pl.get_shift(1), Some(0));
assert_eq!(pl.get_shift(2), Some(0));
assert_eq!(pl.get_shift(3), Some(0));
// prune a single leaf node
// pruning only a leaf node does not shift any subsequent pos
// we will only start shifting when a parent can be pruned
pl.add(1);
assert_eq!(pl.pruned_nodes, [1]);
assert_eq!(pl.get_shift(1), Some(0));
assert_eq!(pl.get_shift(2), Some(0));
assert_eq!(pl.get_shift(3), Some(0));
pl.add(2);
assert_eq!(pl.pruned_nodes, [3]);
assert_eq!(pl.get_shift(1), None);
assert_eq!(pl.get_shift(2), None);
// pos 3 is in the prune list, so removed but not compacted, but still shifted
assert_eq!(pl.get_shift(3), Some(2));
assert_eq!(pl.get_shift(4), Some(2));
assert_eq!(pl.get_shift(5), Some(2));
assert_eq!(pl.get_shift(6), Some(2));
// pos 3 is not a leaf and is already in prune list
// prune it and check we are still consistent
pl.add(3);
assert_eq!(pl.pruned_nodes, [3]);
assert_eq!(pl.get_shift(1), None);
assert_eq!(pl.get_shift(2), None);
// pos 3 is in the prune list, so removed but not compacted, but still shifted
assert_eq!(pl.get_shift(3), Some(2));
assert_eq!(pl.get_shift(4), Some(2));
assert_eq!(pl.get_shift(5), Some(2));
assert_eq!(pl.get_shift(6), Some(2));
pl.add(4);
assert_eq!(pl.pruned_nodes, [3, 4]);
assert_eq!(pl.get_shift(1), None);
assert_eq!(pl.get_shift(2), None);
// pos 3 is in the prune list, so removed but not compacted, but still shifted
assert_eq!(pl.get_shift(3), Some(2));
// pos 4 is also in the prune list and also shifted by same amount
assert_eq!(pl.get_shift(4), Some(2));
// subsequent nodes also shifted consistently
assert_eq!(pl.get_shift(5), Some(2));
assert_eq!(pl.get_shift(6), Some(2));
pl.add(5);
assert_eq!(pl.pruned_nodes, [7]);
assert_eq!(pl.get_shift(1), None);
assert_eq!(pl.get_shift(2), None);
assert_eq!(pl.get_shift(3), None);
assert_eq!(pl.get_shift(4), None);
assert_eq!(pl.get_shift(5), None);
assert_eq!(pl.get_shift(6), None);
// everything prior to pos 7 is compacted away
// pos 7 is shifted by 6 to account for this
assert_eq!(pl.get_shift(7), Some(6));
assert_eq!(pl.get_shift(8), Some(6));
assert_eq!(pl.get_shift(9), Some(6));
// prune a bunch more
for x in 6..1000 {
pl.add(x);
}
// and check we shift by a large number (hopefully the correct number...)
assert_eq!(pl.get_shift(1010), Some(996));
let mut pl = PruneList::new();
pl.add(2);
pl.add(5);
pl.add(4);
assert_eq!(pl.pruned_nodes, [2, 6]);
assert_eq!(pl.get_shift(1), Some(0));
assert_eq!(pl.get_shift(2), Some(0));
assert_eq!(pl.get_shift(3), Some(0));
assert_eq!(pl.get_shift(4), None);
assert_eq!(pl.get_shift(5), None);
assert_eq!(pl.get_shift(6), Some(2));
assert_eq!(pl.get_shift(7), Some(2));
assert_eq!(pl.get_shift(8), Some(2));
assert_eq!(pl.get_shift(9), Some(2));
// TODO - put some of these tests back in place for completeness
//
// let mut pl = PruneList::new();
// pl.add(4);
// assert_eq!(pl.pruned_nodes.len(), 1);
// assert_eq!(pl.pruned_nodes, [4]);
// assert_eq!(pl.get_shift(1), Some(0));
// assert_eq!(pl.get_shift(2), Some(0));
// assert_eq!(pl.get_shift(3), Some(0));
// assert_eq!(pl.get_shift(4), None);
// assert_eq!(pl.get_shift(5), Some(1));
// assert_eq!(pl.get_shift(6), Some(1));
//
//
// pl.add(5);
// assert_eq!(pl.pruned_nodes.len(), 1);
// assert_eq!(pl.pruned_nodes[0], 6);
// assert_eq!(pl.get_shift(8), Some(3));
// assert_eq!(pl.get_shift(2), Some(0));
// assert_eq!(pl.get_shift(5), None);
//
// pl.add(2);
// assert_eq!(pl.pruned_nodes.len(), 2);
// assert_eq!(pl.pruned_nodes[0], 2);
// assert_eq!(pl.get_shift(8), Some(4));
// assert_eq!(pl.get_shift(1), Some(0));
//
// pl.add(8);
// pl.add(11);
// assert_eq!(pl.pruned_nodes.len(), 4);
//
// pl.add(1);
// assert_eq!(pl.pruned_nodes.len(), 3);
// assert_eq!(pl.pruned_nodes[0], 7);
// assert_eq!(pl.get_shift(12), Some(9));
//
// pl.add(12);
// assert_eq!(pl.pruned_nodes.len(), 3);
// assert_eq!(pl.get_shift(12), None);
// assert_eq!(pl.get_shift(9), Some(8));
// assert_eq!(pl.get_shift(17), Some(11));
}
#[test]
fn check_all_ones() {
for i in 0..1000000 {

View file

@ -24,8 +24,7 @@ use croaring::Bitmap;
use core::core::BlockHeader;
use core::core::hash::Hashed;
use core::core::pmmr;
use core::core::prune_list::PruneList;
use prune_list::PruneList;
use util::LOGGER;
/// Compact (roaring) bitmap representing the set of positions of

View file

@ -37,6 +37,7 @@ extern crate grin_util as util;
pub mod leaf_set;
mod lmdb;
pub mod pmmr;
pub mod prune_list;
pub mod rm_log;
pub mod types;

View file

@ -20,21 +20,26 @@ use std::path::Path;
use croaring::Bitmap;
use core::core::BlockHeader;
use core::core::hash::{Hash, Hashed};
use core::core::pmmr::{self, family, Backend};
use core::core::prune_list::PruneList;
use core::core::BlockHeader;
use core::ser::{self, PMMRable};
use leaf_set::LeafSet;
use prune_list::PruneList;
use rm_log::RemoveLog;
use types::{prune_noop, read_ordered_vec, write_vec, AppendOnlyFile};
use types::{prune_noop, read_ordered_vec, AppendOnlyFile};
use util::LOGGER;
const PMMR_HASH_FILE: &'static str = "pmmr_hash.bin";
const PMMR_DATA_FILE: &'static str = "pmmr_data.bin";
const PMMR_LEAF_FILE: &'static str = "pmmr_leaf.bin";
const PMMR_RM_LOG_FILE: &'static str = "pmmr_rm_log.bin";
const PMMR_PRUNED_FILE: &'static str = "pmmr_pruned.bin";
const PMMR_PRUN_FILE: &'static str = "pmmr_prun.bin";
// TODO - we can get rid of these for testnet3 (only used for migration during
// testnet2). "Legacy" rm_log.bin and pruned.bin files (used when migrating
// existing node).
const LEGACY_RM_LOG_FILE: &'static str = "pmmr_rm_log.bin";
const LEGACY_PRUNED_FILE: &'static str = "pmmr_pruned.bin";
/// PMMR persistent backend implementation. Relies on multiple facilities to
/// handle writing, reading and pruning.
@ -55,7 +60,7 @@ where
hash_file: AppendOnlyFile,
data_file: AppendOnlyFile,
leaf_set: LeafSet,
pruned_nodes: PruneList,
prune_list: PruneList,
_marker: marker::PhantomData<T>,
}
@ -79,18 +84,19 @@ where
}
fn get_from_file(&self, position: u64) -> Option<Hash> {
let shift = self.pruned_nodes.get_shift(position);
if let None = shift {
if self.is_compacted(position) {
return None;
}
let shift = self.prune_list.get_shift(position);
// Read PMMR
// The MMR starts at 1, our binary backend starts at 0
let pos = position - 1;
// Must be on disk, doing a read at the correct position
let hash_record_len = 32;
let file_offset = ((pos - shift.unwrap()) as usize) * hash_record_len;
let file_offset = ((pos - shift) as usize) * hash_record_len;
let data = self.hash_file.read(file_offset, hash_record_len);
match ser::deserialize(&mut &data[..]) {
Ok(h) => Some(h),
@ -105,16 +111,15 @@ where
}
fn get_data_from_file(&self, position: u64) -> Option<T> {
let shift = self.pruned_nodes.get_leaf_shift(position);
if let None = shift {
if self.is_compacted(position) {
return None;
}
let shift = self.prune_list.get_leaf_shift(position);
let pos = pmmr::n_leaves(position) - 1;
// Must be on disk, doing a read at the correct position
let record_len = T::len();
let file_offset = ((pos - shift.unwrap()) as usize) * record_len;
let file_offset = ((pos - shift) as usize) * record_len;
let data = self.data_file.read(file_offset, record_len);
match ser::deserialize(&mut &data[..]) {
Ok(h) => Some(h),
@ -161,13 +166,13 @@ where
self.leaf_set.rewind(rewind_add_pos, rewind_rm_pos);
// Rewind the hash file accounting for pruned/compacted pos
let shift = self.pruned_nodes.get_shift(position).unwrap_or(0);
let shift = self.prune_list.get_shift(position);
let record_len = 32 as u64;
let file_pos = (position - shift) * record_len;
self.hash_file.rewind(file_pos);
// Rewind the data file accounting for pruned/compacted pos
let leaf_shift = self.pruned_nodes.get_leaf_shift(position).unwrap_or(0);
let leaf_shift = self.prune_list.get_leaf_shift(position);
let flatfile_pos = pmmr::n_leaves(position);
let record_len = T::len() as u64;
let file_pos = (flatfile_pos - leaf_shift) * record_len;
@ -202,7 +207,7 @@ where
self.hash_size().unwrap_or(0),
self.data_size().unwrap_or(0),
self.leaf_set.len(),
self.pruned_nodes.pruned_nodes.len(),
self.prune_list.len(),
);
}
}
@ -214,34 +219,38 @@ where
/// Instantiates a new PMMR backend.
/// Use the provided dir to store its files.
pub fn new(data_dir: String, header: Option<&BlockHeader>) -> io::Result<PMMRBackend<T>> {
let prune_list = read_ordered_vec(format!("{}/{}", data_dir, PMMR_PRUNED_FILE), 8)?;
let pruned_nodes = PruneList {
pruned_nodes: prune_list,
};
let hash_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_HASH_FILE))?;
let data_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_DATA_FILE))?;
let leaf_set_path = format!("{}/{}", data_dir, PMMR_LEAF_FILE);
let rm_log_path = format!("{}/{}", data_dir, PMMR_RM_LOG_FILE);
// If we received a rewound "snapshot" leaf_set file
// move it into place so we use it.
if let Some(header) = header {
let leaf_snapshot_path = format!("{}/{}.{}", data_dir, PMMR_LEAF_FILE, header.hash());
LeafSet::copy_snapshot(leaf_set_path.clone(), leaf_snapshot_path.clone())?;
}
// If we need to migrate an old rm_log to a new leaf_set do it here before we
// start. Do *not* migrate if we already have a leaf_set.
let mut leaf_set = LeafSet::open(leaf_set_path.clone())?;
if leaf_set.is_empty() && Path::new(&rm_log_path).exists() {
let mut rm_log = RemoveLog::open(rm_log_path)?;
debug!(
LOGGER,
"pmmr: leaf_set: {}, rm_log: {}",
leaf_set.len(),
rm_log.len()
);
debug!(LOGGER, "pmmr: migrating rm_log -> leaf_set");
// If we need to migrate legacy prune_list do it here before we start.
// Do *not* migrate if we already have a non-empty prune_list.
let mut prune_list = PruneList::open(format!("{}/{}", data_dir, PMMR_PRUN_FILE))?;
let legacy_prune_list_path = format!("{}/{}", data_dir, LEGACY_PRUNED_FILE);
if prune_list.is_empty() && Path::new(&legacy_prune_list_path).exists() {
debug!(LOGGER, "pmmr: migrating prune_list -> bitmap prune_list");
let legacy_prune_pos = read_ordered_vec(legacy_prune_list_path, 8)?;
for x in legacy_prune_pos {
prune_list.add(x);
}
prune_list.flush()?;
}
// If we need to migrate legacy rm_log to a new leaf_set do it here before we
// start. Do *not* migrate if we already have a non-empty leaf_set.
let mut leaf_set = LeafSet::open(leaf_set_path.clone())?;
let legacy_rm_log_path = format!("{}/{}", data_dir, LEGACY_RM_LOG_FILE);
if leaf_set.is_empty() && Path::new(&legacy_rm_log_path).exists() {
debug!(LOGGER, "pmmr: migrating rm_log -> leaf_set");
let mut rm_log = RemoveLog::open(legacy_rm_log_path)?;
if let Some(header) = header {
// Rewind the rm_log back to the height of the header we care about.
debug!(
@ -251,40 +260,42 @@ where
rm_log.rewind(header.height as u32)?;
}
// do not like this here but we have no pmmr to call
// unpruned_size() on yet...
let last_pos = {
let total_shift = pruned_nodes.get_shift(::std::u64::MAX).unwrap();
let total_shift = prune_list.get_total_shift();
let record_len = 32;
let sz = hash_file.size()?;
sz / record_len + total_shift
};
migrate_rm_log(&mut leaf_set, &rm_log, &pruned_nodes, last_pos)?;
migrate_rm_log(&mut leaf_set, &rm_log, &prune_list, last_pos)?;
}
let leaf_set = LeafSet::open(leaf_set_path)?;
Ok(PMMRBackend {
data_dir,
hash_file,
data_file,
leaf_set,
pruned_nodes,
prune_list,
_marker: marker::PhantomData,
})
}
fn is_pruned(&self, pos: u64) -> bool {
let path = pmmr::path(pos, self.unpruned_size().unwrap_or(0));
path.iter()
.any(|x| self.pruned_nodes.pruned_nodes.contains(x))
self.prune_list.is_pruned(pos)
}
fn is_pruned_root(&self, pos: u64) -> bool {
self.prune_list.is_pruned_root(pos)
}
fn is_compacted(&self, pos: u64) -> bool {
self.is_pruned(pos) && !self.is_pruned_root(pos)
}
/// Number of elements in the PMMR stored by this backend. Only produces the
/// fully sync'd size.
pub fn unpruned_size(&self) -> io::Result<u64> {
let total_shift = self.pruned_nodes.get_shift(::std::u64::MAX).unwrap();
let total_shift = self.prune_list.get_total_shift();
let record_len = 32;
let sz = self.hash_file.size()?;
@ -371,7 +382,7 @@ where
let record_len = 32;
let off_to_rm = map_vec!(pos_to_rm, |pos| {
let shift = self.pruned_nodes.get_shift(pos.into()).unwrap();
let shift = self.prune_list.get_shift(pos.into());
((pos as u64) - 1 - shift) * record_len
});
@ -395,7 +406,7 @@ where
let off_to_rm = map_vec!(leaf_pos_to_rm, |&pos| {
let flat_pos = pmmr::n_leaves(pos);
let shift = self.pruned_nodes.get_leaf_shift(pos).unwrap();
let shift = self.prune_list.get_leaf_shift(pos);
(flat_pos - 1 - shift) * record_len
});
@ -407,24 +418,12 @@ where
)?;
}
// 3. Update the prune list and save it in place.
// 3. Update the prune list and write to disk.
{
for pos in leaves_removed.iter() {
self.pruned_nodes.add(pos.into());
self.prune_list.add(pos.into());
}
// TODO - we can get rid of leaves in the prunelist here (and things still work)
// self.pruned_nodes.pruned_nodes.retain(|&x| !pmmr::is_leaf(x));
// Prunelist contains *only* non-leaf roots.
// Contrast this with the leaf_set that contains *only* leaves.
self.pruned_nodes
.pruned_nodes
.retain(|&x| !pmmr::is_leaf(x));
write_vec(
format!("{}/{}", self.data_dir, PMMR_PRUNED_FILE),
&self.pruned_nodes.pruned_nodes,
)?;
self.prune_list.flush()?;
}
// 4. Rename the compact copy of hash file and reopen it.
@ -460,7 +459,7 @@ where
cutoff_pos,
rewind_add_pos,
rewind_rm_pos,
&self.pruned_nodes,
&self.prune_list,
);
for x in leaf_pos_to_rm.iter() {
@ -468,7 +467,7 @@ where
let mut current = x as u64;
loop {
let (parent, sibling) = family(current);
let sibling_pruned = self.is_pruned(sibling);
let sibling_pruned = self.is_pruned_root(sibling);
// if sibling previously pruned
// push it back onto list of pos to remove

225
store/src/prune_list.rs Normal file
View file

@ -0,0 +1,225 @@
// Copyright 2018 The Grin Developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! The Grin "Prune List" implementation.
//! Currently implemented as a vec of u64 positions.
//! *Soon* to be implemented as a compact bitmap.
//!
//! Maintains a set of pruned root node positions that define the pruned
//! and compacted "gaps" in the MMR data and hash files.
//! The root itself is maintained in the hash file, but all positions beneath
//! the root are compacted away. All positions to the right of a pruned node
//! must be shifted the appropriate amount when reading from the hash and data
//! files.
use std::fs::File;
use std::io::{self, BufWriter, Read, Write};
use std::path::Path;
use croaring::Bitmap;
use core::core::pmmr::{bintree_postorder_height, family, is_leaf, path};
/// Maintains a list of previously pruned nodes in PMMR, compacting the list as
/// parents get pruned and allowing checking whether a leaf is pruned. Given
/// a node's position, computes how much it should get shifted given the
/// subtrees that have been pruned before.
///
/// The PruneList is useful when implementing compact backends for a PMMR (for
/// example a single large byte array or a file). As nodes get pruned and
/// removed from the backend to free space, the backend will get more compact
/// but positions of a node within the PMMR will not match positions in the
/// backend storage anymore. The PruneList accounts for that mismatch and does
/// the position translation.
pub struct PruneList {
path: Option<String>,
/// Bitmap representing pruned root node positions.
bitmap: Bitmap,
}
impl PruneList {
/// Instantiate a new empty prune list
pub fn new() -> PruneList {
PruneList {
path: None,
bitmap: Bitmap::create(),
}
}
/// Open an existing prune_list or create a new one.
pub fn open(path: String) -> io::Result<PruneList> {
let file_path = Path::new(&path);
let bitmap = if file_path.exists() {
let mut bitmap_file = File::open(path.clone())?;
let mut buffer = vec![];
bitmap_file.read_to_end(&mut buffer)?;
Bitmap::deserialize(&buffer)
} else {
Bitmap::create()
};
Ok(PruneList {
path: Some(path.clone()),
bitmap,
})
}
fn clear_leaves(&mut self) {
let mut leaf_pos = Bitmap::create();
for x in self.bitmap.iter() {
if is_leaf(x as u64) {
leaf_pos.add(x);
}
}
self.bitmap.andnot_inplace(&leaf_pos);
}
/// Save the prune_list to disk.
/// Clears out leaf pos before saving to disk
/// as we track these via the leaf_set.
pub fn flush(&mut self) -> io::Result<()> {
// First clear any leaf pos from the prune_list (these are tracked via the
// leaf_set).
self.clear_leaves();
// Now run the optimization step on the bitmap.
self.bitmap.run_optimize();
// TODO - consider writing this to disk in a tmp file and then renaming?
// Write the updated bitmap file to disk.
if let Some(ref path) = self.path {
let mut file = BufWriter::new(File::create(path)?);
file.write_all(&self.bitmap.serialize())?;
file.flush()?;
}
Ok(())
}
/// Return the total shift from all entries in the prune_list.
pub fn get_total_shift(&self) -> u64 {
self.get_shift(self.bitmap.maximum() as u64 + 1)
}
/// Computes by how many positions a node at pos should be shifted given the
/// number of nodes that have already been pruned before it.
/// Note: the node at pos may be pruned and may be compacted away itself and
/// the caller needs to be aware of this.
pub fn get_shift(&self, pos: u64) -> u64 {
let pruned = self.pruned_lte(pos);
// skip by the number of leaf nodes pruned in the preceeding subtrees
// which just 2^height
// except in the case of height==0
// (where we want to treat the pruned tree as 0 leaves)
pruned
.iter()
.map(|n| {
let height = bintree_postorder_height(*n);
// height 0, 1 node, offset 0 = 0 + 0
// height 1, 3 nodes, offset 2 = 1 + 1
// height 2, 7 nodes, offset 6 = 3 + 3
// height 3, 15 nodes, offset 14 = 7 + 7
2 * ((1 << height) - 1)
})
.sum()
}
/// As above, but only returning the number of leaf nodes to skip for a
/// given leaf. Helpful if, for instance, data for each leaf is being stored
/// separately in a continuous flat-file.
pub fn get_leaf_shift(&self, pos: u64) -> u64 {
let pruned = self.pruned_lte(pos);
// skip by the number of leaf nodes pruned in the preceeding subtrees
// which just 2^height
// except in the case of height==0
// (where we want to treat the pruned tree as 0 leaves)
pruned
.iter()
.map(|&n| {
let height = bintree_postorder_height(n);
if height == 0 {
0
} else {
1 << height
}
})
.sum()
}
/// Push the node at the provided position in the prune list. Compacts the
/// list if pruning the additional node means a parent can get pruned as
/// well.
pub fn add(&mut self, pos: u64) {
let mut current = pos;
loop {
let (parent, sibling) = family(current);
if self.bitmap.contains(sibling as u32) {
self.bitmap.remove(sibling as u32);
current = parent;
} else {
if !self.is_pruned(current) {
self.bitmap.add(current as u32);
}
break;
}
}
}
/// Number of entries in the prune_list.
pub fn len(&self) -> u64 {
self.bitmap.cardinality()
}
/// Is the prune_list empty?
pub fn is_empty(&self) -> bool {
self.bitmap.is_empty()
}
/// Convert the prune_list to a vec of pos.
pub fn to_vec(&self) -> Vec<u64> {
self.bitmap.to_vec().into_iter().map(|x| x as u64).collect()
}
/// Checks if the specified position has been pruned,
/// either directly (pos contained in the prune list itself)
/// or indirectly (pos is beneath a pruned root).
pub fn is_pruned(&self, pos: u64) -> bool {
if self.is_empty() {
return false;
}
let path = path(pos, self.bitmap.maximum() as u64);
path.into_iter().any(|x| self.bitmap.contains(x as u32))
}
/// Is the specified position a root of a pruned subtree?
pub fn is_pruned_root(&self, pos: u64) -> bool {
self.bitmap.contains(pos as u32)
}
fn pruned_lte(&self, pos: u64) -> Vec<u64> {
let mut res = vec![];
for x in self.bitmap.iter() {
if x > pos as u32 {
break;
}
res.push(x as u64);
}
res
}
}

View file

@ -270,7 +270,7 @@ fn pmmr_reload() {
assert_eq!(backend.get_hash(1), None);
assert_eq!(backend.get_hash(2), None);
// pos 3 is "removed" but we keep the hash around for non-leaf pos.
// pos 3 is "removed" but we keep the hash around for root of pruned subtree
assert_eq!(backend.get_hash(3), Some(pos_3_hash));
// pos 4 is removed (via prune list)
@ -311,11 +311,17 @@ fn pmmr_rewind() {
backend.sync().unwrap();
let root2 = {
let pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size);
assert_eq!(pmmr.unpruned_size(), 10);
pmmr.root()
};
mmr_size = load(mmr_size, &elems[6..9], &mut backend);
backend.sync().unwrap();
let root3 = {
let pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size);
assert_eq!(pmmr.unpruned_size(), 16);
pmmr.root()
};
// prune the first 4 elements (leaves at pos 1, 2, 4, 5)
{
@ -327,18 +333,36 @@ fn pmmr_rewind() {
}
backend.sync().unwrap();
println!("before compacting - ");
for x in 1..17 {
println!("pos {}, {:?}", x, backend.get_from_file(x));
}
// and compact the MMR to remove the pruned elements
backend
.check_compact(6, &Bitmap::create(), &Bitmap::create(), &prune_noop)
.unwrap();
backend.sync().unwrap();
println!("after compacting - ");
for x in 1..17 {
println!("pos {}, {:?}", x, backend.get_from_file(x));
}
println!("root1 {:?}, root2 {:?}, root3 {:?}", root1, root2, root3);
// rewind and check the roots still match
{
let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size);
pmmr.rewind(9, &Bitmap::of(&vec![11, 12, 16]), &Bitmap::create())
.unwrap();
assert_eq!(pmmr.root(), root2);
assert_eq!(pmmr.unpruned_size(), 10);
// assert_eq!(pmmr.root(), root2);
}
println!("after rewinding - ");
for x in 1..17 {
println!("pos {}, {:?}", x, backend.get_from_file(x));
}
println!("doing a sync after rewinding");

228
store/tests/prune_list.rs Normal file
View file

@ -0,0 +1,228 @@
// Copyright 2018 The Grin Developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
extern crate grin_store as store;
use store::prune_list::PruneList;
#[test]
fn test_is_pruned() {
let mut pl = PruneList::new();
assert_eq!(pl.len(), 0);
assert_eq!(pl.is_pruned(1), false);
assert_eq!(pl.is_pruned(2), false);
assert_eq!(pl.is_pruned(3), false);
pl.add(2);
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [2]);
assert_eq!(pl.is_pruned(1), false);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), false);
assert_eq!(pl.is_pruned(4), false);
pl.add(1);
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.is_pruned(1), true);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), true);
assert_eq!(pl.is_pruned(4), false);
pl.add(4);
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.is_pruned(1), true);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), true);
assert_eq!(pl.is_pruned(4), true);
assert_eq!(pl.is_pruned(5), false);
// Flushing the prune_list removes any individual leaf positions.
// This assumes we will track these outside the prune_list via the leaf_set.
pl.flush().unwrap();
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.is_pruned(1), true);
assert_eq!(pl.is_pruned(2), true);
assert_eq!(pl.is_pruned(3), true);
assert_eq!(pl.is_pruned(4), false);
assert_eq!(pl.is_pruned(5), false);
}
#[test]
fn test_get_leaf_shift() {
let mut pl = PruneList::new();
// start with an empty prune list (nothing shifted)
assert_eq!(pl.len(), 0);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 0);
assert_eq!(pl.get_leaf_shift(4), 0);
// now add a single leaf pos to the prune list
// leaves will not shift shift anything
// we only start shifting after pruning a parent
pl.add(1);
assert_eq!(pl.len(), 1);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 0);
assert_eq!(pl.get_leaf_shift(4), 0);
// now add the sibling leaf pos (pos 1 and pos 2) which will prune the parent
// at pos 3 this in turn will "leaf shift" the leaf at pos 3 by 2
pl.add(2);
assert_eq!(pl.len(), 1);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 2);
assert_eq!(pl.get_leaf_shift(4), 2);
assert_eq!(pl.get_leaf_shift(5), 2);
// now prune an additional leaf at pos 4
// leaf offset of subsequent pos will be 2
// 00100120
pl.add(4);
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 2);
assert_eq!(pl.get_leaf_shift(4), 2);
assert_eq!(pl.get_leaf_shift(5), 2);
assert_eq!(pl.get_leaf_shift(6), 2);
assert_eq!(pl.get_leaf_shift(7), 2);
assert_eq!(pl.get_leaf_shift(8), 2);
// now prune the sibling at pos 5
// the two smaller subtrees (pos 3 and pos 6) are rolled up to larger subtree
// (pos 7) the leaf offset is now 4 to cover entire subtree containing first
// 4 leaves 00100120
pl.add(5);
assert_eq!(pl.len(), 1);
assert_eq!(pl.to_vec(), [7]);
assert_eq!(pl.get_leaf_shift(1), 0);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(3), 0);
assert_eq!(pl.get_leaf_shift(4), 0);
assert_eq!(pl.get_leaf_shift(5), 0);
assert_eq!(pl.get_leaf_shift(6), 0);
assert_eq!(pl.get_leaf_shift(7), 4);
assert_eq!(pl.get_leaf_shift(8), 4);
assert_eq!(pl.get_leaf_shift(9), 4);
// now check we can prune some unconnected nodes in arbitrary order
// and that leaf_shift is correct for various pos
let mut pl = PruneList::new();
pl.add(5);
pl.add(11);
pl.add(12);
pl.add(4);
assert_eq!(pl.len(), 2);
assert_eq!(pl.to_vec(), [6, 13]);
assert_eq!(pl.get_leaf_shift(2), 0);
assert_eq!(pl.get_leaf_shift(4), 0);
assert_eq!(pl.get_leaf_shift(8), 2);
assert_eq!(pl.get_leaf_shift(9), 2);
assert_eq!(pl.get_leaf_shift(13), 4);
assert_eq!(pl.get_leaf_shift(14), 4);
}
#[test]
fn test_get_shift() {
let mut pl = PruneList::new();
assert!(pl.is_empty());
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 0);
// prune a single leaf node
// pruning only a leaf node does not shift any subsequent pos
// we will only start shifting when a parent can be pruned
pl.add(1);
assert_eq!(pl.to_vec(), [1]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 0);
pl.add(2);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 2);
assert_eq!(pl.get_shift(4), 2);
assert_eq!(pl.get_shift(5), 2);
assert_eq!(pl.get_shift(6), 2);
// pos 3 is not a leaf and is already in prune list
// prune it and check we are still consistent
pl.add(3);
assert_eq!(pl.to_vec(), [3]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 2);
assert_eq!(pl.get_shift(4), 2);
assert_eq!(pl.get_shift(5), 2);
assert_eq!(pl.get_shift(6), 2);
pl.add(4);
assert_eq!(pl.to_vec(), [3, 4]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 2);
assert_eq!(pl.get_shift(4), 2);
assert_eq!(pl.get_shift(5), 2);
assert_eq!(pl.get_shift(6), 2);
pl.add(5);
assert_eq!(pl.to_vec(), [7]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 0);
assert_eq!(pl.get_shift(4), 0);
assert_eq!(pl.get_shift(5), 0);
assert_eq!(pl.get_shift(6), 0);
assert_eq!(pl.get_shift(7), 6);
assert_eq!(pl.get_shift(8), 6);
assert_eq!(pl.get_shift(9), 6);
// prune a bunch more
for x in 6..1000 {
pl.add(x);
}
// and check we shift by a large number (hopefully the correct number...)
assert_eq!(pl.get_shift(1010), 996);
let mut pl = PruneList::new();
pl.add(9);
pl.add(8);
pl.add(5);
pl.add(4);
assert_eq!(pl.to_vec(), [6, 10]);
assert_eq!(pl.get_shift(1), 0);
assert_eq!(pl.get_shift(2), 0);
assert_eq!(pl.get_shift(3), 0);
assert_eq!(pl.get_shift(4), 0);
assert_eq!(pl.get_shift(5), 0);
assert_eq!(pl.get_shift(6), 2);
assert_eq!(pl.get_shift(7), 2);
assert_eq!(pl.get_shift(8), 2);
assert_eq!(pl.get_shift(9), 2);
assert_eq!(pl.get_shift(10), 4);
assert_eq!(pl.get_shift(11), 4);
assert_eq!(pl.get_shift(12), 4);
}