mirror of
https://github.com/mimblewimble/grin.git
synced 2025-01-20 19:11:08 +03:00
More efficient serialization for bitmap segments (#3492)
* More efficient serialization for bitmap segments * Rename a const * Correctly count number of chunks in a segment * Enum for BitmapBlock (de)ser mode * Add more segments in test * Fix duplicate function
This commit is contained in:
parent
b3938de8b3
commit
055b684416
3 changed files with 442 additions and 1 deletions
|
@ -12,6 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::min;
|
||||
use std::convert::TryFrom;
|
||||
use std::time::Instant;
|
||||
|
||||
|
@ -19,9 +20,11 @@ use bit_vec::BitVec;
|
|||
use croaring::Bitmap;
|
||||
|
||||
use crate::core::core::hash::{DefaultHashable, Hash};
|
||||
use crate::core::core::pmmr::segment::{Segment, SegmentIdentifier, SegmentProof};
|
||||
use crate::core::core::pmmr::{self, ReadablePMMR, ReadonlyPMMR, VecBackend, PMMR};
|
||||
use crate::core::ser::{self, PMMRable, Readable, Reader, Writeable, Writer};
|
||||
use crate::error::{Error, ErrorKind};
|
||||
use enum_primitive::FromPrimitive;
|
||||
|
||||
/// The "bitmap accumulator" allows us to commit to a specific bitmap by splitting it into
|
||||
/// fragments and inserting these fragments into an MMR to produce an overall root hash.
|
||||
|
@ -187,7 +190,7 @@ impl BitmapAccumulator {
|
|||
|
||||
/// A bitmap "chunk" representing 1024 contiguous bits of the overall bitmap.
|
||||
/// The first 1024 bits belong in one chunk. The next 1024 bits in the next chunk, etc.
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct BitmapChunk(BitVec);
|
||||
|
||||
impl BitmapChunk {
|
||||
|
@ -242,3 +245,304 @@ impl Readable for BitmapChunk {
|
|||
Ok(BitmapChunk::new())
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct BitmapSegment {
|
||||
identifier: SegmentIdentifier,
|
||||
blocks: Vec<BitmapBlock>,
|
||||
proof: SegmentProof,
|
||||
}
|
||||
|
||||
impl Writeable for BitmapSegment {
|
||||
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
|
||||
Writeable::write(&self.identifier, writer)?;
|
||||
writer.write_u16(self.blocks.len() as u16)?;
|
||||
for block in &self.blocks {
|
||||
Writeable::write(block, writer)?;
|
||||
}
|
||||
Writeable::write(&self.proof, writer)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Readable for BitmapSegment {
|
||||
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
|
||||
let identifier: SegmentIdentifier = Readable::read(reader)?;
|
||||
|
||||
let n_blocks = reader.read_u16()? as usize;
|
||||
let mut blocks = Vec::<BitmapBlock>::with_capacity(n_blocks);
|
||||
for _ in 0..n_blocks {
|
||||
blocks.push(Readable::read(reader)?);
|
||||
}
|
||||
let proof = Readable::read(reader)?;
|
||||
|
||||
Ok(Self {
|
||||
identifier,
|
||||
blocks,
|
||||
proof,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this can be sped up with some `unsafe` code
|
||||
impl From<Segment<BitmapChunk>> for BitmapSegment {
|
||||
fn from(segment: Segment<BitmapChunk>) -> Self {
|
||||
let (identifier, _, _, _, leaf_data, proof) = segment.parts();
|
||||
|
||||
let mut chunks_left = leaf_data.len();
|
||||
let mut blocks =
|
||||
Vec::with_capacity((chunks_left + BitmapBlock::NCHUNKS - 1) / BitmapBlock::NCHUNKS);
|
||||
while chunks_left > 0 {
|
||||
let n_chunks = min(BitmapBlock::NCHUNKS, chunks_left);
|
||||
chunks_left = chunks_left.saturating_sub(n_chunks);
|
||||
blocks.push(BitmapBlock::new(n_chunks));
|
||||
}
|
||||
|
||||
for (chunk_idx, chunk) in leaf_data.into_iter().enumerate() {
|
||||
assert_eq!(chunk.0.len(), BitmapChunk::LEN_BITS);
|
||||
let block = &mut blocks
|
||||
.get_mut(chunk_idx / BitmapBlock::NCHUNKS)
|
||||
.unwrap()
|
||||
.inner;
|
||||
let offset = (chunk_idx % BitmapBlock::NCHUNKS) * BitmapChunk::LEN_BITS;
|
||||
for (i, _) in chunk.0.iter().enumerate().filter(|&(_, v)| v) {
|
||||
block.set(offset + i, true);
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
identifier,
|
||||
blocks,
|
||||
proof,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this can be sped up with some `unsafe` code
|
||||
impl From<BitmapSegment> for Segment<BitmapChunk> {
|
||||
fn from(segment: BitmapSegment) -> Self {
|
||||
let BitmapSegment {
|
||||
identifier,
|
||||
blocks,
|
||||
proof,
|
||||
} = segment;
|
||||
|
||||
// Count the number of chunks taking into account that the final block might be smaller
|
||||
let n_chunks = blocks.len().saturating_sub(1) * BitmapBlock::NCHUNKS
|
||||
+ blocks.last().map(|b| b.n_chunks()).unwrap_or(0);
|
||||
let mut leaf_pos = Vec::with_capacity(n_chunks);
|
||||
let mut chunks = Vec::with_capacity(n_chunks);
|
||||
let offset = (1 << identifier.height) * identifier.idx + 1;
|
||||
for i in 0..(n_chunks as u64) {
|
||||
leaf_pos.push(pmmr::insertion_to_pmmr_index(offset + i));
|
||||
chunks.push(BitmapChunk::new());
|
||||
}
|
||||
|
||||
for (block_idx, block) in blocks.into_iter().enumerate() {
|
||||
assert_eq!(block.inner.len(), BitmapBlock::NBITS as usize);
|
||||
let offset = block_idx * BitmapBlock::NCHUNKS;
|
||||
for (i, _) in block.inner.iter().enumerate().filter(|&(_, v)| v) {
|
||||
chunks
|
||||
.get_mut(offset + i / BitmapChunk::LEN_BITS)
|
||||
.unwrap()
|
||||
.0
|
||||
.set(i % BitmapChunk::LEN_BITS, true);
|
||||
}
|
||||
}
|
||||
|
||||
Segment::from_parts(identifier, Vec::new(), Vec::new(), leaf_pos, chunks, proof)
|
||||
}
|
||||
}
|
||||
|
||||
/// A block of 2^16 bits that provides an efficient (de)serialization
|
||||
/// depending on the bitmap occupancy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct BitmapBlock {
|
||||
inner: BitVec,
|
||||
}
|
||||
|
||||
impl BitmapBlock {
|
||||
/// Maximum number of bits in a block
|
||||
const NBITS: u32 = 1 << 16;
|
||||
/// Maximum number of chunks in a block
|
||||
const NCHUNKS: usize = Self::NBITS as usize / BitmapChunk::LEN_BITS;
|
||||
|
||||
fn new(n_chunks: usize) -> Self {
|
||||
assert!(n_chunks <= BitmapBlock::NCHUNKS);
|
||||
Self {
|
||||
inner: BitVec::from_elem(n_chunks * BitmapChunk::LEN_BITS, false),
|
||||
}
|
||||
}
|
||||
|
||||
fn n_chunks(&self) -> usize {
|
||||
let length = self.inner.len();
|
||||
assert_eq!(length % BitmapChunk::LEN_BITS, 0);
|
||||
let n_chunks = length / BitmapChunk::LEN_BITS;
|
||||
assert!(n_chunks <= BitmapBlock::NCHUNKS);
|
||||
n_chunks
|
||||
}
|
||||
}
|
||||
|
||||
impl Writeable for BitmapBlock {
|
||||
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
|
||||
let length = self.inner.len();
|
||||
assert!(length <= Self::NBITS as usize);
|
||||
assert_eq!(length % BitmapChunk::LEN_BITS, 0);
|
||||
writer.write_u8((length / BitmapChunk::LEN_BITS) as u8)?;
|
||||
|
||||
let count_pos = self.inner.iter().filter(|&v| v).count() as u32;
|
||||
let count_neg = Self::NBITS - count_pos;
|
||||
let threshold = Self::NBITS / 16;
|
||||
if count_pos < threshold {
|
||||
// Write positive indices
|
||||
Writeable::write(&BitmapBlockSerialization::Positive, writer)?;
|
||||
writer.write_u16(count_pos as u16)?;
|
||||
for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| v) {
|
||||
writer.write_u16(i as u16)?;
|
||||
}
|
||||
} else if count_neg < threshold {
|
||||
// Write negative indices
|
||||
Writeable::write(&BitmapBlockSerialization::Negative, writer)?;
|
||||
writer.write_u16(count_neg as u16)?;
|
||||
for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| !v) {
|
||||
writer.write_u16(i as u16)?;
|
||||
}
|
||||
} else {
|
||||
// Write raw bytes
|
||||
Writeable::write(&BitmapBlockSerialization::Raw, writer)?;
|
||||
let bytes = self.inner.to_bytes();
|
||||
assert_eq!(bytes.len(), Self::NBITS as usize / 8);
|
||||
writer.write_fixed_bytes(&bytes)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Readable for BitmapBlock {
|
||||
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
|
||||
let n_chunks = reader.read_u8()?;
|
||||
if n_chunks as usize > BitmapBlock::NCHUNKS {
|
||||
return Err(ser::Error::TooLargeReadErr);
|
||||
}
|
||||
let n_bits = n_chunks as usize * BitmapChunk::LEN_BITS;
|
||||
|
||||
let mode = Readable::read(reader)?;
|
||||
let inner = match mode {
|
||||
BitmapBlockSerialization::Raw => {
|
||||
// Raw bytes
|
||||
let bytes = reader.read_fixed_bytes(n_bits / 8)?;
|
||||
BitVec::from_bytes(&bytes)
|
||||
}
|
||||
BitmapBlockSerialization::Positive => {
|
||||
// Positive indices
|
||||
let mut inner = BitVec::from_elem(n_bits, false);
|
||||
let n = reader.read_u16()?;
|
||||
for _ in 0..n {
|
||||
inner.set(reader.read_u16()? as usize, true);
|
||||
}
|
||||
inner
|
||||
}
|
||||
BitmapBlockSerialization::Negative => {
|
||||
// Negative indices
|
||||
let mut inner = BitVec::from_elem(n_bits, true);
|
||||
let n = reader.read_u16()?;
|
||||
for _ in 0..n {
|
||||
inner.set(reader.read_u16()? as usize, false);
|
||||
}
|
||||
inner
|
||||
}
|
||||
};
|
||||
|
||||
Ok(BitmapBlock { inner })
|
||||
}
|
||||
}
|
||||
|
||||
enum_from_primitive! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
#[repr(u8)]
|
||||
enum BitmapBlockSerialization {
|
||||
Raw = 0,
|
||||
Positive = 1,
|
||||
Negative = 2,
|
||||
}
|
||||
}
|
||||
|
||||
impl Writeable for BitmapBlockSerialization {
|
||||
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
|
||||
writer.write_u8(*self as u8)
|
||||
}
|
||||
}
|
||||
|
||||
impl Readable for BitmapBlockSerialization {
|
||||
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
|
||||
Self::from_u8(reader.read_u8()?).ok_or(ser::Error::CorruptedData)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::ser::{BinReader, BinWriter, ProtocolVersion, Readable, Writeable};
|
||||
use byteorder::ReadBytesExt;
|
||||
use grin_util::secp::rand::Rng;
|
||||
use rand::thread_rng;
|
||||
use std::io::Cursor;
|
||||
|
||||
fn test_roundtrip(entries: usize, inverse: bool, encoding: u8, length: usize) {
|
||||
let mut rng = thread_rng();
|
||||
let mut block = BitmapBlock::new(64);
|
||||
if inverse {
|
||||
block.inner.negate();
|
||||
}
|
||||
|
||||
// Flip `entries` bits in random spots
|
||||
let mut count = 0;
|
||||
while count < entries {
|
||||
let idx = rng.gen_range(0, BitmapBlock::NBITS as usize);
|
||||
if block.inner.get(idx).unwrap() == inverse {
|
||||
count += 1;
|
||||
block.inner.set(idx, !inverse);
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize
|
||||
let mut cursor = Cursor::new(Vec::<u8>::new());
|
||||
let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1));
|
||||
Writeable::write(&block, &mut writer).unwrap();
|
||||
|
||||
// Check encoding type and length
|
||||
cursor.set_position(1);
|
||||
assert_eq!(cursor.read_u8().unwrap(), encoding);
|
||||
let actual_length = cursor.get_ref().len();
|
||||
assert_eq!(actual_length, length);
|
||||
assert!(actual_length <= 2 + BitmapBlock::NBITS as usize / 8);
|
||||
|
||||
// Deserialize
|
||||
cursor.set_position(0);
|
||||
let mut reader = BinReader::new(&mut cursor, ProtocolVersion(1));
|
||||
let block2: BitmapBlock = Readable::read(&mut reader).unwrap();
|
||||
assert_eq!(block, block2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_ser_roundtrip() {
|
||||
let threshold = BitmapBlock::NBITS as usize / 16;
|
||||
let entries = thread_rng().gen_range(threshold, 4 * threshold);
|
||||
test_roundtrip(entries, false, 0, 2 + BitmapBlock::NBITS as usize / 8);
|
||||
test_roundtrip(entries, true, 0, 2 + BitmapBlock::NBITS as usize / 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sparse_block_ser_roundtrip() {
|
||||
let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16);
|
||||
test_roundtrip(entries, false, 1, 4 + 2 * entries);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn abdundant_block_ser_roundtrip() {
|
||||
let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16);
|
||||
test_roundtrip(entries, true, 2, 4 + 2 * entries);
|
||||
}
|
||||
}
|
||||
|
|
79
chain/tests/bitmap_segment.rs
Normal file
79
chain/tests/bitmap_segment.rs
Normal file
|
@ -0,0 +1,79 @@
|
|||
use self::chain::txhashset::{BitmapAccumulator, BitmapSegment};
|
||||
use self::core::core::pmmr::segment::{Segment, SegmentIdentifier};
|
||||
use self::core::ser::{BinReader, BinWriter, ProtocolVersion, Readable, Writeable};
|
||||
use croaring::Bitmap;
|
||||
use grin_chain as chain;
|
||||
use grin_core as core;
|
||||
use grin_util::secp::rand::Rng;
|
||||
use rand::thread_rng;
|
||||
use std::io::Cursor;
|
||||
|
||||
fn test_roundtrip(entries: usize) {
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let identifier = SegmentIdentifier {
|
||||
height: 12,
|
||||
idx: rng.gen_range(8, 16),
|
||||
};
|
||||
let block = rng.gen_range(2, 64);
|
||||
|
||||
let mut bitmap = Bitmap::create();
|
||||
let block_size = 1 << 16;
|
||||
let offset = (1 << identifier.height) * 1024 * identifier.idx + block_size * block;
|
||||
let mut count = 0;
|
||||
while count < entries {
|
||||
let idx = (offset + rng.gen_range(0, block_size)) as u32;
|
||||
if !bitmap.contains(idx) {
|
||||
count += 1;
|
||||
bitmap.add(idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Add a bunch of segments after the one we are interested in
|
||||
let size =
|
||||
bitmap.maximum().unwrap() as u64 + (1 << identifier.height) * 1024 * rng.gen_range(0, 64);
|
||||
|
||||
// Construct the accumulator
|
||||
let mut accumulator = BitmapAccumulator::new();
|
||||
accumulator
|
||||
.init(bitmap.iter().map(|v| v as u64), size)
|
||||
.unwrap();
|
||||
|
||||
let mmr = accumulator.readonly_pmmr();
|
||||
let segment = Segment::from_pmmr(identifier, &mmr, false).unwrap();
|
||||
|
||||
// Convert to `BitmapSegment`
|
||||
let bms = BitmapSegment::from(segment.clone());
|
||||
|
||||
// Serialize `BitmapSegment`
|
||||
let mut cursor = Cursor::new(Vec::<u8>::new());
|
||||
let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1));
|
||||
Writeable::write(&bms, &mut writer).unwrap();
|
||||
|
||||
// Read `BitmapSegment`
|
||||
cursor.set_position(0);
|
||||
let mut reader = BinReader::new(&mut cursor, ProtocolVersion(1));
|
||||
let bms2: BitmapSegment = Readable::read(&mut reader).unwrap();
|
||||
assert_eq!(bms, bms2);
|
||||
|
||||
// Convert back to `Segment`
|
||||
let segment2 = Segment::from(bms2);
|
||||
assert_eq!(segment, segment2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn segment_ser_roundtrip() {
|
||||
let threshold = 4096;
|
||||
test_roundtrip(thread_rng().gen_range(threshold, 4 * threshold));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sparse_segment_ser_roundtrip() {
|
||||
test_roundtrip(thread_rng().gen_range(1024, 4096));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn abundant_segment_ser_roundtrip() {
|
||||
let max = 1 << 16;
|
||||
test_roundtrip(thread_rng().gen_range(max - 4096, max - 1024));
|
||||
}
|
|
@ -142,6 +142,64 @@ impl<T> Segment<T> {
|
|||
.ok_or_else(|| SegmentError::MissingHash(pos))
|
||||
}
|
||||
|
||||
/// Get the identifier associated with this segment
|
||||
pub fn identifier(&self) -> SegmentIdentifier {
|
||||
self.identifier
|
||||
}
|
||||
|
||||
/// Consume the segment and return its parts
|
||||
pub fn parts(
|
||||
self,
|
||||
) -> (
|
||||
SegmentIdentifier,
|
||||
Vec<u64>,
|
||||
Vec<Hash>,
|
||||
Vec<u64>,
|
||||
Vec<T>,
|
||||
SegmentProof,
|
||||
) {
|
||||
(
|
||||
self.identifier,
|
||||
self.hash_pos,
|
||||
self.hashes,
|
||||
self.leaf_pos,
|
||||
self.leaf_data,
|
||||
self.proof,
|
||||
)
|
||||
}
|
||||
|
||||
/// Construct a segment from its parts
|
||||
pub fn from_parts(
|
||||
identifier: SegmentIdentifier,
|
||||
hash_pos: Vec<u64>,
|
||||
hashes: Vec<Hash>,
|
||||
leaf_pos: Vec<u64>,
|
||||
leaf_data: Vec<T>,
|
||||
proof: SegmentProof,
|
||||
) -> Self {
|
||||
assert_eq!(hash_pos.len(), hashes.len());
|
||||
let mut last_pos = 0;
|
||||
for &pos in &hash_pos {
|
||||
assert!(pos > last_pos);
|
||||
last_pos = pos;
|
||||
}
|
||||
assert_eq!(leaf_pos.len(), leaf_data.len());
|
||||
last_pos = 0;
|
||||
for &pos in &leaf_pos {
|
||||
assert!(pos > last_pos);
|
||||
last_pos = pos;
|
||||
}
|
||||
|
||||
Self {
|
||||
identifier,
|
||||
hash_pos,
|
||||
hashes,
|
||||
leaf_pos,
|
||||
leaf_data,
|
||||
proof,
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator of all the leaves in the segment
|
||||
pub fn leaf_iter(&self) -> impl Iterator<Item = (u64, &T)> + '_ {
|
||||
self.leaf_pos.iter().map(|&p| p).zip(&self.leaf_data)
|
||||
|
|
Loading…
Reference in a new issue