diff --git a/chain/src/txhashset/bitmap_accumulator.rs b/chain/src/txhashset/bitmap_accumulator.rs index 67fa61aa2..ed87442b4 100644 --- a/chain/src/txhashset/bitmap_accumulator.rs +++ b/chain/src/txhashset/bitmap_accumulator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::min; use std::convert::TryFrom; use std::time::Instant; @@ -19,9 +20,11 @@ use bit_vec::BitVec; use croaring::Bitmap; use crate::core::core::hash::{DefaultHashable, Hash}; +use crate::core::core::pmmr::segment::{Segment, SegmentIdentifier, SegmentProof}; use crate::core::core::pmmr::{self, ReadablePMMR, ReadonlyPMMR, VecBackend, PMMR}; use crate::core::ser::{self, PMMRable, Readable, Reader, Writeable, Writer}; use crate::error::{Error, ErrorKind}; +use enum_primitive::FromPrimitive; /// The "bitmap accumulator" allows us to commit to a specific bitmap by splitting it into /// fragments and inserting these fragments into an MMR to produce an overall root hash. @@ -187,7 +190,7 @@ impl BitmapAccumulator { /// A bitmap "chunk" representing 1024 contiguous bits of the overall bitmap. /// The first 1024 bits belong in one chunk. The next 1024 bits in the next chunk, etc. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct BitmapChunk(BitVec); impl BitmapChunk { @@ -242,3 +245,304 @@ impl Readable for BitmapChunk { Ok(BitmapChunk::new()) } } + +/// +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BitmapSegment { + identifier: SegmentIdentifier, + blocks: Vec, + proof: SegmentProof, +} + +impl Writeable for BitmapSegment { + fn write(&self, writer: &mut W) -> Result<(), ser::Error> { + Writeable::write(&self.identifier, writer)?; + writer.write_u16(self.blocks.len() as u16)?; + for block in &self.blocks { + Writeable::write(block, writer)?; + } + Writeable::write(&self.proof, writer)?; + Ok(()) + } +} + +impl Readable for BitmapSegment { + fn read(reader: &mut R) -> Result { + let identifier: SegmentIdentifier = Readable::read(reader)?; + + let n_blocks = reader.read_u16()? as usize; + let mut blocks = Vec::::with_capacity(n_blocks); + for _ in 0..n_blocks { + blocks.push(Readable::read(reader)?); + } + let proof = Readable::read(reader)?; + + Ok(Self { + identifier, + blocks, + proof, + }) + } +} + +// TODO: this can be sped up with some `unsafe` code +impl From> for BitmapSegment { + fn from(segment: Segment) -> Self { + let (identifier, _, _, _, leaf_data, proof) = segment.parts(); + + let mut chunks_left = leaf_data.len(); + let mut blocks = + Vec::with_capacity((chunks_left + BitmapBlock::NCHUNKS - 1) / BitmapBlock::NCHUNKS); + while chunks_left > 0 { + let n_chunks = min(BitmapBlock::NCHUNKS, chunks_left); + chunks_left = chunks_left.saturating_sub(n_chunks); + blocks.push(BitmapBlock::new(n_chunks)); + } + + for (chunk_idx, chunk) in leaf_data.into_iter().enumerate() { + assert_eq!(chunk.0.len(), BitmapChunk::LEN_BITS); + let block = &mut blocks + .get_mut(chunk_idx / BitmapBlock::NCHUNKS) + .unwrap() + .inner; + let offset = (chunk_idx % BitmapBlock::NCHUNKS) * BitmapChunk::LEN_BITS; + for (i, _) in chunk.0.iter().enumerate().filter(|&(_, v)| v) { + block.set(offset + i, true); + } + } + + Self { + identifier, + blocks, + proof, + } + } +} + +// TODO: this can be sped up with some `unsafe` code +impl From for Segment { + fn from(segment: BitmapSegment) -> Self { + let BitmapSegment { + identifier, + blocks, + proof, + } = segment; + + // Count the number of chunks taking into account that the final block might be smaller + let n_chunks = blocks.len().saturating_sub(1) * BitmapBlock::NCHUNKS + + blocks.last().map(|b| b.n_chunks()).unwrap_or(0); + let mut leaf_pos = Vec::with_capacity(n_chunks); + let mut chunks = Vec::with_capacity(n_chunks); + let offset = (1 << identifier.height) * identifier.idx + 1; + for i in 0..(n_chunks as u64) { + leaf_pos.push(pmmr::insertion_to_pmmr_index(offset + i)); + chunks.push(BitmapChunk::new()); + } + + for (block_idx, block) in blocks.into_iter().enumerate() { + assert_eq!(block.inner.len(), BitmapBlock::NBITS as usize); + let offset = block_idx * BitmapBlock::NCHUNKS; + for (i, _) in block.inner.iter().enumerate().filter(|&(_, v)| v) { + chunks + .get_mut(offset + i / BitmapChunk::LEN_BITS) + .unwrap() + .0 + .set(i % BitmapChunk::LEN_BITS, true); + } + } + + Segment::from_parts(identifier, Vec::new(), Vec::new(), leaf_pos, chunks, proof) + } +} + +/// A block of 2^16 bits that provides an efficient (de)serialization +/// depending on the bitmap occupancy. +#[derive(Clone, Debug, PartialEq, Eq)] +struct BitmapBlock { + inner: BitVec, +} + +impl BitmapBlock { + /// Maximum number of bits in a block + const NBITS: u32 = 1 << 16; + /// Maximum number of chunks in a block + const NCHUNKS: usize = Self::NBITS as usize / BitmapChunk::LEN_BITS; + + fn new(n_chunks: usize) -> Self { + assert!(n_chunks <= BitmapBlock::NCHUNKS); + Self { + inner: BitVec::from_elem(n_chunks * BitmapChunk::LEN_BITS, false), + } + } + + fn n_chunks(&self) -> usize { + let length = self.inner.len(); + assert_eq!(length % BitmapChunk::LEN_BITS, 0); + let n_chunks = length / BitmapChunk::LEN_BITS; + assert!(n_chunks <= BitmapBlock::NCHUNKS); + n_chunks + } +} + +impl Writeable for BitmapBlock { + fn write(&self, writer: &mut W) -> Result<(), ser::Error> { + let length = self.inner.len(); + assert!(length <= Self::NBITS as usize); + assert_eq!(length % BitmapChunk::LEN_BITS, 0); + writer.write_u8((length / BitmapChunk::LEN_BITS) as u8)?; + + let count_pos = self.inner.iter().filter(|&v| v).count() as u32; + let count_neg = Self::NBITS - count_pos; + let threshold = Self::NBITS / 16; + if count_pos < threshold { + // Write positive indices + Writeable::write(&BitmapBlockSerialization::Positive, writer)?; + writer.write_u16(count_pos as u16)?; + for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| v) { + writer.write_u16(i as u16)?; + } + } else if count_neg < threshold { + // Write negative indices + Writeable::write(&BitmapBlockSerialization::Negative, writer)?; + writer.write_u16(count_neg as u16)?; + for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| !v) { + writer.write_u16(i as u16)?; + } + } else { + // Write raw bytes + Writeable::write(&BitmapBlockSerialization::Raw, writer)?; + let bytes = self.inner.to_bytes(); + assert_eq!(bytes.len(), Self::NBITS as usize / 8); + writer.write_fixed_bytes(&bytes)?; + } + + Ok(()) + } +} + +impl Readable for BitmapBlock { + fn read(reader: &mut R) -> Result { + let n_chunks = reader.read_u8()?; + if n_chunks as usize > BitmapBlock::NCHUNKS { + return Err(ser::Error::TooLargeReadErr); + } + let n_bits = n_chunks as usize * BitmapChunk::LEN_BITS; + + let mode = Readable::read(reader)?; + let inner = match mode { + BitmapBlockSerialization::Raw => { + // Raw bytes + let bytes = reader.read_fixed_bytes(n_bits / 8)?; + BitVec::from_bytes(&bytes) + } + BitmapBlockSerialization::Positive => { + // Positive indices + let mut inner = BitVec::from_elem(n_bits, false); + let n = reader.read_u16()?; + for _ in 0..n { + inner.set(reader.read_u16()? as usize, true); + } + inner + } + BitmapBlockSerialization::Negative => { + // Negative indices + let mut inner = BitVec::from_elem(n_bits, true); + let n = reader.read_u16()?; + for _ in 0..n { + inner.set(reader.read_u16()? as usize, false); + } + inner + } + }; + + Ok(BitmapBlock { inner }) + } +} + +enum_from_primitive! { + #[derive(Debug, Clone, Copy, PartialEq)] + #[repr(u8)] + enum BitmapBlockSerialization { + Raw = 0, + Positive = 1, + Negative = 2, + } +} + +impl Writeable for BitmapBlockSerialization { + fn write(&self, writer: &mut W) -> Result<(), ser::Error> { + writer.write_u8(*self as u8) + } +} + +impl Readable for BitmapBlockSerialization { + fn read(reader: &mut R) -> Result { + Self::from_u8(reader.read_u8()?).ok_or(ser::Error::CorruptedData) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::ser::{BinReader, BinWriter, ProtocolVersion, Readable, Writeable}; + use byteorder::ReadBytesExt; + use grin_util::secp::rand::Rng; + use rand::thread_rng; + use std::io::Cursor; + + fn test_roundtrip(entries: usize, inverse: bool, encoding: u8, length: usize) { + let mut rng = thread_rng(); + let mut block = BitmapBlock::new(64); + if inverse { + block.inner.negate(); + } + + // Flip `entries` bits in random spots + let mut count = 0; + while count < entries { + let idx = rng.gen_range(0, BitmapBlock::NBITS as usize); + if block.inner.get(idx).unwrap() == inverse { + count += 1; + block.inner.set(idx, !inverse); + } + } + + // Serialize + let mut cursor = Cursor::new(Vec::::new()); + let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1)); + Writeable::write(&block, &mut writer).unwrap(); + + // Check encoding type and length + cursor.set_position(1); + assert_eq!(cursor.read_u8().unwrap(), encoding); + let actual_length = cursor.get_ref().len(); + assert_eq!(actual_length, length); + assert!(actual_length <= 2 + BitmapBlock::NBITS as usize / 8); + + // Deserialize + cursor.set_position(0); + let mut reader = BinReader::new(&mut cursor, ProtocolVersion(1)); + let block2: BitmapBlock = Readable::read(&mut reader).unwrap(); + assert_eq!(block, block2); + } + + #[test] + fn block_ser_roundtrip() { + let threshold = BitmapBlock::NBITS as usize / 16; + let entries = thread_rng().gen_range(threshold, 4 * threshold); + test_roundtrip(entries, false, 0, 2 + BitmapBlock::NBITS as usize / 8); + test_roundtrip(entries, true, 0, 2 + BitmapBlock::NBITS as usize / 8); + } + + #[test] + fn sparse_block_ser_roundtrip() { + let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16); + test_roundtrip(entries, false, 1, 4 + 2 * entries); + } + + #[test] + fn abdundant_block_ser_roundtrip() { + let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16); + test_roundtrip(entries, true, 2, 4 + 2 * entries); + } +} diff --git a/chain/tests/bitmap_segment.rs b/chain/tests/bitmap_segment.rs new file mode 100644 index 000000000..f0695a0b2 --- /dev/null +++ b/chain/tests/bitmap_segment.rs @@ -0,0 +1,79 @@ +use self::chain::txhashset::{BitmapAccumulator, BitmapSegment}; +use self::core::core::pmmr::segment::{Segment, SegmentIdentifier}; +use self::core::ser::{BinReader, BinWriter, ProtocolVersion, Readable, Writeable}; +use croaring::Bitmap; +use grin_chain as chain; +use grin_core as core; +use grin_util::secp::rand::Rng; +use rand::thread_rng; +use std::io::Cursor; + +fn test_roundtrip(entries: usize) { + let mut rng = thread_rng(); + + let identifier = SegmentIdentifier { + height: 12, + idx: rng.gen_range(8, 16), + }; + let block = rng.gen_range(2, 64); + + let mut bitmap = Bitmap::create(); + let block_size = 1 << 16; + let offset = (1 << identifier.height) * 1024 * identifier.idx + block_size * block; + let mut count = 0; + while count < entries { + let idx = (offset + rng.gen_range(0, block_size)) as u32; + if !bitmap.contains(idx) { + count += 1; + bitmap.add(idx); + } + } + + // Add a bunch of segments after the one we are interested in + let size = + bitmap.maximum().unwrap() as u64 + (1 << identifier.height) * 1024 * rng.gen_range(0, 64); + + // Construct the accumulator + let mut accumulator = BitmapAccumulator::new(); + accumulator + .init(bitmap.iter().map(|v| v as u64), size) + .unwrap(); + + let mmr = accumulator.readonly_pmmr(); + let segment = Segment::from_pmmr(identifier, &mmr, false).unwrap(); + + // Convert to `BitmapSegment` + let bms = BitmapSegment::from(segment.clone()); + + // Serialize `BitmapSegment` + let mut cursor = Cursor::new(Vec::::new()); + let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1)); + Writeable::write(&bms, &mut writer).unwrap(); + + // Read `BitmapSegment` + cursor.set_position(0); + let mut reader = BinReader::new(&mut cursor, ProtocolVersion(1)); + let bms2: BitmapSegment = Readable::read(&mut reader).unwrap(); + assert_eq!(bms, bms2); + + // Convert back to `Segment` + let segment2 = Segment::from(bms2); + assert_eq!(segment, segment2); +} + +#[test] +fn segment_ser_roundtrip() { + let threshold = 4096; + test_roundtrip(thread_rng().gen_range(threshold, 4 * threshold)); +} + +#[test] +fn sparse_segment_ser_roundtrip() { + test_roundtrip(thread_rng().gen_range(1024, 4096)); +} + +#[test] +fn abundant_segment_ser_roundtrip() { + let max = 1 << 16; + test_roundtrip(thread_rng().gen_range(max - 4096, max - 1024)); +} diff --git a/core/src/core/pmmr/segment.rs b/core/src/core/pmmr/segment.rs index 4e870817f..33e346972 100644 --- a/core/src/core/pmmr/segment.rs +++ b/core/src/core/pmmr/segment.rs @@ -142,6 +142,64 @@ impl Segment { .ok_or_else(|| SegmentError::MissingHash(pos)) } + /// Get the identifier associated with this segment + pub fn identifier(&self) -> SegmentIdentifier { + self.identifier + } + + /// Consume the segment and return its parts + pub fn parts( + self, + ) -> ( + SegmentIdentifier, + Vec, + Vec, + Vec, + Vec, + SegmentProof, + ) { + ( + self.identifier, + self.hash_pos, + self.hashes, + self.leaf_pos, + self.leaf_data, + self.proof, + ) + } + + /// Construct a segment from its parts + pub fn from_parts( + identifier: SegmentIdentifier, + hash_pos: Vec, + hashes: Vec, + leaf_pos: Vec, + leaf_data: Vec, + proof: SegmentProof, + ) -> Self { + assert_eq!(hash_pos.len(), hashes.len()); + let mut last_pos = 0; + for &pos in &hash_pos { + assert!(pos > last_pos); + last_pos = pos; + } + assert_eq!(leaf_pos.len(), leaf_data.len()); + last_pos = 0; + for &pos in &leaf_pos { + assert!(pos > last_pos); + last_pos = pos; + } + + Self { + identifier, + hash_pos, + hashes, + leaf_pos, + leaf_data, + proof, + } + } + /// Iterator of all the leaves in the segment pub fn leaf_iter(&self) -> impl Iterator + '_ { self.leaf_pos.iter().map(|&p| p).zip(&self.leaf_data)