From 7725a05ac129a192fb1f55a48022ae6ba71b018e Mon Sep 17 00:00:00 2001 From: Yeastplume Date: Mon, 6 Dec 2021 16:35:23 +0000 Subject: [PATCH] [SYNC PERFORMANCE] Replace header proof serialisation with more efficient algorithm (#3670) * replace bitvec with more efficient bitpack algorithm * optimise proof_unpack_len * move proof pack length calculation * small refactor * integrate suggestions in #3670 * finish compressing compression function * remove ordering cmp from pack function * remainder fix for new logic * remove println statements * remove ordering import warning --- chain/tests/test_header_perf.rs | 119 ++++++++++++++++++++++++++++++++ core/src/global.rs | 4 +- core/src/pow/types.rs | 89 +++++++++++++----------- 3 files changed, 170 insertions(+), 42 deletions(-) create mode 100644 chain/tests/test_header_perf.rs diff --git a/chain/tests/test_header_perf.rs b/chain/tests/test_header_perf.rs new file mode 100644 index 000000000..f948606b9 --- /dev/null +++ b/chain/tests/test_header_perf.rs @@ -0,0 +1,119 @@ +// Copyright 2021 The Grin Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use grin_chain as chain; +use grin_core as core; +use grin_util as util; + +#[macro_use] +extern crate log; + +use std::sync::Arc; + +use crate::chain::types::{NoopAdapter, Options}; +use crate::core::core::hash::Hashed; +use crate::core::{genesis, global, pow}; + +use self::chain_test_helper::clean_output_dir; + +mod chain_test_helper; + +fn test_header_perf_impl(is_test_chain: bool, src_root_dir: &str, dest_root_dir: &str) { + global::set_local_chain_type(global::ChainTypes::Mainnet); + let mut genesis = genesis::genesis_main(); + + if is_test_chain { + global::set_local_chain_type(global::ChainTypes::AutomatedTesting); + genesis = pow::mine_genesis_block().unwrap(); + } + + { + debug!("Reading Chain, genesis block: {}", genesis.hash()); + let dummy_adapter = Arc::new(NoopAdapter {}); + + // The original chain we're reading from + let src_chain = Arc::new( + chain::Chain::init( + src_root_dir.into(), + dummy_adapter.clone(), + genesis.clone(), + pow::verify_size, + false, + ) + .unwrap(), + ); + + // And the output chain we're writing to + let dest_chain = Arc::new( + chain::Chain::init( + dest_root_dir.into(), + dummy_adapter, + genesis.clone(), + pow::verify_size, + false, + ) + .unwrap(), + ); + + let sh = src_chain.get_header_by_height(0).unwrap(); + debug!("Source Genesis - {}", sh.hash()); + + let dh = dest_chain.get_header_by_height(0).unwrap(); + debug!("Destination Genesis - {}", dh.hash()); + + let horizon_header = src_chain.txhashset_archive_header().unwrap(); + + debug!("Horizon header: {:?}", horizon_header); + + // Copy the headers from source to output in chunks + let dest_sync_head = dest_chain.header_head().unwrap(); + let copy_chunk_size = 1000; + let mut copied_header_index = 1; + let mut src_headers = vec![]; + while copied_header_index <= 100000 { + let h = src_chain.get_header_by_height(copied_header_index).unwrap(); + src_headers.push(h); + copied_header_index += 1; + if copied_header_index % copy_chunk_size == 0 { + debug!( + "Copying headers to {} of {}", + copied_header_index, horizon_header.height + ); + dest_chain + .sync_block_headers(&src_headers, dest_sync_head, Options::NONE) + .unwrap(); + src_headers = vec![]; + } + } + if !src_headers.is_empty() { + dest_chain + .sync_block_headers(&src_headers, dest_sync_head, Options::NONE) + .unwrap(); + } + } +} + +#[test] +#[ignore] +// Ignored during CI, but use this to run this test on a real instance of a chain pointed where you like +fn test_header_perf() { + util::init_test_logger(); + // if testing against a real chain, insert location here + // NOTE: Modify to point at your own paths + let src_root_dir = format!("/Users/yeastplume/Projects/grin_project/server/chain_data"); + let dest_root_dir = format!("/Users/yeastplume/Projects/grin_project/server/.chain_data_copy"); + clean_output_dir(&dest_root_dir); + test_header_perf_impl(false, &src_root_dir, &dest_root_dir); + clean_output_dir(&dest_root_dir); +} diff --git a/core/src/global.rs b/core/src/global.rs index 9b67d9369..663d7e4d8 100644 --- a/core/src/global.rs +++ b/core/src/global.rs @@ -25,7 +25,7 @@ use crate::consensus::{ use crate::core::block::HeaderVersion; use crate::pow::{ self, new_cuckaroo_ctx, new_cuckarood_ctx, new_cuckaroom_ctx, new_cuckarooz_ctx, - new_cuckatoo_ctx, no_cuckaroo_ctx, BitVec, PoWContext, + new_cuckatoo_ctx, no_cuckaroo_ctx, PoWContext, Proof, }; use crate::ser::ProtocolVersion; use std::cell::Cell; @@ -488,7 +488,7 @@ where #[inline] pub fn header_size_bytes(edge_bits: u8) -> usize { let size = 2 + 2 * 8 + 5 * 32 + 32 + 2 * 8; - let proof_size = 8 + 4 + 8 + 1 + BitVec::bytes_len(edge_bits as usize * proofsize()); + let proof_size = 8 + 4 + 8 + 1 + Proof::pack_len(edge_bits); size + proof_size } diff --git a/core/src/pow/types.rs b/core/src/pow/types.rs index 89ed9a262..0fa81c1ba 100644 --- a/core/src/pow/types.rs +++ b/core/src/pow/types.rs @@ -23,6 +23,7 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; /// proof of work within a block header. use std::cmp::{max, min}; use std::ops::{Add, Div, Mul, Sub}; +use std::u64; use std::{fmt, iter}; /// Generic trait for a solver/verifier providing common interface into Cuckoo-family PoW @@ -325,8 +326,8 @@ impl ProofOfWork { /// The hash of the `Proof` is the hash of its packed nonces when serializing /// them at their exact bit size. The resulting bit sequence is padded to be /// byte-aligned. We form a PROOFSIZE*edge_bits integer by packing the PROOFSIZE edge -/// indices together, with edge index i occupying bits i * edge_bits through -/// (i+1) * edge_bits - 1, padding it with up to 7 0-bits to a multiple of 8 bits, +/// indices together, with edge index i occupying bits i * edge_bits through +/// (i+1) * edge_bits - 1, padding it with up to 7 0-bits to a multiple of 8 bits, /// writing as a little endian byte array, and hashing with blake2b using 256 bit digest. #[derive(Clone, PartialOrd, PartialEq, Serialize)] @@ -372,6 +373,11 @@ impl Proof { } } + /// Number of bytes required store a proof of given edge bits + pub fn pack_len(bit_width: u8) -> usize { + (bit_width as usize * global::proofsize() + 7) / 8 + } + /// Builds a proof with random POW data, /// needed so that tests that ignore POW /// don't fail due to duplicate hashes @@ -396,6 +402,17 @@ impl Proof { self.nonces.len() } + /// Pack the nonces of the proof to their exact bit size as described above + pub fn pack_nonces(&self) -> Vec { + let mut compressed = vec![0u8; Proof::pack_len(self.edge_bits)]; + pack_bits( + self.edge_bits, + &self.nonces[0..self.nonces.len()], + &mut compressed, + ); + compressed + } + /// Difficulty achieved by this proof with given scaling factor fn scaled_difficulty(&self, scale: u64) -> u64 { let diff = ((scale as u128) << 64) / (max(1, self.hash().to_u64()) as u128); @@ -403,6 +420,34 @@ impl Proof { } } +/// Pack an array of u64s into `compressed` at the specified bit width. Caller +/// must ensure `compressed` is the right size +fn pack_bits(bit_width: u8, uncompressed: &[u64], mut compressed: &mut [u8]) { + // We will use a `u64` as a mini buffer of 64 bits. + // We accumulate bits in it until capacity, at which point we just copy this + // mini buffer to compressed. + let mut mini_buffer = 0u64; + let mut remaining = 64; + for el in uncompressed { + mini_buffer |= el << (64 - remaining); + if bit_width < remaining { + remaining -= bit_width; + } else { + compressed[..8].copy_from_slice(&mini_buffer.to_le_bytes()); + compressed = &mut compressed[8..]; + mini_buffer = el >> remaining; + remaining = 64 + remaining - bit_width; + } + } + let mut remainder = compressed.len() % 8; + if remainder == 0 { + remainder = 8; + } + if mini_buffer > 0 { + compressed[..].copy_from_slice(&mini_buffer.to_le_bytes()[..remainder]); + } +} + fn extract_bits(bits: &[u8], bit_start: usize, bit_count: usize, read_from: usize) -> u64 { let mut buf: [u8; 8] = [0; 8]; buf.copy_from_slice(&bits[read_from..read_from + 8]); @@ -448,8 +493,7 @@ impl Readable for Proof { // prepare nonces and read the right number of bytes let mut nonces = Vec::with_capacity(global::proofsize()); let nonce_bits = edge_bits as usize; - let bits_len = nonce_bits * global::proofsize(); - let bytes_len = BitVec::bytes_len(bits_len); + let bytes_len = Proof::pack_len(edge_bits); if bytes_len < 8 { return Err(ser::Error::CorruptedData); } @@ -475,42 +519,7 @@ impl Writeable for Proof { if writer.serialization_mode() != ser::SerializationMode::Hash { writer.write_u8(self.edge_bits)?; } - let nonce_bits = self.edge_bits as usize; - let mut bitvec = BitVec::new(nonce_bits * global::proofsize()); - for (n, nonce) in self.nonces.iter().enumerate() { - for bit in 0..nonce_bits { - if nonce & (1 << bit) != 0 { - bitvec.set_bit_at(n * nonce_bits + (bit as usize)) - } - } - } - writer.write_fixed_bytes(&bitvec.bits)?; - Ok(()) - } -} - -/// A bit vector -// TODO this could likely be optimized by writing whole bytes (or even words) -// in the `BitVec` at once, dealing with the truncation, instead of bits by bits -pub struct BitVec { - bits: Vec, -} - -impl BitVec { - /// Number of bytes required to store the provided number of bits - #[inline] - pub fn bytes_len(bits_len: usize) -> usize { - (bits_len + 7) / 8 - } - - fn new(bits_len: usize) -> BitVec { - BitVec { - bits: vec![0; BitVec::bytes_len(bits_len)], - } - } - - fn set_bit_at(&mut self, pos: usize) { - self.bits[pos / 8] |= 1 << (pos % 8) as u8; + writer.write_fixed_bytes(&self.pack_nonces()) } }