[SYNC PERFORMANCE] Replace header proof serialisation with more efficient algorithm (#3670)

* replace bitvec with more efficient bitpack algorithm

* optimise proof_unpack_len

* move proof pack length calculation

* small refactor

* integrate suggestions in #3670

* finish compressing compression function

* remove ordering cmp from pack function

* remainder fix for new logic

* remove println statements

* remove ordering import warning
This commit is contained in:
Yeastplume 2021-12-06 16:35:23 +00:00 committed by GitHub
parent c6f25e9929
commit 7725a05ac1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 170 additions and 42 deletions

View file

@ -0,0 +1,119 @@
// Copyright 2021 The Grin Developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use grin_chain as chain;
use grin_core as core;
use grin_util as util;
#[macro_use]
extern crate log;
use std::sync::Arc;
use crate::chain::types::{NoopAdapter, Options};
use crate::core::core::hash::Hashed;
use crate::core::{genesis, global, pow};
use self::chain_test_helper::clean_output_dir;
mod chain_test_helper;
fn test_header_perf_impl(is_test_chain: bool, src_root_dir: &str, dest_root_dir: &str) {
global::set_local_chain_type(global::ChainTypes::Mainnet);
let mut genesis = genesis::genesis_main();
if is_test_chain {
global::set_local_chain_type(global::ChainTypes::AutomatedTesting);
genesis = pow::mine_genesis_block().unwrap();
}
{
debug!("Reading Chain, genesis block: {}", genesis.hash());
let dummy_adapter = Arc::new(NoopAdapter {});
// The original chain we're reading from
let src_chain = Arc::new(
chain::Chain::init(
src_root_dir.into(),
dummy_adapter.clone(),
genesis.clone(),
pow::verify_size,
false,
)
.unwrap(),
);
// And the output chain we're writing to
let dest_chain = Arc::new(
chain::Chain::init(
dest_root_dir.into(),
dummy_adapter,
genesis.clone(),
pow::verify_size,
false,
)
.unwrap(),
);
let sh = src_chain.get_header_by_height(0).unwrap();
debug!("Source Genesis - {}", sh.hash());
let dh = dest_chain.get_header_by_height(0).unwrap();
debug!("Destination Genesis - {}", dh.hash());
let horizon_header = src_chain.txhashset_archive_header().unwrap();
debug!("Horizon header: {:?}", horizon_header);
// Copy the headers from source to output in chunks
let dest_sync_head = dest_chain.header_head().unwrap();
let copy_chunk_size = 1000;
let mut copied_header_index = 1;
let mut src_headers = vec![];
while copied_header_index <= 100000 {
let h = src_chain.get_header_by_height(copied_header_index).unwrap();
src_headers.push(h);
copied_header_index += 1;
if copied_header_index % copy_chunk_size == 0 {
debug!(
"Copying headers to {} of {}",
copied_header_index, horizon_header.height
);
dest_chain
.sync_block_headers(&src_headers, dest_sync_head, Options::NONE)
.unwrap();
src_headers = vec![];
}
}
if !src_headers.is_empty() {
dest_chain
.sync_block_headers(&src_headers, dest_sync_head, Options::NONE)
.unwrap();
}
}
}
#[test]
#[ignore]
// Ignored during CI, but use this to run this test on a real instance of a chain pointed where you like
fn test_header_perf() {
util::init_test_logger();
// if testing against a real chain, insert location here
// NOTE: Modify to point at your own paths
let src_root_dir = format!("/Users/yeastplume/Projects/grin_project/server/chain_data");
let dest_root_dir = format!("/Users/yeastplume/Projects/grin_project/server/.chain_data_copy");
clean_output_dir(&dest_root_dir);
test_header_perf_impl(false, &src_root_dir, &dest_root_dir);
clean_output_dir(&dest_root_dir);
}

View file

@ -25,7 +25,7 @@ use crate::consensus::{
use crate::core::block::HeaderVersion;
use crate::pow::{
self, new_cuckaroo_ctx, new_cuckarood_ctx, new_cuckaroom_ctx, new_cuckarooz_ctx,
new_cuckatoo_ctx, no_cuckaroo_ctx, BitVec, PoWContext,
new_cuckatoo_ctx, no_cuckaroo_ctx, PoWContext, Proof,
};
use crate::ser::ProtocolVersion;
use std::cell::Cell;
@ -488,7 +488,7 @@ where
#[inline]
pub fn header_size_bytes(edge_bits: u8) -> usize {
let size = 2 + 2 * 8 + 5 * 32 + 32 + 2 * 8;
let proof_size = 8 + 4 + 8 + 1 + BitVec::bytes_len(edge_bits as usize * proofsize());
let proof_size = 8 + 4 + 8 + 1 + Proof::pack_len(edge_bits);
size + proof_size
}

View file

@ -23,6 +23,7 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
/// proof of work within a block header.
use std::cmp::{max, min};
use std::ops::{Add, Div, Mul, Sub};
use std::u64;
use std::{fmt, iter};
/// Generic trait for a solver/verifier providing common interface into Cuckoo-family PoW
@ -325,8 +326,8 @@ impl ProofOfWork {
/// The hash of the `Proof` is the hash of its packed nonces when serializing
/// them at their exact bit size. The resulting bit sequence is padded to be
/// byte-aligned. We form a PROOFSIZE*edge_bits integer by packing the PROOFSIZE edge
/// indices together, with edge index i occupying bits i * edge_bits through
/// (i+1) * edge_bits - 1, padding it with up to 7 0-bits to a multiple of 8 bits,
/// indices together, with edge index i occupying bits i * edge_bits through
/// (i+1) * edge_bits - 1, padding it with up to 7 0-bits to a multiple of 8 bits,
/// writing as a little endian byte array, and hashing with blake2b using 256 bit digest.
#[derive(Clone, PartialOrd, PartialEq, Serialize)]
@ -372,6 +373,11 @@ impl Proof {
}
}
/// Number of bytes required store a proof of given edge bits
pub fn pack_len(bit_width: u8) -> usize {
(bit_width as usize * global::proofsize() + 7) / 8
}
/// Builds a proof with random POW data,
/// needed so that tests that ignore POW
/// don't fail due to duplicate hashes
@ -396,6 +402,17 @@ impl Proof {
self.nonces.len()
}
/// Pack the nonces of the proof to their exact bit size as described above
pub fn pack_nonces(&self) -> Vec<u8> {
let mut compressed = vec![0u8; Proof::pack_len(self.edge_bits)];
pack_bits(
self.edge_bits,
&self.nonces[0..self.nonces.len()],
&mut compressed,
);
compressed
}
/// Difficulty achieved by this proof with given scaling factor
fn scaled_difficulty(&self, scale: u64) -> u64 {
let diff = ((scale as u128) << 64) / (max(1, self.hash().to_u64()) as u128);
@ -403,6 +420,34 @@ impl Proof {
}
}
/// Pack an array of u64s into `compressed` at the specified bit width. Caller
/// must ensure `compressed` is the right size
fn pack_bits(bit_width: u8, uncompressed: &[u64], mut compressed: &mut [u8]) {
// We will use a `u64` as a mini buffer of 64 bits.
// We accumulate bits in it until capacity, at which point we just copy this
// mini buffer to compressed.
let mut mini_buffer = 0u64;
let mut remaining = 64;
for el in uncompressed {
mini_buffer |= el << (64 - remaining);
if bit_width < remaining {
remaining -= bit_width;
} else {
compressed[..8].copy_from_slice(&mini_buffer.to_le_bytes());
compressed = &mut compressed[8..];
mini_buffer = el >> remaining;
remaining = 64 + remaining - bit_width;
}
}
let mut remainder = compressed.len() % 8;
if remainder == 0 {
remainder = 8;
}
if mini_buffer > 0 {
compressed[..].copy_from_slice(&mini_buffer.to_le_bytes()[..remainder]);
}
}
fn extract_bits(bits: &[u8], bit_start: usize, bit_count: usize, read_from: usize) -> u64 {
let mut buf: [u8; 8] = [0; 8];
buf.copy_from_slice(&bits[read_from..read_from + 8]);
@ -448,8 +493,7 @@ impl Readable for Proof {
// prepare nonces and read the right number of bytes
let mut nonces = Vec::with_capacity(global::proofsize());
let nonce_bits = edge_bits as usize;
let bits_len = nonce_bits * global::proofsize();
let bytes_len = BitVec::bytes_len(bits_len);
let bytes_len = Proof::pack_len(edge_bits);
if bytes_len < 8 {
return Err(ser::Error::CorruptedData);
}
@ -475,42 +519,7 @@ impl Writeable for Proof {
if writer.serialization_mode() != ser::SerializationMode::Hash {
writer.write_u8(self.edge_bits)?;
}
let nonce_bits = self.edge_bits as usize;
let mut bitvec = BitVec::new(nonce_bits * global::proofsize());
for (n, nonce) in self.nonces.iter().enumerate() {
for bit in 0..nonce_bits {
if nonce & (1 << bit) != 0 {
bitvec.set_bit_at(n * nonce_bits + (bit as usize))
}
}
}
writer.write_fixed_bytes(&bitvec.bits)?;
Ok(())
}
}
/// A bit vector
// TODO this could likely be optimized by writing whole bytes (or even words)
// in the `BitVec` at once, dealing with the truncation, instead of bits by bits
pub struct BitVec {
bits: Vec<u8>,
}
impl BitVec {
/// Number of bytes required to store the provided number of bits
#[inline]
pub fn bytes_len(bits_len: usize) -> usize {
(bits_len + 7) / 8
}
fn new(bits_len: usize) -> BitVec {
BitVec {
bits: vec![0; BitVec::bytes_len(bits_len)],
}
}
fn set_bit_at(&mut self, pos: usize) {
self.bits[pos / 8] |= 1 << (pos % 8) as u8;
writer.write_fixed_bytes(&self.pack_nonces())
}
}