From 4c081b8f7361f8834f95ec751e446b5d957d7508 Mon Sep 17 00:00:00 2001 From: Antioch Peverell Date: Thu, 13 Feb 2020 10:26:56 +0000 Subject: [PATCH] use backend file when compacting the output pos index (#3226) --- chain/src/chain.rs | 3 --- chain/src/store.rs | 11 +++++++++++ chain/src/txhashset/txhashset.rs | 30 ++++++++++++++++++++++++++++- core/src/core/pmmr/readonly_pmmr.rs | 10 ++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/chain/src/chain.rs b/chain/src/chain.rs index ce4727d06..15619dda6 100644 --- a/chain/src/chain.rs +++ b/chain/src/chain.rs @@ -1109,9 +1109,6 @@ impl Chain { txhashset.compact(&horizon_header, &mut batch)?; } - // Rebuild our output_pos index in the db based on current UTXO set. - txhashset.rebuild_height_pos_index(&header_pmmr, &mut batch)?; - // If we are not in archival mode remove historical blocks from the db. if !self.archive_mode { self.remove_historical_blocks(&header_pmmr, &mut batch)?; diff --git a/chain/src/store.rs b/chain/src/store.rs index e02a788b3..95024bdfb 100644 --- a/chain/src/store.rs +++ b/chain/src/store.rs @@ -229,6 +229,11 @@ impl<'a> Batch<'a> { Ok(()) } + /// Low level function to delete directly by raw key. + pub fn delete(&self, key: &[u8]) -> Result<(), Error> { + self.db.delete(key) + } + /// Delete a full block. Does not delete any record associated with a block /// header. pub fn delete_block(&self, bh: &Hash) -> Result<(), Error> { @@ -269,6 +274,12 @@ impl<'a> Batch<'a> { ) } + /// Iterator over the output_pos index. + pub fn output_pos_iter(&self) -> Result, Error> { + let key = to_key(COMMIT_POS_HGT_PREFIX, &mut "".to_string().into_bytes()); + self.db.iter(&key) + } + /// Get output_pos from index. /// Note: /// - Original prefix 'COMMIT_POS_PREFIX' is not used for normal case anymore, refer to #2889 for detail. diff --git a/chain/src/txhashset/txhashset.rs b/chain/src/txhashset/txhashset.rs index 73a84f7de..f21826e3f 100644 --- a/chain/src/txhashset/txhashset.rs +++ b/chain/src/txhashset/txhashset.rs @@ -383,6 +383,9 @@ impl TxHashSet { .backend .check_compact(horizon_header.output_mmr_size, &rewind_rm_pos)?; + debug!("txhashset: compact height pos index..."); + self.compact_height_pos_index(batch)?; + debug!("txhashset: ... compaction finished"); Ok(()) @@ -390,7 +393,6 @@ impl TxHashSet { /// Rebuild the index of block height & MMR positions to the corresponding UTXOs. /// This is a costly operation performed only when we receive a full new chain state. - /// Note: only called by compact. pub fn rebuild_height_pos_index( &self, header_pmmr: &PMMRHandle, @@ -446,6 +448,32 @@ impl TxHashSet { ); Ok(()) } + + fn compact_height_pos_index(&self, batch: &Batch<'_>) -> Result<(), Error> { + let now = Instant::now(); + let output_pmmr = + ReadonlyPMMR::at(&self.output_pmmr_h.backend, self.output_pmmr_h.last_pos); + let last_pos = output_pmmr.unpruned_size(); + + let deleted = batch + .output_pos_iter()? + .filter(|(_, (pos, _))| { + // Note we use get_from_file() here as we want to ensure we have an entry + // in the index for *every* output still in the file, not just the "unspent" + // outputs. This is because we need to support rewind to handle fork/reorg. + // Rewind may "unspend" recently spent, but not yet pruned outputs, and the + // index must be consistent in this situation. + *pos <= last_pos && output_pmmr.get_from_file(*pos).is_none() + }) + .map(|(key, _)| batch.delete(&key)) + .count(); + debug!( + "compact_output_pos_index: deleted {} entries from the index, took {}s", + deleted, + now.elapsed().as_secs(), + ); + Ok(()) + } } /// Starts a new unit of work to extend (or rewind) the chain with additional diff --git a/core/src/core/pmmr/readonly_pmmr.rs b/core/src/core/pmmr/readonly_pmmr.rs index 4a447810f..de2f24495 100644 --- a/core/src/core/pmmr/readonly_pmmr.rs +++ b/core/src/core/pmmr/readonly_pmmr.rs @@ -86,6 +86,16 @@ where } } + /// Get the hash from the underlying MMR file, ignoring the leafset. + /// Some entries may have been removed from the leafset but not yet pruned from the file. + pub fn get_from_file(&self, pos: u64) -> Option { + if pos > self.last_pos { + None + } else { + self.backend.get_from_file(pos) + } + } + /// Iterator over current (unpruned, unremoved) leaf positions. pub fn leaf_pos_iter(&self) -> impl Iterator + '_ { self.backend.leaf_pos_iter()