From 77765796ab76ea78cd39986542de10f4bb4312a4 Mon Sep 17 00:00:00 2001
From: Antioch Peverell <apeverell@protonmail.com>
Date: Wed, 5 Sep 2018 10:51:29 +0100
Subject: [PATCH] improve "check known" steps during process_block() (#1475)

* document what we do during pipe::apply_block()

* rustfmt

* wip

* rustfmt

* wip

* additional check_known_store and check_known_mmr checks in process_block

* rustfmt

* cleanup coinbase maturity check in process_block

* consolidate the "check in store" logic
add TODOs around the 50 block OldBlock logic

* rustfmt

* cleanup
---
 chain/src/pipe.rs      | 272 +++++++++++++++++++++++++++--------------
 chain/src/txhashset.rs |  22 +++-
 core/src/core/pmmr.rs  |  13 +-
 3 files changed, 205 insertions(+), 102 deletions(-)

diff --git a/chain/src/pipe.rs b/chain/src/pipe.rs
index 8fdab929a..4dda0a56a 100644
--- a/chain/src/pipe.rs
+++ b/chain/src/pipe.rs
@@ -57,8 +57,8 @@ pub struct BlockContext {
 
 // Check if this block is the next block *immediately*
 // after our current chain head.
-fn is_next_block(b: &Block, ctx: &mut BlockContext) -> bool {
-	b.header.previous == ctx.head.last_block_h
+fn is_next_block(header: &BlockHeader, ctx: &mut BlockContext) -> bool {
+	header.previous == ctx.head.last_block_h
 }
 
 /// Runs the block processing pipeline, including validation and finding a
@@ -89,40 +89,47 @@ pub fn process_block(
 	let txhashset = ctx.txhashset.clone();
 	let mut txhashset = txhashset.write().unwrap();
 
-	// update head now that we're in the lock
+	// Update head now that we are in the lock.
 	ctx.head = ctx.store.head()?;
 
-	// Check if we have recently processed this block (via block_hashes_cache).
-	check_known(b.hash(), ctx)?;
+	// Fast in-memory checks to avoid re-processing a block we recently processed.
+	{
+		// Check if we have recently processed this block (via ctx chain head).
+		check_known_head(&b.header, ctx)?;
+
+		// Check if we have recently processed this block (via block_hashes_cache).
+		check_known_cache(&b.header, ctx)?;
+
+		// Check if this block is already know due it being in the current set of orphan blocks.
+		check_known_orphans(&b.header, ctx)?;
+	}
 
 	// Check our header itself is actually valid before proceeding any further.
 	validate_header(&b.header, ctx)?;
 
 	// Check if are processing the "next" block relative to the current chain head.
-	if is_next_block(b, ctx) {
+	if is_next_block(&b.header, ctx) {
 		// If this is the "next" block then either -
 		//   * common case where we process blocks sequentially.
 		//   * special case where this is the first fast sync full block
-		// Either way we can proceed.
+		// Either way we can proceed (and we know the block is new and unprocessed).
 	} else {
-		// Check we actually have the previous block in the store.
-		// Note: not just the header but the full block itself.
-		// We cannot assume we can use the chain head for this
-		// as we may be dealing with a fork (with less work currently).
-		match ctx.store.block_exists(&b.header.previous) {
-			Ok(true) => {
-				// We have the previous block in the store, so we can proceed.
-			}
-			Ok(false) => {
-				// We do not have the previous block in the store.
-				// We have not yet processed the previous block so
-				// this block is an orphan (for now).
-				return Err(ErrorKind::Orphan.into());
-			}
-			Err(e) => {
-				return Err(ErrorKind::StoreErr(e, "pipe get previous".to_owned()).into());
-			}
-		}
+		// Check we have *this* block in the store.
+		// Stop if we have processed this block previously (it is in the store).
+		// This is more expensive than the earlier check_known() as we hit the store.
+		check_known_store(&b.header, ctx)?;
+
+		// Check existing MMR (via rewind) to see if this block is known to us already.
+		// This should catch old blocks before we check to see if they appear to be
+		// orphaned due to compacting/pruning on a fast-sync node.
+		// This is more expensive than check_known_store() as we rewind the txhashset.
+		// But we only incur the cost of the rewind if this is an earlier block on the same chain.
+		check_known_mmr(&b.header, ctx, &mut txhashset)?;
+
+		// At this point it looks like this is a new block that we have not yet processed.
+		// Check we have the *previous* block in the store.
+		// If we do not then treat this block as an orphan.
+		check_prev_store(&b.header, ctx)?;
 	}
 
 	// Validate the block itself.
@@ -140,7 +147,7 @@ pub fn process_block(
 		// First we rewind the txhashset extension if necessary
 		// to put it into a consistent state for validating the block.
 		// We can skip this step if the previous header is the latest header we saw.
-		if is_next_block(b, ctx) {
+		if is_next_block(&b.header, ctx) {
 			// No need to rewind if we are processing the next block.
 		} else {
 			// Rewind the re-apply blocks on the forked chain to
@@ -149,6 +156,12 @@ pub fn process_block(
 			rewind_and_apply_fork(b, ctx.store.clone(), extension)?;
 		}
 
+		// Check any coinbase being spent have matured sufficiently.
+		// This needs to be done within the context of a potentially
+		// rewound txhashset extension to reflect chain state prior
+		// to applying the new block.
+		verify_coinbase_maturity(b, &mut extension)?;
+
 		// Apply the block to the txhashset state.
 		// Validate the txhashset roots and sizes against the block header.
 		// Block is invalid if there are any discrepencies.
@@ -156,7 +169,7 @@ pub fn process_block(
 
 		// If applying this block does not increase the work on the chain then
 		// we know we have not yet updated the chain to produce a new chain head.
-		if !block_has_more_work(b, &ctx.head) {
+		if !block_has_more_work(&b.header, &ctx.head) {
 			extension.force_rollback();
 		}
 
@@ -234,19 +247,125 @@ fn check_header_known(bh: Hash, ctx: &mut BlockContext) -> Result<(), Error> {
 	Ok(())
 }
 
-/// Quick in-memory check to fast-reject any block we've already handled
-/// recently. Keeps duplicates from the network in check.
-fn check_known(bh: Hash, ctx: &mut BlockContext) -> Result<(), Error> {
+/// Quick in-memory check to fast-reject any block handled recently.
+/// Keeps duplicates from the network in check.
+/// Checks against the last_block_h and prev_block_h of the chain head.
+fn check_known_head(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), Error> {
+	let bh = header.hash();
 	if bh == ctx.head.last_block_h || bh == ctx.head.prev_block_h {
-		return Err(ErrorKind::Unfit("already known".to_string()).into());
+		return Err(ErrorKind::Unfit("already known in head".to_string()).into());
 	}
+	Ok(())
+}
+
+/// Quick in-memory check to fast-reject any block handled recently.
+/// Keeps duplicates from the network in check.
+/// Checks against the cache of recently processed block hashes.
+fn check_known_cache(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), Error> {
 	let cache = ctx.block_hashes_cache.read().unwrap();
-	if cache.contains(&bh) {
+	if cache.contains(&header.hash()) {
 		return Err(ErrorKind::Unfit("already known in cache".to_string()).into());
 	}
-	if ctx.orphans.contains(&bh) {
-		return Err(ErrorKind::Unfit("already known in orphans".to_string()).into());
+	Ok(())
+}
+
+/// Check if this block is in the set of known orphans.
+fn check_known_orphans(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), Error> {
+	if ctx.orphans.contains(&header.hash()) {
+		Err(ErrorKind::Unfit("already known in orphans".to_string()).into())
+	} else {
+		Ok(())
 	}
+}
+
+// Check if this block is in the store already.
+fn check_known_store(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), Error> {
+	match ctx.store.block_exists(&header.hash()) {
+		Ok(true) => {
+			if header.height < ctx.head.height.saturating_sub(50) {
+				// TODO - we flag this as an "abusive peer" but only in the case
+				// where we have the full block in our store.
+				// So this is not a particularly exhaustive check.
+				Err(ErrorKind::OldBlock.into())
+			} else {
+				Err(ErrorKind::Unfit("already known in store".to_string()).into())
+			}
+		}
+		Ok(false) => {
+			// Not yet processed this block, we can proceed.
+			Ok(())
+		}
+		Err(e) => {
+			return Err(ErrorKind::StoreErr(e, "pipe get this block".to_owned()).into());
+		}
+	}
+}
+
+// Check we have the *previous* block in the store.
+// Note: not just the header but the full block itself.
+// We cannot assume we can use the chain head for this
+// as we may be dealing with a fork (with less work currently).
+fn check_prev_store(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), Error> {
+	match ctx.store.block_exists(&header.previous) {
+		Ok(true) => {
+			// We have the previous block in the store, so we can proceed.
+			Ok(())
+		}
+		Ok(false) => {
+			// We do not have the previous block in the store.
+			// We have not yet processed the previous block so
+			// this block is an orphan (for now).
+			Err(ErrorKind::Orphan.into())
+		}
+		Err(e) => Err(ErrorKind::StoreErr(e, "pipe get previous".to_owned()).into()),
+	}
+}
+
+// If we are processing an "old" block then
+// we can quickly check if it already exists
+// on our current longest chain (we have already processes it).
+// First check the header matches via current height index.
+// Then peek directly into the MMRs at the appropriate pos.
+// We can avoid a full rewind in this case.
+fn check_known_mmr(
+	header: &BlockHeader,
+	ctx: &mut BlockContext,
+	write_txhashset: &mut txhashset::TxHashSet,
+) -> Result<(), Error> {
+	// No point checking the MMR if this block is not earlier in the chain.
+	if header.height > ctx.head.height {
+		return Ok(());
+	}
+
+	// Use "header by height" index to look at current most work chain.
+	// Header is not "known if the header differs at the given height.
+	let local_header = ctx.store.get_header_by_height(header.height)?;
+	if local_header.hash() != header.hash() {
+		return Ok(());
+	}
+
+	// Rewind the txhashset to the given block and validate
+	// roots and sizes against the header.
+	// If everything matches then this is a "known" block
+	// and we do not need to spend any more effort
+	txhashset::extending_readonly(write_txhashset, |extension| {
+		extension.rewind(header)?;
+
+		// We want to return an error here (block already known)
+		// if we *successfully validate the MMR roots and sizes.
+		if extension.validate_roots(header).is_ok() && extension.validate_sizes(header).is_ok() {
+			// TODO - determine if block is more than 50 blocks old
+			// and return specific OldBlock error.
+			// Or pull OldBlock (abusive peer) out into separate processing step.
+
+			return Err(ErrorKind::Unfit("already known on most work chain".to_string()).into());
+		}
+
+		// If we get here then we have *not* seen this block before
+		// and we should continue processing the block.
+		Ok(())
+	})?;
+
 	Ok(())
 }
 
@@ -354,72 +473,35 @@ fn validate_header(header: &BlockHeader, ctx: &mut BlockContext) -> Result<(), E
 }
 
 fn validate_block(
-	b: &Block,
+	block: &Block,
 	ctx: &mut BlockContext,
 	verifier_cache: Arc<RwLock<VerifierCache>>,
 ) -> Result<(), Error> {
-	if ctx.store.block_exists(&b.hash())? {
-		if b.header.height < ctx.head.height.saturating_sub(50) {
-			return Err(ErrorKind::OldBlock.into());
-		} else {
-			return Err(ErrorKind::Unfit("already known".to_string()).into());
-		}
-	}
-	let prev = ctx.store.get_block_header(&b.header.previous)?;
-	b.validate(
-		&prev.total_kernel_offset,
-		&prev.total_kernel_sum,
-		verifier_cache,
-	).map_err(|e| ErrorKind::InvalidBlockProof(e))?;
+	let prev = ctx.store.get_block_header(&block.header.previous)?;
+	block
+		.validate(
+			&prev.total_kernel_offset,
+			&prev.total_kernel_sum,
+			verifier_cache,
+		)
+		.map_err(|e| ErrorKind::InvalidBlockProof(e))?;
+	Ok(())
+}
+
+/// Verify the block is not attempting to spend coinbase outputs
+/// before they have sufficiently matured.
+/// Note: requires a txhashset extension.
+fn verify_coinbase_maturity(block: &Block, ext: &mut txhashset::Extension) -> Result<(), Error> {
+	ext.verify_coinbase_maturity(&block.inputs(), block.header.height)?;
 	Ok(())
 }
 
 /// Fully validate the block by applying it to the txhashset extension.
 /// Check both the txhashset roots and sizes are correct after applying the block.
-fn apply_block_to_txhashset(b: &Block, ext: &mut txhashset::Extension) -> Result<(), Error> {
-	// First check we are not attempting to spend any coinbase outputs
-	// before they have matured sufficiently.
-	// TODO - this step is ill-fitting here - where should this live?
-	ext.verify_coinbase_maturity(&b.inputs(), b.header.height)?;
-
-	// apply the new block to the MMR trees and check the new root hashes
-	ext.apply_block(&b)?;
-
-	let roots = ext.roots();
-	if roots.output_root != b.header.output_root
-		|| roots.rproof_root != b.header.range_proof_root
-		|| roots.kernel_root != b.header.kernel_root
-	{
-		ext.dump(false);
-
-		debug!(
-			LOGGER,
-			"validate_block_via_txhashset: output roots - {:?}, {:?}",
-			roots.output_root,
-			b.header.output_root,
-		);
-		debug!(
-			LOGGER,
-			"validate_block_via_txhashset: rproof roots - {:?}, {:?}",
-			roots.rproof_root,
-			b.header.range_proof_root,
-		);
-		debug!(
-			LOGGER,
-			"validate_block_via_txhashset: kernel roots - {:?}, {:?}",
-			roots.kernel_root,
-			b.header.kernel_root,
-		);
-
-		return Err(ErrorKind::InvalidRoot.into());
-	}
-
-	// Check the output and rangeproof MMR sizes here against the header.
-	let (output_mmr_size, _, kernel_mmr_size) = ext.sizes();
-	if b.header.output_mmr_size != output_mmr_size || b.header.kernel_mmr_size != kernel_mmr_size {
-		return Err(ErrorKind::InvalidMMRSize.into());
-	}
-
+fn apply_block_to_txhashset(block: &Block, ext: &mut txhashset::Extension) -> Result<(), Error> {
+	ext.apply_block(block)?;
+	ext.validate_roots(&block.header)?;
+	ext.validate_sizes(&block.header)?;
 	Ok(())
 }
 
@@ -448,7 +530,7 @@ fn add_block_header(bh: &BlockHeader, batch: &mut store::Batch) -> Result<(), Er
 fn update_head(b: &Block, ctx: &BlockContext, batch: &store::Batch) -> Result<Option<Tip>, Error> {
 	// if we made a fork with more work than the head (which should also be true
 	// when extending the head), update it
-	if block_has_more_work(b, &ctx.head) {
+	if block_has_more_work(&b.header, &ctx.head) {
 		// update the block height index
 		batch
 			.setup_height(&b.header, &ctx.head)
@@ -480,8 +562,8 @@ fn update_head(b: &Block, ctx: &BlockContext, batch: &store::Batch) -> Result<Op
 }
 
 // Whether the provided block totals more work than the chain tip
-fn block_has_more_work(b: &Block, tip: &Tip) -> bool {
-	let block_tip = Tip::from_block(&b.header);
+fn block_has_more_work(header: &BlockHeader, tip: &Tip) -> bool {
+	let block_tip = Tip::from_block(header);
 	block_tip.total_difficulty > tip.total_difficulty
 }
 
diff --git a/chain/src/txhashset.rs b/chain/src/txhashset.rs
index 42da28fb3..4654a5cfd 100644
--- a/chain/src/txhashset.rs
+++ b/chain/src/txhashset.rs
@@ -837,9 +837,26 @@ impl<'a> Extension<'a> {
 			|| roots.rproof_root != header.range_proof_root
 			|| roots.kernel_root != header.kernel_root
 		{
-			return Err(ErrorKind::InvalidRoot.into());
+			Err(ErrorKind::InvalidRoot.into())
+		} else {
+			Ok(())
+		}
+	}
+
+	/// Validate the output and kernel MMR sizes against the block header.
+	pub fn validate_sizes(&self, header: &BlockHeader) -> Result<(), Error> {
+		// If we are validating the genesis block then we have no outputs or
+		// kernels. So we are done here.
+		if header.height == 0 {
+			return Ok(());
+		}
+
+		let (output_mmr_size, _, kernel_mmr_size) = self.sizes();
+		if output_mmr_size != header.output_mmr_size || kernel_mmr_size != header.kernel_mmr_size {
+			Err(ErrorKind::InvalidMMRSize.into())
+		} else {
+			Ok(())
 		}
-		Ok(())
 	}
 
 	fn validate_mmrs(&self) -> Result<(), Error> {
@@ -874,6 +891,7 @@ impl<'a> Extension<'a> {
 	) -> Result<((Commitment, Commitment)), Error> {
 		self.validate_mmrs()?;
 		self.validate_roots(header)?;
+		self.validate_sizes(header)?;
 
 		if header.height == 0 {
 			let zero_commit = secp_static::commit_to_zero_value();
diff --git a/core/src/core/pmmr.rs b/core/src/core/pmmr.rs
index 21717bd77..03c01c22a 100644
--- a/core/src/core/pmmr.rs
+++ b/core/src/core/pmmr.rs
@@ -352,7 +352,8 @@ where
 	}
 
 	/// Helper function to get the last N nodes inserted, i.e. the last
-	/// n nodes along the bottom of the tree
+	/// n nodes along the bottom of the tree.
+	/// May return less than n items if the MMR has been pruned/compacted.
 	pub fn get_last_n_insertions(&self, n: u64) -> Vec<(Hash, T)> {
 		let mut return_vec = vec![];
 		let mut last_leaf = self.last_pos;
@@ -361,10 +362,12 @@ where
 				break;
 			}
 			last_leaf = bintree_rightmost(last_leaf);
-			return_vec.push((
-				self.backend.get_hash(last_leaf).unwrap(),
-				self.backend.get_data(last_leaf).unwrap(),
-			));
+
+			if let Some(hash) = self.backend.get_hash(last_leaf) {
+				if let Some(data) = self.backend.get_data(last_leaf) {
+					return_vec.push((hash, data));
+				}
+			}
 			last_leaf -= 1;
 		}
 		return_vec