From 93b648fbc04e69b15b1a33294511dc72bce32bc8 Mon Sep 17 00:00:00 2001
From: Yeastplume <yeastplume@gmail.com>
Date: Fri, 27 Apr 2018 14:16:30 +0100
Subject: [PATCH] Split wallet server queries into multiple queries + restore
 performance (#1013)

* mods to speed up restore a bit

* mods to speed up restore a bit

* performance improvements to wallet restore and split large server queries into multiple
---
 wallet/src/checker.rs | 117 +++++++++++++++++++++++++++++-------------
 wallet/src/restore.rs |  33 ++++++++++--
 2 files changed, 109 insertions(+), 41 deletions(-)
diff --git a/wallet/src/checker.rs b/wallet/src/checker.rs
index 2fcd69f3c..263576885 100644
--- a/wallet/src/checker.rs
+++ b/wallet/src/checker.rs
@@ -82,7 +82,7 @@ fn refresh_missing_block_hashes(config: &WalletConfig, keychain: &Keychain) -> R
 		wallet_outputs.len(),
 	);
 
-	let mut id_params: Vec<String> = wallet_outputs
+	let id_params: Vec<String> = wallet_outputs
 		.keys()
 		.map(|commit| {
 			let id = util::to_hex(commit.as_ref().to_vec());
@@ -92,33 +92,54 @@ fn refresh_missing_block_hashes(config: &WalletConfig, keychain: &Keychain) -> R
 
 	let tip = get_tip_from_node(config)?;
 
-	let height_params = format!("start_height={}&end_height={}", 0, tip.height,);
-	let mut query_params = vec![height_params];
-	query_params.append(&mut id_params);
-
-	let url = format!(
-		"{}/v1/chain/outputs/byheight?{}",
-		config.check_node_api_http_addr,
-		query_params.join("&"),
-	);
-	debug!(LOGGER, "{:?}", url);
+	let max_ids_in_query = 1000;
+	let mut current_index = 0;
 
 	let mut api_blocks: HashMap<pedersen::Commitment, api::BlockHeaderInfo> = HashMap::new();
 	let mut api_merkle_proofs: HashMap<pedersen::Commitment, MerkleProofWrapper> = HashMap::new();
-	match api::client::get::<Vec<api::BlockOutputs>>(url.as_str()) {
-		Ok(blocks) => for block in blocks {
-			for out in block.outputs {
-				api_blocks.insert(out.commit, block.header.clone());
-				if let Some(merkle_proof) = out.merkle_proof {
-					let wrapper = MerkleProofWrapper(merkle_proof);
-					api_merkle_proofs.insert(out.commit, wrapper);
+
+	// Split up into separate requests, to avoid hitting http limits
+	loop {
+		let q = id_params.clone();
+		let mut cur_params: Vec<String> = q.into_iter()
+			.skip(current_index)
+			.take(max_ids_in_query)
+			.collect();
+
+		if cur_params.len() == 0 {
+			break;
+		}
+
+		debug!(LOGGER, "Splitting query: {} ids", cur_params.len());
+
+		current_index = current_index + cur_params.len();
+
+		let height_params = format!("start_height={}&end_height={}", 0, tip.height,);
+		let mut query_params = vec![height_params];
+		query_params.append(&mut cur_params);
+
+		let url = format!(
+			"{}/v1/chain/outputs/byheight?{}",
+			config.check_node_api_http_addr,
+			query_params.join("&"),
+		);
+		debug!(LOGGER, "{:?}", url);
+
+		match api::client::get::<Vec<api::BlockOutputs>>(url.as_str()) {
+			Ok(blocks) => for block in blocks {
+				for out in block.outputs {
+					api_blocks.insert(out.commit, block.header.clone());
+					if let Some(merkle_proof) = out.merkle_proof {
+						let wrapper = MerkleProofWrapper(merkle_proof);
+						api_merkle_proofs.insert(out.commit, wrapper);
+					}
 				}
+			},
+			Err(e) => {
+				// if we got anything other than 200 back from server, bye
+				error!(LOGGER, "Refresh failed... unable to contact node: {}", e);
+				return Err(e).context(ErrorKind::Node)?;
 			}
-		},
-		Err(e) => {
-			// if we got anything other than 200 back from server, bye
-			error!(LOGGER, "Refresh failed... unable to contact node: {}", e);
-			return Err(e).context(ErrorKind::Node)?;
 		}
 	}
 
@@ -177,24 +198,46 @@ fn refresh_output_state(config: &WalletConfig, keychain: &Keychain) -> Result<()
 	// build a map of api outputs by commit so we can look them up efficiently
 	let mut api_outputs: HashMap<pedersen::Commitment, api::Output> = HashMap::new();
 
-	let query_string = query_params.join("&");
+	let max_ids_in_query = 1000;
+	let mut current_index = 0;
 
-	let url = format!(
-		"{}/v1/chain/outputs/byids?{}",
-		config.check_node_api_http_addr, query_string,
-	);
+	// Split up into separate requests, to avoid hitting http limits
+	loop {
+		let q = query_params.clone();
+		let cur_params: Vec<String> = q.into_iter()
+			.skip(current_index)
+			.take(max_ids_in_query)
+			.collect();
 
-	match api::client::get::<Vec<api::Output>>(url.as_str()) {
-		Ok(outputs) => for out in outputs {
-			api_outputs.insert(out.commit.commit(), out);
-		},
-		Err(e) => {
-			// if we got anything other than 200 back from server, don't attempt to refresh
-			// the wallet data after
-			return Err(e).context(ErrorKind::Node)?;
+		if cur_params.len() == 0 {
+			break;
 		}
-	};
 
+		debug!(LOGGER, "Splitting query: {} ids", cur_params.len());
+
+		current_index = current_index + cur_params.len();
+		let query_string = cur_params.join("&");
+
+		let url = format!(
+			"{}/v1/chain/outputs/byids?{}",
+			config.check_node_api_http_addr, query_string,
+		);
+
+		match api::client::get::<Vec<api::Output>>(url.as_str()) {
+			Ok(outputs) => for out in outputs {
+				api_outputs.insert(out.commit.commit(), out);
+			},
+			Err(e) => {
+				// if we got anything other than 200 back from server, don't attempt to refresh
+				// the wallet data after
+				error!(
+					LOGGER,
+					"Error sending wallet refresh request to server: {:?}", e
+				);
+				return Err(e).context(ErrorKind::Node)?;
+			}
+		};
+	}
 	// now for each commit, find the output in the wallet and
 	// the corresponding api output (if it exists)
 	// and refresh it in-place in the wallet.
diff --git a/wallet/src/restore.rs b/wallet/src/restore.rs
index bec4471e0..a44fb46c3 100644
--- a/wallet/src/restore.rs
+++ b/wallet/src/restore.rs
@@ -103,6 +103,7 @@ fn find_outputs_with_key(
 	config: &WalletConfig,
 	keychain: &Keychain,
 	outputs: Vec<api::OutputPrintable>,
+	found_key_index: &mut Vec<u32>,
 ) -> Vec<
 	(
 		pedersen::Commitment,
@@ -151,7 +152,24 @@ fn find_outputs_with_key(
 		}
 		// we have a match, now check through our key iterations to find a partial match
 		let mut found = false;
-		for i in 1..max_derivations {
+
+		let mut start_index = 1;
+
+		// TODO: This assumption only holds with current wallet software assuming
+		// wallet doesn't go back and re-use gaps in its key index, ie. every
+		// new key index produced is always greater than the previous max key index
+		if let Some(m) = found_key_index.iter().max() {
+			start_index = *m as usize + 1;
+		}
+
+		for i in start_index..max_derivations {
+			// much faster than calling EC functions for each found key
+			// Shouldn't be needed if assumtion about wallet key 'gaps' above
+			// holds.. otherwise this is a good optimisation.. perhaps 
+			// provide a command line switch
+			/*if found_key_index.contains(&(i as u32)) {
+				continue;
+			}*/
 			let key_id = &keychain.derive_key_id(i as u32).unwrap();
 			if !message.compare_bf_first_8(key_id) {
 				continue;
@@ -171,7 +189,7 @@ fn find_outputs_with_key(
 				LOGGER,
 				"Output found: {:?}, key_index: {:?}", output.commit, i,
 			);
-
+			found_key_index.push(i as u32);
 			// add it to result set here
 			let commit_id = output.commit.0;
 
@@ -242,6 +260,9 @@ pub fn restore(config: &WalletConfig, keychain: &Keychain) -> Result<(), Error>
 
 	let batch_size = 1000;
 	let mut start_index = 1;
+	// Keep a set of keys we've already claimed (cause it's far faster than
+	// deriving a key for each one)
+	let mut found_key_index: Vec<u32> = vec![];
 	// this will start here, then lower as outputs are found, moving backwards on
 	// the chain
 	loop {
@@ -255,8 +276,12 @@ pub fn restore(config: &WalletConfig, keychain: &Keychain) -> Result<(), Error>
 		);
 
 		let _ = WalletData::with_wallet(&config.data_file_dir, |wallet_data| {
-			let result_vec =
-				find_outputs_with_key(config, keychain, output_listing.outputs.clone());
+			let result_vec = find_outputs_with_key(
+				config,
+				keychain,
+				output_listing.outputs.clone(),
+				&mut found_key_index,
+			);
 			if result_vec.len() > 0 {
 				for output in result_vec.clone() {
 					let root_key_id = keychain.root_key_id();