diff --git a/Cargo.lock b/Cargo.lock index f3c756aa6..7b30e5a6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,6 +98,26 @@ dependencies = [ "safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bindgen" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aster 0.41.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cexpr 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "clang-sys 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)", + "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "quasi 0.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "quasi_codegen 0.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", + "syntex_syntax 0.58.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "bindgen" version = "0.29.1" @@ -244,6 +264,17 @@ dependencies = [ "time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "clang-sys" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", + "libloading 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "clang-sys" version = "0.21.2" @@ -297,6 +328,25 @@ dependencies = [ "build_const 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "croaring" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "croaring-sys 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "croaring-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bindgen 0.23.1 (registry+https://github.com/rust-lang/crates.io-index)", + "gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crossbeam-deque" version = "0.2.0" @@ -609,6 +659,7 @@ version = "0.2.0" dependencies = [ "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "croaring 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)", "grin_core 0.2.0", "grin_keychain 0.2.0", @@ -643,6 +694,7 @@ dependencies = [ "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "croaring 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "grin_keychain 0.2.0", "grin_util 0.2.0", "grin_wallet 0.2.0", @@ -741,11 +793,13 @@ name = "grin_store" version = "0.2.0" dependencies = [ "byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "croaring 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)", "grin_core 0.2.0", "grin_util 0.2.0", "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.6.2 (git+https://github.com/danburkert/memmap-rs?tag=0.6.2)", + "rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", "rocksdb 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)", @@ -933,6 +987,11 @@ name = "itoa" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "itoa" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "itoa" version = "0.4.1" @@ -996,6 +1055,17 @@ dependencies = [ "pkg-config 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "libloading" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "target_build_utils 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "libloading" version = "0.4.3" @@ -1676,6 +1746,11 @@ name = "serde" version = "0.8.23" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "serde" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "serde" version = "1.0.66" @@ -1712,6 +1787,17 @@ dependencies = [ "serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "serde_json" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "serde_json" version = "1.0.13" @@ -1869,6 +1955,16 @@ name = "take_mut" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "target_build_utils" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf 0.7.22 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_codegen 0.7.22 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 0.9.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "term" version = "0.4.6" @@ -2308,6 +2404,7 @@ dependencies = [ "checksum backtrace-sys 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "44585761d6161b0f57afc49482ab6bd067e4edef48c12a152c237eb0203f7661" "checksum base64 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96434f987501f0ed4eb336a411e0631ecd1afa11574fe148587adc4ff96143c9" "checksum base64 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "229d032f1a99302697f10b27167ae6d03d49d032e6a8e2550e8d3fc13356d2b4" +"checksum bindgen 0.23.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a1c9555b6fec94761d3e6a9c633745949fbda8d355a783cedf9f4b8e6a0c8c77" "checksum bindgen 0.29.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ba610cba0c1727ed837316540068b51349b8268c073906067b7c3948598929bd" "checksum bitflags 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1370e9fc2a6ae53aea8b7a5110edbd08836ed87c88736dfabccade1c2b44bff4" "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf" @@ -2325,12 +2422,15 @@ dependencies = [ "checksum cexpr 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "42aac45e9567d97474a834efdee3081b3c942b2205be932092f53354ce503d6c" "checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" "checksum chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7c20ebe0b2b08b0aeddba49c609fe7957ba2e33449882cb186a180bc60682fa9" +"checksum clang-sys 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f048fc517ae55a21e25d8081764fe5795653ac16c63b45e99755f2a6c0378b31" "checksum clang-sys 0.21.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e414af9726e1d11660801e73ccc7fb81803fb5f49e5903a25b348b2b3b480d2e" "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536" "checksum cmake 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)" = "56d741ea7a69e577f6d06b36b7dff4738f680593dc27a701ffa8506b73ce28bb" "checksum conduit-mime-types 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "95ca30253581af809925ef68c2641cc140d6183f43e12e0af4992d53768bd7b8" "checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e" "checksum crc 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bd5d02c0aac6bd68393ed69e00bbc2457f3e89075c6349db7189618dc4ddc1d7" +"checksum croaring 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6492a6db70229be82255d139a58be62096e7f7b337c3d8559e255705bd7c5ad9" +"checksum croaring-sys 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0d4d04d5775d0eebd8bcc4acca9fe712c756260268f63d4807123e0a55c0f8" "checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3" "checksum crossbeam-deque 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c1bdc73742c36f7f35ebcda81dbb33a7e0d33757d03a06d9ddca762712ec5ea2" "checksum crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "927121f5407de9956180ff5e936fe3cf4324279280001cd56b669d28ee7e9150" @@ -2372,6 +2472,7 @@ dependencies = [ "checksum isatty 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f2a233726c7bb76995cec749d59582e5664823b7245d4970354408f1d79a7a2" "checksum itertools 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "f58856976b776fedd95533137617a02fb25719f40e7d9b01c7043cd65474f450" "checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1" +"checksum itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c" "checksum itoa 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c069bbec61e1ca5a596166e55dfe4773ff745c3d16b700013bcaff9a6df2c682" "checksum jsonrpc-core 8.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ddf83704f4e79979a424d1082dd2c1e52683058056c9280efa19ac5f6bc9033c" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" @@ -2381,6 +2482,7 @@ dependencies = [ "checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef" "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b" "checksum libgit2-sys 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9cc8a747e1d0254ef5eb71330fcb8fb25b8b8f8dc1981379b7bb06d6f006672e" +"checksum libloading 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0a020ac941774eb37e9d13d418c37b522e76899bfc4e7b1a600d529a53f83a66" "checksum libloading 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fd38073de8f7965d0c17d30546d4bb6da311ab428d1c7a3fc71dff7f9d4979b9" "checksum librocksdb-sys 5.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1b0fa7f4ca5ceff76237db63802cb073d84e64a327e38478e79a669093fb7fa5" "checksum libz-sys 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)" = "87f737ad6cc6fd6eefe3d9dc5412f1573865bded441300904d2f42269e140f16" @@ -2464,10 +2566,12 @@ dependencies = [ "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum sequence_trie 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c915714ca833b1d4d6b8f6a9d72a3ff632fe45b40a8d184ef79c81bec6327eed" "checksum serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)" = "9dad3f759919b92c3068c696c15c3d17238234498bbdcc80f2c469606f948ac8" +"checksum serde 0.9.15 (registry+https://github.com/rust-lang/crates.io-index)" = "34b623917345a631dc9608d5194cc206b3fe6c3554cd1c75b937e55e285254af" "checksum serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "e9a2d9a9ac5120e0f768801ca2b58ad6eec929dc9d1d616c162f208869c2ce95" "checksum serde_derive 1.0.35 (registry+https://github.com/rust-lang/crates.io-index)" = "90f1f8f7784452461db5b73dc5097c18f21011fbcc6d1178f1897bfa8e1cb4bd" "checksum serde_derive_internals 0.22.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f9525ada08124ee1a9b8b1e6f3bf035ffff6fc0c96d56ddda98d4506d3533e4" "checksum serde_json 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "67f7d2e9edc3523a9c8ec8cd6ec481b3a27810aafee3e625d311febd3e656b4c" +"checksum serde_json 0.9.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ad8bcf487be7d2e15d3d543f04312de991d631cfe1b43ea0ade69e6a8a5b16a1" "checksum serde_json 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "5c508584d9913df116b91505eec55610a2f5b16e9ed793c46e4d0152872b3e74" "checksum siphasher 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0df90a788073e8d0235a67e50441d47db7c8ad9debd91cbf43736a2a92d36537" "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" @@ -2488,6 +2592,7 @@ dependencies = [ "checksum syntex_syntax 0.58.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6e0e4dbae163dd98989464c23dd503161b338790640e11537686f2ef0f25c791" "checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5" "checksum take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +"checksum target_build_utils 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "013d134ae4a25ee744ad6129db589018558f620ddfa44043887cdd45fa08e75c" "checksum term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "fa63644f74ce96fbeb9b794f66aff2a52d601cbd5e80f4b97123e3899f4570f1" "checksum term 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5e6b677dd1e8214ea1ef4297f85dbcbed8e8cdddb561040cc998ca2551c37561" "checksum termcolor 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "adc4587ead41bf016f11af03e55a624c06568b5a19db4e90fde573d805074f83" diff --git a/chain/Cargo.toml b/chain/Cargo.toml index 10f252c4c..7e054b30f 100644 --- a/chain/Cargo.toml +++ b/chain/Cargo.toml @@ -8,6 +8,7 @@ publish = false [dependencies] bitflags = "1" byteorder = "1" +croaring = "0.3" slog = { version = "~2.2", features = ["max_level_trace", "release_max_level_trace"] } serde = "1" serde_derive = "1" diff --git a/chain/src/chain.rs b/chain/src/chain.rs index b44783fef..b5001d62e 100644 --- a/chain/src/chain.rs +++ b/chain/src/chain.rs @@ -168,10 +168,13 @@ impl Chain { let head = store.head(); // open the txhashset, creating a new one if necessary - let mut txhashset = txhashset::TxHashSet::open(db_root.clone(), store.clone())?; + let mut txhashset = txhashset::TxHashSet::open(db_root.clone(), store.clone(), None)?; match head { Ok(head) => { + // TODO - consolidate head vs head_header here. + let head_header = store.head_header()?; + let mut head = head; loop { // Use current chain tip if we have one. @@ -187,7 +190,7 @@ impl Chain { header.height, ); - extension.rewind(&header)?; + extension.rewind(&header, &head_header)?; extension.validate_roots(&header)?; @@ -473,16 +476,18 @@ impl Chain { Ok(bh.height + 1) } - /// Validate a vector of "raw" transactions against the current chain state. + /// Validate a vec of "raw" transactions against the current chain state. + /// Specifying a "pre_tx" if we need to adjust the state, for example when + /// validating the txs in the stempool we adjust the state based on the + /// txpool. pub fn validate_raw_txs( &self, txs: Vec<Transaction>, pre_tx: Option<Transaction>, ) -> Result<Vec<Transaction>, Error> { - let height = self.next_block_height()?; let mut txhashset = self.txhashset.write().unwrap(); txhashset::extending_readonly(&mut txhashset, |extension| { - let valid_txs = extension.validate_raw_txs(txs, pre_tx, height)?; + let valid_txs = extension.validate_raw_txs(txs, pre_tx)?; Ok(valid_txs) }) } @@ -525,7 +530,8 @@ impl Chain { // Rewind the extension to the specified header to ensure the view is // consistent. txhashset::extending_readonly(&mut txhashset, |extension| { - extension.rewind(&header)?; + // TODO - is this rewind guaranteed to be redundant now? + extension.rewind(&header, &header)?; extension.validate(&header, skip_rproofs)?; Ok(()) }) @@ -589,6 +595,23 @@ impl Chain { txhashset.indexes_at(&h)? }; + // now we want to rewind the txhashset extension and + // sync a "rewound" copy of the leaf_set files to disk + // so we can send these across as part of the zip file. + // The fast sync client does *not* have the necessary data + // to rewind after receiving the txhashset zip. + { + let head_header = self.store.head_header()?; + let header = self.store.get_block_header(&h)?; + + let mut txhashset = self.txhashset.write().unwrap(); + txhashset::extending_readonly(&mut txhashset, |extension| { + extension.rewind(&header, &head_header)?; + extension.snapshot(&header)?; + Ok(()) + })?; + } + // prepares the zip and return the corresponding Read let txhashset_reader = txhashset::zip_read(self.db_root.clone())?; Ok((marker.output_pos, marker.kernel_pos, txhashset_reader)) @@ -628,11 +651,14 @@ impl Chain { "Going to validate new txhashset, might take some time..." ); - let mut txhashset = txhashset::TxHashSet::open(self.db_root.clone(), self.store.clone())?; + let mut txhashset = + txhashset::TxHashSet::open(self.db_root.clone(), self.store.clone(), Some(&header))?; // Note: we are validating against a writeable extension. txhashset::extending(&mut txhashset, |extension| { - extension.rewind(&header)?; + // TODO do we need to rewind here? We have no blocks to rewind + // (and we need them for the pos to unremove) + extension.rewind(&header, &header)?; let (output_sum, kernel_sum) = extension.validate(&header, false)?; extension.save_latest_block_sums(&header, output_sum, kernel_sum)?; extension.rebuild_index()?; @@ -709,9 +735,12 @@ impl Chain { match self.store.get_block(¤t.hash()) { Ok(b) => { count += 1; + + // TODO - consider wrapping these up in a single fn call? self.store.delete_block(&b.hash())?; self.store.delete_block_marker(&b.hash())?; self.store.delete_block_sums(&b.hash())?; + self.store.delete_block_input_bitmap(&b.hash())?; } Err(NotFoundErr) => { break; diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 94d452874..cdd0b44bc 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -23,6 +23,7 @@ #[macro_use] extern crate bitflags; extern crate byteorder; +extern crate croaring; extern crate lru_cache; extern crate serde; #[macro_use] diff --git a/chain/src/pipe.rs b/chain/src/pipe.rs index 13c4cfcac..8eb0d1501 100644 --- a/chain/src/pipe.rs +++ b/chain/src/pipe.rs @@ -377,7 +377,11 @@ fn validate_block_via_txhashset(b: &Block, ext: &mut txhashset::Extension) -> Re fn add_block(b: &Block, ctx: &mut BlockContext) -> Result<(), Error> { ctx.store .save_block(b) - .map_err(|e| Error::StoreErr(e, "pipe save block".to_owned())) + .map_err(|e| Error::StoreErr(e, "pipe save block".to_owned()))?; + ctx.store + .save_block_input_bitmap(&b) + .map_err(|e| Error::StoreErr(e, "pipe save block input bitmap".to_owned()))?; + Ok(()) } /// Officially adds the block header to our header chain. @@ -505,40 +509,41 @@ pub fn rewind_and_apply_fork( // extending a fork, first identify the block where forking occurred // keeping the hashes of blocks along the fork let mut current = b.header.previous; - let mut hashes = vec![]; + let mut fork_hashes = vec![]; loop { let curr_header = store.get_block_header(¤t)?; if let Ok(_) = store.is_on_current_chain(&curr_header) { break; } else { - hashes.insert(0, (curr_header.height, curr_header.hash())); + fork_hashes.insert(0, (curr_header.height, curr_header.hash())); current = curr_header.previous; } } - let forked_block = store.get_block_header(¤t)?; + let head_header = store.head_header()?; + let forked_header = store.get_block_header(¤t)?; trace!( LOGGER, "rewind_and_apply_fork @ {} [{}], was @ {} [{}]", - forked_block.height, - forked_block.hash(), + forked_header.height, + forked_header.hash(), b.header.height, b.header.hash() ); // rewind the sum trees up to the forking block - ext.rewind(&forked_block)?; + ext.rewind(&forked_header, &head_header)?; trace!( LOGGER, "rewind_and_apply_fork: blocks on fork: {:?}", - hashes, + fork_hashes, ); // apply all forked blocks, including this new one - for (_, h) in hashes { + for (_, h) in fork_hashes { let fb = store .get_block(&h) .map_err(|e| Error::StoreErr(e, format!("getting forked blocks")))?; diff --git a/chain/src/store.rs b/chain/src/store.rs index 19943d184..d621fcb22 100644 --- a/chain/src/store.rs +++ b/chain/src/store.rs @@ -16,6 +16,7 @@ use std::sync::{Arc, RwLock}; +use croaring::Bitmap; use lru_cache::LruCache; use util::secp::pedersen::Commitment; @@ -38,12 +39,14 @@ const HEADER_HEIGHT_PREFIX: u8 = '8' as u8; const COMMIT_POS_PREFIX: u8 = 'c' as u8; const BLOCK_MARKER_PREFIX: u8 = 'm' as u8; const BLOCK_SUMS_PREFIX: u8 = 'M' as u8; +const BLOCK_INPUT_BITMAP_PREFIX: u8 = 'B' as u8; /// An implementation of the ChainStore trait backed by a simple key-value /// store. pub struct ChainKVStore { db: grin_store::Store, header_cache: Arc<RwLock<LruCache<Hash, BlockHeader>>>, + block_input_bitmap_cache: Arc<RwLock<LruCache<Hash, Vec<u8>>>>, } impl ChainKVStore { @@ -52,9 +55,45 @@ impl ChainKVStore { let db = grin_store::Store::open(format!("{}/{}", root_path, STORE_SUBPATH).as_str())?; Ok(ChainKVStore { db, - header_cache: Arc::new(RwLock::new(LruCache::new(100))), + header_cache: Arc::new(RwLock::new(LruCache::new(1_000))), + block_input_bitmap_cache: Arc::new(RwLock::new(LruCache::new(1_000))), }) } + + fn get_block_header_db(&self, h: &Hash) -> Result<BlockHeader, Error> { + option_to_not_found( + self.db + .get_ser(&to_key(BLOCK_HEADER_PREFIX, &mut h.to_vec())), + ) + } + + fn build_block_input_bitmap(&self, block: &Block) -> Result<Bitmap, Error> { + let bitmap = block + .inputs + .iter() + .filter_map(|x| self.get_output_pos(&x.commitment()).ok()) + .map(|x| x as u32) + .collect(); + Ok(bitmap) + } + + // Get the block input bitmap from the db or build the bitmap from + // the full block from the db (if the block is found). + fn get_block_input_bitmap_db(&self, bh: &Hash) -> Result<Bitmap, Error> { + if let Ok(Some(bytes)) = self.db + .get(&to_key(BLOCK_INPUT_BITMAP_PREFIX, &mut bh.to_vec())) + { + Ok(Bitmap::deserialize(&bytes)) + } else { + match self.get_block(bh) { + Ok(block) => { + let bitmap = self.save_block_input_bitmap(&block)?; + Ok(bitmap) + } + Err(e) => Err(e), + } + } + } } impl ChainStore for ChainKVStore { @@ -128,21 +167,15 @@ impl ChainStore for ChainKVStore { } } - let header: Result<BlockHeader, Error> = option_to_not_found( - self.db - .get_ser(&to_key(BLOCK_HEADER_PREFIX, &mut h.to_vec())), - ); - - // cache miss - so adding to the cache for next time - if let Ok(header) = header { - { - let mut header_cache = self.header_cache.write().unwrap(); - header_cache.insert(*h, header.clone()); - } - Ok(header) - } else { - header + // cache miss - get it from db and cache it for next time (if we found one in + // db) + let res = self.get_block_header_db(h); + if let Ok(header) = res { + let mut header_cache = self.header_cache.write().unwrap(); + header_cache.insert(*h, header.clone()); + return Ok(header); } + res } /// Save the block and its header @@ -183,10 +216,17 @@ impl ChainStore for ChainKVStore { } fn save_block_header(&self, bh: &BlockHeader) -> Result<(), Error> { - self.db.put_ser( - &to_key(BLOCK_HEADER_PREFIX, &mut bh.hash().to_vec())[..], - bh, - ) + let hash = bh.hash(); + self.db + .put_ser(&to_key(BLOCK_HEADER_PREFIX, &mut hash.to_vec())[..], bh)?; + + // Write the block_header to the cache also. + { + let mut header_cache = self.header_cache.write().unwrap(); + header_cache.insert(hash, bh.clone()); + } + + Ok(()) } fn get_header_by_height(&self, height: u64) -> Result<BlockHeader, Error> { @@ -255,6 +295,46 @@ impl ChainStore for ChainKVStore { self.db.delete(&to_key(BLOCK_SUMS_PREFIX, &mut bh.to_vec())) } + fn get_block_input_bitmap(&self, bh: &Hash) -> Result<Bitmap, Error> { + { + let mut cache = self.block_input_bitmap_cache.write().unwrap(); + + // cache hit - return the value from the cache + if let Some(bytes) = cache.get_mut(bh) { + return Ok(Bitmap::deserialize(&bytes)); + } + } + + // cache miss - get it from db and cache it for next time + // if we found one in db + let res = self.get_block_input_bitmap_db(bh); + if let Ok(bitmap) = res { + let mut cache = self.block_input_bitmap_cache.write().unwrap(); + cache.insert(*bh, bitmap.serialize()); + return Ok(bitmap); + } + res + } + + fn save_block_input_bitmap(&self, block: &Block) -> Result<Bitmap, Error> { + let hash = block.hash(); + let bitmap = self.build_block_input_bitmap(block)?; + self.db.put( + &to_key(BLOCK_INPUT_BITMAP_PREFIX, &mut hash.to_vec())[..], + bitmap.serialize(), + )?; + { + let mut cache = self.block_input_bitmap_cache.write().unwrap(); + cache.insert(hash, bitmap.serialize()); + } + Ok(bitmap) + } + + fn delete_block_input_bitmap(&self, bh: &Hash) -> Result<(), Error> { + self.db + .delete(&to_key(BLOCK_INPUT_BITMAP_PREFIX, &mut bh.to_vec())) + } + /// Maintain consistency of the "header_by_height" index by traversing back /// through the current chain and updating "header_by_height" until we reach /// a block_header that is consistent with its height (everything prior to diff --git a/chain/src/txhashset.rs b/chain/src/txhashset.rs index 7c9b6fe60..336dfd529 100644 --- a/chain/src/txhashset.rs +++ b/chain/src/txhashset.rs @@ -22,6 +22,8 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Instant; +use croaring::Bitmap; + use util::secp::pedersen::{Commitment, RangeProof}; use core::core::committed::Committed; @@ -56,10 +58,14 @@ impl<T> PMMRHandle<T> where T: PMMRable + ::std::fmt::Debug, { - fn new(root_dir: String, file_name: &str) -> Result<PMMRHandle<T>, Error> { + fn new( + root_dir: String, + file_name: &str, + header: Option<&BlockHeader>, + ) -> Result<PMMRHandle<T>, Error> { let path = Path::new(&root_dir).join(TXHASHSET_SUBDIR).join(file_name); fs::create_dir_all(path.clone())?; - let be = PMMRBackend::new(path.to_str().unwrap().to_string())?; + let be = PMMRBackend::new(path.to_str().unwrap().to_string(), header)?; let sz = be.unpruned_size()?; Ok(PMMRHandle { backend: be, @@ -89,7 +95,11 @@ pub struct TxHashSet { impl TxHashSet { /// Open an existing or new set of backends for the TxHashSet - pub fn open(root_dir: String, commit_index: Arc<ChainStore>) -> Result<TxHashSet, Error> { + pub fn open( + root_dir: String, + commit_index: Arc<ChainStore>, + header: Option<&BlockHeader>, + ) -> Result<TxHashSet, Error> { let output_file_path: PathBuf = [&root_dir, TXHASHSET_SUBDIR, OUTPUT_SUBDIR] .iter() .collect(); @@ -106,9 +116,9 @@ impl TxHashSet { fs::create_dir_all(kernel_file_path.clone())?; Ok(TxHashSet { - output_pmmr_h: PMMRHandle::new(root_dir.clone(), OUTPUT_SUBDIR)?, - rproof_pmmr_h: PMMRHandle::new(root_dir.clone(), RANGE_PROOF_SUBDIR)?, - kernel_pmmr_h: PMMRHandle::new(root_dir.clone(), KERNEL_SUBDIR)?, + output_pmmr_h: PMMRHandle::new(root_dir.clone(), OUTPUT_SUBDIR, header)?, + rproof_pmmr_h: PMMRHandle::new(root_dir.clone(), RANGE_PROOF_SUBDIR, header)?, + kernel_pmmr_h: PMMRHandle::new(root_dir.clone(), KERNEL_SUBDIR, None)?, commit_index, }) } @@ -216,26 +226,38 @@ impl TxHashSet { /// Compact the MMR data files and flush the rm logs pub fn compact(&mut self) -> Result<(), Error> { let commit_index = self.commit_index.clone(); - let head = commit_index.head()?; - let current_height = head.height; + let head_header = commit_index.head_header()?; + let current_height = head_header.height; // horizon for compacting is based on current_height - let horizon = (current_height as u32).saturating_sub(global::cut_through_horizon()); + let horizon = current_height.saturating_sub(global::cut_through_horizon().into()); + let horizon_header = self.commit_index.get_header_by_height(horizon)?; + let horizon_marker = self.commit_index.get_block_marker(&horizon_header.hash())?; + + let rewind_add_pos = + output_pos_to_rewind(self.commit_index.clone(), &horizon_header, &head_header)?; + let rewind_rm_pos = + input_pos_to_rewind(self.commit_index.clone(), &horizon_header, &head_header)?; let clean_output_index = |commit: &[u8]| { // do we care if this fails? let _ = commit_index.delete_output_pos(commit); }; - let min_rm = (horizon / 10) as usize; + self.output_pmmr_h.backend.check_compact( + horizon_marker.output_pos, + &rewind_add_pos, + &rewind_rm_pos, + clean_output_index, + )?; - self.output_pmmr_h - .backend - .check_compact(min_rm, horizon, clean_output_index)?; + self.rproof_pmmr_h.backend.check_compact( + horizon_marker.output_pos, + &rewind_add_pos, + &rewind_rm_pos, + &prune_noop, + )?; - self.rproof_pmmr_h - .backend - .check_compact(min_rm, horizon, &prune_noop)?; Ok(()) } } @@ -404,6 +426,22 @@ impl<'a> Extension<'a> { } } + // Rewind the MMR backend to undo applying a raw tx to the txhashset extension. + // This is used during txpool validation to undo an invalid tx. + fn rewind_raw_tx( + &mut self, + output_pos: u64, + kernel_pos: u64, + rewind_rm_pos: &Bitmap, + ) -> Result<(), Error> { + let latest_output_pos = self.output_pmmr.unpruned_size(); + let rewind_add_pos: Bitmap = ((output_pos + 1)..(latest_output_pos + 1)) + .map(|x| x as u32) + .collect(); + self.rewind_to_pos(output_pos, kernel_pos, &rewind_add_pos, rewind_rm_pos)?; + Ok(()) + } + /// Apply a "raw" transaction to the txhashset. /// We will never commit a txhashset extension that includes raw txs. /// But we can use this when validating txs in the tx pool. @@ -411,7 +449,7 @@ impl<'a> Extension<'a> { /// aggregated tx from the tx pool to the current chain state (via a /// txhashset extension) then we know the tx pool is valid (including the /// new tx). - pub fn apply_raw_tx(&mut self, tx: &Transaction, height: u64) -> Result<(), Error> { + pub fn apply_raw_tx(&mut self, tx: &Transaction) -> Result<(), Error> { // This should *never* be called on a writeable extension... if !self.rollback { panic!("attempted to apply a raw tx to a writeable txhashset extension"); @@ -422,27 +460,30 @@ impl<'a> Extension<'a> { let output_pos = self.output_pmmr.unpruned_size(); let kernel_pos = self.kernel_pmmr.unpruned_size(); - let rewind_to_height = height - 1; + // Build bitmap of output pos spent (as inputs) by this tx for rewind. + let rewind_rm_pos = tx.inputs + .iter() + .filter_map(|x| self.get_output_pos(&x.commitment()).ok()) + .map(|x| x as u32) + .collect(); - // When applying blocks we can apply the coinbase output first - // but we cannot do this here, so we need to apply outputs first. for ref output in &tx.outputs { if let Err(e) = self.apply_output(output) { - self.rewind_to_pos(output_pos, kernel_pos, rewind_to_height)?; + self.rewind_raw_tx(output_pos, kernel_pos, &rewind_rm_pos)?; return Err(e); } } for ref input in &tx.inputs { - if let Err(e) = self.apply_input(input, height) { - self.rewind_to_pos(output_pos, kernel_pos, rewind_to_height)?; + if let Err(e) = self.apply_input(input) { + self.rewind_raw_tx(output_pos, kernel_pos, &rewind_rm_pos)?; return Err(e); } } for ref kernel in &tx.kernels { if let Err(e) = self.apply_kernel(kernel) { - self.rewind_to_pos(output_pos, kernel_pos, rewind_to_height)?; + self.rewind_raw_tx(output_pos, kernel_pos, &rewind_rm_pos)?; return Err(e); } } @@ -471,18 +512,21 @@ impl<'a> Extension<'a> { &mut self, txs: Vec<Transaction>, pre_tx: Option<Transaction>, - height: u64, ) -> Result<Vec<Transaction>, Error> { - let mut height = height; let mut valid_txs = vec![]; + + // First apply the "pre_tx" to account for any state that need adding to + // the chain state before we can validate our vec of txs. + // This is the aggregate tx from the txpool if we are validating the stempool. if let Some(tx) = pre_tx { - self.apply_raw_tx(&tx, height)?; - height += 1; + self.apply_raw_tx(&tx)?; } + + // Now validate each tx, rewinding any tx (and only that tx) + // if it fails to validate successfully. for tx in txs { - if self.apply_raw_tx(&tx, height).is_ok() { + if self.apply_raw_tx(&tx).is_ok() { valid_txs.push(tx); - height += 1; } } Ok(valid_txs) @@ -523,7 +567,7 @@ impl<'a> Extension<'a> { // then doing inputs guarantees an input can't spend an output in the // same block, enforcing block cut-through for input in &b.inputs { - self.apply_input(input, b.header.height)?; + self.apply_input(input)?; } // now all regular, non coinbase outputs @@ -559,7 +603,7 @@ impl<'a> Extension<'a> { Ok(()) } - fn apply_input(&mut self, input: &Input, height: u64) -> Result<(), Error> { + fn apply_input(&mut self, input: &Input) -> Result<(), Error> { let commit = input.commitment(); let pos_res = self.get_output_pos(&commit); if let Ok(pos) = pos_res { @@ -580,10 +624,10 @@ impl<'a> Extension<'a> { // Now prune the output_pmmr, rproof_pmmr and their storage. // Input is not valid if we cannot prune successfully (to spend an unspent // output). - match self.output_pmmr.prune(pos, height as u32) { + match self.output_pmmr.prune(pos) { Ok(true) => { self.rproof_pmmr - .prune(pos, height as u32) + .prune(pos) .map_err(|s| Error::TxHashSetErr(s))?; } Ok(false) => return Err(Error::AlreadySpent(commit)), @@ -659,7 +703,8 @@ impl<'a> Extension<'a> { ); // rewind to the specified block for a consistent view - self.rewind(block_header)?; + let head_header = self.commit_index.head_header()?; + self.rewind(block_header, &head_header)?; // then calculate the Merkle Proof based on the known pos let pos = self.get_output_pos(&output.commit)?; @@ -670,17 +715,57 @@ impl<'a> Extension<'a> { Ok(merkle_proof) } - /// Rewinds the MMRs to the provided block, using the last output and - /// last kernel of the block we want to rewind to. - pub fn rewind(&mut self, block_header: &BlockHeader) -> Result<(), Error> { - let hash = block_header.hash(); - let height = block_header.height; - trace!(LOGGER, "Rewind to header {} @ {}", height, hash); + /// Saves a snapshot of the output and rangeproof MMRs to disk. + /// Specifically - saves a snapshot of the utxo file, tagged with + /// the block hash as filename suffix. + /// Needed for fast-sync (utxo file needs to be rewound before sending + /// across). + pub fn snapshot(&mut self, header: &BlockHeader) -> Result<(), Error> { + self.output_pmmr + .snapshot(header) + .map_err(|e| Error::Other(e))?; + self.rproof_pmmr + .snapshot(header) + .map_err(|e| Error::Other(e))?; + Ok(()) + } - // rewind our MMRs to the appropriate pos - // based on block height and block marker + /// Rewinds the MMRs to the provided block, rewinding to the last output pos + /// and last kernel pos of that block. + pub fn rewind( + &mut self, + block_header: &BlockHeader, + head_header: &BlockHeader, + ) -> Result<(), Error> { + let hash = block_header.hash(); + trace!( + LOGGER, + "Rewind to header {} @ {}", + block_header.height, + hash + ); + + // Rewind our MMRs to the appropriate positions + // based on the block_marker. let marker = self.commit_index.get_block_marker(&hash)?; - self.rewind_to_pos(marker.output_pos, marker.kernel_pos, height)?; + + // We need to build bitmaps of added and removed output positions + // so we can correctly rewind all operations applied to the output MMR + // after the position we are rewinding to (these operations will be + // undone during rewind). + // Rewound output pos will be removed from the MMR. + // Rewound input (spent) pos will be added back to the MMR. + let rewind_add_pos = + output_pos_to_rewind(self.commit_index.clone(), block_header, head_header)?; + let rewind_rm_pos = + input_pos_to_rewind(self.commit_index.clone(), block_header, head_header)?; + + self.rewind_to_pos( + marker.output_pos, + marker.kernel_pos, + &rewind_add_pos, + &rewind_rm_pos, + )?; Ok(()) } @@ -691,24 +776,29 @@ impl<'a> Extension<'a> { &mut self, output_pos: u64, kernel_pos: u64, - height: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, ) -> Result<(), Error> { trace!( LOGGER, - "Rewind txhashset to output {}, kernel {}, height {}", + "Rewind txhashset to output {}, kernel {}", output_pos, kernel_pos, - height ); + // Remember to "rewind" our new_output_commits + // in case we are rewinding state that has not yet + // been sync'd to disk. + self.new_output_commits.retain(|_, &mut v| v <= output_pos); + self.output_pmmr - .rewind(output_pos, height as u32) + .rewind(output_pos, rewind_add_pos, rewind_rm_pos) .map_err(&Error::TxHashSetErr)?; self.rproof_pmmr - .rewind(output_pos, height as u32) + .rewind(output_pos, rewind_add_pos, rewind_rm_pos) .map_err(&Error::TxHashSetErr)?; self.kernel_pmmr - .rewind(kernel_pos, height as u32) + .rewind(kernel_pos, rewind_add_pos, rewind_rm_pos) .map_err(&Error::TxHashSetErr)?; Ok(()) @@ -963,3 +1053,50 @@ pub fn zip_write(root_dir: String, txhashset_data: File) -> Result<(), Error> { fs::create_dir_all(txhashset_path.clone())?; zip::decompress(txhashset_data, &txhashset_path).map_err(|ze| Error::Other(ze.to_string())) } + +/// Given a block header to rewind to and the block header at the +/// head of the current chain state, we need to calculate the positions +/// of all outputs we need to "undo" during a rewind. +/// The MMR is append-only so we can simply look for all positions added after +/// the rewind pos. +fn output_pos_to_rewind( + commit_index: Arc<ChainStore>, + block_header: &BlockHeader, + head_header: &BlockHeader, +) -> Result<Bitmap, Error> { + let marker_to = commit_index.get_block_marker(&head_header.hash())?; + let marker_from = commit_index.get_block_marker(&block_header.hash())?; + Ok(((marker_from.output_pos + 1)..=marker_to.output_pos) + .map(|x| x as u32) + .collect()) +} + +/// Given a block header to rewind to and the block header at the +/// head of the current chain state, we need to calculate the positions +/// of all inputs (spent outputs) we need to "undo" during a rewind. +/// We do this by leveraging the "block_input_bitmap" cache and OR'ing +/// the set of bitmaps together for the set of blocks being rewound. +fn input_pos_to_rewind( + commit_index: Arc<ChainStore>, + block_header: &BlockHeader, + head_header: &BlockHeader, +) -> Result<Bitmap, Error> { + let mut bitmap = Bitmap::create(); + let mut current = head_header.hash(); + loop { + if current == block_header.hash() { + break; + } + + // We cache recent block headers and block_input_bitmaps + // internally in our db layer (commit_index). + // I/O should be minimized or eliminated here for most + // rewind scenarios. + let current_header = commit_index.get_block_header(¤t)?; + let input_bitmap = commit_index.get_block_input_bitmap(¤t)?; + + bitmap.or_inplace(&input_bitmap); + current = current_header.previous; + } + Ok(bitmap) +} diff --git a/chain/src/types.rs b/chain/src/types.rs index 198a94e9b..bb45566c0 100644 --- a/chain/src/types.rs +++ b/chain/src/types.rs @@ -16,6 +16,8 @@ use std::{error, fmt, io}; +use croaring::Bitmap; + use util::secp; use util::secp::pedersen::Commitment; use util::secp_static; @@ -346,6 +348,15 @@ pub trait ChainStore: Send + Sync { /// Delete block sums for the given block hash. fn delete_block_sums(&self, bh: &Hash) -> Result<(), store::Error>; + /// Get the bitmap representing the inputs for the specified block. + fn get_block_input_bitmap(&self, bh: &Hash) -> Result<Bitmap, store::Error>; + + /// Save the bitmap representing the inputs for the specified block. + fn save_block_input_bitmap(&self, b: &Block) -> Result<Bitmap, store::Error>; + + /// Delete the bitmap representing the inputs for the specified block. + fn delete_block_input_bitmap(&self, bh: &Hash) -> Result<(), store::Error>; + /// Saves the provided block header at the corresponding height. Also check /// the consistency of the height chain in store by assuring previous /// headers are also at their respective heights. diff --git a/chain/tests/test_txhashset_raw_txs.rs b/chain/tests/test_txhashset_raw_txs.rs index 73a7b210d..c7fb913b4 100644 --- a/chain/tests/test_txhashset_raw_txs.rs +++ b/chain/tests/test_txhashset_raw_txs.rs @@ -40,7 +40,7 @@ fn test_some_raw_txs() { clean_output_dir(&db_root); let store = Arc::new(ChainKVStore::new(db_root.clone()).unwrap()); - let mut txhashset = TxHashSet::open(db_root.clone(), store.clone()).unwrap(); + let mut txhashset = TxHashSet::open(db_root.clone(), store.clone(), None).unwrap(); let keychain = ExtKeychain::from_random_seed().unwrap(); let key_id1 = keychain.derive_key_id(1).unwrap(); @@ -124,10 +124,10 @@ fn test_some_raw_txs() { // Note: we pass in an increasing "height" here so we can rollback // each tx individually as necessary, while maintaining a long lived // txhashset extension. - assert!(extension.apply_raw_tx(&tx1, 3).is_ok()); - assert!(extension.apply_raw_tx(&tx2, 4).is_err()); - assert!(extension.apply_raw_tx(&tx3, 5).is_ok()); - assert!(extension.apply_raw_tx(&tx4, 6).is_ok()); + assert!(extension.apply_raw_tx(&tx1).is_ok()); + assert!(extension.apply_raw_tx(&tx2).is_err()); + assert!(extension.apply_raw_tx(&tx3).is_ok()); + assert!(extension.apply_raw_tx(&tx4).is_ok()); Ok(()) }); } diff --git a/core/Cargo.toml b/core/Cargo.toml index df087fb79..8ac907954 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -9,6 +9,7 @@ publish = false bitflags = "1" blake2-rfc = "0.2" byteorder = "1" +croaring = "0.3" lazy_static = "1" num-bigint = "0.2" rand = "0.3" diff --git a/core/src/core/mod.rs b/core/src/core/mod.rs index eb42534cb..f3047493a 100644 --- a/core/src/core/mod.rs +++ b/core/src/core/mod.rs @@ -19,8 +19,10 @@ pub mod committed; pub mod hash; pub mod id; pub mod pmmr; +pub mod prune_list; pub mod target; pub mod transaction; + use consensus::GRIN_BASE; #[allow(dead_code)] use rand::{thread_rng, Rng}; diff --git a/core/src/core/pmmr.rs b/core/src/core/pmmr.rs index 72ea526af..e41ea470c 100644 --- a/core/src/core/pmmr.rs +++ b/core/src/core/pmmr.rs @@ -35,7 +35,10 @@ //! The underlying Hashes are stored in a Backend implementation that can //! either be a simple Vec or a database. +use croaring::Bitmap; + use core::hash::Hash; +use core::BlockHeader; use ser::{self, PMMRIndexHashable, PMMRable, Readable, Reader, Writeable, Writer}; use std::clone::Clone; @@ -58,9 +61,15 @@ where /// Rewind the backend state to a previous position, as if all append /// operations after that had been canceled. Expects a position in the PMMR - /// to rewind to as well as the consumer-provided index of when the change - /// occurred (see remove). - fn rewind(&mut self, position: u64, index: u32) -> Result<(), String>; + /// to rewind to as well as bitmaps representing the positions added and + /// removed since the rewind position. These are what we will "undo" + /// during the rewind. + fn rewind( + &mut self, + position: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + ) -> Result<(), String>; /// Get a Hash by insertion position. fn get_hash(&self, position: u64) -> Option<Hash>; @@ -76,17 +85,23 @@ where /// (ignoring the remove log). fn get_data_from_file(&self, position: u64) -> Option<T>; - /// Remove HashSums by insertion position. An index is also provided so the + /// Remove Hash by insertion position. An index is also provided so the /// underlying backend can implement some rollback of positions up to a /// given index (practically the index is the height of a block that /// triggered removal). - fn remove(&mut self, positions: Vec<u64>, index: u32) -> Result<(), String>; + fn remove(&mut self, position: u64) -> Result<(), String>; /// Returns the data file path.. this is a bit of a hack now that doesn't /// sit well with the design, but TxKernels have to be summed and the /// fastest way to to be able to allow direct access to the file fn get_data_file_path(&self) -> String; + /// Also a bit of a hack... + /// Saves a snapshot of the rewound utxo file with the block hash as + /// filename suffix. We need this when sending a txhashset zip file to a + /// node for fast sync. + fn snapshot(&self, header: &BlockHeader) -> Result<(), String>; + /// For debugging purposes so we can see how compaction is doing. fn dump_stats(&self); } @@ -388,7 +403,6 @@ where pos += 1; current_hash = (left_hash, current_hash).hash_with_index(pos - 1); - to_append.push((current_hash, None)); } @@ -398,61 +412,51 @@ where Ok(elmt_pos) } + /// Saves a snaphost of the MMR tagged with the block hash. + /// Specifically - snapshots the utxo file as we need this rewound before + /// sending the txhashset zip file to another node for fast-sync. + pub fn snapshot(&mut self, header: &BlockHeader) -> Result<(), String> { + self.backend.snapshot(header)?; + Ok(()) + } + /// Rewind the PMMR to a previous position, as if all push operations after - /// that had been canceled. Expects a position in the PMMR to rewind to as - /// well as the consumer-provided index of when the change occurred. - pub fn rewind(&mut self, position: u64, index: u32) -> Result<(), String> { - // identify which actual position we should rewind to as the provided - // position is a leaf, which may had some parent that needs to exist - // afterward for the MMR to be valid + /// that had been canceled. Expects a position in the PMMR to rewind and + /// bitmaps representing the positions added and removed that we want to + /// "undo". + pub fn rewind( + &mut self, + position: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + ) -> Result<(), String> { + // Identify which actual position we should rewind to as the provided + // position is a leaf. We traverse the MMR to inclue any parent(s) that + // need to be included for the MMR to be valid. let mut pos = position; while bintree_postorder_height(pos + 1) > 0 { pos += 1; } - self.backend.rewind(pos, index)?; + self.backend.rewind(pos, rewind_add_pos, rewind_rm_pos)?; self.last_pos = pos; Ok(()) } - /// Prune an element from the tree given its position. Note that to be able - /// to provide that position and prune, consumers of this API are expected - /// to keep an index of elements to positions in the tree. Prunes parent - /// nodes as well when they become childless. - pub fn prune(&mut self, position: u64, index: u32) -> Result<bool, String> { - if self.backend.get_hash(position).is_none() { - return Ok(false); - } - let prunable_height = bintree_postorder_height(position); - if prunable_height > 0 { - // only leaves can be pruned + /// Prunes (removes) the leaf from the MMR at the specified position. + /// Returns an error if prune is called on a non-leaf position. + /// Returns false if the leaf node has already been pruned. + /// Returns true if pruning is successful. + pub fn prune(&mut self, position: u64) -> Result<bool, String> { + if !is_leaf(position) { return Err(format!("Node at {} is not a leaf, can't prune.", position)); } - // loop going up the tree, from node to parent, as long as we stay inside - // the tree. - let mut to_prune = vec![]; - - let mut current = position; - while current + 1 <= self.last_pos { - let (parent, sibling) = family(current); - - to_prune.push(current); - - if parent > self.last_pos { - // can't prune when our parent isn't here yet - break; - } - - // if we have a pruned sibling, we can continue up the tree - // otherwise we're done - if self.backend.get_hash(sibling).is_none() { - current = parent; - } else { - break; - } + if self.backend.get_hash(position).is_none() { + return Ok(false); } - self.backend.remove(to_prune, index)?; + + self.backend.remove(position)?; Ok(true) } @@ -460,17 +464,26 @@ where pub fn get_hash(&self, pos: u64) -> Option<Hash> { if pos > self.last_pos { None - } else { + } else if is_leaf(pos) { + // If we are a leaf then get hash from the backend. self.backend.get_hash(pos) + } else { + // If we are not a leaf get hash ignoring the remove log. + self.backend.get_from_file(pos) } } /// Get the data element at provided position in the MMR. pub fn get_data(&self, pos: u64) -> Option<T> { if pos > self.last_pos { + // If we are beyond the rhs of the MMR return None. None - } else { + } else if is_leaf(pos) { + // If we are a leaf then get data from the backend. self.backend.get_data(pos) + } else { + // If we are not a leaf then return None as only leaves have data. + None } } @@ -648,146 +661,6 @@ where } } -/// Maintains a list of previously pruned nodes in PMMR, compacting the list as -/// parents get pruned and allowing checking whether a leaf is pruned. Given -/// a node's position, computes how much it should get shifted given the -/// subtrees that have been pruned before. -/// -/// The PruneList is useful when implementing compact backends for a PMMR (for -/// example a single large byte array or a file). As nodes get pruned and -/// removed from the backend to free space, the backend will get more compact -/// but positions of a node within the PMMR will not match positions in the -/// backend storage anymore. The PruneList accounts for that mismatch and does -/// the position translation. -#[derive(Default)] -pub struct PruneList { - /// Vector of pruned nodes positions - pub pruned_nodes: Vec<u64>, -} - -impl PruneList { - /// Instantiate a new empty prune list - pub fn new() -> PruneList { - PruneList { - pruned_nodes: vec![], - } - } - - /// Computes by how many positions a node at pos should be shifted given the - /// number of nodes that have already been pruned before it. Returns None if - /// the position has already been pruned. - pub fn get_shift(&self, pos: u64) -> Option<u64> { - // get the position where the node at pos would fit in the pruned list, if - // it's already pruned, nothing to skip - - let pruned_idx = self.next_pruned_idx(pos); - let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok(); - match pruned_idx.or(next_idx) { - None => None, - Some(idx) => { - // skip by the number of elements pruned in the preceding subtrees, - // which is the sum of the size of each subtree - Some( - self.pruned_nodes[0..(idx as usize)] - .iter() - .map(|n| { - let height = bintree_postorder_height(*n); - // height 0, 1 node, offset 0 = 0 + 0 - // height 1, 3 nodes, offset 2 = 1 + 1 - // height 2, 7 nodes, offset 6 = 3 + 3 - // height 3, 15 nodes, offset 14 = 7 + 7 - 2 * ((1 << height) - 1) - }) - .sum(), - ) - } - } - } - - /// As above, but only returning the number of leaf nodes to skip for a - /// given leaf. Helpful if, for instance, data for each leaf is being stored - /// separately in a continuous flat-file. Returns None if the position has - /// already been pruned. - pub fn get_leaf_shift(&self, pos: u64) -> Option<u64> { - // get the position where the node at pos would fit in the pruned list, if - // it's already pruned, nothing to skip - - let pruned_idx = self.next_pruned_idx(pos); - let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok(); - - let idx = pruned_idx.or(next_idx)?; - Some( - // skip by the number of leaf nodes pruned in the preceding subtrees - // which just 2^height - // except in the case of height==0 - // (where we want to treat the pruned tree as 0 leaves) - self.pruned_nodes[0..(idx as usize)] - .iter() - .map(|n| { - let height = bintree_postorder_height(*n); - if height == 0 { - 0 - } else { - (1 << height) - } - }) - .sum(), - ) - } - - /// Push the node at the provided position in the prune list. Compacts the - /// list if pruning the additional node means a parent can get pruned as - /// well. - pub fn add(&mut self, pos: u64) { - let mut current = pos; - loop { - let (parent, sibling) = family(current); - - match self.pruned_nodes.binary_search(&sibling) { - Ok(idx) => { - self.pruned_nodes.remove(idx); - current = parent; - } - Err(_) => { - if let Some(idx) = self.next_pruned_idx(current) { - self.pruned_nodes.insert(idx, current); - } - break; - } - } - } - } - - /// Gets the index a new pruned node should take in the prune list. - /// If the node has already been pruned, either directly or through one of - /// its parents contained in the prune list, returns None. - pub fn next_pruned_idx(&self, pos: u64) -> Option<usize> { - match self.pruned_nodes.binary_search(&pos) { - Ok(_) => None, - Err(idx) => { - if self.pruned_nodes.len() > idx { - // the node at pos can't be a child of lower position nodes by MMR - // construction but can be a child of the next node, going up parents - // from pos to make sure it's not the case - let next_peak_pos = self.pruned_nodes[idx]; - let mut cursor = pos; - loop { - let (parent, _) = family(cursor); - if next_peak_pos == parent { - return None; - } - if next_peak_pos < parent { - break; - } - cursor = parent; - } - } - Some(idx) - } - } - } -} - /// Gets the postorder traversal index of all peaks in a MMR given the last /// node's position. Starts with the top peak, which is always on the left /// side of the range, and navigates toward lower siblings toward the right @@ -960,6 +833,24 @@ pub fn is_left_sibling(pos: u64) -> bool { sibling_pos > pos } +/// Returns the path from the specified position up to its +/// corresponding peak in the MMR. +/// The size (and therefore the set of peaks) of the MMR +/// is defined by last_pos. +pub fn path(pos: u64, last_pos: u64) -> Vec<u64> { + let mut path = vec![pos]; + let mut current = pos; + while current + 1 <= last_pos { + let (parent, _) = family(current); + if parent > last_pos { + break; + } + path.push(parent); + current = parent; + } + path +} + /// For a given starting position calculate the parent and sibling positions /// for the branch/path from that position to the peak of the tree. /// We will use the sibling positions to generate the "path" of a Merkle proof. diff --git a/core/src/core/prune_list.rs b/core/src/core/prune_list.rs new file mode 100644 index 000000000..16d773696 --- /dev/null +++ b/core/src/core/prune_list.rs @@ -0,0 +1,173 @@ +// Copyright 2018 The Grin Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The Grin "Prune List" implementation. +//! Currently implemented as a vec of u64 positions. +//! *Soon* to be implemented as a compact bitmap. +//! +//! Maintains a set of pruned root node positions that define the pruned +//! and compacted "gaps" in the MMR data and hash files. +//! The root itself is maintained in the hash file, but all positions beneath +//! the root are compacted away. All positions to the right of a pruned node +//! must be shifted the appropriate amount when reading from the hash and data +//! files. + +use core::pmmr::{bintree_postorder_height, family}; + +/// Maintains a list of previously pruned nodes in PMMR, compacting the list as +/// parents get pruned and allowing checking whether a leaf is pruned. Given +/// a node's position, computes how much it should get shifted given the +/// subtrees that have been pruned before. +/// +/// The PruneList is useful when implementing compact backends for a PMMR (for +/// example a single large byte array or a file). As nodes get pruned and +/// removed from the backend to free space, the backend will get more compact +/// but positions of a node within the PMMR will not match positions in the +/// backend storage anymore. The PruneList accounts for that mismatch and does +/// the position translation. +#[derive(Default)] +pub struct PruneList { + /// Vector of pruned nodes positions + pub pruned_nodes: Vec<u64>, +} + +impl PruneList { + /// Instantiate a new empty prune list + pub fn new() -> PruneList { + PruneList { + pruned_nodes: vec![], + } + } + + /// Computes by how many positions a node at pos should be shifted given the + /// number of nodes that have already been pruned before it. Returns None if + /// the position has already been pruned. + pub fn get_shift(&self, pos: u64) -> Option<u64> { + // get the position where the node at pos would fit in the pruned list, if + // it's already pruned, nothing to skip + + let pruned_idx = self.next_pruned_idx(pos); + let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok(); + match pruned_idx.or(next_idx) { + None => None, + Some(idx) => { + // skip by the number of elements pruned in the preceding subtrees, + // which is the sum of the size of each subtree + Some( + self.pruned_nodes[0..(idx as usize)] + .iter() + .map(|n| { + let height = bintree_postorder_height(*n); + // height 0, 1 node, offset 0 = 0 + 0 + // height 1, 3 nodes, offset 2 = 1 + 1 + // height 2, 7 nodes, offset 6 = 3 + 3 + // height 3, 15 nodes, offset 14 = 7 + 7 + 2 * ((1 << height) - 1) + }) + .sum(), + ) + } + } + } + + /// As above, but only returning the number of leaf nodes to skip for a + /// given leaf. Helpful if, for instance, data for each leaf is being stored + /// separately in a continuous flat-file. Returns None if the position has + /// already been pruned. + pub fn get_leaf_shift(&self, pos: u64) -> Option<u64> { + // get the position where the node at pos would fit in the pruned list, if + // it's already pruned, nothing to skip + + let pruned_idx = self.next_pruned_idx(pos); + let next_idx = self.pruned_nodes.binary_search(&pos).map(|x| x + 1).ok(); + + let idx = pruned_idx.or(next_idx)?; + Some( + // skip by the number of leaf nodes pruned in the preceeding subtrees + // which just 2^height + // except in the case of height==0 + // (where we want to treat the pruned tree as 0 leaves) + self.pruned_nodes[0..(idx as usize)] + .iter() + .map(|n| { + let height = bintree_postorder_height(*n); + if height == 0 { + 0 + } else { + (1 << height) + } + }) + .sum(), + ) + } + + /// Push the node at the provided position in the prune list. Compacts the + /// list if pruning the additional node means a parent can get pruned as + /// well. + pub fn add(&mut self, pos: u64) { + let mut current = pos; + loop { + let (parent, sibling) = family(current); + + match self.pruned_nodes.binary_search(&sibling) { + Ok(idx) => { + self.pruned_nodes.remove(idx); + current = parent; + } + Err(_) => { + if let Some(idx) = self.next_pruned_idx(current) { + self.pruned_nodes.insert(idx, current); + } + break; + } + } + } + } + + /// Checks if the specified position has been pruned, + /// either directly (pos contained in the prune list itself) + /// or indirectly (pos is beneath a pruned root). + pub fn is_pruned(&self, pos: u64) -> bool { + self.next_pruned_idx(pos).is_none() + } + + /// Gets the index a new pruned node should take in the prune list. + /// If the node has already been pruned, either directly or through one of + /// its parents contained in the prune list, returns None. + pub fn next_pruned_idx(&self, pos: u64) -> Option<usize> { + match self.pruned_nodes.binary_search(&pos) { + Ok(_) => None, + Err(idx) => { + if self.pruned_nodes.len() > idx { + // the node at pos can't be a child of lower position nodes by MMR + // construction but can be a child of the next node, going up parents + // from pos to make sure it's not the case + let next_peak_pos = self.pruned_nodes[idx]; + let mut cursor = pos; + loop { + let (parent, _) = family(cursor); + if next_peak_pos == parent { + return None; + } + if next_peak_pos < parent { + break; + } + cursor = parent; + } + } + Some(idx) + } + } + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index e06a47bf8..cec947ba3 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -25,6 +25,7 @@ extern crate bitflags; extern crate blake2_rfc as blake2; extern crate byteorder; +extern crate croaring; extern crate grin_keychain as keychain; extern crate grin_util as util; #[macro_use] @@ -45,6 +46,6 @@ pub mod macros; pub mod consensus; pub mod core; pub mod genesis; -pub mod ser; pub mod global; pub mod pow; +pub mod ser; diff --git a/core/tests/pmmr.rs b/core/tests/pmmr.rs index f13b8fa45..0af563462 100644 --- a/core/tests/pmmr.rs +++ b/core/tests/pmmr.rs @@ -15,9 +15,14 @@ //! PMMR tests #[macro_use] extern crate grin_core as core; +extern crate croaring; + +use croaring::Bitmap; use core::core::hash::Hash; -use core::core::pmmr::{self, Backend, MerkleProof, PruneList, PMMR}; +use core::core::pmmr::{self, Backend, MerkleProof, PMMR}; +use core::core::prune_list::PruneList; +use core::core::BlockHeader; use core::ser::{self, Error, PMMRIndexHashable, PMMRable, Readable, Reader, Writeable, Writer}; /// Simple MMR backend implementation based on a Vector. Pruning does not @@ -82,22 +87,28 @@ where } } - fn remove(&mut self, positions: Vec<u64>, _index: u32) -> Result<(), String> { - for n in positions { - self.remove_list.push(n) - } + fn remove(&mut self, position: u64) -> Result<(), String> { + self.remove_list.push(position); Ok(()) } - fn rewind(&mut self, position: u64, _index: u32) -> Result<(), String> { - self.elems = self.elems[0..(position as usize) + 1].to_vec(); - Ok(()) + fn rewind( + &mut self, + position: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + ) -> Result<(), String> { + panic!("not yet implemented for vec backend..."); } fn get_data_file_path(&self) -> String { "".to_string() } + fn snapshot(&self, header: &BlockHeader) -> Result<(), String> { + Ok(()) + } + fn dump_stats(&self) {} } @@ -395,7 +406,7 @@ fn pmmr_merkle_proof_prune_and_rewind() { // now prune an element and check we can still generate // the correct Merkle proof for the other element (after sibling pruned) - pmmr.prune(1, 1).unwrap(); + pmmr.prune(1).unwrap(); let proof_2 = pmmr.merkle_proof(2).unwrap(); assert_eq!(proof, proof_2); } @@ -591,62 +602,74 @@ fn pmmr_prune() { sz = pmmr.unpruned_size(); } + // First check the initial numbers of elements. + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 0); + // pruning a leaf with no parent should do nothing { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(16, 0).unwrap(); + pmmr.prune(16).unwrap(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 16); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 1); // pruning leaves with no shared parent just removes 1 element { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(2, 0).unwrap(); + pmmr.prune(2).unwrap(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 15); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 2); { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(4, 0).unwrap(); + pmmr.prune(4).unwrap(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 14); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 3); // pruning a non-leaf node has no effect { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(3, 0).unwrap_err(); + pmmr.prune(3).unwrap_err(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 14); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 3); - // pruning sibling removes subtree + // TODO - no longer true (leaves only now) - pruning sibling removes subtree { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(5, 0).unwrap(); + pmmr.prune(5).unwrap(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 12); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 4); - // pruning all leaves under level >1 removes all subtree + // TODO - no longeer true (leaves only now) - pruning all leaves under level >1 + // removes all subtree { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); - pmmr.prune(1, 0).unwrap(); + pmmr.prune(1).unwrap(); assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 9); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 5); // pruning everything should only leave us with a single peak { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut ba, sz); for n in 1..16 { - let _ = pmmr.prune(n, 0); + let _ = pmmr.prune(n); } assert_eq!(orig_root, pmmr.root()); } - assert_eq!(ba.used_size(), 1); + assert_eq!(ba.elems.len(), 16); + assert_eq!(ba.remove_list.len(), 9); } #[test] @@ -990,11 +1013,11 @@ fn check_elements_from_insertion_index() { assert_eq!(res.1[349].0[3], 999); // pruning a few nodes should get consistent results - pmmr.prune(pmmr::insertion_to_pmmr_index(650), 0).unwrap(); - pmmr.prune(pmmr::insertion_to_pmmr_index(651), 0).unwrap(); - pmmr.prune(pmmr::insertion_to_pmmr_index(800), 0).unwrap(); - pmmr.prune(pmmr::insertion_to_pmmr_index(900), 0).unwrap(); - pmmr.prune(pmmr::insertion_to_pmmr_index(998), 0).unwrap(); + pmmr.prune(pmmr::insertion_to_pmmr_index(650)).unwrap(); + pmmr.prune(pmmr::insertion_to_pmmr_index(651)).unwrap(); + pmmr.prune(pmmr::insertion_to_pmmr_index(800)).unwrap(); + pmmr.prune(pmmr::insertion_to_pmmr_index(900)).unwrap(); + pmmr.prune(pmmr::insertion_to_pmmr_index(998)).unwrap(); let res = pmmr.elements_from_insertion_index(650, 1000); assert_eq!(res.0, 999); assert_eq!(res.1.len(), 345); diff --git a/pool/tests/block_reconciliation.rs b/pool/tests/block_reconciliation.rs index 425d61ae1..ec3c0785b 100644 --- a/pool/tests/block_reconciliation.rs +++ b/pool/tests/block_reconciliation.rs @@ -188,6 +188,12 @@ fn test_transaction_pool_block_reconciliation() { block }; + // Check the pool still contains everything we expect at this point. + { + let write_pool = pool.write().unwrap(); + assert_eq!(write_pool.total_size(), txs_to_add.len()); + } + // And reconcile the pool with this latest block. { let mut write_pool = pool.write().unwrap(); @@ -205,12 +211,11 @@ fn test_transaction_pool_block_reconciliation() { // txhashset.apply_output() TODO - and we no longer incorrectly allow // duplicate outputs in the MMR TODO - then this test will fail // - // TODO - wtf is with these name permutations... - // - assert_eq!(write_pool.total_size(), 4); + assert_eq!(write_pool.total_size(), 5); assert_eq!(write_pool.txpool.entries[0].tx, valid_transaction); - assert_eq!(write_pool.txpool.entries[1].tx, conflict_valid_child); - assert_eq!(write_pool.txpool.entries[2].tx, valid_child_conflict); - assert_eq!(write_pool.txpool.entries[3].tx, valid_child_valid); + assert_eq!(write_pool.txpool.entries[1].tx, pool_child); + assert_eq!(write_pool.txpool.entries[2].tx, conflict_valid_child); + assert_eq!(write_pool.txpool.entries[3].tx, valid_child_conflict); + assert_eq!(write_pool.txpool.entries[4].tx, valid_child_valid); } } diff --git a/pool/tests/common/mod.rs b/pool/tests/common/mod.rs index a48f2b704..5f3e121a5 100644 --- a/pool/tests/common/mod.rs +++ b/pool/tests/common/mod.rs @@ -30,7 +30,6 @@ use std::sync::{Arc, RwLock}; use core::core::{BlockHeader, Transaction}; -use chain::ChainStore; use chain::store::ChainKVStore; use chain::txhashset; use chain::txhashset::TxHashSet; @@ -41,8 +40,8 @@ use pool::*; use keychain::Keychain; use wallet::libtx; -use pool::TransactionPool; use pool::types::*; +use pool::TransactionPool; #[derive(Clone)] pub struct ChainAdapter { @@ -56,7 +55,7 @@ impl ChainAdapter { let chain_store = ChainKVStore::new(target_dir.clone()) .map_err(|e| format!("failed to init chain_store, {}", e))?; let store = Arc::new(chain_store); - let txhashset = TxHashSet::open(target_dir.clone(), store.clone()) + let txhashset = TxHashSet::open(target_dir.clone(), store.clone(), None) .map_err(|e| format!("failed to init txhashset, {}", e))?; Ok(ChainAdapter { @@ -72,13 +71,11 @@ impl BlockChain for ChainAdapter { txs: Vec<Transaction>, pre_tx: Option<Transaction>, ) -> Result<Vec<Transaction>, PoolError> { - let header = self.store.head_header().unwrap(); let mut txhashset = self.txhashset.write().unwrap(); let res = txhashset::extending_readonly(&mut txhashset, |extension| { - let valid_txs = extension.validate_raw_txs(txs, pre_tx, header.height)?; + let valid_txs = extension.validate_raw_txs(txs, pre_tx)?; Ok(valid_txs) }).map_err(|e| PoolError::Other(format!("Error: test chain adapter: {:?}", e)))?; - Ok(res) } diff --git a/store/Cargo.toml b/store/Cargo.toml index 7c63e40cb..e27f008e9 100644 --- a/store/Cargo.toml +++ b/store/Cargo.toml @@ -7,9 +7,11 @@ publish = false [dependencies] byteorder = "1" +croaring = "0.3" env_logger = "0.5" libc = "0.2" memmap = { git = "https://github.com/danburkert/memmap-rs", tag = "0.6.2" } +rand = "0.3" rocksdb = "0.10" serde = "1" serde_derive = "1" diff --git a/store/src/leaf_set.rs b/store/src/leaf_set.rs new file mode 100644 index 000000000..926f55952 --- /dev/null +++ b/store/src/leaf_set.rs @@ -0,0 +1,195 @@ +// Copyright 2018 The Grin Developers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The Grin leaf_set implementation. +//! Compact (roaring) bitmap representing the set of leaf positions +//! that exist and are not currently pruned in the MMR. + +use std::fs::File; +use std::io::{self, BufWriter, Read, Write}; +use std::path::Path; + +use croaring::Bitmap; + +use core::core::hash::Hashed; +use core::core::pmmr; +use core::core::prune_list::PruneList; +use core::core::BlockHeader; + +use util::LOGGER; + +/// Compact (roaring) bitmap representing the set of positions of +/// leaves that are currently unpruned in the MMR. +pub struct LeafSet { + path: String, + bitmap: Bitmap, + bitmap_bak: Bitmap, +} + +impl LeafSet { + /// Open the remove log file. + /// The content of the file will be read in memory for fast checking. + pub fn open(path: String) -> io::Result<LeafSet> { + let file_path = Path::new(&path); + let bitmap = if file_path.exists() { + let mut bitmap_file = File::open(path.clone())?; + let mut buffer = vec![]; + bitmap_file.read_to_end(&mut buffer)?; + Bitmap::deserialize(&buffer) + } else { + Bitmap::create() + }; + + Ok(LeafSet { + path: path.clone(), + bitmap: bitmap.clone(), + bitmap_bak: bitmap.clone(), + }) + } + + /// Copies a snapshot of the utxo file into the primary utxo file. + pub fn copy_snapshot(path: String, cp_path: String) -> io::Result<()> { + let cp_file_path = Path::new(&cp_path); + + if !cp_file_path.exists() { + debug!(LOGGER, "leaf_set: rewound leaf file not found: {}", cp_path); + return Ok(()); + } + + let mut bitmap_file = File::open(cp_path.clone())?; + let mut buffer = vec![]; + bitmap_file.read_to_end(&mut buffer)?; + let bitmap = Bitmap::deserialize(&buffer); + + debug!( + LOGGER, + "leaf_set: copying rewound file {} to {}", cp_path, path + ); + + let mut leaf_set = LeafSet { + path: path.clone(), + bitmap: bitmap.clone(), + bitmap_bak: bitmap.clone(), + }; + + leaf_set.flush()?; + Ok(()) + } + + /// Calculate the set of unpruned leaves + /// up to and including the cutoff_pos. + /// Only applicable for the output MMR. + fn unpruned_pre_cutoff(&self, cutoff_pos: u64, prune_list: &PruneList) -> Bitmap { + (1..=cutoff_pos) + .filter(|&x| pmmr::is_leaf(x)) + .filter(|&x| !prune_list.is_pruned(x)) + .map(|x| x as u32) + .collect() + } + + /// Calculate the set of pruned positions + /// up to and including the cutoff_pos. + /// Uses both the leaf_set and the prune_list to determine prunedness. + pub fn removed_pre_cutoff( + &self, + cutoff_pos: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + prune_list: &PruneList, + ) -> Bitmap { + let mut bitmap = self.bitmap.clone(); + + // Now "rewind" using the rewind_add_pos and rewind_rm_pos bitmaps passed in. + bitmap.andnot_inplace(&rewind_add_pos); + bitmap.or_inplace(&rewind_rm_pos); + + // Invert bitmap for the leaf pos and return the resulting bitmap. + bitmap + .flip(1..(cutoff_pos + 1)) + .and(&self.unpruned_pre_cutoff(cutoff_pos, prune_list)) + } + + /// Rewinds the leaf_set back to a previous state. + pub fn rewind(&mut self, rewind_add_pos: &Bitmap, rewind_rm_pos: &Bitmap) { + // First remove pos from leaf_set that were + // added after the point we are rewinding to. + self.bitmap.andnot_inplace(&rewind_add_pos); + // Then add back output pos to the leaf_set + // that were removed. + self.bitmap.or_inplace(&rewind_rm_pos); + } + + /// Append a new position to the leaf_set. + pub fn add(&mut self, pos: u64) { + self.bitmap.add(pos as u32); + } + + /// Remove the provided position from the leaf_set. + pub fn remove(&mut self, pos: u64) { + self.bitmap.remove(pos as u32); + } + + /// Saves the utxo file tagged with block hash as filename suffix. + /// Needed during fast-sync as the receiving node cannot rewind + /// after receiving the txhashset zip file. + pub fn snapshot(&self, header: &BlockHeader) -> io::Result<()> { + let mut cp_bitmap = self.bitmap.clone(); + cp_bitmap.run_optimize(); + + let cp_path = format!("{}.{}", self.path, header.hash()); + let mut file = BufWriter::new(File::create(cp_path)?); + file.write_all(&cp_bitmap.serialize())?; + file.flush()?; + Ok(()) + } + + /// Flush the leaf_set to file. + pub fn flush(&mut self) -> io::Result<()> { + // First run the optimization step on the bitmap. + self.bitmap.run_optimize(); + + // TODO - consider writing this to disk in a tmp file and then renaming? + + // Write the updated bitmap file to disk. + { + let mut file = BufWriter::new(File::create(self.path.clone())?); + file.write_all(&self.bitmap.serialize())?; + file.flush()?; + } + + // Make sure our backup in memory is up to date. + self.bitmap_bak = self.bitmap.clone(); + + Ok(()) + } + + /// Discard any pending changes. + pub fn discard(&mut self) { + self.bitmap = self.bitmap_bak.clone(); + } + + /// Whether the leaf_set includes the provided position. + pub fn includes(&self, pos: u64) -> bool { + self.bitmap.contains(pos as u32) + } + + /// Number of positions stored in the leaf_set. + pub fn len(&self) -> usize { + self.bitmap.cardinality() as usize + } + + // Is the leaf_set empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} diff --git a/store/src/lib.rs b/store/src/lib.rs index 1a6109205..783bc6bbf 100644 --- a/store/src/lib.rs +++ b/store/src/lib.rs @@ -21,6 +21,7 @@ #![warn(missing_docs)] extern crate byteorder; +extern crate croaring; extern crate env_logger; #[macro_use] extern crate grin_core as core; @@ -32,7 +33,9 @@ extern crate serde; #[macro_use] extern crate slog; +pub mod leaf_set; pub mod pmmr; +pub mod rm_log; pub mod types; const SEP: u8 = ':' as u8; diff --git a/store/src/pmmr.rs b/store/src/pmmr.rs index 4b5ed83bc..cdeed6881 100644 --- a/store/src/pmmr.rs +++ b/store/src/pmmr.rs @@ -16,31 +16,37 @@ use std::fs; use std::io; use std::marker; +use std::path::Path; -use core::core::hash::Hash; +use croaring::Bitmap; + +use core::core::hash::{Hash, Hashed}; use core::core::pmmr::{self, family, Backend}; +use core::core::prune_list::PruneList; +use core::core::BlockHeader; use core::ser::{self, PMMRable}; -use types::{prune_noop, read_ordered_vec, write_vec, AppendOnlyFile, RemoveLog}; +use leaf_set::LeafSet; +use rm_log::RemoveLog; +use types::{prune_noop, read_ordered_vec, write_vec, AppendOnlyFile}; use util::LOGGER; const PMMR_HASH_FILE: &'static str = "pmmr_hash.bin"; const PMMR_DATA_FILE: &'static str = "pmmr_data.bin"; +const PMMR_LEAF_FILE: &'static str = "pmmr_leaf.bin"; const PMMR_RM_LOG_FILE: &'static str = "pmmr_rm_log.bin"; const PMMR_PRUNED_FILE: &'static str = "pmmr_pruned.bin"; -/// Maximum number of nodes in the remove log before it gets flushed -pub const RM_LOG_MAX_NODES: usize = 10_000; - /// PMMR persistent backend implementation. Relies on multiple facilities to /// handle writing, reading and pruning. /// -/// * A main storage file appends Hash instances as they come. This -/// AppendOnlyFile is also backed by a mmap for reads. +/// * A main storage file appends Hash instances as they come. +/// This AppendOnlyFile is also backed by a mmap for reads. /// * An in-memory backend buffers the latest batch of writes to ensure the /// PMMR can always read recent values even if they haven't been flushed to /// disk yet. -/// * A remove log tracks the positions that need to be pruned from the -/// main storage file. +/// * A leaf_set tracks unpruned (unremoved) leaf positions in the MMR.. +/// * A prune_list tracks the positions of pruned (and compacted) roots in the +/// MMR. pub struct PMMRBackend<T> where T: PMMRable, @@ -48,8 +54,8 @@ where data_dir: String, hash_file: AppendOnlyFile, data_file: AppendOnlyFile, - rm_log: RemoveLog, - pruned_nodes: pmmr::PruneList, + leaf_set: LeafSet, + pruned_nodes: PruneList, _marker: marker::PhantomData<T>, } @@ -64,6 +70,9 @@ where self.hash_file.append(&mut ser::ser_vec(&d.0).unwrap()); if let Some(elem) = d.1 { self.data_file.append(&mut ser::ser_vec(&elem).unwrap()); + + // Add the new position to our leaf_set. + self.leaf_set.add(position); } } Ok(()) @@ -120,35 +129,36 @@ where } /// Get the hash at pos. - /// Return None if it has been removed. + /// Return None if pos is a leaf and it has been removed (or pruned or + /// compacted). fn get_hash(&self, pos: u64) -> Option<(Hash)> { - // Check if this position has been pruned in the remove log... - if self.rm_log.includes(pos) { - None - } else { - self.get_from_file(pos) + if pmmr::is_leaf(pos) && !self.leaf_set.includes(pos) { + return None; } + self.get_from_file(pos) } /// Get the data at pos. /// Return None if it has been removed or if pos is not a leaf node. fn get_data(&self, pos: u64) -> Option<(T)> { - if self.rm_log.includes(pos) { - None - } else if !pmmr::is_leaf(pos) { - None - } else { - self.get_data_from_file(pos) + if !pmmr::is_leaf(pos) { + return None; } + if !self.leaf_set.includes(pos) { + return None; + } + self.get_data_from_file(pos) } /// Rewind the PMMR backend to the given position. - /// Use the index to rewind the rm_log correctly (based on block height). - fn rewind(&mut self, position: u64, index: u32) -> Result<(), String> { - // Rewind the rm_log based on index (block height) - self.rm_log - .rewind(index) - .map_err(|e| format!("Could not truncate remove log: {}", e))?; + fn rewind( + &mut self, + position: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + ) -> Result<(), String> { + // First rewind the leaf_set with the necessary added and removed positions. + self.leaf_set.rewind(rewind_add_pos, rewind_rm_pos); // Rewind the hash file accounting for pruned/compacted pos let shift = self.pruned_nodes.get_shift(position).unwrap_or(0); @@ -166,11 +176,10 @@ where Ok(()) } - /// Remove Hashes by insertion position - fn remove(&mut self, positions: Vec<u64>, index: u32) -> Result<(), String> { - self.rm_log - .append(positions, index) - .map_err(|e| format!("Could not write to log storage, disk full? {:?}", e)) + /// Remove by insertion position. + fn remove(&mut self, pos: u64) -> Result<(), String> { + self.leaf_set.remove(pos); + Ok(()) } /// Return data file path @@ -178,14 +187,21 @@ where self.data_file.path() } + fn snapshot(&self, header: &BlockHeader) -> Result<(), String> { + self.leaf_set + .snapshot(header) + .map_err(|_| format!("Failed to save copy of leaf_set for {}", header.hash()))?; + Ok(()) + } + fn dump_stats(&self) { debug!( LOGGER, - "pmmr backend: unpruned: {}, hashes: {}, data: {}, rm_log: {}, prune_list: {}", + "pmmr backend: unpruned: {}, hashes: {}, data: {}, leaf_set: {}, prune_list: {}", self.unpruned_size().unwrap_or(0), self.hash_size().unwrap_or(0), self.data_size().unwrap_or(0), - self.rm_log.removed.len(), + self.leaf_set.len(), self.pruned_nodes.pruned_nodes.len(), ); } @@ -197,25 +213,74 @@ where { /// Instantiates a new PMMR backend. /// Use the provided dir to store its files. - pub fn new(data_dir: String) -> io::Result<PMMRBackend<T>> { + pub fn new(data_dir: String, header: Option<&BlockHeader>) -> io::Result<PMMRBackend<T>> { let prune_list = read_ordered_vec(format!("{}/{}", data_dir, PMMR_PRUNED_FILE), 8)?; - let pruned_nodes = pmmr::PruneList { + let pruned_nodes = PruneList { pruned_nodes: prune_list, }; - let rm_log = RemoveLog::open(format!("{}/{}", data_dir, PMMR_RM_LOG_FILE))?; let hash_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_HASH_FILE))?; let data_file = AppendOnlyFile::open(format!("{}/{}", data_dir, PMMR_DATA_FILE))?; + let leaf_set_path = format!("{}/{}", data_dir, PMMR_LEAF_FILE); + let rm_log_path = format!("{}/{}", data_dir, PMMR_RM_LOG_FILE); + + if let Some(header) = header { + let leaf_snapshot_path = format!("{}/{}.{}", data_dir, PMMR_LEAF_FILE, header.hash()); + LeafSet::copy_snapshot(leaf_set_path.clone(), leaf_snapshot_path.clone())?; + } + + // If we need to migrate an old rm_log to a new leaf_set do it here before we + // start. Do *not* migrate if we already have a leaf_set. + let mut leaf_set = LeafSet::open(leaf_set_path.clone())?; + if leaf_set.is_empty() && Path::new(&rm_log_path).exists() { + let mut rm_log = RemoveLog::open(rm_log_path)?; + debug!( + LOGGER, + "pmmr: leaf_set: {}, rm_log: {}", + leaf_set.len(), + rm_log.len() + ); + debug!(LOGGER, "pmmr: migrating rm_log -> leaf_set"); + + if let Some(header) = header { + // Rewind the rm_log back to the height of the header we care about. + debug!( + LOGGER, + "pmmr: first rewinding rm_log to height {}", header.height + ); + rm_log.rewind(header.height as u32)?; + } + + // do not like this here but we have no pmmr to call + // unpruned_size() on yet... + let last_pos = { + let total_shift = pruned_nodes.get_shift(::std::u64::MAX).unwrap(); + let record_len = 32; + let sz = hash_file.size()?; + sz / record_len + total_shift + }; + + migrate_rm_log(&mut leaf_set, &rm_log, &pruned_nodes, last_pos)?; + } + + let leaf_set = LeafSet::open(leaf_set_path)?; + Ok(PMMRBackend { data_dir, hash_file, data_file, - rm_log, + leaf_set, pruned_nodes, _marker: marker::PhantomData, }) } + fn is_pruned(&self, pos: u64) -> bool { + let path = pmmr::path(pos, self.unpruned_size().unwrap_or(0)); + path.iter() + .any(|x| self.pruned_nodes.pruned_nodes.contains(x)) + } + /// Number of elements in the PMMR stored by this backend. Only produces the /// fully sync'd size. pub fn unpruned_size(&self) -> io::Result<u64> { @@ -254,7 +319,7 @@ where format!("Could not write to log data storage, disk full? {:?}", e), )); } - self.rm_log.flush()?; + self.leaf_set.flush()?; Ok(()) } @@ -262,7 +327,7 @@ where /// Discard the current, non synced state of the backend. pub fn discard(&mut self) { self.hash_file.discard(); - self.rm_log.discard(); + self.leaf_set.discard(); self.data_file.discard(); } @@ -271,54 +336,43 @@ where self.get_data_file_path() } - /// Checks the length of the remove log to see if it should get compacted. - /// If so, the remove log is flushed into the pruned list, which itself gets - /// saved, and the hash and data files are rewritten, cutting the removed - /// data. + /// Takes the leaf_set at a given cutoff_pos and generates an updated + /// prune_list. Saves the updated prune_list to disk + /// Compacts the hash and data files based on the prune_list and saves both + /// to disk. /// - /// If a max_len strictly greater than 0 is provided, the value will be used - /// to decide whether the remove log has reached its maximum length, - /// otherwise the RM_LOG_MAX_NODES default value is used. - /// - /// A cutoff limits compaction on recent data. Provided as an indexed value - /// on pruned data (practically a block height), it forces compaction to - /// ignore any prunable data beyond the cutoff. This is used to enforce - /// a horizon after which the local node should have all the data to allow - /// rewinding. + /// A cutoff position limits compaction on recent data. + /// This will be the last position of a particular block + /// to keep things aligned. + /// The block_marker in the db/index for the particular block + /// will have a suitable output_pos. + /// This is used to enforce a horizon after which the local node + /// should have all the data to allow rewinding. pub fn check_compact<P>( &mut self, - max_len: usize, - cutoff_index: u32, + cutoff_pos: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, prune_cb: P, ) -> io::Result<bool> where P: Fn(&[u8]), { - if !(max_len > 0 && self.rm_log.len() >= max_len - || max_len == 0 && self.rm_log.len() > RM_LOG_MAX_NODES) - { - return Ok(false); - } - // Paths for tmp hash and data files. let tmp_prune_file_hash = format!("{}/{}.hashprune", self.data_dir, PMMR_HASH_FILE); let tmp_prune_file_data = format!("{}/{}.dataprune", self.data_dir, PMMR_DATA_FILE); - // Pos we want to get rid of. - // Filtered by cutoff index. - let rm_pre_cutoff = self.rm_log.removed_pre_cutoff(cutoff_index); - // Filtered to exclude the subtree "roots". - let pos_to_rm = removed_excl_roots(rm_pre_cutoff.clone()); - // Filtered for leaves only. - let leaf_pos_to_rm = removed_leaves(pos_to_rm.clone()); + // Calculate the sets of leaf positions and node positions to remove based + // on the cutoff_pos provided. + let (leaves_removed, pos_to_rm) = self.pos_to_rm(cutoff_pos, rewind_add_pos, rewind_rm_pos); // 1. Save compact copy of the hash file, skipping removed data. { let record_len = 32; - let off_to_rm = map_vec!(pos_to_rm, |&pos| { - let shift = self.pruned_nodes.get_shift(pos).unwrap(); - (pos - 1 - shift) * record_len + let off_to_rm = map_vec!(pos_to_rm, |pos| { + let shift = self.pruned_nodes.get_shift(pos.into()).unwrap(); + ((pos as u64) - 1 - shift) * record_len }); self.hash_file.save_prune( @@ -333,9 +387,15 @@ where { let record_len = T::len() as u64; - let off_to_rm = map_vec!(leaf_pos_to_rm, |pos| { - let flat_pos = pmmr::n_leaves(*pos); - let shift = self.pruned_nodes.get_leaf_shift(*pos).unwrap(); + let leaf_pos_to_rm = pos_to_rm + .iter() + .filter(|&x| pmmr::is_leaf(x.into())) + .map(|x| x as u64) + .collect::<Vec<_>>(); + + let off_to_rm = map_vec!(leaf_pos_to_rm, |&pos| { + let flat_pos = pmmr::n_leaves(pos); + let shift = self.pruned_nodes.get_leaf_shift(pos).unwrap(); (flat_pos - 1 - shift) * record_len }); @@ -349,12 +409,18 @@ where // 3. Update the prune list and save it in place. { - for &pos in &rm_pre_cutoff { - self.pruned_nodes.add(pos); + for pos in leaves_removed.iter() { + self.pruned_nodes.add(pos.into()); } // TODO - we can get rid of leaves in the prunelist here (and things still work) // self.pruned_nodes.pruned_nodes.retain(|&x| !pmmr::is_leaf(x)); + // Prunelist contains *only* non-leaf roots. + // Contrast this with the leaf_set that contains *only* leaves. + self.pruned_nodes + .pruned_nodes + .retain(|&x| !pmmr::is_leaf(x)); + write_vec( format!("{}/{}", self.data_dir, PMMR_PRUNED_FILE), &self.pruned_nodes.pruned_nodes, @@ -375,35 +441,85 @@ where )?; self.data_file = AppendOnlyFile::open(format!("{}/{}", self.data_dir, PMMR_DATA_FILE))?; - // 6. Truncate the rm log based on pos removed. - // Excluding roots which remain in rm log. - self.rm_log - .removed - .retain(|&(pos, _)| !pos_to_rm.binary_search(&&pos).is_ok()); - self.rm_log.flush()?; + // 6. Write the leaf_set to disk. + // Optimize the bitmap storage in the process. + self.leaf_set.flush()?; Ok(true) } + + fn pos_to_rm( + &self, + cutoff_pos: u64, + rewind_add_pos: &Bitmap, + rewind_rm_pos: &Bitmap, + ) -> (Bitmap, Bitmap) { + let mut expanded = Bitmap::create(); + + let leaf_pos_to_rm = self.leaf_set.removed_pre_cutoff( + cutoff_pos, + rewind_add_pos, + rewind_rm_pos, + &self.pruned_nodes, + ); + + for x in leaf_pos_to_rm.iter() { + expanded.add(x); + let mut current = x as u64; + loop { + let (parent, sibling) = family(current); + let sibling_pruned = self.is_pruned(sibling); + + // if sibling previously pruned + // push it back onto list of pos to remove + // so we can remove it and traverse up to parent + if sibling_pruned { + expanded.add(sibling as u32); + } + + if sibling_pruned || expanded.contains(sibling as u32) { + expanded.add(parent as u32); + current = parent; + } else { + break; + } + } + } + (leaf_pos_to_rm, removed_excl_roots(expanded)) + } } /// Filter remove list to exclude roots. /// We want to keep roots around so we have hashes for Merkle proofs. -fn removed_excl_roots(removed: Vec<u64>) -> Vec<u64> { +fn removed_excl_roots(removed: Bitmap) -> Bitmap { removed .iter() - .filter(|&pos| { - let (parent_pos, _) = family(*pos); - removed.binary_search(&parent_pos).is_ok() + .filter(|pos| { + let (parent_pos, _) = family(*pos as u64); + removed.contains(parent_pos as u32) }) - .cloned() .collect() } -/// Filter remove list to only include leaf positions. -fn removed_leaves(removed: Vec<u64>) -> Vec<u64> { - removed - .iter() - .filter(|&pos| pmmr::is_leaf(*pos)) - .cloned() - .collect() +fn migrate_rm_log( + leaf_set: &mut LeafSet, + rm_log: &RemoveLog, + prune_list: &PruneList, + last_pos: u64, +) -> io::Result<()> { + info!( + LOGGER, + "Migrating rm_log -> leaf_set. Might take a little while... {} pos", last_pos + ); + + // check every leaf + // if not pruned and not removed, add it to the leaf_set + for x in 1..=last_pos { + if pmmr::is_leaf(x) && !prune_list.is_pruned(x) && !rm_log.includes(x) { + leaf_set.add(x); + } + } + + leaf_set.flush()?; + Ok(()) } diff --git a/store/src/rm_log.rs b/store/src/rm_log.rs new file mode 100644 index 000000000..0f0fc1709 --- /dev/null +++ b/store/src/rm_log.rs @@ -0,0 +1,149 @@ +// Copyright 2018 The Grin Developers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The deprecated rm_log impl. Still used for migration +//! from rm_log -> leaf_set on startup and fast sync. + +use std::fs::File; +use std::io::{self, BufWriter, Write}; + +use core::ser; +use types::read_ordered_vec; + +/// Log file fully cached in memory containing all positions that should be +/// eventually removed from the MMR append-only data file. Allows quick +/// checking of whether a piece of data has been marked for deletion. When the +/// log becomes too long, the MMR backend will actually remove chunks from the +/// MMR data file and truncate the remove log. +pub struct RemoveLog { + path: String, + /// Ordered vector of MMR positions that should get eventually removed. + pub removed: Vec<(u64, u32)>, + // Holds positions temporarily until flush is called. + removed_tmp: Vec<(u64, u32)>, + // Holds truncated removed temporarily until discarded or committed + removed_bak: Vec<(u64, u32)>, +} + +impl RemoveLog { + /// Open the remove log file. + /// The content of the file will be read in memory for fast checking. + pub fn open(path: String) -> io::Result<RemoveLog> { + let removed = read_ordered_vec(path.clone(), 12)?; + Ok(RemoveLog { + path: path, + removed: removed, + removed_tmp: vec![], + removed_bak: vec![], + }) + } + + /// Rewinds the remove log back to the provided index. + /// We keep everything in the rm_log from that index and earlier. + /// In practice the index is a block height, so we rewind back to that block + /// keeping everything in the rm_log up to and including that block. + pub fn rewind(&mut self, idx: u32) -> io::Result<()> { + // backing it up before truncating (unless we already have a backup) + if self.removed_bak.is_empty() { + self.removed_bak = self.removed.clone(); + } + + if idx == 0 { + self.removed = vec![]; + self.removed_tmp = vec![]; + } else { + // retain rm_log entries up to and including those at the provided index + self.removed.retain(|&(_, x)| x <= idx); + self.removed_tmp.retain(|&(_, x)| x <= idx); + } + Ok(()) + } + + /// Append a set of new positions to the remove log. Both adds those + /// positions the ordered in-memory set and to the file. + pub fn append(&mut self, elmts: Vec<u64>, index: u32) -> io::Result<()> { + for elmt in elmts { + match self.removed_tmp.binary_search(&(elmt, index)) { + Ok(_) => continue, + Err(idx) => { + self.removed_tmp.insert(idx, (elmt, index)); + } + } + } + Ok(()) + } + + /// Flush the positions to remove to file. + pub fn flush(&mut self) -> io::Result<()> { + for elmt in &self.removed_tmp { + match self.removed.binary_search(&elmt) { + Ok(_) => continue, + Err(idx) => { + self.removed.insert(idx, *elmt); + } + } + } + let mut file = BufWriter::new(File::create(self.path.clone())?); + for elmt in &self.removed { + file.write_all(&ser::ser_vec(&elmt).unwrap()[..])?; + } + self.removed_tmp = vec![]; + self.removed_bak = vec![]; + file.flush() + } + + /// Discard pending changes + pub fn discard(&mut self) { + if self.removed_bak.len() > 0 { + self.removed = self.removed_bak.clone(); + self.removed_bak = vec![]; + } + self.removed_tmp = vec![]; + } + + /// Whether the remove log currently includes the provided position. + pub fn includes(&self, elmt: u64) -> bool { + include_tuple(&self.removed, elmt) || include_tuple(&self.removed_tmp, elmt) + } + + /// Number of positions stored in the remove log. + pub fn len(&self) -> usize { + self.removed.len() + } + + /// Return vec of pos for removed elements before the provided cutoff index. + /// Useful for when we prune and compact an MMR. + pub fn removed_pre_cutoff(&self, cutoff_idx: u32) -> Vec<u64> { + self.removed + .iter() + .filter_map( + |&(pos, idx)| { + if idx < cutoff_idx { + Some(pos) + } else { + None + } + }, + ) + .collect() + } +} + +fn include_tuple(v: &Vec<(u64, u32)>, e: u64) -> bool { + if let Err(pos) = v.binary_search(&(e, 0)) { + if pos < v.len() && v[pos].0 == e { + return true; + } + } + false +} diff --git a/store/src/types.rs b/store/src/types.rs index 39ccdf905..cc98fbde2 100644 --- a/store/src/types.rs +++ b/store/src/types.rs @@ -250,134 +250,6 @@ impl AppendOnlyFile { } } -/// Log file fully cached in memory containing all positions that should be -/// eventually removed from the MMR append-only data file. Allows quick -/// checking of whether a piece of data has been marked for deletion. When the -/// log becomes too long, the MMR backend will actually remove chunks from the -/// MMR data file and truncate the remove log. -pub struct RemoveLog { - path: String, - /// Ordered vector of MMR positions that should get eventually removed. - pub removed: Vec<(u64, u32)>, - // Holds positions temporarily until flush is called. - removed_tmp: Vec<(u64, u32)>, - // Holds truncated removed temporarily until discarded or committed - removed_bak: Vec<(u64, u32)>, -} - -impl RemoveLog { - /// Open the remove log file. - /// The content of the file will be read in memory for fast checking. - pub fn open(path: String) -> io::Result<RemoveLog> { - let removed = read_ordered_vec(path.clone(), 12)?; - Ok(RemoveLog { - path: path, - removed: removed, - removed_tmp: vec![], - removed_bak: vec![], - }) - } - - /// Rewinds the remove log back to the provided index. - /// We keep everything in the rm_log from that index and earlier. - /// In practice the index is a block height, so we rewind back to that block - /// keeping everything in the rm_log up to and including that block. - pub fn rewind(&mut self, idx: u32) -> io::Result<()> { - // backing it up before truncating (unless we already have a backup) - if self.removed_bak.is_empty() { - self.removed_bak = self.removed.clone(); - } - - if idx == 0 { - self.removed = vec![]; - self.removed_tmp = vec![]; - } else { - // retain rm_log entries up to and including those at the provided index - self.removed.retain(|&(_, x)| x <= idx); - self.removed_tmp.retain(|&(_, x)| x <= idx); - } - Ok(()) - } - - /// Append a set of new positions to the remove log. Both adds those - /// positions the ordered in-memory set and to the file. - pub fn append(&mut self, elmts: Vec<u64>, index: u32) -> io::Result<()> { - for elmt in elmts { - match self.removed_tmp.binary_search(&(elmt, index)) { - Ok(_) => continue, - Err(idx) => { - self.removed_tmp.insert(idx, (elmt, index)); - } - } - } - Ok(()) - } - - /// Flush the positions to remove to file. - pub fn flush(&mut self) -> io::Result<()> { - for elmt in &self.removed_tmp { - match self.removed.binary_search(&elmt) { - Ok(_) => continue, - Err(idx) => { - self.removed.insert(idx, *elmt); - } - } - } - let mut file = BufWriter::new(File::create(self.path.clone())?); - for elmt in &self.removed { - file.write_all(&ser::ser_vec(&elmt).unwrap()[..])?; - } - self.removed_tmp = vec![]; - self.removed_bak = vec![]; - file.flush() - } - - /// Discard pending changes - pub fn discard(&mut self) { - if self.removed_bak.len() > 0 { - self.removed = self.removed_bak.clone(); - self.removed_bak = vec![]; - } - self.removed_tmp = vec![]; - } - - /// Whether the remove log currently includes the provided position. - pub fn includes(&self, elmt: u64) -> bool { - include_tuple(&self.removed, elmt) || include_tuple(&self.removed_tmp, elmt) - } - - /// Number of positions stored in the remove log. - pub fn len(&self) -> usize { - self.removed.len() - } - - /// Return vec of pos for removed elements before the provided cutoff index. - /// Useful for when we prune and compact an MMR. - pub fn removed_pre_cutoff(&self, cutoff_idx: u32) -> Vec<u64> { - self.removed - .iter() - .filter_map( - |&(pos, idx)| { - if idx < cutoff_idx { - Some(pos) - } else { - None - } - }, - ) - .collect() - } -} - -fn include_tuple(v: &Vec<(u64, u32)>, e: u64) -> bool { - if let Err(pos) = v.binary_search(&(e, 0)) { - if pos < v.len() && v[pos].0 == e { - return true; - } - } - false -} - /// Read an ordered vector of scalars from a file. pub fn read_ordered_vec<T>(path: String, elmt_len: usize) -> io::Result<Vec<T>> where diff --git a/store/tests/pmmr.rs b/store/tests/pmmr.rs index c6d7ea32a..2bf18db6b 100644 --- a/store/tests/pmmr.rs +++ b/store/tests/pmmr.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +extern crate croaring; extern crate env_logger; extern crate grin_core as core; extern crate grin_store as store; @@ -19,6 +20,8 @@ extern crate time; use std::fs; +use croaring::Bitmap; + use core::core::pmmr::{Backend, PMMR}; use core::ser::{Error, PMMRIndexHashable, PMMRable, Readable, Reader, Writeable, Writer}; use store::types::prune_noop; @@ -26,7 +29,7 @@ use store::types::prune_noop; #[test] fn pmmr_append() { let (data_dir, elems) = setup("append"); - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); // adding first set of 4 elements and sync let mut mmr_size = load(0, &elems[0..4], &mut backend); @@ -76,7 +79,7 @@ fn pmmr_compact_leaf_sibling() { let (data_dir, elems) = setup("compact_leaf_sibling"); // setup the mmr store with all elements - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); let mmr_size = load(0, &elems[..], &mut backend); backend.sync().unwrap(); @@ -96,10 +99,10 @@ fn pmmr_compact_leaf_sibling() { // prune pos 1 { let mut pmmr = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); + pmmr.prune(1).unwrap(); // prune pos 8 as well to push the remove list past the cutoff - pmmr.prune(8, 1).unwrap(); + pmmr.prune(8).unwrap(); } backend.sync().unwrap(); @@ -120,7 +123,9 @@ fn pmmr_compact_leaf_sibling() { assert_eq!(backend.get_from_file(1).unwrap(), pos_1_hash); // aggressively compact the PMMR files - backend.check_compact(1, 2, &prune_noop).unwrap(); + backend + .check_compact(1, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); // check pos 1, 2, 3 are in the state we expect after compacting { @@ -146,7 +151,7 @@ fn pmmr_prune_compact() { let (data_dir, elems) = setup("prune_compact"); // setup the mmr store with all elements - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); let mmr_size = load(0, &elems[..], &mut backend); backend.sync().unwrap(); @@ -159,9 +164,9 @@ fn pmmr_prune_compact() { // pruning some choice nodes { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); - pmmr.prune(4, 1).unwrap(); - pmmr.prune(5, 1).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(4).unwrap(); + pmmr.prune(5).unwrap(); } backend.sync().unwrap(); @@ -176,7 +181,9 @@ fn pmmr_prune_compact() { } // compact - backend.check_compact(2, 2, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); // recheck the root and stored data { @@ -194,7 +201,7 @@ fn pmmr_reload() { let (data_dir, elems) = setup("reload"); // set everything up with an initial backend - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); let mmr_size = load(0, &elems[..], &mut backend); @@ -215,23 +222,27 @@ fn pmmr_reload() { // prune a node so we have prune data { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); + pmmr.prune(1).unwrap(); } backend.sync().unwrap(); // now check and compact the backend - backend.check_compact(1, 2, &prune_noop).unwrap(); + backend + .check_compact(1, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); backend.sync().unwrap(); // prune another node to force compact to actually do something { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(4, 1).unwrap(); - pmmr.prune(2, 1).unwrap(); + pmmr.prune(4).unwrap(); + pmmr.prune(2).unwrap(); } backend.sync().unwrap(); - backend.check_compact(1, 2, &prune_noop).unwrap(); + backend + .check_compact(4, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); backend.sync().unwrap(); assert_eq!(backend.unpruned_size().unwrap(), mmr_size); @@ -239,7 +250,7 @@ fn pmmr_reload() { // prune some more to get rm log data { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(5, 1).unwrap(); + pmmr.prune(5).unwrap(); } backend.sync().unwrap(); assert_eq!(backend.unpruned_size().unwrap(), mmr_size); @@ -248,7 +259,7 @@ fn pmmr_reload() { // create a new backend referencing the data files // and check everything still works as expected { - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); assert_eq!(backend.unpruned_size().unwrap(), mmr_size); { let pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); @@ -259,8 +270,8 @@ fn pmmr_reload() { assert_eq!(backend.get_hash(1), None); assert_eq!(backend.get_hash(2), None); - // pos 3 is removed (via prune list) - assert_eq!(backend.get_hash(3), None); + // pos 3 is "removed" but we keep the hash around for non-leaf pos. + assert_eq!(backend.get_hash(3), Some(pos_3_hash)); // pos 4 is removed (via prune list) assert_eq!(backend.get_hash(4), None); @@ -286,7 +297,7 @@ fn pmmr_reload() { #[test] fn pmmr_rewind() { let (data_dir, elems) = setup("rewind"); - let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone(), None).unwrap(); // adding elements and keeping the corresponding root let mut mmr_size = load(0, &elems[0..4], &mut backend); @@ -309,43 +320,63 @@ fn pmmr_rewind() { // prune the first 4 elements (leaves at pos 1, 2, 4, 5) { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); - pmmr.prune(2, 1).unwrap(); - pmmr.prune(4, 1).unwrap(); - pmmr.prune(5, 1).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(2).unwrap(); + pmmr.prune(4).unwrap(); + pmmr.prune(5).unwrap(); } backend.sync().unwrap(); - // and compact the MMR to remove the 2 pruned elements - backend.check_compact(2, 2, &prune_noop).unwrap(); + // and compact the MMR to remove the pruned elements + backend + .check_compact(6, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); backend.sync().unwrap(); // rewind and check the roots still match { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.rewind(9, 3).unwrap(); + pmmr.rewind(9, &Bitmap::of(&vec![11, 12, 16]), &Bitmap::create()) + .unwrap(); assert_eq!(pmmr.root(), root2); } + + println!("doing a sync after rewinding"); backend.sync().unwrap(); + { let pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, 10); assert_eq!(pmmr.root(), root2); } - // also check the data file looks correct - // everything up to and including pos 7 should be pruned from the data file - // except the data at pos 8 and 9 (corresponding to elements 5 and 6) - for pos in 1..8 { + // Also check the data file looks correct. + // pos 1, 2, 4, 5 are all leaves but these have been pruned. + for pos in vec![1, 2, 4, 5] { assert_eq!(backend.get_data(pos), None); } + // pos 3, 6, 7 are non-leaves so we have no data for these + for pos in vec![3, 6, 7] { + assert_eq!(backend.get_data(pos), None); + } + + // pos 8 and 9 are both leaves and should be unaffected by prior pruning + + for x in 1..16 { + println!("data at {}, {:?}", x, backend.get_data(x)); + } + assert_eq!(backend.get_data(8), Some(elems[4])); + assert_eq!(backend.get_hash(8), Some(elems[4].hash_with_index(7))); + assert_eq!(backend.get_data(9), Some(elems[5])); + assert_eq!(backend.get_hash(9), Some(elems[5].hash_with_index(8))); assert_eq!(backend.data_size().unwrap(), 2); { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, 10); - pmmr.rewind(5, 3).unwrap(); + pmmr.rewind(5, &Bitmap::create(), &Bitmap::create()) + .unwrap(); assert_eq!(pmmr.root(), root1); } backend.sync().unwrap(); @@ -371,31 +402,35 @@ fn pmmr_rewind() { #[test] fn pmmr_compact_single_leaves() { let (data_dir, elems) = setup("compact_single_leaves"); - let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone(), None).unwrap(); let mmr_size = load(0, &elems[0..5], &mut backend); backend.sync().unwrap(); { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); - pmmr.prune(4, 1).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(4).unwrap(); } backend.sync().unwrap(); // compact - backend.check_compact(2, 2, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(2, 2).unwrap(); - pmmr.prune(5, 2).unwrap(); + pmmr.prune(2).unwrap(); + pmmr.prune(5).unwrap(); } backend.sync().unwrap(); // compact - backend.check_compact(2, 3, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); teardown(data_dir); } @@ -403,7 +438,7 @@ fn pmmr_compact_single_leaves() { #[test] fn pmmr_compact_entire_peak() { let (data_dir, elems) = setup("compact_entire_peak"); - let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone(), None).unwrap(); let mmr_size = load(0, &elems[0..5], &mut backend); backend.sync().unwrap(); @@ -415,25 +450,27 @@ fn pmmr_compact_entire_peak() { // prune all leaves under the peak at pos 7 { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); - pmmr.prune(2, 1).unwrap(); - pmmr.prune(4, 1).unwrap(); - pmmr.prune(5, 1).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(2).unwrap(); + pmmr.prune(4).unwrap(); + pmmr.prune(5).unwrap(); } backend.sync().unwrap(); // compact - backend.check_compact(2, 2, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); // now check we have pruned up to and including the peak at pos 7 // hash still available in underlying hash file - assert_eq!(backend.get_hash(7), None); + assert_eq!(backend.get_hash(7), Some(pos_7_hash)); assert_eq!(backend.get_from_file(7), Some(pos_7_hash)); // now check we still have subsequent hash and data where we expect - assert_eq!(backend.get_hash(8), Some(pos_8_hash)); assert_eq!(backend.get_data(8), Some(pos_8)); + assert_eq!(backend.get_hash(8), Some(pos_8_hash)); assert_eq!(backend.get_from_file(8), Some(pos_8_hash)); teardown(data_dir); @@ -442,7 +479,7 @@ fn pmmr_compact_entire_peak() { #[test] fn pmmr_compact_horizon() { let (data_dir, elems) = setup("compact_horizon"); - let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.clone(), None).unwrap(); let mmr_size = load(0, &elems[..], &mut backend); backend.sync().unwrap(); @@ -451,10 +488,10 @@ fn pmmr_compact_horizon() { assert_eq!(backend.data_size().unwrap(), 19); assert_eq!(backend.hash_size().unwrap(), 35); + let pos_1_hash = backend.get_hash(1).unwrap(); + let pos_2_hash = backend.get_hash(2).unwrap(); let pos_3_hash = backend.get_hash(3).unwrap(); - let pos_6_hash = backend.get_hash(6).unwrap(); - let pos_7_hash = backend.get_hash(7).unwrap(); let pos_8 = backend.get_data(8).unwrap(); @@ -464,25 +501,25 @@ fn pmmr_compact_horizon() { let pos_11_hash = backend.get_hash(11).unwrap(); { - // pruning some choice nodes with an increasing block height + // pruning some choice nodes { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(4, 1).unwrap(); - pmmr.prune(5, 2).unwrap(); - pmmr.prune(1, 3).unwrap(); - pmmr.prune(2, 4).unwrap(); + pmmr.prune(4).unwrap(); + pmmr.prune(5).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(2).unwrap(); } backend.sync().unwrap(); // check we can read hashes and data correctly after pruning { - assert_eq!(backend.get_hash(3), None); + // assert_eq!(backend.get_hash(3), None); assert_eq!(backend.get_from_file(3), Some(pos_3_hash)); - assert_eq!(backend.get_hash(6), None); + // assert_eq!(backend.get_hash(6), None); assert_eq!(backend.get_from_file(6), Some(pos_6_hash)); - assert_eq!(backend.get_hash(7), None); + // assert_eq!(backend.get_hash(7), None); assert_eq!(backend.get_from_file(7), Some(pos_7_hash)); assert_eq!(backend.get_hash(8), Some(pos_8_hash)); @@ -495,18 +532,25 @@ fn pmmr_compact_horizon() { } // compact - backend.check_compact(2, 3, &prune_noop).unwrap(); + backend + .check_compact(4, &Bitmap::create(), &Bitmap::of(&vec![1, 2]), &prune_noop) + .unwrap(); backend.sync().unwrap(); // check we can read a hash by pos correctly after compaction { - assert_eq!(backend.get_hash(3), None); - assert_eq!(backend.get_from_file(3), Some(pos_3_hash)); + assert_eq!(backend.get_hash(1), None); + assert_eq!(backend.get_from_file(1), Some(pos_1_hash)); - assert_eq!(backend.get_hash(6), None); - assert_eq!(backend.get_from_file(6), Some(pos_6_hash)); + assert_eq!(backend.get_hash(2), None); + assert_eq!(backend.get_from_file(2), Some(pos_2_hash)); + + assert_eq!(backend.get_hash(3), Some(pos_3_hash)); + + assert_eq!(backend.get_hash(4), None); + assert_eq!(backend.get_hash(5), None); + assert_eq!(backend.get_hash(6), Some(pos_6_hash)); - assert_eq!(backend.get_hash(7), None); assert_eq!(backend.get_from_file(7), Some(pos_7_hash)); assert_eq!(backend.get_hash(8), Some(pos_8_hash)); @@ -517,13 +561,14 @@ fn pmmr_compact_horizon() { // recheck stored data { // recreate backend - let backend = store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string()).unwrap(); + let backend = + store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string(), None).unwrap(); - assert_eq!(backend.data_size().unwrap(), 17); - assert_eq!(backend.hash_size().unwrap(), 33); + assert_eq!(backend.data_size().unwrap(), 19); + assert_eq!(backend.hash_size().unwrap(), 35); // check we can read a hash by pos correctly from recreated backend - assert_eq!(backend.get_hash(7), None); + assert_eq!(backend.get_hash(7), Some(pos_7_hash)); assert_eq!(backend.get_from_file(7), Some(pos_7_hash)); assert_eq!(backend.get_hash(8), Some(pos_8_hash)); @@ -531,31 +576,37 @@ fn pmmr_compact_horizon() { } { - let mut backend = store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string()).unwrap(); + let mut backend = + store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string(), None).unwrap(); { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(8, 5).unwrap(); - pmmr.prune(9, 5).unwrap(); + pmmr.prune(8).unwrap(); + pmmr.prune(9).unwrap(); } // compact some more - backend.check_compact(1, 6, &prune_noop).unwrap(); + backend + .check_compact(9, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); } // recheck stored data { // recreate backend - let backend = store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string()).unwrap(); + let backend = + store::pmmr::PMMRBackend::<TestElem>::new(data_dir.to_string(), None).unwrap(); // 0010012001001230 - assert_eq!(backend.data_size().unwrap(), 15); - assert_eq!(backend.hash_size().unwrap(), 29); + assert_eq!(backend.data_size().unwrap(), 13); + assert_eq!(backend.hash_size().unwrap(), 27); // check we can read a hash by pos correctly from recreated backend - assert_eq!(backend.get_hash(7), None); + // get_hash() and get_from_file() should return the same value + // and we only store leaves in the rm_log so pos 7 still has a hash in there + assert_eq!(backend.get_hash(7), Some(pos_7_hash)); assert_eq!(backend.get_from_file(7), Some(pos_7_hash)); assert_eq!(backend.get_hash(11), Some(pos_11_hash)); @@ -571,7 +622,7 @@ fn compact_twice() { let (data_dir, elems) = setup("compact_twice"); // setup the mmr store with all elements - let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string()).unwrap(); + let mut backend = store::pmmr::PMMRBackend::new(data_dir.to_string(), None).unwrap(); let mmr_size = load(0, &elems[..], &mut backend); backend.sync().unwrap(); @@ -584,9 +635,9 @@ fn compact_twice() { // pruning some choice nodes { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(1, 1).unwrap(); - pmmr.prune(2, 1).unwrap(); - pmmr.prune(4, 1).unwrap(); + pmmr.prune(1).unwrap(); + pmmr.prune(2).unwrap(); + pmmr.prune(4).unwrap(); } backend.sync().unwrap(); @@ -599,7 +650,9 @@ fn compact_twice() { } // compact - backend.check_compact(2, 2, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); // recheck the root and stored data { @@ -612,9 +665,9 @@ fn compact_twice() { // now prune some more nodes { let mut pmmr: PMMR<TestElem, _> = PMMR::at(&mut backend, mmr_size); - pmmr.prune(5, 2).unwrap(); - pmmr.prune(8, 2).unwrap(); - pmmr.prune(9, 2).unwrap(); + pmmr.prune(5).unwrap(); + pmmr.prune(8).unwrap(); + pmmr.prune(9).unwrap(); } backend.sync().unwrap(); @@ -626,7 +679,9 @@ fn compact_twice() { } // compact - backend.check_compact(2, 3, &prune_noop).unwrap(); + backend + .check_compact(2, &Bitmap::create(), &Bitmap::create(), &prune_noop) + .unwrap(); // recheck the root and stored data { diff --git a/store/tests/rm_log_perf.rs b/store/tests/rm_log_perf.rs new file mode 100644 index 000000000..639ac8a54 --- /dev/null +++ b/store/tests/rm_log_perf.rs @@ -0,0 +1,109 @@ +// Copyright 2018 The Grin Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extern crate croaring; +extern crate env_logger; +extern crate grin_core as core; +extern crate grin_store as store; +extern crate time; + +use std::fs; +use std::time::{Duration, Instant}; + +use store::rm_log::RemoveLog; + +pub fn as_millis(d: Duration) -> u128 { + d.as_secs() as u128 * 1_000 as u128 + (d.subsec_nanos() / (1_000 * 1_000)) as u128 +} + +#[test] +fn test_rm_log_performance() { + let (mut rm_log, data_dir) = setup("rm_log_perf"); + + println!("Timing some common operations:"); + + // Add a 1000 pos to the rm_log and sync to disk. + let now = Instant::now(); + for x in 0..100 { + for y in 0..1000 { + let idx = x + 1; + let pos = (x * 1000) + y + 1; + rm_log.append(vec![pos], idx as u32).unwrap(); + } + rm_log.flush().unwrap(); + } + assert_eq!(rm_log.len(), 100_000); + println!( + "Adding 100 chunks of 1,000 pos to rm_log (syncing to disk) took {}ms", + as_millis(now.elapsed()) + ); + + // Add another 900,000 pos to the UTXO set, (do not sync each block, too + // expensive)... Simulates 1,000 blocks with 1,000 outputs each. + let now = Instant::now(); + for x in 100..1_000 { + for y in 0..1_000 { + let pos = (x * 1_000) + y + 1; + rm_log.append(vec![pos], (x + 1) as u32).unwrap(); + } + // Do not flush to disk each time (this gets very expensive). + // rm_log.flush().unwrap(); + } + // assert_eq!(rm_log.len(), 1_000_000); + println!( + "Adding 990 chunks of 1,000 pos to rm_log (without syncing) took {}ms", + as_millis(now.elapsed()) + ); + + // Simulate looking up existence of a large number of pos in the UTXO set. + let now = Instant::now(); + for x in 0..1_000_000 { + assert!(rm_log.includes(x + 1)); + } + println!( + "Checking 1,000,000 inclusions in rm_log took {}ms", + as_millis(now.elapsed()) + ); + + // Rewind pos in chunks of 1,000 to simulate rewinding over the same blocks. + let now = Instant::now(); + let mut x = 1_000; + while x > 0 { + rm_log.rewind(x - 1).unwrap(); + x = x - 1; + } + rm_log.flush().unwrap(); + assert_eq!(rm_log.len(), 0); + println!( + "Rewinding 1,000 chunks of 1,000 pos from rm_log took {}ms", + as_millis(now.elapsed()) + ); + + // panic!("stop here to display results"); + + teardown(data_dir); +} + +fn setup(test_name: &str) -> (RemoveLog, String) { + let _ = env_logger::init(); + let t = time::get_time(); + let data_dir = format!("./target/{}-{}", test_name, t.sec); + fs::create_dir_all(data_dir.clone()).unwrap(); + let rm_log = RemoveLog::open(format!("{}/{}", data_dir, "rm_log.bin")).unwrap(); + (rm_log, data_dir) +} + +fn teardown(data_dir: String) { + fs::remove_dir_all(data_dir).unwrap(); +} diff --git a/store/tests/test_bitmap.rs b/store/tests/test_bitmap.rs new file mode 100644 index 000000000..9ebe8cdcc --- /dev/null +++ b/store/tests/test_bitmap.rs @@ -0,0 +1,99 @@ +// Copyright 2018 The Grin Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extern crate croaring; +extern crate rand; + +use croaring::Bitmap; +use rand::Rng; + +// We can use "andnot" to rewind the rm_log easily by passing in a "bitmask" of +// all the subsequent pos we want to rewind. +#[test] +fn test_andnot_bitmap() { + // bitmap: 10010011 + // bitmask: ....1111 (i.e. rewind to leave first 4 pos in place) + // result: 1001.... + let bitmap: Bitmap = vec![1, 4, 7, 8].into_iter().collect(); + let bitmask: Bitmap = vec![5, 6, 7, 8].into_iter().collect(); + let res = bitmap.andnot(&bitmask); + assert_eq!(res.to_vec(), vec![1, 4]); +} + +// Alternatively we can use "and" to rewind the rm_log easily by passing in a +// "bitmask" of all the pos we want to keep. +#[test] +fn test_and_bitmap() { + // bitmap: 10010011 + // bitmask: 1111.... (i.e. rewind to leave first 4 pos in place) + // result: 1001.... + let bitmap: Bitmap = vec![1, 4, 7, 8].into_iter().collect(); + let bitmask: Bitmap = vec![1, 2, 3, 4].into_iter().collect(); + let res = bitmap.and(&bitmask); + assert_eq!(res.to_vec(), vec![1, 4]); +} + +#[test] +fn test_flip_bitmap() { + let bitmap: Bitmap = vec![1, 2, 4].into_iter().collect(); + let res = bitmap.flip(2..4); + assert_eq!(res.to_vec(), vec![1, 3, 4]); +} + +#[test] +fn test_a_small_bitmap() { + let bitmap: Bitmap = vec![1, 99, 1_000].into_iter().collect(); + let serialized_buffer = bitmap.serialize(); + + // we can store 3 pos in a roaring bitmap in 22 bytes + // this is compared to storing them as a vec of u64 values which would be 8 * 3 + // = 32 bytes + assert_eq!(serialized_buffer.len(), 22); +} + +#[test] +fn test_1000_inputs() { + let mut rng = rand::thread_rng(); + let mut bitmap = Bitmap::create(); + for _ in 1..1_000 { + let n = rng.gen_range(0, 1_000_000); + bitmap.add(n); + } + let serialized_buffer = bitmap.serialize(); + println!( + "bitmap with 1,000 (out of 1,000,000) values in it: {}", + serialized_buffer.len() + ); + bitmap.run_optimize(); + let serialized_buffer = bitmap.serialize(); + println!( + "bitmap with 1,000 (out of 1,000,000) values in it (optimized): {}", + serialized_buffer.len() + ); +} + +#[test] +fn test_a_big_bitmap() { + let mut bitmap: Bitmap = (1..1_000_000).collect(); + let serialized_buffer = bitmap.serialize(); + + // we can also store 1,000 pos in 2,014 bytes + // a vec of u64s here would be 8,000 bytes + assert_eq!(serialized_buffer.len(), 131_208); + + // but note we can optimize this heavily to get down to 230 bytes... + assert!(bitmap.run_optimize()); + let serialized_buffer = bitmap.serialize(); + assert_eq!(serialized_buffer.len(), 230); +} diff --git a/store/tests/utxo_set_perf.rs b/store/tests/utxo_set_perf.rs new file mode 100644 index 000000000..f36a9bbb0 --- /dev/null +++ b/store/tests/utxo_set_perf.rs @@ -0,0 +1,110 @@ +// Copyright 2018 The Grin Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extern crate croaring; +extern crate env_logger; +extern crate grin_core as core; +extern crate grin_store as store; +extern crate time; + +use std::fs; +use std::time::{Duration, Instant}; + +use croaring::Bitmap; + +use store::leaf_set::LeafSet; + +pub fn as_millis(d: Duration) -> u128 { + d.as_secs() as u128 * 1_000 as u128 + (d.subsec_nanos() / (1_000 * 1_000)) as u128 +} + +#[test] +fn test_leaf_set_performance() { + let (mut leaf_set, data_dir) = setup("leaf_set_perf"); + + println!("Timing some common operations:"); + + // Add a million pos to the set, syncing data to disk in 1,000 pos chunks + // Simulating 1,000 blocks with 1,000 outputs each. + let now = Instant::now(); + for x in 0..1_000 { + for y in 0..1_000 { + let pos = (x * 1_000) + y + 1; + leaf_set.add(pos); + } + leaf_set.flush().unwrap(); + } + assert_eq!(leaf_set.len(), 1_000_000); + println!( + "Adding 1,000 chunks of 1,000 pos to leaf_set took {}ms", + as_millis(now.elapsed()) + ); + + // Simulate looking up existence of a large number of pos in the leaf_set. + let now = Instant::now(); + for x in 0..1_000_000 { + assert!(leaf_set.includes(x + 1)); + } + println!( + "Checking 1,000,000 inclusions in leaf_set took {}ms", + as_millis(now.elapsed()) + ); + + // Remove a large number of pos in chunks to simulate blocks containing tx + // spending outputs. Simulate 1,000 blocks each spending 1,000 outputs. + let now = Instant::now(); + for x in 0..1_000 { + for y in 0..1_000 { + let pos = (x * 1_000) + y + 1; + leaf_set.remove(pos); + } + leaf_set.flush().unwrap(); + } + assert_eq!(leaf_set.len(), 0); + println!( + "Removing 1,000 chunks of 1,000 pos from leaf_set took {}ms", + as_millis(now.elapsed()) + ); + + // Rewind pos in chunks of 1,000 to simulate rewinding over the same blocks. + let now = Instant::now(); + for x in 0..1_000 { + let from_pos = x * 1_000 + 1; + let to_pos = from_pos + 1_000; + let bitmap: Bitmap = (from_pos..to_pos).collect(); + leaf_set.rewind(&Bitmap::create(), &bitmap); + } + assert_eq!(leaf_set.len(), 1_000_000); + println!( + "Rewinding 1,000 chunks of 1,000 pos from leaf_set took {}ms", + as_millis(now.elapsed()) + ); + + // panic!("stop here to display results"); + + teardown(data_dir); +} + +fn setup(test_name: &str) -> (LeafSet, String) { + let _ = env_logger::init(); + let t = time::get_time(); + let data_dir = format!("./target/{}-{}", test_name, t.sec); + fs::create_dir_all(data_dir.clone()).unwrap(); + let leaf_set = LeafSet::open(format!("{}/{}", data_dir, "utxo.bin")).unwrap(); + (leaf_set, data_dir) +} + +fn teardown(data_dir: String) { + fs::remove_dir_all(data_dir).unwrap(); +} diff --git a/util/src/secp_static.rs b/util/src/secp_static.rs index 4b21d83fd..283356939 100644 --- a/util/src/secp_static.rs +++ b/util/src/secp_static.rs @@ -33,8 +33,7 @@ pub fn static_secp_instance() -> Arc<Mutex<secp::Secp256k1>> { SECP256K1.clone() } -// TODO - Can we generate this once and memoize it for subsequent use? -// Even if we clone it each time it will likely be faster than this. +/// Convenient way to generate a commitment to zero. pub fn commit_to_zero_value() -> secp::pedersen::Commitment { let secp = static_secp_instance(); let secp = secp.lock().unwrap();