From 5ccaf8cc6706c5a1563f4a33c7c72060ec47af2d Mon Sep 17 00:00:00 2001 From: Mark Renten <42224876+rentenmark@users.noreply.github.com> Date: Mon, 10 Dec 2018 04:35:25 -0500 Subject: [PATCH] functional version - cleanup pmmr_leaf snapshot files (from fast-sync) (#2109) * Issue #1348 - cleanup pmmr_leaf snapshot files (from fast-sync) * create a new function in store::pmmr to delete files * call the new function during compaction * create test coverage for the new function * formatting * Fix rustdoc issue I had to add a text annotation to the preformatted block to prevent cargo from creating a doctest. * don't delete files that were recently accessed * parameterize the age in the cleanup function * add a new constant defining how long the rewind files should be saved for * enhance the unit test to include files that were too new to be deleted and files that were old enough to be deleted * formatting * formatting * remove errant println statement * functional style change --- Cargo.lock | 1 + store/Cargo.toml | 1 + store/src/pmmr.rs | 103 ++++++++++++++++++++++++++++++++++++++- store/tests/pmmr.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index c85e4cffc..175b20563 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -914,6 +914,7 @@ dependencies = [ "env_logger 0.5.13 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "filetime 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "grin_core 0.4.2", "grin_util 0.4.2", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/store/Cargo.toml b/store/Cargo.toml index f291216cc..ebd665f9c 100644 --- a/store/Cargo.toml +++ b/store/Cargo.toml @@ -28,3 +28,4 @@ grin_util = { path = "../util", version = "0.4.2" } [dev-dependencies] chrono = "0.4.4" rand = "0.5" +filetime = "0.2" diff --git a/store/src/pmmr.rs b/store/src/pmmr.rs index 604ac0399..d2b7aeb99 100644 --- a/store/src/pmmr.rs +++ b/store/src/pmmr.rs @@ -13,7 +13,7 @@ //! Implementation of the persistent Backend for the prunable MMR tree. -use std::{fs, io}; +use std::{fs, io, marker, time}; use crate::core::core::hash::{Hash, Hashed}; use crate::core::core::pmmr::{self, family, Backend}; @@ -28,6 +28,7 @@ const PMMR_HASH_FILE: &str = "pmmr_hash.bin"; const PMMR_DATA_FILE: &str = "pmmr_data.bin"; const PMMR_LEAF_FILE: &str = "pmmr_leaf.bin"; const PMMR_PRUN_FILE: &str = "pmmr_prun.bin"; +const REWIND_FILE_CLEANUP_DURATION_SECONDS: u64 = 60 * 60 * 24; // 24 hours as seconds /// The list of PMMR_Files for internal purposes pub const PMMR_FILES: [&str; 4] = [ @@ -339,9 +340,18 @@ impl PMMRBackend { // Optimize the bitmap storage in the process. self.leaf_set.flush()?; + // 7. cleanup rewind files + self.clean_rewind_files()?; + Ok(true) } + fn clean_rewind_files(&self) -> io::Result { + let data_dir = self.data_dir.clone(); + let pattern = format!("{}.", PMMR_LEAF_FILE); + clean_files_by_prefix(data_dir, &pattern, REWIND_FILE_CLEANUP_DURATION_SECONDS) + } + fn pos_to_rm(&self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> (Bitmap, Bitmap) { let mut expanded = Bitmap::create(); @@ -386,3 +396,94 @@ fn removed_excl_roots(removed: &Bitmap) -> Bitmap { }) .collect() } + +/// Quietly clean a directory up based on a given prefix. +/// If the file was accessed within cleanup_duration_seconds from the beginning of +/// the function call, it will not be deleted. To delete all files, set cleanup_duration_seconds +/// to zero. +/// +/// Precondition is that path points to a directory. +/// +/// If you have files such as +/// ```text +/// foo +/// foo.1 +/// foo.2 +/// . +/// . +/// . +/// . +/// . +/// ``` +/// +/// call this function and you will get +/// +/// ```text +/// foo +/// ``` +/// +/// in the directory +/// +/// The return value will be the number of files that were deleted. +/// +/// This function will return an error whenever the call to `std;:fs::read_dir` +/// fails on the given path for any reason. +/// + +pub fn clean_files_by_prefix>( + path: P, + prefix_to_delete: &str, + cleanup_duration_seconds: u64, +) -> io::Result { + let now = time::SystemTime::now(); + let cleanup_duration = time::Duration::from_secs(cleanup_duration_seconds); + + let number_of_files_deleted: u32 = fs::read_dir(&path)? + .flat_map( + |possible_dir_entry| -> Result> { + // result implements iterator and so if we were to use map here + // we would have a list of Result> + // but because we use flat_map, the errors get "discarded" and we are + // left with a clean iterator over u32s + + // the error cases that come out of this code are numerous and + // we don't really mind throwing them away because the main point + // here is to clean up some files, if it doesn't work out it's not + // the end of the world + + let dir_entry: std::fs::DirEntry = possible_dir_entry?; + let metadata = dir_entry.metadata()?; + if metadata.is_dir() { + return Ok(0); // skip directories unconditionally + } + let accessed = metadata.accessed()?; + let duration_since_accessed = now.duration_since(accessed)?; + if duration_since_accessed <= cleanup_duration { + return Ok(0); // these files are still too new + } + let file_name = dir_entry + .file_name() + .into_string() + .ok() + .ok_or("could not convert filename into utf-8")?; + + // check to see if we want to delete this file? + if file_name.starts_with(prefix_to_delete) + && file_name.len() > prefix_to_delete.len() + { + // we want to delete it, try to do so + if fs::remove_file(dir_entry.path()).is_ok() { + // we successfully deleted a file + return Ok(1); + } + } + + // we either did not want to delete this file or could + // not for whatever reason. 0 files deleted. + Ok(0) + }, + ) + .sum(); + + Ok(number_of_files_deleted) +} diff --git a/store/tests/pmmr.rs b/store/tests/pmmr.rs index 9ccf0a3fd..258a46826 100644 --- a/store/tests/pmmr.rs +++ b/store/tests/pmmr.rs @@ -17,6 +17,7 @@ use grin_core as core; use grin_store as store; use std::fs; +use std::io::prelude::*; use chrono::prelude::Utc; use croaring::Bitmap; @@ -716,6 +717,117 @@ fn compact_twice() { teardown(data_dir); } +#[test] +fn cleanup_rewind_files_test() { + let expected = 10; + let prefix_to_delete = "foo"; + let prefix_to_save = "bar"; + let seconds_to_delete_after = 100; + + // create the scenario + let (data_dir, _) = setup("cleanup_rewind_files_test"); + // create some files with the delete prefix that aren't yet old enough to delete + create_numbered_files(&data_dir, expected, prefix_to_delete, 0, 0); + // create some files with the delete prefix that are old enough to delete + create_numbered_files( + &data_dir, + expected, + prefix_to_delete, + seconds_to_delete_after, + expected, + ); + // create some files with the save prefix that are old enough to delete, but will be saved because they don't start + // with the right prefix + create_numbered_files( + &data_dir, + expected, + prefix_to_save, + seconds_to_delete_after, + 0, + ); + + // run the cleaner + let actual = + store::pmmr::clean_files_by_prefix(&data_dir, prefix_to_delete, seconds_to_delete_after) + .unwrap(); + assert_eq!( + actual, expected, + "the clean files by prefix function did not report the correct number of files deleted" + ); + + // check that the reported number is actually correct, the block is to borrow data_dir for the closure + { + // this function simply counts the number of files in the directory based on the prefix + let count_fn = |prefix| { + let mut remaining_count = 0; + for entry in fs::read_dir(&data_dir).unwrap() { + if entry + .unwrap() + .file_name() + .into_string() + .unwrap() + .starts_with(prefix) + { + remaining_count += 1; + } + } + remaining_count + }; + + assert_eq!( + count_fn(prefix_to_delete), + expected, // we expect this many to be left because they weren't old enough to be deleted yet + "it should delete all of the files it is supposed to delete" + ); + assert_eq!( + count_fn(prefix_to_save), + expected, + "it should delete none of the files it is not supposed to" + ); + } + + teardown(data_dir); +} + +/// Create some files for testing with, for example +/// +/// ```text +/// create_numbered_files(".", 3, "hello.txt.", 100, 2) +/// ``` +/// +/// will create files +/// +/// ```text +/// hello.txt.2 +/// hello.txt.3 +/// hello.txt.4 +/// ``` +/// +/// in the current working directory that are all 100 seconds old (modified and accessed time) +/// +fn create_numbered_files( + data_dir: &str, + num_files: u32, + prefix: &str, + last_accessed_delay_seconds: u64, + start_index: u32, +) { + let now = std::time::SystemTime::now(); + let time_to_set = now - std::time::Duration::from_secs(last_accessed_delay_seconds); + let time_to_set_ft = filetime::FileTime::from_system_time(time_to_set); + + for rewind_file_num in 0..num_files { + let path = std::path::Path::new(&data_dir).join(format!( + "{}.{}", + prefix, + start_index + rewind_file_num + )); + let mut file = fs::File::create(path.clone()).unwrap(); + let metadata = file.metadata().unwrap(); + filetime::set_file_times(path, time_to_set_ft, time_to_set_ft); + } +} + fn setup(tag: &str) -> (String, Vec) { match env_logger::try_init() { Ok(_) => println!("Initializing env logger"), @@ -737,6 +849,8 @@ fn setup(tag: &str) -> (String, Vec) { (data_dir, elems) } +/// note that taking ownership of the data_dir is a feature +/// because it will not be able to be used after teardown as intended fn teardown(data_dir: String) { fs::remove_dir_all(data_dir).unwrap(); }