functional version - cleanup pmmr_leaf snapshot files (from fast-sync) (#2109)

* Issue #1348 - cleanup pmmr_leaf snapshot files (from fast-sync)

* create a new function in store::pmmr to delete files
* call the new function during compaction
* create test coverage for the new function

* formatting

* Fix rustdoc issue

I had to add a text annotation to the preformatted block to prevent cargo from creating a doctest.

* don't delete files that were recently accessed

* parameterize the age in the cleanup function
* add a new constant defining how long the rewind files should be saved for
* enhance the unit test to include files that were too new to be deleted and files that were old enough to be deleted

* formatting

* formatting

* remove errant println statement

* functional style change
This commit is contained in:
Mark Renten 2018-12-10 04:35:25 -05:00 committed by Antioch Peverell
parent 5f0f0f6ff9
commit 5ccaf8cc67
4 changed files with 218 additions and 1 deletions

1
Cargo.lock generated
View file

@ -914,6 +914,7 @@ dependencies = [
"env_logger 0.5.13 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"failure_derive 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"filetime 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
"grin_core 0.4.2",
"grin_util 0.4.2",
"libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -28,3 +28,4 @@ grin_util = { path = "../util", version = "0.4.2" }
[dev-dependencies]
chrono = "0.4.4"
rand = "0.5"
filetime = "0.2"

View file

@ -13,7 +13,7 @@
//! Implementation of the persistent Backend for the prunable MMR tree.
use std::{fs, io};
use std::{fs, io, marker, time};
use crate::core::core::hash::{Hash, Hashed};
use crate::core::core::pmmr::{self, family, Backend};
@ -28,6 +28,7 @@ const PMMR_HASH_FILE: &str = "pmmr_hash.bin";
const PMMR_DATA_FILE: &str = "pmmr_data.bin";
const PMMR_LEAF_FILE: &str = "pmmr_leaf.bin";
const PMMR_PRUN_FILE: &str = "pmmr_prun.bin";
const REWIND_FILE_CLEANUP_DURATION_SECONDS: u64 = 60 * 60 * 24; // 24 hours as seconds
/// The list of PMMR_Files for internal purposes
pub const PMMR_FILES: [&str; 4] = [
@ -339,9 +340,18 @@ impl<T: PMMRable> PMMRBackend<T> {
// Optimize the bitmap storage in the process.
self.leaf_set.flush()?;
// 7. cleanup rewind files
self.clean_rewind_files()?;
Ok(true)
}
fn clean_rewind_files(&self) -> io::Result<u32> {
let data_dir = self.data_dir.clone();
let pattern = format!("{}.", PMMR_LEAF_FILE);
clean_files_by_prefix(data_dir, &pattern, REWIND_FILE_CLEANUP_DURATION_SECONDS)
}
fn pos_to_rm(&self, cutoff_pos: u64, rewind_rm_pos: &Bitmap) -> (Bitmap, Bitmap) {
let mut expanded = Bitmap::create();
@ -386,3 +396,94 @@ fn removed_excl_roots(removed: &Bitmap) -> Bitmap {
})
.collect()
}
/// Quietly clean a directory up based on a given prefix.
/// If the file was accessed within cleanup_duration_seconds from the beginning of
/// the function call, it will not be deleted. To delete all files, set cleanup_duration_seconds
/// to zero.
///
/// Precondition is that path points to a directory.
///
/// If you have files such as
/// ```text
/// foo
/// foo.1
/// foo.2
/// .
/// .
/// .
/// .
/// .
/// ```
///
/// call this function and you will get
///
/// ```text
/// foo
/// ```
///
/// in the directory
///
/// The return value will be the number of files that were deleted.
///
/// This function will return an error whenever the call to `std;:fs::read_dir`
/// fails on the given path for any reason.
///
pub fn clean_files_by_prefix<P: AsRef<std::path::Path>>(
path: P,
prefix_to_delete: &str,
cleanup_duration_seconds: u64,
) -> io::Result<u32> {
let now = time::SystemTime::now();
let cleanup_duration = time::Duration::from_secs(cleanup_duration_seconds);
let number_of_files_deleted: u32 = fs::read_dir(&path)?
.flat_map(
|possible_dir_entry| -> Result<u32, Box<std::error::Error>> {
// result implements iterator and so if we were to use map here
// we would have a list of Result<u32, Box<std::error::Error>>
// but because we use flat_map, the errors get "discarded" and we are
// left with a clean iterator over u32s
// the error cases that come out of this code are numerous and
// we don't really mind throwing them away because the main point
// here is to clean up some files, if it doesn't work out it's not
// the end of the world
let dir_entry: std::fs::DirEntry = possible_dir_entry?;
let metadata = dir_entry.metadata()?;
if metadata.is_dir() {
return Ok(0); // skip directories unconditionally
}
let accessed = metadata.accessed()?;
let duration_since_accessed = now.duration_since(accessed)?;
if duration_since_accessed <= cleanup_duration {
return Ok(0); // these files are still too new
}
let file_name = dir_entry
.file_name()
.into_string()
.ok()
.ok_or("could not convert filename into utf-8")?;
// check to see if we want to delete this file?
if file_name.starts_with(prefix_to_delete)
&& file_name.len() > prefix_to_delete.len()
{
// we want to delete it, try to do so
if fs::remove_file(dir_entry.path()).is_ok() {
// we successfully deleted a file
return Ok(1);
}
}
// we either did not want to delete this file or could
// not for whatever reason. 0 files deleted.
Ok(0)
},
)
.sum();
Ok(number_of_files_deleted)
}

View file

@ -17,6 +17,7 @@ use grin_core as core;
use grin_store as store;
use std::fs;
use std::io::prelude::*;
use chrono::prelude::Utc;
use croaring::Bitmap;
@ -716,6 +717,117 @@ fn compact_twice() {
teardown(data_dir);
}
#[test]
fn cleanup_rewind_files_test() {
let expected = 10;
let prefix_to_delete = "foo";
let prefix_to_save = "bar";
let seconds_to_delete_after = 100;
// create the scenario
let (data_dir, _) = setup("cleanup_rewind_files_test");
// create some files with the delete prefix that aren't yet old enough to delete
create_numbered_files(&data_dir, expected, prefix_to_delete, 0, 0);
// create some files with the delete prefix that are old enough to delete
create_numbered_files(
&data_dir,
expected,
prefix_to_delete,
seconds_to_delete_after,
expected,
);
// create some files with the save prefix that are old enough to delete, but will be saved because they don't start
// with the right prefix
create_numbered_files(
&data_dir,
expected,
prefix_to_save,
seconds_to_delete_after,
0,
);
// run the cleaner
let actual =
store::pmmr::clean_files_by_prefix(&data_dir, prefix_to_delete, seconds_to_delete_after)
.unwrap();
assert_eq!(
actual, expected,
"the clean files by prefix function did not report the correct number of files deleted"
);
// check that the reported number is actually correct, the block is to borrow data_dir for the closure
{
// this function simply counts the number of files in the directory based on the prefix
let count_fn = |prefix| {
let mut remaining_count = 0;
for entry in fs::read_dir(&data_dir).unwrap() {
if entry
.unwrap()
.file_name()
.into_string()
.unwrap()
.starts_with(prefix)
{
remaining_count += 1;
}
}
remaining_count
};
assert_eq!(
count_fn(prefix_to_delete),
expected, // we expect this many to be left because they weren't old enough to be deleted yet
"it should delete all of the files it is supposed to delete"
);
assert_eq!(
count_fn(prefix_to_save),
expected,
"it should delete none of the files it is not supposed to"
);
}
teardown(data_dir);
}
/// Create some files for testing with, for example
///
/// ```text
/// create_numbered_files(".", 3, "hello.txt.", 100, 2)
/// ```
///
/// will create files
///
/// ```text
/// hello.txt.2
/// hello.txt.3
/// hello.txt.4
/// ```
///
/// in the current working directory that are all 100 seconds old (modified and accessed time)
///
fn create_numbered_files(
data_dir: &str,
num_files: u32,
prefix: &str,
last_accessed_delay_seconds: u64,
start_index: u32,
) {
let now = std::time::SystemTime::now();
let time_to_set = now - std::time::Duration::from_secs(last_accessed_delay_seconds);
let time_to_set_ft = filetime::FileTime::from_system_time(time_to_set);
for rewind_file_num in 0..num_files {
let path = std::path::Path::new(&data_dir).join(format!(
"{}.{}",
prefix,
start_index + rewind_file_num
));
let mut file = fs::File::create(path.clone()).unwrap();
let metadata = file.metadata().unwrap();
filetime::set_file_times(path, time_to_set_ft, time_to_set_ft);
}
}
fn setup(tag: &str) -> (String, Vec<TestElem>) {
match env_logger::try_init() {
Ok(_) => println!("Initializing env logger"),
@ -737,6 +849,8 @@ fn setup(tag: &str) -> (String, Vec<TestElem>) {
(data_dir, elems)
}
/// note that taking ownership of the data_dir is a feature
/// because it will not be able to be used after teardown as intended
fn teardown(data_dir: String) {
fs::remove_dir_all(data_dir).unwrap();
}