From 4e6b475470d00c49feff6623e745a8bab5718394 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Tue, 26 May 2020 00:09:16 -0700 Subject: [PATCH] Benchmark and improve hashing performance - Add a `bench` feature that exposes a hasher benchmark. - Make the hasher read up to the next piece end and hash everything it reads, instead of hashing one byte at a time. This was a 4x improvement on the benchmark. (Terrible code == easy wins!) type: performance --- Cargo.lock | 340 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 +- benches/hasher.rs | 13 ++ bin/gen/src/kind.rs | 9 +- src/bench.rs | 62 ++++++++ src/hasher.rs | 21 ++- src/lib.rs | 3 + 7 files changed, 447 insertions(+), 16 deletions(-) create mode 100644 benches/hasher.rs create mode 100644 src/bench.rs diff --git a/Cargo.lock b/Cargo.lock index ecd1027..6bf50ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,9 +141,24 @@ version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" dependencies = [ + "lazy_static", "memchr", + "regex-automata", + "serde", ] +[[package]] +name = "bumpalo" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5356f1d23ee24a1f785a56d1d1a5f0fd5b0f6a0c0fb2412ce11da71649ab78f6" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + [[package]] name = "cargo_toml" version = "0.8.0" @@ -155,6 +170,15 @@ dependencies = [ "toml", ] +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" +dependencies = [ + "rustc_version", +] + [[package]] name = "cc" version = "1.0.52" @@ -225,6 +249,41 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "criterion" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63f696897c88b57f4ffe3c69d8e1a0613c7d0e6c4833363c8560fbde9c47b966" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddeaf7989f00f2e1d871a26a110f3ed713632feac17f65f03ca938c542618b60" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.4.2" @@ -235,6 +294,42 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "crossbeam-deque" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "maybe-uninit", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "lazy_static", + "maybe-uninit", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.7.2" @@ -246,6 +341,28 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "csv" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "ctor" version = "0.1.14" @@ -278,6 +395,12 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3" +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -488,6 +611,7 @@ dependencies = [ "bendy", "chrono", "console", + "criterion", "globset", "ignore", "imdl-indicatif", @@ -499,6 +623,7 @@ dependencies = [ "open", "pretty_assertions", "pretty_env_logger", + "rand", "regex", "serde", "serde-hex", @@ -528,6 +653,21 @@ dependencies = [ "regex", ] +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" + [[package]] name = "jobserver" version = "0.1.21" @@ -537,6 +677,15 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa5a448de267e7358beaf4a5d849518fe9a0c13fce7afd44b06e68550e5562a7" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -644,6 +793,15 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +[[package]] +name = "memoffset" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8" +dependencies = [ + "autocfg", +] + [[package]] name = "nodrop" version = "0.1.14" @@ -679,12 +837,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "number_prefix" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" +[[package]] +name = "oorandom" +version = "11.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af325bc33c7f60191be4e2c984d48aaa21e2854f473b85398344b60c9b6358" + [[package]] name = "open" version = "1.4.0" @@ -734,6 +908,18 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" +[[package]] +name = "plotters" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9b1d9ca091d370ea3a78d5619145d1b59426ab0c9eedbad2514a4cee08bf389" +dependencies = [ + "js-sys", + "num-traits", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "ppv-lite86" version = "0.2.6" @@ -853,6 +1039,30 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" +dependencies = [ + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" +dependencies = [ + "crossbeam-deque", + "crossbeam-queue", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.1.56" @@ -871,6 +1081,15 @@ dependencies = [ "thread_local", ] +[[package]] +name = "regex-automata" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" +dependencies = [ + "byteorder", +] + [[package]] name = "regex-syntax" version = "0.6.17" @@ -892,6 +1111,21 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1" + [[package]] name = "same-file" version = "1.0.6" @@ -901,6 +1135,27 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "serde" version = "1.0.106" @@ -941,6 +1196,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_with" version = "1.4.0" @@ -1185,6 +1451,16 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "tinytemplate" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e4bc5ac99433e0dcb8b9f309dd271a165ae37dde129b9e0ce1bfdd8bfe4891" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.5.6" @@ -1277,6 +1553,70 @@ version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +[[package]] +name = "wasm-bindgen" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c7d40d09cdbf0f4895ae58cf57d92e1e57a9dd8ed2e8390514b54a47cc5551" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3972e137ebf830900db522d6c8fd74d1900dcfc733462e9a12e942b00b4ac94" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cd85aa2c579e8892442954685f0d801f9129de24fa2136b2c6a539c76b65776" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eb197bd3a47553334907ffd2f16507b4f4f01bbec3ac921a7719e0decdfe72a" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a91c2916119c17a8e316507afaaa2dd94b47646048014bbdf6bef098c1bb58ad" + +[[package]] +name = "web-sys" +version = "0.3.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bc359e5dd3b46cb9687a051d50a2fdd228e4ba7cf6fcf861a5365c3d671a642" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.2.8" diff --git a/Cargo.toml b/Cargo.toml index c7e5349..a0d05d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,10 @@ repository = "https://github.com/casey/intermodal" edition = "2018" default-run = "imdl" +[features] +default = [] +bench = ["rand"] + [dependencies] ansi_term = "0.12.0" atty = "0.2.0" @@ -59,8 +63,13 @@ features = ["default", "wrap_help"] version = "2.1.1" features = ["serde"] +[dependencies.rand] +version = "0.7.3" +optional = true + [dev-dependencies] -temptree = "0.0.0" +criterion = "0.3.0" +temptree = "0.0.0" [workspace] members = [ @@ -70,3 +79,7 @@ members = [ # run commands for demo animation "bin/demo", ] + +[[bench]] +name = "hasher" +harness = false diff --git a/benches/hasher.rs b/benches/hasher.rs new file mode 100644 index 0000000..859ba87 --- /dev/null +++ b/benches/hasher.rs @@ -0,0 +1,13 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +use imdl::bench::{Bench, HasherBench}; + +fn bench(c: &mut Criterion) { + let bench = HasherBench::init(); + + c.bench_function(&bench.name(), |b| b.iter(|| bench.iter())); +} + +criterion_group!(benches, bench); + +criterion_main!(benches); diff --git a/bin/gen/src/kind.rs b/bin/gen/src/kind.rs index 81e4551..e922251 100644 --- a/bin/gen/src/kind.rs +++ b/bin/gen/src/kind.rs @@ -24,10 +24,11 @@ pub(crate) enum Kind { Distribution, Documentation, Fixed, + Performance, Reform, Release, - Testing, Removed, + Testing, } impl Kind { @@ -41,10 +42,11 @@ impl Kind { Self::Distribution => "📦", Self::Documentation => "📚", Self::Fixed => "🐛", + Self::Performance => "🐎", Self::Reform => "🎨", Self::Release => "🔖", - Self::Testing => "✅", Self::Removed => "➖", + Self::Testing => "✅", } } @@ -58,10 +60,11 @@ impl Kind { Self::Distribution => ":package:", Self::Documentation => ":books:", Self::Fixed => ":bug:", + Self::Performance => ":racehorse:", Self::Reform => ":art:", Self::Release => ":bookmark:", - Self::Testing => ":white_check_mark:", Self::Removed => ":heavy_minus_sign:", + Self::Testing => ":white_check_mark:", } } } diff --git a/src/bench.rs b/src/bench.rs new file mode 100644 index 0000000..722de62 --- /dev/null +++ b/src/bench.rs @@ -0,0 +1,62 @@ +use crate::common::*; + +use std::io::BufWriter; + +use rand::RngCore; +use tempfile::{Builder, NamedTempFile}; + +pub const TEMPFILE_BYTES: u64 = 256 << 10; + +pub trait Bench { + fn init() -> Self; + + fn name(&self) -> String; + + fn iter(&self); +} + +pub struct HasherBench { + tempfile: NamedTempFile, +} + +impl Bench for HasherBench { + fn name(&self) -> String { + format!("bench::HasherBench: {} tempfile", Bytes(TEMPFILE_BYTES)) + } + + fn init() -> Self { + let mut tempfile = Builder::new() + .prefix("imdl-bench-hasher") + .tempfile() + .unwrap(); + + { + let mut bytes = vec![0; 1024]; + + let mut written = 0; + + let mut writer = BufWriter::new(&mut tempfile); + + while written < TEMPFILE_BYTES { + rand::thread_rng().fill_bytes(&mut bytes); + writer.write(&bytes).unwrap(); + written += bytes.len().into_u64(); + } + + writer.flush().unwrap(); + } + + Self { tempfile } + } + + fn iter(&self) { + let files = Files::file( + self.tempfile.as_ref().to_owned(), + Bytes::from(TEMPFILE_BYTES), + ); + + let hasher = Hasher::new(false, 16 << 10, None); + + let _result = hasher.hash_files(&files).unwrap(); + } +} diff --git a/src/hasher.rs b/src/hasher.rs index 3a733f6..ee7b0c9 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -90,7 +90,6 @@ impl Hasher { } fn hash_read_io(&mut self, file: &mut dyn BufRead) -> io::Result<(Option, Bytes)> { - let buffer_len = self.buffer.len(); let mut bytes_hashed = 0; let mut md5 = if self.md5sum { @@ -100,9 +99,9 @@ impl Hasher { }; loop { - let buffer = &mut self.buffer[..buffer_len]; + let remaining = &mut self.buffer[..self.piece_length - self.piece_bytes_hashed]; - let bytes_read = file.read(buffer)?; + let bytes_read = file.read(remaining)?; if bytes_read == 0 { break; @@ -110,18 +109,16 @@ impl Hasher { bytes_hashed += bytes_read; - let read = &buffer[0..bytes_read]; + let read = &remaining[..bytes_read]; - for byte in read.iter().cloned() { - self.sha1.update(&[byte]); + self.sha1.update(read); - self.piece_bytes_hashed += 1; + self.piece_bytes_hashed += bytes_read; - if self.piece_bytes_hashed == self.piece_length { - self.pieces.push(self.sha1.digest().into()); - self.sha1.reset(); - self.piece_bytes_hashed = 0; - } + if self.piece_bytes_hashed == self.piece_length { + self.pieces.push(self.sha1.digest().into()); + self.sha1.reset(); + self.piece_bytes_hashed = 0; } if let Some(md5) = md5.as_mut() { diff --git a/src/lib.rs b/src/lib.rs index 6a63d11..26d96a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,4 +103,7 @@ mod verifier; mod walker; mod xor_args; +#[cfg(feature = "bench")] +pub mod bench; + pub use run::run;