Benchmark and improve hashing performance

- Add a `bench` feature that exposes a hasher benchmark.

- Make the hasher read up to the next piece end and hash everything it
  reads, instead of hashing one byte at a time. This was a 4x
  improvement on the benchmark. (Terrible code == easy wins!)

type: performance
This commit is contained in:
Casey Rodarmor 2020-05-26 00:09:16 -07:00
parent 1b2d79b4a5
commit 4e6b475470
No known key found for this signature in database
GPG Key ID: 556186B153EC6FE0
7 changed files with 447 additions and 16 deletions

340
Cargo.lock generated
View File

@ -141,9 +141,24 @@ version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41"
dependencies = [ dependencies = [
"lazy_static",
"memchr", "memchr",
"regex-automata",
"serde",
] ]
[[package]]
name = "bumpalo"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5356f1d23ee24a1f785a56d1d1a5f0fd5b0f6a0c0fb2412ce11da71649ab78f6"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
[[package]] [[package]]
name = "cargo_toml" name = "cargo_toml"
version = "0.8.0" version = "0.8.0"
@ -155,6 +170,15 @@ dependencies = [
"toml", "toml",
] ]
[[package]]
name = "cast"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0"
dependencies = [
"rustc_version",
]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.52" version = "1.0.52"
@ -225,6 +249,41 @@ dependencies = [
"winapi 0.3.8", "winapi 0.3.8",
] ]
[[package]]
name = "criterion"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63f696897c88b57f4ffe3c69d8e1a0613c7d0e6c4833363c8560fbde9c47b966"
dependencies = [
"atty",
"cast",
"clap",
"criterion-plot",
"csv",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddeaf7989f00f2e1d871a26a110f3ed713632feac17f65f03ca938c542618b60"
dependencies = [
"cast",
"itertools",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.4.2" version = "0.4.2"
@ -235,6 +294,42 @@ dependencies = [
"maybe-uninit", "maybe-uninit",
] ]
[[package]]
name = "crossbeam-deque"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
"maybe-uninit",
]
[[package]]
name = "crossbeam-epoch"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"lazy_static",
"maybe-uninit",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-queue"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.7.2" version = "0.7.2"
@ -246,6 +341,28 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "csv"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "ctor" name = "ctor"
version = "0.1.14" version = "0.1.14"
@ -278,6 +395,12 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3" checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3"
[[package]]
name = "either"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]] [[package]]
name = "encode_unicode" name = "encode_unicode"
version = "0.3.6" version = "0.3.6"
@ -488,6 +611,7 @@ dependencies = [
"bendy", "bendy",
"chrono", "chrono",
"console", "console",
"criterion",
"globset", "globset",
"ignore", "ignore",
"imdl-indicatif", "imdl-indicatif",
@ -499,6 +623,7 @@ dependencies = [
"open", "open",
"pretty_assertions", "pretty_assertions",
"pretty_env_logger", "pretty_env_logger",
"rand",
"regex", "regex",
"serde", "serde",
"serde-hex", "serde-hex",
@ -528,6 +653,21 @@ dependencies = [
"regex", "regex",
] ]
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.21" version = "0.1.21"
@ -537,6 +677,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "js-sys"
version = "0.3.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa5a448de267e7358beaf4a5d849518fe9a0c13fce7afd44b06e68550e5562a7"
dependencies = [
"wasm-bindgen",
]
[[package]] [[package]]
name = "kernel32-sys" name = "kernel32-sys"
version = "0.2.2" version = "0.2.2"
@ -644,6 +793,15 @@ version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
[[package]]
name = "memoffset"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "nodrop" name = "nodrop"
version = "0.1.14" version = "0.1.14"
@ -679,12 +837,28 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]] [[package]]
name = "number_prefix" name = "number_prefix"
version = "0.3.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
[[package]]
name = "oorandom"
version = "11.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af325bc33c7f60191be4e2c984d48aaa21e2854f473b85398344b60c9b6358"
[[package]] [[package]]
name = "open" name = "open"
version = "1.4.0" version = "1.4.0"
@ -734,6 +908,18 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677"
[[package]]
name = "plotters"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9b1d9ca091d370ea3a78d5619145d1b59426ab0c9eedbad2514a4cee08bf389"
dependencies = [
"js-sys",
"num-traits",
"wasm-bindgen",
"web-sys",
]
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.6" version = "0.2.6"
@ -853,6 +1039,30 @@ dependencies = [
"rand_core", "rand_core",
] ]
[[package]]
name = "rayon"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
dependencies = [
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
dependencies = [
"crossbeam-deque",
"crossbeam-queue",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.1.56" version = "0.1.56"
@ -871,6 +1081,15 @@ dependencies = [
"thread_local", "thread_local",
] ]
[[package]]
name = "regex-automata"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.6.17" version = "0.6.17"
@ -892,6 +1111,21 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"
@ -901,6 +1135,27 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.106" version = "1.0.106"
@ -941,6 +1196,17 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "serde_json"
version = "1.0.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]] [[package]]
name = "serde_with" name = "serde_with"
version = "1.4.0" version = "1.4.0"
@ -1185,6 +1451,16 @@ dependencies = [
"winapi 0.3.8", "winapi 0.3.8",
] ]
[[package]]
name = "tinytemplate"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e4bc5ac99433e0dcb8b9f309dd271a165ae37dde129b9e0ce1bfdd8bfe4891"
dependencies = [
"serde",
"serde_json",
]
[[package]] [[package]]
name = "toml" name = "toml"
version = "0.5.6" version = "0.5.6"
@ -1277,6 +1553,70 @@ version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasm-bindgen"
version = "0.2.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c7d40d09cdbf0f4895ae58cf57d92e1e57a9dd8ed2e8390514b54a47cc5551"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3972e137ebf830900db522d6c8fd74d1900dcfc733462e9a12e942b00b4ac94"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cd85aa2c579e8892442954685f0d801f9129de24fa2136b2c6a539c76b65776"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eb197bd3a47553334907ffd2f16507b4f4f01bbec3ac921a7719e0decdfe72a"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a91c2916119c17a8e316507afaaa2dd94b47646048014bbdf6bef098c1bb58ad"
[[package]]
name = "web-sys"
version = "0.3.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bc359e5dd3b46cb9687a051d50a2fdd228e4ba7cf6fcf861a5365c3d671a642"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.2.8" version = "0.2.8"

View File

@ -12,6 +12,10 @@ repository = "https://github.com/casey/intermodal"
edition = "2018" edition = "2018"
default-run = "imdl" default-run = "imdl"
[features]
default = []
bench = ["rand"]
[dependencies] [dependencies]
ansi_term = "0.12.0" ansi_term = "0.12.0"
atty = "0.2.0" atty = "0.2.0"
@ -59,7 +63,12 @@ features = ["default", "wrap_help"]
version = "2.1.1" version = "2.1.1"
features = ["serde"] features = ["serde"]
[dependencies.rand]
version = "0.7.3"
optional = true
[dev-dependencies] [dev-dependencies]
criterion = "0.3.0"
temptree = "0.0.0" temptree = "0.0.0"
[workspace] [workspace]
@ -70,3 +79,7 @@ members = [
# run commands for demo animation # run commands for demo animation
"bin/demo", "bin/demo",
] ]
[[bench]]
name = "hasher"
harness = false

13
benches/hasher.rs Normal file
View File

@ -0,0 +1,13 @@
use criterion::{criterion_group, criterion_main, Criterion};
use imdl::bench::{Bench, HasherBench};
fn bench(c: &mut Criterion) {
let bench = HasherBench::init();
c.bench_function(&bench.name(), |b| b.iter(|| bench.iter()));
}
criterion_group!(benches, bench);
criterion_main!(benches);

View File

@ -24,10 +24,11 @@ pub(crate) enum Kind {
Distribution, Distribution,
Documentation, Documentation,
Fixed, Fixed,
Performance,
Reform, Reform,
Release, Release,
Testing,
Removed, Removed,
Testing,
} }
impl Kind { impl Kind {
@ -41,10 +42,11 @@ impl Kind {
Self::Distribution => "📦", Self::Distribution => "📦",
Self::Documentation => "📚", Self::Documentation => "📚",
Self::Fixed => "🐛", Self::Fixed => "🐛",
Self::Performance => "🐎",
Self::Reform => "🎨", Self::Reform => "🎨",
Self::Release => "🔖", Self::Release => "🔖",
Self::Testing => "",
Self::Removed => "", Self::Removed => "",
Self::Testing => "",
} }
} }
@ -58,10 +60,11 @@ impl Kind {
Self::Distribution => ":package:", Self::Distribution => ":package:",
Self::Documentation => ":books:", Self::Documentation => ":books:",
Self::Fixed => ":bug:", Self::Fixed => ":bug:",
Self::Performance => ":racehorse:",
Self::Reform => ":art:", Self::Reform => ":art:",
Self::Release => ":bookmark:", Self::Release => ":bookmark:",
Self::Testing => ":white_check_mark:",
Self::Removed => ":heavy_minus_sign:", Self::Removed => ":heavy_minus_sign:",
Self::Testing => ":white_check_mark:",
} }
} }
} }

62
src/bench.rs Normal file
View File

@ -0,0 +1,62 @@
use crate::common::*;
use std::io::BufWriter;
use rand::RngCore;
use tempfile::{Builder, NamedTempFile};
pub const TEMPFILE_BYTES: u64 = 256 << 10;
pub trait Bench {
fn init() -> Self;
fn name(&self) -> String;
fn iter(&self);
}
pub struct HasherBench {
tempfile: NamedTempFile,
}
impl Bench for HasherBench {
fn name(&self) -> String {
format!("bench::HasherBench: {} tempfile", Bytes(TEMPFILE_BYTES))
}
fn init() -> Self {
let mut tempfile = Builder::new()
.prefix("imdl-bench-hasher")
.tempfile()
.unwrap();
{
let mut bytes = vec![0; 1024];
let mut written = 0;
let mut writer = BufWriter::new(&mut tempfile);
while written < TEMPFILE_BYTES {
rand::thread_rng().fill_bytes(&mut bytes);
writer.write(&bytes).unwrap();
written += bytes.len().into_u64();
}
writer.flush().unwrap();
}
Self { tempfile }
}
fn iter(&self) {
let files = Files::file(
self.tempfile.as_ref().to_owned(),
Bytes::from(TEMPFILE_BYTES),
);
let hasher = Hasher::new(false, 16 << 10, None);
let _result = hasher.hash_files(&files).unwrap();
}
}

View File

@ -90,7 +90,6 @@ impl Hasher {
} }
fn hash_read_io(&mut self, file: &mut dyn BufRead) -> io::Result<(Option<Md5Digest>, Bytes)> { fn hash_read_io(&mut self, file: &mut dyn BufRead) -> io::Result<(Option<Md5Digest>, Bytes)> {
let buffer_len = self.buffer.len();
let mut bytes_hashed = 0; let mut bytes_hashed = 0;
let mut md5 = if self.md5sum { let mut md5 = if self.md5sum {
@ -100,9 +99,9 @@ impl Hasher {
}; };
loop { loop {
let buffer = &mut self.buffer[..buffer_len]; let remaining = &mut self.buffer[..self.piece_length - self.piece_bytes_hashed];
let bytes_read = file.read(buffer)?; let bytes_read = file.read(remaining)?;
if bytes_read == 0 { if bytes_read == 0 {
break; break;
@ -110,19 +109,17 @@ impl Hasher {
bytes_hashed += bytes_read; bytes_hashed += bytes_read;
let read = &buffer[0..bytes_read]; let read = &remaining[..bytes_read];
for byte in read.iter().cloned() { self.sha1.update(read);
self.sha1.update(&[byte]);
self.piece_bytes_hashed += 1; self.piece_bytes_hashed += bytes_read;
if self.piece_bytes_hashed == self.piece_length { if self.piece_bytes_hashed == self.piece_length {
self.pieces.push(self.sha1.digest().into()); self.pieces.push(self.sha1.digest().into());
self.sha1.reset(); self.sha1.reset();
self.piece_bytes_hashed = 0; self.piece_bytes_hashed = 0;
} }
}
if let Some(md5) = md5.as_mut() { if let Some(md5) = md5.as_mut() {
md5.consume(read); md5.consume(read);

View File

@ -103,4 +103,7 @@ mod verifier;
mod walker; mod walker;
mod xor_args; mod xor_args;
#[cfg(feature = "bench")]
pub mod bench;
pub use run::run; pub use run::run;