From 5a1de1acd219bc5e83c678fccaf862aee40713be Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Tue, 4 Feb 2020 19:59:06 -0800 Subject: [PATCH] Select piece length when none is provided When no piece length is provided to imdl torrent create, a piece length is selected based on the size of the input. The hueristic is lifted directly from libtorrent. Also adds a imdl torrent piece-length command, which prints a table of the piece lengths chosen at different content sizes, which is useful for understanding and debugging the piece length selection algorithm. type: added --- Cargo.toml | 2 +- src/bytes.rs | 60 +++++++++++++++++++++++--- src/common.rs | 11 ++--- src/files.rs | 29 +++++++++++++ src/linter.rs | 25 +++++++++++ src/main.rs | 9 +++- src/opt.rs | 15 +++++++ src/{ => opt}/torrent.rs | 12 ++++-- src/{ => opt}/torrent/create.rs | 69 ++++++++++++------------------ src/opt/torrent/piece_length.rs | 67 +++++++++++++++++++++++++++++ src/{ => opt}/torrent/show.rs | 0 src/{ => opt}/torrent/stats.rs | 0 src/outln.rs | 11 +++++ src/piece_length_picker.rs | 75 +++++++++++++++++++++++++++++++++ src/subcommand.rs | 14 ------ 15 files changed, 326 insertions(+), 73 deletions(-) create mode 100644 src/files.rs create mode 100644 src/linter.rs rename src/{ => opt}/torrent.rs (69%) rename src/{ => opt}/torrent/create.rs (96%) create mode 100644 src/opt/torrent/piece_length.rs rename src/{ => opt}/torrent/show.rs (100%) rename src/{ => opt}/torrent/stats.rs (100%) create mode 100644 src/outln.rs create mode 100644 src/piece_length_picker.rs delete mode 100644 src/subcommand.rs diff --git a/Cargo.toml b/Cargo.toml index 364d86d..7998e28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,5 +42,5 @@ features = ["default", "wrap_help"] [workspace] members = [ # generate table of contents and table of supported BEPs in README.md - "bin/update-readme" + "bin/update-readme", ] diff --git a/src/bytes.rs b/src/bytes.rs index 66dd557..24164c9 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -9,17 +9,25 @@ const EI: u128 = PI << 10; const ZI: u128 = EI << 10; const YI: u128 = ZI << 10; -#[derive(Debug, PartialEq, Copy, Clone)] +#[derive(Debug, PartialEq, Copy, Clone, PartialOrd, Ord, Eq)] pub(crate) struct Bytes(pub(crate) u128); impl Bytes { - pub(crate) fn from(bytes: impl Into) -> Bytes { - Bytes(bytes.into()) - } - pub(crate) fn is_power_of_two(self) -> bool { self.0 == 0 || self.0 & (self.0 - 1) == 0 } + + pub(crate) fn kib() -> Self { + Bytes::from(KI) + } + + pub(crate) fn mib() -> Self { + Bytes::from(MI) + } + + pub(crate) fn count(self) -> u128 { + self.0 + } } fn float_to_int(x: f64) -> u128 { @@ -36,6 +44,48 @@ fn int_to_float(x: u128) -> f64 { x as f64 } +impl> From for Bytes { + fn from(n: I) -> Bytes { + Bytes(n.into()) + } +} + +impl Div for Bytes { + type Output = u128; + + fn div(self, rhs: Bytes) -> u128 { + self.0 / rhs.0 + } +} + +impl Div for Bytes { + type Output = Bytes; + + fn div(self, rhs: u128) -> Bytes { + Bytes::from(self.0 / rhs) + } +} + +impl DivAssign for Bytes { + fn div_assign(&mut self, rhs: u128) { + self.0 /= rhs; + } +} + +impl Mul for Bytes { + type Output = Bytes; + + fn mul(self, rhs: u128) -> Self { + Bytes::from(self.0 * rhs) + } +} + +impl MulAssign for Bytes { + fn mul_assign(&mut self, rhs: u128) { + self.0 *= rhs; + } +} + impl FromStr for Bytes { type Err = Error; diff --git a/src/common.rs b/src/common.rs index a547e17..db2a76d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -11,6 +11,7 @@ pub(crate) use std::{ hash::Hash, io::{self, Read, Write}, num::ParseFloatError, + ops::{Div, DivAssign, Mul, MulAssign}, path::{Path, PathBuf}, process::{self, Command, ExitStatus}, str::{self, FromStr}, @@ -35,7 +36,7 @@ pub(crate) use url::Url; pub(crate) use walkdir::WalkDir; // modules -pub(crate) use crate::{bencode, consts, error, torrent, use_color}; +pub(crate) use crate::{bencode, consts, error, use_color}; // traits pub(crate) use crate::{ @@ -45,10 +46,10 @@ pub(crate) use crate::{ // structs and enums pub(crate) use crate::{ - bytes::Bytes, env::Env, error::Error, file_info::FileInfo, hasher::Hasher, info::Info, - lint::Lint, metainfo::Metainfo, mode::Mode, opt::Opt, platform::Platform, style::Style, - subcommand::Subcommand, table::Table, torrent::Torrent, torrent_summary::TorrentSummary, - use_color::UseColor, + bytes::Bytes, env::Env, error::Error, file_info::FileInfo, files::Files, hasher::Hasher, + info::Info, lint::Lint, linter::Linter, metainfo::Metainfo, mode::Mode, opt::Opt, + piece_length_picker::PieceLengthPicker, platform::Platform, style::Style, table::Table, + torrent_summary::TorrentSummary, use_color::UseColor, }; // test stdlib types diff --git a/src/files.rs b/src/files.rs new file mode 100644 index 0000000..67a5e88 --- /dev/null +++ b/src/files.rs @@ -0,0 +1,29 @@ +use crate::common::*; + +pub(crate) struct Files { + total_size: Bytes, +} + +impl Files { + pub(crate) fn from_root(root: &Path) -> Result { + let mut total_size = 0; + + for result in WalkDir::new(root).sort_by(|a, b| a.file_name().cmp(b.file_name())) { + let entry = result?; + + let metadata = entry.metadata()?; + + if metadata.is_file() { + total_size += metadata.len(); + } + } + + Ok(Files { + total_size: Bytes::from(total_size), + }) + } + + pub(crate) fn total_size(&self) -> Bytes { + self.total_size + } +} diff --git a/src/linter.rs b/src/linter.rs new file mode 100644 index 0000000..d55df28 --- /dev/null +++ b/src/linter.rs @@ -0,0 +1,25 @@ +use crate::common::*; + +pub(crate) struct Linter { + allowed: BTreeSet, +} + +impl Linter { + pub(crate) fn new() -> Linter { + Linter { + allowed: BTreeSet::new(), + } + } + + pub(crate) fn allow(&mut self, allowed: impl IntoIterator) { + self.allowed.extend(allowed) + } + + pub(crate) fn is_allowed(&self, lint: Lint) -> bool { + self.allowed.contains(&lint) + } + + pub(crate) fn is_denied(&self, lint: Lint) -> bool { + !self.is_allowed(lint) + } +} diff --git a/src/main.rs b/src/main.rs index 326e440..57d522a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ clippy::result_unwrap_used, clippy::shadow_reuse, clippy::unreachable, + clippy::unseparated_literal_suffix, clippy::wildcard_enum_match_arm )] @@ -36,6 +37,9 @@ mod errln; #[macro_use] mod err; +#[macro_use] +mod outln; + #[cfg(test)] mod testing; @@ -55,22 +59,23 @@ mod consts; mod env; mod error; mod file_info; +mod files; mod hasher; mod info; mod into_u64; mod into_usize; mod lint; +mod linter; mod metainfo; mod mode; mod opt; mod path_ext; +mod piece_length_picker; mod platform; mod platform_interface; mod reckoner; mod style; -mod subcommand; mod table; -mod torrent; mod torrent_summary; mod use_color; diff --git a/src/opt.rs b/src/opt.rs index 73c4fad..1e6aa3a 100644 --- a/src/opt.rs +++ b/src/opt.rs @@ -1,5 +1,20 @@ use crate::common::*; +mod torrent; + +#[derive(StructOpt)] +pub(crate) enum Subcommand { + Torrent(torrent::Torrent), +} + +impl Subcommand { + pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> { + match self { + Self::Torrent(torrent) => torrent.run(env, unstable), + } + } +} + #[derive(StructOpt)] #[structopt( about(consts::ABOUT), diff --git a/src/torrent.rs b/src/opt/torrent.rs similarity index 69% rename from src/torrent.rs rename to src/opt/torrent.rs index 80fef72..6a36b26 100644 --- a/src/torrent.rs +++ b/src/opt/torrent.rs @@ -1,6 +1,7 @@ use crate::common::*; mod create; +mod piece_length; mod show; mod stats; @@ -11,17 +12,20 @@ mod stats; about("Subcommands related to the BitTorrent protocol.") )] pub(crate) enum Torrent { - Create(torrent::create::Create), - Stats(torrent::stats::Stats), - Show(torrent::show::Show), + Create(create::Create), + #[structopt(alias = "piece-size")] + PieceLength(piece_length::PieceLength), + Show(show::Show), + Stats(stats::Stats), } impl Torrent { pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> { match self { Self::Create(create) => create.run(env), - Self::Stats(stats) => stats.run(env, unstable), + Self::PieceLength(piece_length) => piece_length.run(env), Self::Show(show) => show.run(env), + Self::Stats(stats) => stats.run(env, unstable), } } } diff --git a/src/torrent/create.rs b/src/opt/torrent/create.rs similarity index 96% rename from src/torrent/create.rs rename to src/opt/torrent/create.rs index 9c3c0ff..2ca4eef 100644 --- a/src/torrent/create.rs +++ b/src/opt/torrent/create.rs @@ -91,11 +91,10 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12. #[structopt( name = "PIECE-LENGTH", long = "piece-length", - default_value = "512KiB", help = "Set piece length to `PIECE-LENGTH` bytes.", long_help = "Set piece length to `PIECE-LENGTH` bytes. Accepts SI units, e.g. kib, mib, and gib." )] - piece_length: Bytes, + piece_length: Option, #[structopt( name = "PRIVATE", long = "private", @@ -112,49 +111,31 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12. source: Option, } -struct Linter { - allowed: BTreeSet, -} - -impl Linter { - fn new() -> Linter { - Linter { - allowed: BTreeSet::new(), - } - } - - fn allow(&mut self, allowed: impl IntoIterator) { - self.allowed.extend(allowed) - } - - fn is_allowed(&self, lint: Lint) -> bool { - self.allowed.contains(&lint) - } - - fn is_denied(&self, lint: Lint) -> bool { - !self.is_allowed(lint) - } -} - impl Create { pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> { + let input = env.resolve(&self.input); + + let files = Files::from_root(&input)?; + + let piece_length = self + .piece_length + .unwrap_or_else(|| PieceLengthPicker::from_content_size(files.total_size())); + let mut linter = Linter::new(); linter.allow(self.allowed_lints.iter().cloned()); - if linter.is_denied(Lint::UnevenPieceLength) && !self.piece_length.is_power_of_two() { + if linter.is_denied(Lint::UnevenPieceLength) && !piece_length.is_power_of_two() { return Err(Error::PieceLengthUneven { - bytes: self.piece_length, + bytes: piece_length, }); } - let piece_length: u32 = - self - .piece_length - .0 - .try_into() - .map_err(|_| Error::PieceLengthTooLarge { - bytes: self.piece_length, - })?; + let piece_length: u32 = piece_length + .0 + .try_into() + .map_err(|_| Error::PieceLengthTooLarge { + bytes: piece_length, + })?; if piece_length == 0 { return Err(Error::PieceLengthZero); @@ -164,8 +145,6 @@ impl Create { return Err(Error::PieceLengthSmall); } - let input = env.resolve(&self.input); - let mut announce_list = Vec::new(); for tier in &self.announce_tiers { let tier = tier.split(',').map(str::to_string).collect::>(); @@ -453,7 +432,7 @@ mod tests { let torrent = env.resolve("foo.torrent"); let bytes = fs::read(torrent).unwrap(); let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); - assert_eq!(metainfo.info.piece_length, 512 * 2u32.pow(10)); + assert_eq!(metainfo.info.piece_length, 16 * 2u32.pow(10)); } #[test] @@ -804,6 +783,8 @@ mod tests { "--piece-length", "17KiB", ]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); assert_matches!( env.run(), Err(Error::PieceLengthUneven { bytes }) if bytes.0 == 17 * 1024 @@ -837,6 +818,8 @@ mod tests { "--piece-length", "0", ]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); assert_matches!(env.run(), Err(Error::PieceLengthZero)); } @@ -850,6 +833,8 @@ mod tests { "--piece-length", "8KiB", ]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); assert_matches!(env.run(), Err(Error::PieceLengthSmall)); } @@ -894,12 +879,12 @@ mod tests { env.run().unwrap(); let have = env.out(); let want = " Name foo - Info Hash 8197efe97f10f50f249e8d5c63eb5c0d4e1d9b49 -Torrent Size 166 bytes + Info Hash 2637812436658f855e99f07c40fe7da5832a7b6d +Torrent Size 165 bytes Content Size 0 bytes Private no Tracker http://bar/ - Piece Size 512 KiB + Piece Size 16 KiB Piece Count 1 File Count 0 "; diff --git a/src/opt/torrent/piece_length.rs b/src/opt/torrent/piece_length.rs new file mode 100644 index 0000000..aee8f28 --- /dev/null +++ b/src/opt/torrent/piece_length.rs @@ -0,0 +1,67 @@ +use crate::common::*; + +#[derive(StructOpt)] +#[structopt( + help_message(consts::HELP_MESSAGE), + version_message(consts::VERSION_MESSAGE), + about("Display information about automatic piece length selection.") +)] +pub(crate) struct PieceLength {} + +#[allow(clippy::unused_self)] +impl PieceLength { + pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> { + let mut rows: Vec<(String, String, String, String)> = Vec::new(); + + rows.push(( + "Content".into(), + "Piece Length".into(), + "Count".into(), + "Metainfo Size".into(), + )); + + for i in 14..51 { + let content_size = Bytes::from(1u128 << i); + + let piece_length = PieceLengthPicker::from_content_size(content_size); + + let metainfo_size = PieceLengthPicker::metainfo_size(content_size, piece_length); + + let piece_count = PieceLengthPicker::piece_count(content_size, piece_length); + + rows.push(( + content_size.to_string(), + piece_length.to_string(), + piece_count.to_string(), + metainfo_size.to_string(), + )); + } + + let mut w = (0, 0, 0, 0); + for (c0, c1, c2, c3) in &rows { + w = ( + w.0.max(c0.len()), + w.1.max(c1.len()), + w.2.max(c2.len()), + w.3.max(c3.len()), + ); + } + + for (content_size, piece_length, metainfo_size, piece_count) in rows { + outln!( + env, + "{:w0$} -> {:w1$} x {:w2$} = {:w3$}", + content_size, + piece_length, + metainfo_size, + piece_count, + w0 = w.0, + w1 = w.1, + w2 = w.2, + w3 = w.3, + ); + } + + Ok(()) + } +} diff --git a/src/torrent/show.rs b/src/opt/torrent/show.rs similarity index 100% rename from src/torrent/show.rs rename to src/opt/torrent/show.rs diff --git a/src/torrent/stats.rs b/src/opt/torrent/stats.rs similarity index 100% rename from src/torrent/stats.rs rename to src/opt/torrent/stats.rs diff --git a/src/outln.rs b/src/outln.rs new file mode 100644 index 0000000..3be286b --- /dev/null +++ b/src/outln.rs @@ -0,0 +1,11 @@ +macro_rules! outln { + ($env:expr) => { + writeln!($env.out, "").context(crate::error::Stderr)?; + }; + ($env:expr, $fmt:expr) => { + writeln!($env.out, $fmt).context(crate::error::Stderr)?; + }; + ($env:expr, $fmt:expr, $($arg:tt)*) => { + writeln!($env.out, $fmt, $($arg)*).context(crate::error::Stderr)?; + }; +} diff --git a/src/piece_length_picker.rs b/src/piece_length_picker.rs new file mode 100644 index 0000000..22e8134 --- /dev/null +++ b/src/piece_length_picker.rs @@ -0,0 +1,75 @@ +// The piece length picker attempts to pick a reasonable piece length +// for a torrent given the size of the torrent's contents. +// +// Constraints: +// - Decreasing piece length increases protocol overhead. +// - Decreasing piece length increases torrent metainfo size. +// - Increasing piece length increases the amount of data that must be +// thrown away in case of corruption. +// - Increasing piece length increases the amount of data that must be +// downloaded before it can be verified and uploaded to other peers. +// - Decreasing piece length increases the proportion of disk seeks to +// disk reads. This can be an issue for spinning disks. +// - The BitTorrent v2 specification requires that piece sizes be +// larger than 16 KiB. +// +// These constraints could probably be exactly defined and optimized +// using an integer programming solver, but instead we just copy what +// libtorrent does. + +use crate::common::*; + +pub(crate) struct PieceLengthPicker; + +impl PieceLengthPicker { + pub(crate) fn from_content_size(content_size: Bytes) -> Bytes { + #![allow( + clippy::as_conversions, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::cast_possible_truncation + )] + let exponent = (content_size.count() as f64).log2().ceil() as u128; + Bytes::from(1u128 << (exponent / 2 + 4)) + .max(Bytes::kib() * 16) + .min(Bytes::mib() * 16) + } + + pub(crate) fn piece_count(content_size: Bytes, piece_length: Bytes) -> u128 { + if content_size == Bytes::from(0u128) { + 0 + } else { + (content_size / piece_length).max(1) + } + } + + pub(crate) fn metainfo_size(content_size: Bytes, piece_length: Bytes) -> Bytes { + let digest_length: u128 = sha1::DIGEST_LENGTH.into_u64().into(); + Bytes::from(Self::piece_count(content_size, piece_length) * digest_length) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn limits() { + assert_eq!( + PieceLengthPicker::from_content_size(Bytes::mib() * 2), + Bytes::kib() * 16 + ); + assert_eq!( + PieceLengthPicker::from_content_size(Bytes::mib() * 4), + Bytes::kib() * 32 + ); + assert_eq!( + PieceLengthPicker::from_content_size(Bytes::mib() * 8), + Bytes::kib() * 32 + ); + assert_eq!( + PieceLengthPicker::from_content_size(Bytes::mib() * 16), + Bytes::kib() * 64 + ); + } +} diff --git a/src/subcommand.rs b/src/subcommand.rs deleted file mode 100644 index 3b52b8d..0000000 --- a/src/subcommand.rs +++ /dev/null @@ -1,14 +0,0 @@ -use crate::common::*; - -#[derive(StructOpt)] -pub(crate) enum Subcommand { - Torrent(Torrent), -} - -impl Subcommand { - pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> { - match self { - Self::Torrent(torrent) => torrent.run(env, unstable), - } - } -}