Select piece length when none is provided

When no piece length is provided to imdl torrent create, a piece
length is selected based on the size of the input. The hueristic is
lifted directly from libtorrent.

Also adds a imdl torrent piece-length command, which prints a table
of the piece lengths chosen at different content sizes, which is useful
for understanding and debugging the piece length selection algorithm.

type: added
This commit is contained in:
Casey Rodarmor 2020-02-04 19:59:06 -08:00
parent 35a0e8f9b7
commit 5a1de1acd2
No known key found for this signature in database
GPG Key ID: 556186B153EC6FE0
15 changed files with 326 additions and 73 deletions

View File

@ -42,5 +42,5 @@ features = ["default", "wrap_help"]
[workspace]
members = [
# generate table of contents and table of supported BEPs in README.md
"bin/update-readme"
"bin/update-readme",
]

View File

@ -9,17 +9,25 @@ const EI: u128 = PI << 10;
const ZI: u128 = EI << 10;
const YI: u128 = ZI << 10;
#[derive(Debug, PartialEq, Copy, Clone)]
#[derive(Debug, PartialEq, Copy, Clone, PartialOrd, Ord, Eq)]
pub(crate) struct Bytes(pub(crate) u128);
impl Bytes {
pub(crate) fn from(bytes: impl Into<u128>) -> Bytes {
Bytes(bytes.into())
}
pub(crate) fn is_power_of_two(self) -> bool {
self.0 == 0 || self.0 & (self.0 - 1) == 0
}
pub(crate) fn kib() -> Self {
Bytes::from(KI)
}
pub(crate) fn mib() -> Self {
Bytes::from(MI)
}
pub(crate) fn count(self) -> u128 {
self.0
}
}
fn float_to_int(x: f64) -> u128 {
@ -36,6 +44,48 @@ fn int_to_float(x: u128) -> f64 {
x as f64
}
impl<I: Into<u128>> From<I> for Bytes {
fn from(n: I) -> Bytes {
Bytes(n.into())
}
}
impl Div<Bytes> for Bytes {
type Output = u128;
fn div(self, rhs: Bytes) -> u128 {
self.0 / rhs.0
}
}
impl Div<u128> for Bytes {
type Output = Bytes;
fn div(self, rhs: u128) -> Bytes {
Bytes::from(self.0 / rhs)
}
}
impl DivAssign<u128> for Bytes {
fn div_assign(&mut self, rhs: u128) {
self.0 /= rhs;
}
}
impl Mul<u128> for Bytes {
type Output = Bytes;
fn mul(self, rhs: u128) -> Self {
Bytes::from(self.0 * rhs)
}
}
impl MulAssign<u128> for Bytes {
fn mul_assign(&mut self, rhs: u128) {
self.0 *= rhs;
}
}
impl FromStr for Bytes {
type Err = Error;

View File

@ -11,6 +11,7 @@ pub(crate) use std::{
hash::Hash,
io::{self, Read, Write},
num::ParseFloatError,
ops::{Div, DivAssign, Mul, MulAssign},
path::{Path, PathBuf},
process::{self, Command, ExitStatus},
str::{self, FromStr},
@ -35,7 +36,7 @@ pub(crate) use url::Url;
pub(crate) use walkdir::WalkDir;
// modules
pub(crate) use crate::{bencode, consts, error, torrent, use_color};
pub(crate) use crate::{bencode, consts, error, use_color};
// traits
pub(crate) use crate::{
@ -45,10 +46,10 @@ pub(crate) use crate::{
// structs and enums
pub(crate) use crate::{
bytes::Bytes, env::Env, error::Error, file_info::FileInfo, hasher::Hasher, info::Info,
lint::Lint, metainfo::Metainfo, mode::Mode, opt::Opt, platform::Platform, style::Style,
subcommand::Subcommand, table::Table, torrent::Torrent, torrent_summary::TorrentSummary,
use_color::UseColor,
bytes::Bytes, env::Env, error::Error, file_info::FileInfo, files::Files, hasher::Hasher,
info::Info, lint::Lint, linter::Linter, metainfo::Metainfo, mode::Mode, opt::Opt,
piece_length_picker::PieceLengthPicker, platform::Platform, style::Style, table::Table,
torrent_summary::TorrentSummary, use_color::UseColor,
};
// test stdlib types

29
src/files.rs Normal file
View File

@ -0,0 +1,29 @@
use crate::common::*;
pub(crate) struct Files {
total_size: Bytes,
}
impl Files {
pub(crate) fn from_root(root: &Path) -> Result<Files, Error> {
let mut total_size = 0;
for result in WalkDir::new(root).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
let entry = result?;
let metadata = entry.metadata()?;
if metadata.is_file() {
total_size += metadata.len();
}
}
Ok(Files {
total_size: Bytes::from(total_size),
})
}
pub(crate) fn total_size(&self) -> Bytes {
self.total_size
}
}

25
src/linter.rs Normal file
View File

@ -0,0 +1,25 @@
use crate::common::*;
pub(crate) struct Linter {
allowed: BTreeSet<Lint>,
}
impl Linter {
pub(crate) fn new() -> Linter {
Linter {
allowed: BTreeSet::new(),
}
}
pub(crate) fn allow(&mut self, allowed: impl IntoIterator<Item = Lint>) {
self.allowed.extend(allowed)
}
pub(crate) fn is_allowed(&self, lint: Lint) -> bool {
self.allowed.contains(&lint)
}
pub(crate) fn is_denied(&self, lint: Lint) -> bool {
!self.is_allowed(lint)
}
}

View File

@ -17,6 +17,7 @@
clippy::result_unwrap_used,
clippy::shadow_reuse,
clippy::unreachable,
clippy::unseparated_literal_suffix,
clippy::wildcard_enum_match_arm
)]
@ -36,6 +37,9 @@ mod errln;
#[macro_use]
mod err;
#[macro_use]
mod outln;
#[cfg(test)]
mod testing;
@ -55,22 +59,23 @@ mod consts;
mod env;
mod error;
mod file_info;
mod files;
mod hasher;
mod info;
mod into_u64;
mod into_usize;
mod lint;
mod linter;
mod metainfo;
mod mode;
mod opt;
mod path_ext;
mod piece_length_picker;
mod platform;
mod platform_interface;
mod reckoner;
mod style;
mod subcommand;
mod table;
mod torrent;
mod torrent_summary;
mod use_color;

View File

@ -1,5 +1,20 @@
use crate::common::*;
mod torrent;
#[derive(StructOpt)]
pub(crate) enum Subcommand {
Torrent(torrent::Torrent),
}
impl Subcommand {
pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> {
match self {
Self::Torrent(torrent) => torrent.run(env, unstable),
}
}
}
#[derive(StructOpt)]
#[structopt(
about(consts::ABOUT),

View File

@ -1,6 +1,7 @@
use crate::common::*;
mod create;
mod piece_length;
mod show;
mod stats;
@ -11,17 +12,20 @@ mod stats;
about("Subcommands related to the BitTorrent protocol.")
)]
pub(crate) enum Torrent {
Create(torrent::create::Create),
Stats(torrent::stats::Stats),
Show(torrent::show::Show),
Create(create::Create),
#[structopt(alias = "piece-size")]
PieceLength(piece_length::PieceLength),
Show(show::Show),
Stats(stats::Stats),
}
impl Torrent {
pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> {
match self {
Self::Create(create) => create.run(env),
Self::Stats(stats) => stats.run(env, unstable),
Self::PieceLength(piece_length) => piece_length.run(env),
Self::Show(show) => show.run(env),
Self::Stats(stats) => stats.run(env, unstable),
}
}
}

View File

@ -91,11 +91,10 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12.
#[structopt(
name = "PIECE-LENGTH",
long = "piece-length",
default_value = "512KiB",
help = "Set piece length to `PIECE-LENGTH` bytes.",
long_help = "Set piece length to `PIECE-LENGTH` bytes. Accepts SI units, e.g. kib, mib, and gib."
)]
piece_length: Bytes,
piece_length: Option<Bytes>,
#[structopt(
name = "PRIVATE",
long = "private",
@ -112,49 +111,31 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12.
source: Option<String>,
}
struct Linter {
allowed: BTreeSet<Lint>,
}
impl Linter {
fn new() -> Linter {
Linter {
allowed: BTreeSet::new(),
}
}
fn allow(&mut self, allowed: impl IntoIterator<Item = Lint>) {
self.allowed.extend(allowed)
}
fn is_allowed(&self, lint: Lint) -> bool {
self.allowed.contains(&lint)
}
fn is_denied(&self, lint: Lint) -> bool {
!self.is_allowed(lint)
}
}
impl Create {
pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> {
let input = env.resolve(&self.input);
let files = Files::from_root(&input)?;
let piece_length = self
.piece_length
.unwrap_or_else(|| PieceLengthPicker::from_content_size(files.total_size()));
let mut linter = Linter::new();
linter.allow(self.allowed_lints.iter().cloned());
if linter.is_denied(Lint::UnevenPieceLength) && !self.piece_length.is_power_of_two() {
if linter.is_denied(Lint::UnevenPieceLength) && !piece_length.is_power_of_two() {
return Err(Error::PieceLengthUneven {
bytes: self.piece_length,
bytes: piece_length,
});
}
let piece_length: u32 =
self
.piece_length
.0
.try_into()
.map_err(|_| Error::PieceLengthTooLarge {
bytes: self.piece_length,
})?;
let piece_length: u32 = piece_length
.0
.try_into()
.map_err(|_| Error::PieceLengthTooLarge {
bytes: piece_length,
})?;
if piece_length == 0 {
return Err(Error::PieceLengthZero);
@ -164,8 +145,6 @@ impl Create {
return Err(Error::PieceLengthSmall);
}
let input = env.resolve(&self.input);
let mut announce_list = Vec::new();
for tier in &self.announce_tiers {
let tier = tier.split(',').map(str::to_string).collect::<Vec<String>>();
@ -453,7 +432,7 @@ mod tests {
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_eq!(metainfo.info.piece_length, 512 * 2u32.pow(10));
assert_eq!(metainfo.info.piece_length, 16 * 2u32.pow(10));
}
#[test]
@ -804,6 +783,8 @@ mod tests {
"--piece-length",
"17KiB",
]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
assert_matches!(
env.run(),
Err(Error::PieceLengthUneven { bytes }) if bytes.0 == 17 * 1024
@ -837,6 +818,8 @@ mod tests {
"--piece-length",
"0",
]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
assert_matches!(env.run(), Err(Error::PieceLengthZero));
}
@ -850,6 +833,8 @@ mod tests {
"--piece-length",
"8KiB",
]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
assert_matches!(env.run(), Err(Error::PieceLengthSmall));
}
@ -894,12 +879,12 @@ mod tests {
env.run().unwrap();
let have = env.out();
let want = " Name foo
Info Hash 8197efe97f10f50f249e8d5c63eb5c0d4e1d9b49
Torrent Size 166 bytes
Info Hash 2637812436658f855e99f07c40fe7da5832a7b6d
Torrent Size 165 bytes
Content Size 0 bytes
Private no
Tracker http://bar/
Piece Size 512 KiB
Piece Size 16 KiB
Piece Count 1
File Count 0
";

View File

@ -0,0 +1,67 @@
use crate::common::*;
#[derive(StructOpt)]
#[structopt(
help_message(consts::HELP_MESSAGE),
version_message(consts::VERSION_MESSAGE),
about("Display information about automatic piece length selection.")
)]
pub(crate) struct PieceLength {}
#[allow(clippy::unused_self)]
impl PieceLength {
pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> {
let mut rows: Vec<(String, String, String, String)> = Vec::new();
rows.push((
"Content".into(),
"Piece Length".into(),
"Count".into(),
"Metainfo Size".into(),
));
for i in 14..51 {
let content_size = Bytes::from(1u128 << i);
let piece_length = PieceLengthPicker::from_content_size(content_size);
let metainfo_size = PieceLengthPicker::metainfo_size(content_size, piece_length);
let piece_count = PieceLengthPicker::piece_count(content_size, piece_length);
rows.push((
content_size.to_string(),
piece_length.to_string(),
piece_count.to_string(),
metainfo_size.to_string(),
));
}
let mut w = (0, 0, 0, 0);
for (c0, c1, c2, c3) in &rows {
w = (
w.0.max(c0.len()),
w.1.max(c1.len()),
w.2.max(c2.len()),
w.3.max(c3.len()),
);
}
for (content_size, piece_length, metainfo_size, piece_count) in rows {
outln!(
env,
"{:w0$} -> {:w1$} x {:w2$} = {:w3$}",
content_size,
piece_length,
metainfo_size,
piece_count,
w0 = w.0,
w1 = w.1,
w2 = w.2,
w3 = w.3,
);
}
Ok(())
}
}

11
src/outln.rs Normal file
View File

@ -0,0 +1,11 @@
macro_rules! outln {
($env:expr) => {
writeln!($env.out, "").context(crate::error::Stderr)?;
};
($env:expr, $fmt:expr) => {
writeln!($env.out, $fmt).context(crate::error::Stderr)?;
};
($env:expr, $fmt:expr, $($arg:tt)*) => {
writeln!($env.out, $fmt, $($arg)*).context(crate::error::Stderr)?;
};
}

View File

@ -0,0 +1,75 @@
// The piece length picker attempts to pick a reasonable piece length
// for a torrent given the size of the torrent's contents.
//
// Constraints:
// - Decreasing piece length increases protocol overhead.
// - Decreasing piece length increases torrent metainfo size.
// - Increasing piece length increases the amount of data that must be
// thrown away in case of corruption.
// - Increasing piece length increases the amount of data that must be
// downloaded before it can be verified and uploaded to other peers.
// - Decreasing piece length increases the proportion of disk seeks to
// disk reads. This can be an issue for spinning disks.
// - The BitTorrent v2 specification requires that piece sizes be
// larger than 16 KiB.
//
// These constraints could probably be exactly defined and optimized
// using an integer programming solver, but instead we just copy what
// libtorrent does.
use crate::common::*;
pub(crate) struct PieceLengthPicker;
impl PieceLengthPicker {
pub(crate) fn from_content_size(content_size: Bytes) -> Bytes {
#![allow(
clippy::as_conversions,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_truncation
)]
let exponent = (content_size.count() as f64).log2().ceil() as u128;
Bytes::from(1u128 << (exponent / 2 + 4))
.max(Bytes::kib() * 16)
.min(Bytes::mib() * 16)
}
pub(crate) fn piece_count(content_size: Bytes, piece_length: Bytes) -> u128 {
if content_size == Bytes::from(0u128) {
0
} else {
(content_size / piece_length).max(1)
}
}
pub(crate) fn metainfo_size(content_size: Bytes, piece_length: Bytes) -> Bytes {
let digest_length: u128 = sha1::DIGEST_LENGTH.into_u64().into();
Bytes::from(Self::piece_count(content_size, piece_length) * digest_length)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn limits() {
assert_eq!(
PieceLengthPicker::from_content_size(Bytes::mib() * 2),
Bytes::kib() * 16
);
assert_eq!(
PieceLengthPicker::from_content_size(Bytes::mib() * 4),
Bytes::kib() * 32
);
assert_eq!(
PieceLengthPicker::from_content_size(Bytes::mib() * 8),
Bytes::kib() * 32
);
assert_eq!(
PieceLengthPicker::from_content_size(Bytes::mib() * 16),
Bytes::kib() * 64
);
}
}

View File

@ -1,14 +0,0 @@
use crate::common::*;
#[derive(StructOpt)]
pub(crate) enum Subcommand {
Torrent(Torrent),
}
impl Subcommand {
pub(crate) fn run(self, env: &mut Env, unstable: bool) -> Result<(), Error> {
match self {
Self::Torrent(torrent) => torrent.run(env, unstable),
}
}
}