Allow including and excluding files from torrent with globs

To include only files that match a glob, pass `--glob GLOB`. To exclude
files that match a glob, pass `--glob GLOB`. Multiple globs may be
passed, with later globs taking precedence over earlier ones.

type: added
This commit is contained in:
Casey Rodarmor 2020-02-05 21:47:12 -08:00
parent 9158c230df
commit a574368ffc
No known key found for this signature in database
GPG Key ID: 556186B153EC6FE0
8 changed files with 241 additions and 13 deletions

29
Cargo.lock generated
View File

@ -50,6 +50,15 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "bstr"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "502ae1441a0a5adb8fbd38a5955a6416b9493e92b465de5e4a9bde6a539c2c48"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "c2-chacha" name = "c2-chacha"
version = "0.2.3" version = "0.2.3"
@ -127,6 +136,12 @@ dependencies = [
"termcolor", "termcolor",
] ]
[[package]]
name = "fnv"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.14" version = "0.1.14"
@ -144,6 +159,19 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "globset"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925aa2cac82d8834e2b2a4415b6f6879757fb5c0928fc445ae76461a12eed8f2"
dependencies = [
"aho-corasick",
"bstr",
"fnv",
"log",
"regex",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.3.1" version = "0.3.1"
@ -190,6 +218,7 @@ dependencies = [
"atty", "atty",
"chrono", "chrono",
"env_logger", "env_logger",
"globset",
"libc", "libc",
"md5", "md5",
"pretty_assertions", "pretty_assertions",

View File

@ -17,6 +17,7 @@ ansi_term = "0.12"
atty = "0.2" atty = "0.2"
chrono = "0.4.1" chrono = "0.4.1"
env_logger = "0.7" env_logger = "0.7"
globset = "0.4"
libc = "0.2" libc = "0.2"
md5 = "0.7" md5 = "0.7"
pretty_assertions = "0.6" pretty_assertions = "0.6"

View File

@ -21,6 +21,7 @@ pub(crate) use std::{
// dependencies // dependencies
pub(crate) use chrono::{TimeZone, Utc}; pub(crate) use chrono::{TimeZone, Utc};
pub(crate) use globset::{Glob, GlobMatcher};
pub(crate) use libc::EXIT_FAILURE; pub(crate) use libc::EXIT_FAILURE;
pub(crate) use regex::{Regex, RegexSet}; pub(crate) use regex::{Regex, RegexSet};
pub(crate) use serde::{Deserialize, Serialize}; pub(crate) use serde::{Deserialize, Serialize};

View File

@ -35,8 +35,15 @@ pub(crate) enum Error {
FilenameExtract { path: PathBuf }, FilenameExtract { path: PathBuf },
#[snafu(display("I/O error at `{}`: {}", path.display(), source))] #[snafu(display("I/O error at `{}`: {}", path.display(), source))]
Filesystem { source: io::Error, path: PathBuf }, Filesystem { source: io::Error, path: PathBuf },
#[snafu(display("Invalid glob: {}", source))]
GlobParse { source: globset::Error },
#[snafu(display("Failed to find opener utility, please install one of {}", tried.join(",")))] #[snafu(display("Failed to find opener utility, please install one of {}", tried.join(",")))]
OpenerMissing { tried: &'static [&'static str] }, OpenerMissing { tried: &'static [&'static str] },
#[snafu(display(
"Interal error, this may indicate a bug in intermodal: {}\nConsider filing an issue: https://github.com/casey/imdl/issues/new",
message,
))]
Internal { message: String },
#[snafu(display( #[snafu(display(
"Path `{}` contains non-normal component: {}", "Path `{}` contains non-normal component: {}",
path.display(), path.display(),
@ -106,6 +113,12 @@ impl Error {
_ => None, _ => None,
} }
} }
pub(crate) fn internal(message: impl Into<String>) -> Error {
Error::Internal {
message: message.into(),
}
}
} }
impl From<clap::Error> for Error { impl From<clap::Error> for Error {
@ -114,6 +127,12 @@ impl From<clap::Error> for Error {
} }
} }
impl From<globset::Error> for Error {
fn from(source: globset::Error) -> Self {
Self::GlobParse { source }
}
}
impl From<SystemTimeError> for Error { impl From<SystemTimeError> for Error {
fn from(source: SystemTimeError) -> Self { fn from(source: SystemTimeError) -> Self {
Self::SystemTime { source } Self::SystemTime { source }

View File

@ -7,28 +7,24 @@ pub(crate) struct FilePath {
} }
impl FilePath { impl FilePath {
pub(crate) fn from_prefix_and_path(prefix: &Path, path: &Path) -> Result<FilePath, Error> { pub(crate) fn from_relative_path(path: &Path) -> Result<FilePath, Error> {
let relative = path
.strip_prefix(prefix)
.context(error::PathStripPrefix { prefix, path })?;
let mut components = Vec::new(); let mut components = Vec::new();
for component in relative.components() { for component in path.components() {
match component { match component {
path::Component::Normal(os) => { path::Component::Normal(os) => {
if let Some(unicode) = os.to_str() { if let Some(unicode) = os.to_str() {
components.push(unicode.to_owned()); components.push(unicode.to_owned());
} else { } else {
return Err(Error::PathDecode { return Err(Error::PathDecode {
path: relative.to_owned(), path: path.to_owned(),
component: PathBuf::from(component.as_os_str()), component: PathBuf::from(component.as_os_str()),
}); });
} }
} }
_ => { _ => {
return Err(Error::PathComponent { return Err(Error::PathComponent {
path: relative.to_owned(), path: path.to_owned(),
component: PathBuf::from(component.as_os_str()), component: PathBuf::from(component.as_os_str()),
}) })
} }
@ -36,10 +32,7 @@ impl FilePath {
} }
if components.is_empty() { if components.is_empty() {
return Err(Error::PathStripEmpty { return Err(Error::internal("FilePath::from_relative_path: empty path"));
prefix: prefix.to_owned(),
path: path.to_owned(),
});
} }
Ok(FilePath { components }) Ok(FilePath { components })

View File

@ -1,5 +1,6 @@
use crate::common::*; use crate::common::*;
#[derive(Debug)]
pub(crate) struct Files { pub(crate) struct Files {
root: PathBuf, root: PathBuf,
total_size: Bytes, total_size: Bytes,

View File

@ -55,6 +55,12 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12.
help = "Overwrite the destination `.torrent` file, if it exists." help = "Overwrite the destination `.torrent` file, if it exists."
)] )]
force: bool, force: bool,
#[structopt(
name = "GLOB",
long = "glob",
help = "Include or exclude files that match `GLOB`. Multiple glob may be provided, with the last one taking precedence. Precede a glob with a ! to exclude it."
)]
globs: Vec<String>,
#[structopt( #[structopt(
name = "INCLUDE-HIDDEN", name = "INCLUDE-HIDDEN",
long = "include-hidden", long = "include-hidden",
@ -145,6 +151,7 @@ impl Create {
.include_junk(self.include_junk) .include_junk(self.include_junk)
.include_hidden(self.include_hidden) .include_hidden(self.include_hidden)
.follow_symlinks(self.follow_symlinks) .follow_symlinks(self.follow_symlinks)
.globs(&self.globs)?
.files()?; .files()?;
let piece_length = self let piece_length = self
@ -1294,4 +1301,112 @@ Content Size 9 bytes
); );
assert_eq!(metainfo.info.pieces, &[]); assert_eq!(metainfo.info.pieces, &[]);
} }
#[test]
fn glob_exclude() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "!a"]);
env.create_dir("foo");
env.create_file("foo/a", "a");
env.create_file("foo/b", "b");
env.create_file("foo/c", "c");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 2
);
assert_eq!(metainfo.info.pieces, Sha1::from("bc").digest().bytes());
}
#[test]
fn glob_exclude_nomatch() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "!x"]);
env.create_dir("foo");
env.create_file("foo/a", "a");
env.create_file("foo/b", "b");
env.create_file("foo/c", "c");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 3
);
assert_eq!(metainfo.info.pieces, Sha1::from("abc").digest().bytes());
}
#[test]
fn glob_include() {
let mut env = environment(&[
"--input",
"foo",
"--announce",
"http://bar",
"--glob",
"[bc]",
]);
env.create_dir("foo");
env.create_file("foo/a", "a");
env.create_file("foo/b", "b");
env.create_file("foo/c", "c");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 2
);
assert_eq!(metainfo.info.pieces, Sha1::from("bc").digest().bytes());
}
#[test]
fn glob_include_nomatch() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "x"]);
env.create_dir("foo");
env.create_file("foo/a", "a");
env.create_file("foo/b", "b");
env.create_file("foo/c", "c");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.is_empty()
);
assert_eq!(metainfo.info.pieces, &[]);
}
#[test]
fn glob_precedence() {
let mut env = environment(&[
"--input",
"foo",
"--announce",
"http://bar",
"--glob",
"!*",
"--glob",
"[ab]",
"--glob",
"!b",
]);
env.create_dir("foo");
env.create_file("foo/a", "a");
env.create_file("foo/b", "b");
env.create_file("foo/c", "c");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 1
);
assert_eq!(metainfo.info.pieces, Sha1::from("a").digest().bytes());
}
} }

View File

@ -2,10 +2,17 @@ use crate::common::*;
const JUNK: &[&str] = &["Thumbs.db", "Desktop.ini"]; const JUNK: &[&str] = &["Thumbs.db", "Desktop.ini"];
#[derive(Debug)]
struct Pattern {
glob: GlobMatcher,
include: bool,
}
pub(crate) struct Walker { pub(crate) struct Walker {
follow_symlinks: bool, follow_symlinks: bool,
include_hidden: bool, include_hidden: bool,
include_junk: bool, include_junk: bool,
patterns: Vec<Pattern>,
root: PathBuf, root: PathBuf,
} }
@ -15,6 +22,7 @@ impl Walker {
follow_symlinks: false, follow_symlinks: false,
include_hidden: false, include_hidden: false,
include_junk: false, include_junk: false,
patterns: Vec::new(),
root: root.to_owned(), root: root.to_owned(),
} }
} }
@ -33,6 +41,19 @@ impl Walker {
} }
} }
pub(crate) fn globs(mut self, globs: &[String]) -> Result<Self, Error> {
for glob in globs {
let exclude = glob.starts_with('!');
let glob = Glob::new(if exclude { &glob[1..] } else { glob })?.compile_matcher();
self.patterns.push(Pattern {
glob,
include: !exclude,
});
}
Ok(self)
}
pub(crate) fn follow_symlinks(self, follow_symlinks: bool) -> Self { pub(crate) fn follow_symlinks(self, follow_symlinks: bool) -> Self {
Walker { Walker {
follow_symlinks, follow_symlinks,
@ -97,7 +118,25 @@ impl Walker {
continue; continue;
} }
let file_path = FilePath::from_prefix_and_path(&self.root, &path)?; let relative = path
.strip_prefix(&self.root)
.context(error::PathStripPrefix {
path,
prefix: &self.root,
})?;
if relative.components().count() == 0 {
return Err(Error::PathStripEmpty {
prefix: self.root.clone(),
path: path.to_owned(),
});
}
if !self.pattern_filter(&relative) {
continue;
}
let file_path = FilePath::from_relative_path(relative)?;
if !self.include_junk && JUNK.contains(&file_path.name()) { if !self.include_junk && JUNK.contains(&file_path.name()) {
continue; continue;
@ -110,4 +149,34 @@ impl Walker {
Ok(Files::dir(self.root, Bytes::from(total_size), paths)) Ok(Files::dir(self.root, Bytes::from(total_size), paths))
} }
fn pattern_filter(&self, relative: &Path) -> bool {
for Pattern { glob, include } in self.patterns.iter().rev() {
if glob.is_match(relative) {
return *include;
}
}
if let Some(Pattern { include, .. }) = self.patterns.first() {
return !include;
}
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn glob() {
let walker = Walker::new(Path::new("foo"))
.globs(&["[bc]".into()])
.unwrap();
assert!(!walker.pattern_filter(Path::new("a")));
assert!(walker.pattern_filter(Path::new("b")));
assert!(walker.pattern_filter(Path::new("c")));
}
} }