From a574368ffca6738393950bfe2412a26d66a41d17 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Wed, 5 Feb 2020 21:47:12 -0800 Subject: [PATCH] Allow including and excluding files from torrent with globs To include only files that match a glob, pass `--glob GLOB`. To exclude files that match a glob, pass `--glob GLOB`. Multiple globs may be passed, with later globs taking precedence over earlier ones. type: added --- Cargo.lock | 29 ++++++++++ Cargo.toml | 1 + src/common.rs | 1 + src/error.rs | 19 +++++++ src/file_path.rs | 17 ++---- src/files.rs | 1 + src/opt/torrent/create.rs | 115 ++++++++++++++++++++++++++++++++++++++ src/walker.rs | 71 ++++++++++++++++++++++- 8 files changed, 241 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cfa7273..6a22305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,15 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bstr" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "502ae1441a0a5adb8fbd38a5955a6416b9493e92b465de5e4a9bde6a539c2c48" +dependencies = [ + "memchr", +] + [[package]] name = "c2-chacha" version = "0.2.3" @@ -127,6 +136,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "fnv" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" + [[package]] name = "getrandom" version = "0.1.14" @@ -144,6 +159,19 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "globset" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925aa2cac82d8834e2b2a4415b6f6879757fb5c0928fc445ae76461a12eed8f2" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] + [[package]] name = "heck" version = "0.3.1" @@ -190,6 +218,7 @@ dependencies = [ "atty", "chrono", "env_logger", + "globset", "libc", "md5", "pretty_assertions", diff --git a/Cargo.toml b/Cargo.toml index 7998e28..40dae1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ ansi_term = "0.12" atty = "0.2" chrono = "0.4.1" env_logger = "0.7" +globset = "0.4" libc = "0.2" md5 = "0.7" pretty_assertions = "0.6" diff --git a/src/common.rs b/src/common.rs index e69076b..49f1892 100644 --- a/src/common.rs +++ b/src/common.rs @@ -21,6 +21,7 @@ pub(crate) use std::{ // dependencies pub(crate) use chrono::{TimeZone, Utc}; +pub(crate) use globset::{Glob, GlobMatcher}; pub(crate) use libc::EXIT_FAILURE; pub(crate) use regex::{Regex, RegexSet}; pub(crate) use serde::{Deserialize, Serialize}; diff --git a/src/error.rs b/src/error.rs index 1bd6fef..29af2fc 100644 --- a/src/error.rs +++ b/src/error.rs @@ -35,8 +35,15 @@ pub(crate) enum Error { FilenameExtract { path: PathBuf }, #[snafu(display("I/O error at `{}`: {}", path.display(), source))] Filesystem { source: io::Error, path: PathBuf }, + #[snafu(display("Invalid glob: {}", source))] + GlobParse { source: globset::Error }, #[snafu(display("Failed to find opener utility, please install one of {}", tried.join(",")))] OpenerMissing { tried: &'static [&'static str] }, + #[snafu(display( + "Interal error, this may indicate a bug in intermodal: {}\nConsider filing an issue: https://github.com/casey/imdl/issues/new", + message, + ))] + Internal { message: String }, #[snafu(display( "Path `{}` contains non-normal component: {}", path.display(), @@ -106,6 +113,12 @@ impl Error { _ => None, } } + + pub(crate) fn internal(message: impl Into) -> Error { + Error::Internal { + message: message.into(), + } + } } impl From for Error { @@ -114,6 +127,12 @@ impl From for Error { } } +impl From for Error { + fn from(source: globset::Error) -> Self { + Self::GlobParse { source } + } +} + impl From for Error { fn from(source: SystemTimeError) -> Self { Self::SystemTime { source } diff --git a/src/file_path.rs b/src/file_path.rs index dde6dbf..25d50c6 100644 --- a/src/file_path.rs +++ b/src/file_path.rs @@ -7,28 +7,24 @@ pub(crate) struct FilePath { } impl FilePath { - pub(crate) fn from_prefix_and_path(prefix: &Path, path: &Path) -> Result { - let relative = path - .strip_prefix(prefix) - .context(error::PathStripPrefix { prefix, path })?; - + pub(crate) fn from_relative_path(path: &Path) -> Result { let mut components = Vec::new(); - for component in relative.components() { + for component in path.components() { match component { path::Component::Normal(os) => { if let Some(unicode) = os.to_str() { components.push(unicode.to_owned()); } else { return Err(Error::PathDecode { - path: relative.to_owned(), + path: path.to_owned(), component: PathBuf::from(component.as_os_str()), }); } } _ => { return Err(Error::PathComponent { - path: relative.to_owned(), + path: path.to_owned(), component: PathBuf::from(component.as_os_str()), }) } @@ -36,10 +32,7 @@ impl FilePath { } if components.is_empty() { - return Err(Error::PathStripEmpty { - prefix: prefix.to_owned(), - path: path.to_owned(), - }); + return Err(Error::internal("FilePath::from_relative_path: empty path")); } Ok(FilePath { components }) diff --git a/src/files.rs b/src/files.rs index 05502c8..78fe3c6 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1,5 +1,6 @@ use crate::common::*; +#[derive(Debug)] pub(crate) struct Files { root: PathBuf, total_size: Bytes, diff --git a/src/opt/torrent/create.rs b/src/opt/torrent/create.rs index dee6447..c84349f 100644 --- a/src/opt/torrent/create.rs +++ b/src/opt/torrent/create.rs @@ -55,6 +55,12 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12. help = "Overwrite the destination `.torrent` file, if it exists." )] force: bool, + #[structopt( + name = "GLOB", + long = "glob", + help = "Include or exclude files that match `GLOB`. Multiple glob may be provided, with the last one taking precedence. Precede a glob with a ! to exclude it." + )] + globs: Vec, #[structopt( name = "INCLUDE-HIDDEN", long = "include-hidden", @@ -145,6 +151,7 @@ impl Create { .include_junk(self.include_junk) .include_hidden(self.include_hidden) .follow_symlinks(self.follow_symlinks) + .globs(&self.globs)? .files()?; let piece_length = self @@ -1294,4 +1301,112 @@ Content Size 9 bytes ); assert_eq!(metainfo.info.pieces, &[]); } + + #[test] + fn glob_exclude() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "!a"]); + env.create_dir("foo"); + env.create_file("foo/a", "a"); + env.create_file("foo/b", "b"); + env.create_file("foo/c", "c"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 2 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("bc").digest().bytes()); + } + + #[test] + fn glob_exclude_nomatch() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "!x"]); + env.create_dir("foo"); + env.create_file("foo/a", "a"); + env.create_file("foo/b", "b"); + env.create_file("foo/c", "c"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 3 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("abc").digest().bytes()); + } + + #[test] + fn glob_include() { + let mut env = environment(&[ + "--input", + "foo", + "--announce", + "http://bar", + "--glob", + "[bc]", + ]); + env.create_dir("foo"); + env.create_file("foo/a", "a"); + env.create_file("foo/b", "b"); + env.create_file("foo/c", "c"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 2 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("bc").digest().bytes()); + } + + #[test] + fn glob_include_nomatch() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--glob", "x"]); + env.create_dir("foo"); + env.create_file("foo/a", "a"); + env.create_file("foo/b", "b"); + env.create_file("foo/c", "c"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.is_empty() + ); + assert_eq!(metainfo.info.pieces, &[]); + } + + #[test] + fn glob_precedence() { + let mut env = environment(&[ + "--input", + "foo", + "--announce", + "http://bar", + "--glob", + "!*", + "--glob", + "[ab]", + "--glob", + "!b", + ]); + env.create_dir("foo"); + env.create_file("foo/a", "a"); + env.create_file("foo/b", "b"); + env.create_file("foo/c", "c"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 1 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("a").digest().bytes()); + } } diff --git a/src/walker.rs b/src/walker.rs index 2f6eb06..ff192bb 100644 --- a/src/walker.rs +++ b/src/walker.rs @@ -2,10 +2,17 @@ use crate::common::*; const JUNK: &[&str] = &["Thumbs.db", "Desktop.ini"]; +#[derive(Debug)] +struct Pattern { + glob: GlobMatcher, + include: bool, +} + pub(crate) struct Walker { follow_symlinks: bool, include_hidden: bool, include_junk: bool, + patterns: Vec, root: PathBuf, } @@ -15,6 +22,7 @@ impl Walker { follow_symlinks: false, include_hidden: false, include_junk: false, + patterns: Vec::new(), root: root.to_owned(), } } @@ -33,6 +41,19 @@ impl Walker { } } + pub(crate) fn globs(mut self, globs: &[String]) -> Result { + for glob in globs { + let exclude = glob.starts_with('!'); + let glob = Glob::new(if exclude { &glob[1..] } else { glob })?.compile_matcher(); + self.patterns.push(Pattern { + glob, + include: !exclude, + }); + } + + Ok(self) + } + pub(crate) fn follow_symlinks(self, follow_symlinks: bool) -> Self { Walker { follow_symlinks, @@ -97,7 +118,25 @@ impl Walker { continue; } - let file_path = FilePath::from_prefix_and_path(&self.root, &path)?; + let relative = path + .strip_prefix(&self.root) + .context(error::PathStripPrefix { + path, + prefix: &self.root, + })?; + + if relative.components().count() == 0 { + return Err(Error::PathStripEmpty { + prefix: self.root.clone(), + path: path.to_owned(), + }); + } + + if !self.pattern_filter(&relative) { + continue; + } + + let file_path = FilePath::from_relative_path(relative)?; if !self.include_junk && JUNK.contains(&file_path.name()) { continue; @@ -110,4 +149,34 @@ impl Walker { Ok(Files::dir(self.root, Bytes::from(total_size), paths)) } + + fn pattern_filter(&self, relative: &Path) -> bool { + for Pattern { glob, include } in self.patterns.iter().rev() { + if glob.is_match(relative) { + return *include; + } + } + + if let Some(Pattern { include, .. }) = self.patterns.first() { + return !include; + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn glob() { + let walker = Walker::new(Path::new("foo")) + .globs(&["[bc]".into()]) + .unwrap(); + + assert!(!walker.pattern_filter(Path::new("a"))); + assert!(walker.pattern_filter(Path::new("b"))); + assert!(walker.pattern_filter(Path::new("c"))); + } }