From 9158c230dfd0a7ec90847b05399137012501a613 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Wed, 5 Feb 2020 18:32:09 -0800 Subject: [PATCH] Skip hidden files, symlinks, and junk in created torrents By default, skip the following when creating a torrent: - Junk files, like `Thumbs.db` - Files and directories that begin with a `.` - Files and directories that have the OS or Windows hidden attribute set - Symlinks These can be overridden with, respectively: - `--include-junk` - `--include-hidden` - `--include-hidden` - `--follow-symlinks` type: changed --- src/error.rs | 5 + src/file_path.rs | 14 +- src/files.rs | 21 ++- src/hasher.rs | 62 ++++----- src/opt/torrent/create.rs | 278 +++++++++++++++++++++++++++++++++++++- src/test_env.rs | 8 ++ src/walker.rs | 86 +++++++++--- 7 files changed, 412 insertions(+), 62 deletions(-) diff --git a/src/error.rs b/src/error.rs index b2963cf..1bd6fef 100644 --- a/src/error.rs +++ b/src/error.rs @@ -82,6 +82,11 @@ pub(crate) enum Error { Stderr { source: io::Error }, #[snafu(display("Failed to write to standard output: {}", source))] Stdout { source: io::Error }, + #[snafu(display( + "Attempted to create torrent from symlink `{}`. To override, pass the `--follow-symlinks` flag.", + root.display() + ))] + SymlinkRoot { root: PathBuf }, #[snafu(display("Failed to retrieve system time: {}", source))] SystemTime { source: SystemTimeError }, #[snafu(display( diff --git a/src/file_path.rs b/src/file_path.rs index e87c1e4..dde6dbf 100644 --- a/src/file_path.rs +++ b/src/file_path.rs @@ -1,7 +1,7 @@ use crate::common::*; #[serde(transparent)] -#[derive(Deserialize, Serialize, Debug, PartialEq)] +#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)] pub(crate) struct FilePath { components: Vec, } @@ -45,6 +45,18 @@ impl FilePath { Ok(FilePath { components }) } + pub(crate) fn name(&self) -> &str { + &self.components[0] + } + + pub(crate) fn absolute(&self, root: &Path) -> PathBuf { + let mut absolute = root.to_owned(); + for component in &self.components { + absolute.push(component); + } + absolute + } + #[cfg(test)] pub(crate) fn from_components(components: &[&str]) -> FilePath { let components: Vec = components diff --git a/src/files.rs b/src/files.rs index e6704d3..05502c8 100644 --- a/src/files.rs +++ b/src/files.rs @@ -3,17 +3,34 @@ use crate::common::*; pub(crate) struct Files { root: PathBuf, total_size: Bytes, + contents: Option>, } impl Files { - pub(crate) fn new(root: PathBuf, total_size: Bytes) -> Files { - Files { root, total_size } + pub(crate) fn file(root: PathBuf, total_size: Bytes) -> Files { + Files { + contents: None, + root, + total_size, + } + } + + pub(crate) fn dir(root: PathBuf, total_size: Bytes, contents: Vec) -> Files { + Files { + contents: Some(contents), + root, + total_size, + } } pub(crate) fn root(&self) -> &Path { &self.root } + pub(crate) fn contents(&self) -> Option<&[FilePath]> { + self.contents.as_deref() + } + pub(crate) fn total_size(&self) -> Bytes { self.total_size } diff --git a/src/hasher.rs b/src/hasher.rs index 14708ff..cf4246d 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -16,7 +16,7 @@ impl Hasher { md5sum: bool, piece_length: u32, ) -> Result<(Mode, Vec), Error> { - Self::new(md5sum, piece_length).hash_root(files.root()) + Self::new(md5sum, piece_length).hash_files(files) } fn new(md5sum: bool, piece_length: u32) -> Self { @@ -31,56 +31,44 @@ impl Hasher { } } - fn hash_root(mut self, root: &Path) -> Result<(Mode, Vec), Error> { - let metadata = root.metadata().context(error::Filesystem { path: root })?; + fn hash_files(mut self, files: &Files) -> Result<(Mode, Vec), Error> { + let mode = if let Some(contents) = files.contents() { + let files = self.hash_contents(&files.root(), contents)?; - if metadata.is_file() { - let (md5sum, length) = self.hash_file(&root)?; - - if self.piece_bytes_hashed > 0 { - self.pieces.extend(&self.sha1.digest().bytes()); - self.sha1.reset(); - self.piece_bytes_hashed = 0; - } - - Ok(( - Mode::Single { - md5sum: md5sum.map(|md5sum| format!("{:x}", md5sum)), - length, - }, - self.pieces, - )) + Mode::Multiple { files } } else { - let files = self.hash_dir(root)?; + let (md5sum, length) = self.hash_file(files.root())?; - if self.piece_bytes_hashed > 0 { - self.pieces.extend(&self.sha1.digest().bytes()); - self.sha1.reset(); - self.piece_bytes_hashed = 0; + Mode::Single { + md5sum: md5sum.map(|md5sum| format!("{:x}", md5sum)), + length, } + }; - Ok((Mode::Multiple { files }, self.pieces)) + if self.piece_bytes_hashed > 0 { + self.pieces.extend(&self.sha1.digest().bytes()); + self.sha1.reset(); + self.piece_bytes_hashed = 0; } + + Ok((mode, self.pieces)) } - fn hash_dir(&mut self, dir: &Path) -> Result, Error> { + fn hash_contents( + &mut self, + root: &Path, + file_paths: &[FilePath], + ) -> Result, Error> { let mut files = Vec::new(); - for result in WalkDir::new(dir).sort_by(|a, b| a.file_name().cmp(b.file_name())) { - let entry = result?; - let path = entry.path(); + for file_path in file_paths { + let path = file_path.absolute(root); - if !entry.metadata()?.is_file() { - continue; - } - - let (md5sum, length) = self.hash_file(path)?; - - let file_path = FilePath::from_prefix_and_path(dir, path)?; + let (md5sum, length) = self.hash_file(&path)?; files.push(FileInfo { md5sum: md5sum.map(|md5sum| format!("{:x}", md5sum)), - path: file_path, + path: file_path.clone(), length, }); } diff --git a/src/opt/torrent/create.rs b/src/opt/torrent/create.rs index c6eb40e..dee6447 100644 --- a/src/opt/torrent/create.rs +++ b/src/opt/torrent/create.rs @@ -43,12 +43,30 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12. long_help = "Include `COMMENT` in generated `.torrent` file. Stored under `comment` key of top-level metainfo dictionary." )] comment: Option, + #[structopt( + name = "FOLLOW-SYMLINKS", + long = "follow-symlinks", + help = "Follow symlinks in torrent input. By default, symlinks to files and directories are not included in torrent contents." + )] + follow_symlinks: bool, #[structopt( name = "FORCE", long = "force", help = "Overwrite the destination `.torrent` file, if it exists." )] force: bool, + #[structopt( + name = "INCLUDE-HIDDEN", + long = "include-hidden", + help = "Include hidden files that would otherwise be skipped, such as files that start with a `.`, and files hidden by file attributes on macOS and Windows." + )] + include_hidden: bool, + #[structopt( + name = "INCLUDE-JUNK", + long = "include-junk", + help = "Include junk files that would otherwise be skipped." + )] + include_junk: bool, #[structopt( name = "INPUT", long = "input", @@ -123,7 +141,11 @@ impl Create { pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> { let input = env.resolve(&self.input); - let files = Walker::new(&input).files()?; + let files = Walker::new(&input) + .include_junk(self.include_junk) + .include_hidden(self.include_hidden) + .follow_symlinks(self.follow_symlinks) + .files()?; let piece_length = self .piece_length @@ -1018,4 +1040,258 @@ Content Size 9 bytes let value = bencode::Value::decode(&bytes).unwrap(); assert!(matches!(value, bencode::Value::Dict(_))); } + + #[test] + fn exclude_junk() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar"]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); + fs::write(dir.join("Thumbs.db"), "abc").unwrap(); + fs::write(dir.join("Desktop.ini"), "abc").unwrap(); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.is_empty() + ); + assert_eq!(metainfo.info.pieces, &[]); + } + + #[test] + fn include_junk() { + let mut env = environment(&[ + "--input", + "foo", + "--announce", + "http://bar", + "--include-junk", + ]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); + fs::write(dir.join("Thumbs.db"), "abc").unwrap(); + fs::write(dir.join("Desktop.ini"), "abc").unwrap(); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 2 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("abcabc").digest().bytes()); + } + + #[test] + fn skip_hidden() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar"]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); + fs::write(dir.join(".hidden"), "abc").unwrap(); + #[cfg(target_os = "windows")] + { + let path = dir.join("hidden"); + fs::write(&path, "abc").unwrap(); + Command::new("attrib") + .arg("+h") + .arg(&path) + .status() + .unwrap(); + } + #[cfg(target_os = "macos")] + { + let path = dir.join("hidden"); + fs::write(&path, "abc").unwrap(); + Command::new("chflags") + .arg("hidden") + .arg(&path) + .status() + .unwrap(); + } + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 0 + ); + assert_eq!(metainfo.info.pieces, &[]); + } + + #[test] + fn include_hidden() { + let mut env = environment(&[ + "--input", + "foo", + "--announce", + "http://bar", + "--include-hidden", + ]); + let dir = env.resolve("foo"); + fs::create_dir(&dir).unwrap(); + fs::write(dir.join(".hidden"), "abc").unwrap(); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.len() == 1 + ); + assert_eq!(metainfo.info.pieces, Sha1::from("abc").digest().bytes()); + } + + fn populate_symlinks(env: &Env) { + let dir = env.resolve("foo"); + let file_src = env.resolve("bar"); + let file_link = env.resolve("foo/bar"); + let dir_src = env.resolve("dir-src"); + let dir_contents = dir_src.join("baz"); + let dir_link = env.resolve("foo/dir"); + fs::create_dir(&dir_src).unwrap(); + fs::write(dir_contents, "baz").unwrap(); + + fs::create_dir(&dir).unwrap(); + fs::write(file_src, "bar").unwrap(); + #[cfg(unix)] + { + Command::new("ln") + .arg("-s") + .arg("../bar") + .arg(file_link) + .status() + .unwrap(); + + Command::new("ln") + .arg("-s") + .arg("../dir-src") + .arg(dir_link) + .status() + .unwrap(); + } + } + + #[test] + fn skip_symlinks() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]); + populate_symlinks(&env); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.is_empty() + ); + assert_eq!(metainfo.info.pieces, &[]); + } + + #[test] + #[cfg(unix)] + fn follow_symlinks() { + let mut env = environment(&[ + "--input", + "foo", + "--announce", + "http://bar", + "--follow-symlinks", + "--md5sum", + ]); + populate_symlinks(&env); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_eq!(metainfo.info.pieces, Sha1::from("barbaz").digest().bytes()); + match metainfo.info.mode { + Mode::Multiple { files } => { + assert_eq!( + files, + &[ + FileInfo { + length: 3, + md5sum: Some("37b51d194a7513e45b56f6524f2d51f2".to_owned()), + path: FilePath::from_components(&["bar"]), + }, + FileInfo { + length: 3, + md5sum: Some("73feffa4b7f6bb68e44cf984c85f6e88".to_owned()), + path: FilePath::from_components(&["dir", "baz"]), + }, + ] + ); + } + _ => panic!("Expected multi-file torrent"), + } + } + + #[test] + #[cfg(unix)] + fn symlink_root() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]); + let file_src = env.resolve("bar"); + let file_link = env.resolve("foo"); + + Command::new("ln") + .arg("-s") + .arg(&file_src) + .arg(&file_link) + .status() + .unwrap(); + + assert_matches!(env.run().unwrap_err(), Error::SymlinkRoot { root } if root == file_link); + } + + #[test] + fn skip_dot_dir_contents() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]); + env.create_dir("foo/.bar"); + env.create_file("foo/.bar/baz", "baz"); + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.is_empty() + ); + assert_eq!(metainfo.info.pieces, &[]); + } + + #[test] + fn skip_hidden_attribute_dir_contents() { + let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]); + env.create_dir("foo/bar"); + #[cfg(target_os = "windows")] + { + env.create_file("foo/bar/baz", "baz"); + let path = env.resolve("foo/bar"); + Command::new("attrib") + .arg("+h") + .arg(&path) + .status() + .unwrap(); + } + #[cfg(target_os = "macos")] + { + env.create_file("foo/bar/baz", "baz"); + let path = env.resolve("foo/bar"); + Command::new("chflags") + .arg("hidden") + .arg(&path) + .status() + .unwrap(); + } + env.run().unwrap(); + let torrent = env.resolve("foo.torrent"); + let bytes = fs::read(torrent).unwrap(); + let metainfo = serde_bencode::de::from_bytes::(&bytes).unwrap(); + assert_matches!( + metainfo.info.mode, + Mode::Multiple { files } if files.is_empty() + ); + assert_eq!(metainfo.info.pieces, &[]); + } } diff --git a/src/test_env.rs b/src/test_env.rs index 172345b..5ca6fad 100644 --- a/src/test_env.rs +++ b/src/test_env.rs @@ -22,6 +22,14 @@ impl TestEnv { pub(crate) fn out_bytes(&self) -> Vec { self.out.bytes() } + + pub(crate) fn create_dir(&self, path: impl AsRef) { + fs::create_dir_all(self.env.resolve(path.as_ref())).unwrap(); + } + + pub(crate) fn create_file(&self, path: impl AsRef, bytes: impl AsRef<[u8]>) { + fs::write(self.env.resolve(path), bytes.as_ref()).unwrap(); + } } impl Deref for TestEnv { diff --git a/src/walker.rs b/src/walker.rs index 08a77ad..2f6eb06 100644 --- a/src/walker.rs +++ b/src/walker.rs @@ -1,69 +1,113 @@ use crate::common::*; +const JUNK: &[&str] = &["Thumbs.db", "Desktop.ini"]; + pub(crate) struct Walker { - include_junk: bool, + follow_symlinks: bool, include_hidden: bool, + include_junk: bool, root: PathBuf, } impl Walker { pub(crate) fn new(root: &Path) -> Walker { Walker { - include_junk: false, + follow_symlinks: false, include_hidden: false, + include_junk: false, root: root.to_owned(), } } - pub(crate) fn _include_junk(self) -> Self { + pub(crate) fn include_junk(self, include_junk: bool) -> Self { Walker { - include_junk: true, + include_junk, ..self } } - pub(crate) fn _include_hidden(self) -> Self { + pub(crate) fn include_hidden(self, include_hidden: bool) -> Self { Walker { - include_hidden: true, + include_hidden, + ..self + } + } + + pub(crate) fn follow_symlinks(self, follow_symlinks: bool) -> Self { + Walker { + follow_symlinks, ..self } } pub(crate) fn files(self) -> Result { - let mut paths = Vec::new(); - let mut total_size = 0; + if !self.follow_symlinks + && self + .root + .symlink_metadata() + .context(error::Filesystem { path: &self.root })? + .file_type() + .is_symlink() + { + return Err(Error::SymlinkRoot { root: self.root }); + } - let junk: &[&OsStr] = &[OsStr::new("Thumbs.db"), OsStr::new("Desktop.ini")]; + let root_metadata = self + .root + .metadata() + .context(error::Filesystem { path: &self.root })?; - for result in WalkDir::new(&self.root).sort_by(|a, b| a.file_name().cmp(b.file_name())) { - let entry = result?; + if root_metadata.is_file() { + return Ok(Files::file(self.root, Bytes::from(root_metadata.len()))); + } + let filter = |entry: &walkdir::DirEntry| { let path = entry.path(); let file_name = entry.file_name(); + if !self.include_hidden && file_name.to_string_lossy().starts_with('.') { + return false; + } + + let hidden = Platform::hidden(path).unwrap_or(true); + + if !self.include_hidden && hidden { + return false; + } + + true + }; + + let mut paths = Vec::new(); + let mut total_size = 0; + for result in WalkDir::new(&self.root) + .follow_links(self.follow_symlinks) + .sort_by(|a, b| a.file_name().cmp(b.file_name())) + .into_iter() + .filter_entry(filter) + { + let entry = result?; + + let path = entry.path(); + let metadata = entry.metadata()?; if !metadata.is_file() { continue; } - if !self.include_hidden && file_name.to_string_lossy().starts_with('.') { - continue; - } + let file_path = FilePath::from_prefix_and_path(&self.root, &path)?; - if !self.include_hidden && Platform::hidden(path)? { - continue; - } - - if !self.include_junk && junk.contains(&file_name) { + if !self.include_junk && JUNK.contains(&file_path.name()) { continue; } total_size += metadata.len(); - paths.push(entry.path().to_owned()); + + paths.push(file_path); } - Ok(Files::new(self.root, Bytes::from(total_size))) + Ok(Files::dir(self.root, Bytes::from(total_size), paths)) } }