Skip hidden files, symlinks, and junk in created torrents

By default, skip the following when creating a torrent:

- Junk files, like `Thumbs.db`
- Files and directories that begin with a `.`
- Files and directories that have the OS or Windows hidden attribute set
- Symlinks

These can be overridden with, respectively:
- `--include-junk`
- `--include-hidden`
- `--include-hidden`
- `--follow-symlinks`

type: changed
This commit is contained in:
Casey Rodarmor 2020-02-05 18:32:09 -08:00
parent 3739a92857
commit 9158c230df
No known key found for this signature in database
GPG Key ID: 556186B153EC6FE0
7 changed files with 412 additions and 62 deletions

View File

@ -82,6 +82,11 @@ pub(crate) enum Error {
Stderr { source: io::Error },
#[snafu(display("Failed to write to standard output: {}", source))]
Stdout { source: io::Error },
#[snafu(display(
"Attempted to create torrent from symlink `{}`. To override, pass the `--follow-symlinks` flag.",
root.display()
))]
SymlinkRoot { root: PathBuf },
#[snafu(display("Failed to retrieve system time: {}", source))]
SystemTime { source: SystemTimeError },
#[snafu(display(

View File

@ -1,7 +1,7 @@
use crate::common::*;
#[serde(transparent)]
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
pub(crate) struct FilePath {
components: Vec<String>,
}
@ -45,6 +45,18 @@ impl FilePath {
Ok(FilePath { components })
}
pub(crate) fn name(&self) -> &str {
&self.components[0]
}
pub(crate) fn absolute(&self, root: &Path) -> PathBuf {
let mut absolute = root.to_owned();
for component in &self.components {
absolute.push(component);
}
absolute
}
#[cfg(test)]
pub(crate) fn from_components(components: &[&str]) -> FilePath {
let components: Vec<String> = components

View File

@ -3,17 +3,34 @@ use crate::common::*;
pub(crate) struct Files {
root: PathBuf,
total_size: Bytes,
contents: Option<Vec<FilePath>>,
}
impl Files {
pub(crate) fn new(root: PathBuf, total_size: Bytes) -> Files {
Files { root, total_size }
pub(crate) fn file(root: PathBuf, total_size: Bytes) -> Files {
Files {
contents: None,
root,
total_size,
}
}
pub(crate) fn dir(root: PathBuf, total_size: Bytes, contents: Vec<FilePath>) -> Files {
Files {
contents: Some(contents),
root,
total_size,
}
}
pub(crate) fn root(&self) -> &Path {
&self.root
}
pub(crate) fn contents(&self) -> Option<&[FilePath]> {
self.contents.as_deref()
}
pub(crate) fn total_size(&self) -> Bytes {
self.total_size
}

View File

@ -16,7 +16,7 @@ impl Hasher {
md5sum: bool,
piece_length: u32,
) -> Result<(Mode, Vec<u8>), Error> {
Self::new(md5sum, piece_length).hash_root(files.root())
Self::new(md5sum, piece_length).hash_files(files)
}
fn new(md5sum: bool, piece_length: u32) -> Self {
@ -31,27 +31,19 @@ impl Hasher {
}
}
fn hash_root(mut self, root: &Path) -> Result<(Mode, Vec<u8>), Error> {
let metadata = root.metadata().context(error::Filesystem { path: root })?;
fn hash_files(mut self, files: &Files) -> Result<(Mode, Vec<u8>), Error> {
let mode = if let Some(contents) = files.contents() {
let files = self.hash_contents(&files.root(), contents)?;
if metadata.is_file() {
let (md5sum, length) = self.hash_file(&root)?;
Mode::Multiple { files }
} else {
let (md5sum, length) = self.hash_file(files.root())?;
if self.piece_bytes_hashed > 0 {
self.pieces.extend(&self.sha1.digest().bytes());
self.sha1.reset();
self.piece_bytes_hashed = 0;
}
Ok((
Mode::Single {
md5sum: md5sum.map(|md5sum| format!("{:x}", md5sum)),
length,
},
self.pieces,
))
} else {
let files = self.hash_dir(root)?;
}
};
if self.piece_bytes_hashed > 0 {
self.pieces.extend(&self.sha1.digest().bytes());
@ -59,28 +51,24 @@ impl Hasher {
self.piece_bytes_hashed = 0;
}
Ok((Mode::Multiple { files }, self.pieces))
}
Ok((mode, self.pieces))
}
fn hash_dir(&mut self, dir: &Path) -> Result<Vec<FileInfo>, Error> {
fn hash_contents(
&mut self,
root: &Path,
file_paths: &[FilePath],
) -> Result<Vec<FileInfo>, Error> {
let mut files = Vec::new();
for result in WalkDir::new(dir).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
let entry = result?;
let path = entry.path();
for file_path in file_paths {
let path = file_path.absolute(root);
if !entry.metadata()?.is_file() {
continue;
}
let (md5sum, length) = self.hash_file(path)?;
let file_path = FilePath::from_prefix_and_path(dir, path)?;
let (md5sum, length) = self.hash_file(&path)?;
files.push(FileInfo {
md5sum: md5sum.map(|md5sum| format!("{:x}", md5sum)),
path: file_path,
path: file_path.clone(),
length,
});
}

View File

@ -43,12 +43,30 @@ Note: Many BitTorrent clients do not implement the behavior described in BEP 12.
long_help = "Include `COMMENT` in generated `.torrent` file. Stored under `comment` key of top-level metainfo dictionary."
)]
comment: Option<String>,
#[structopt(
name = "FOLLOW-SYMLINKS",
long = "follow-symlinks",
help = "Follow symlinks in torrent input. By default, symlinks to files and directories are not included in torrent contents."
)]
follow_symlinks: bool,
#[structopt(
name = "FORCE",
long = "force",
help = "Overwrite the destination `.torrent` file, if it exists."
)]
force: bool,
#[structopt(
name = "INCLUDE-HIDDEN",
long = "include-hidden",
help = "Include hidden files that would otherwise be skipped, such as files that start with a `.`, and files hidden by file attributes on macOS and Windows."
)]
include_hidden: bool,
#[structopt(
name = "INCLUDE-JUNK",
long = "include-junk",
help = "Include junk files that would otherwise be skipped."
)]
include_junk: bool,
#[structopt(
name = "INPUT",
long = "input",
@ -123,7 +141,11 @@ impl Create {
pub(crate) fn run(self, env: &mut Env) -> Result<(), Error> {
let input = env.resolve(&self.input);
let files = Walker::new(&input).files()?;
let files = Walker::new(&input)
.include_junk(self.include_junk)
.include_hidden(self.include_hidden)
.follow_symlinks(self.follow_symlinks)
.files()?;
let piece_length = self
.piece_length
@ -1018,4 +1040,258 @@ Content Size 9 bytes
let value = bencode::Value::decode(&bytes).unwrap();
assert!(matches!(value, bencode::Value::Dict(_)));
}
#[test]
fn exclude_junk() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar"]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
fs::write(dir.join("Thumbs.db"), "abc").unwrap();
fs::write(dir.join("Desktop.ini"), "abc").unwrap();
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.is_empty()
);
assert_eq!(metainfo.info.pieces, &[]);
}
#[test]
fn include_junk() {
let mut env = environment(&[
"--input",
"foo",
"--announce",
"http://bar",
"--include-junk",
]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
fs::write(dir.join("Thumbs.db"), "abc").unwrap();
fs::write(dir.join("Desktop.ini"), "abc").unwrap();
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 2
);
assert_eq!(metainfo.info.pieces, Sha1::from("abcabc").digest().bytes());
}
#[test]
fn skip_hidden() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar"]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
fs::write(dir.join(".hidden"), "abc").unwrap();
#[cfg(target_os = "windows")]
{
let path = dir.join("hidden");
fs::write(&path, "abc").unwrap();
Command::new("attrib")
.arg("+h")
.arg(&path)
.status()
.unwrap();
}
#[cfg(target_os = "macos")]
{
let path = dir.join("hidden");
fs::write(&path, "abc").unwrap();
Command::new("chflags")
.arg("hidden")
.arg(&path)
.status()
.unwrap();
}
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 0
);
assert_eq!(metainfo.info.pieces, &[]);
}
#[test]
fn include_hidden() {
let mut env = environment(&[
"--input",
"foo",
"--announce",
"http://bar",
"--include-hidden",
]);
let dir = env.resolve("foo");
fs::create_dir(&dir).unwrap();
fs::write(dir.join(".hidden"), "abc").unwrap();
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.len() == 1
);
assert_eq!(metainfo.info.pieces, Sha1::from("abc").digest().bytes());
}
fn populate_symlinks(env: &Env) {
let dir = env.resolve("foo");
let file_src = env.resolve("bar");
let file_link = env.resolve("foo/bar");
let dir_src = env.resolve("dir-src");
let dir_contents = dir_src.join("baz");
let dir_link = env.resolve("foo/dir");
fs::create_dir(&dir_src).unwrap();
fs::write(dir_contents, "baz").unwrap();
fs::create_dir(&dir).unwrap();
fs::write(file_src, "bar").unwrap();
#[cfg(unix)]
{
Command::new("ln")
.arg("-s")
.arg("../bar")
.arg(file_link)
.status()
.unwrap();
Command::new("ln")
.arg("-s")
.arg("../dir-src")
.arg(dir_link)
.status()
.unwrap();
}
}
#[test]
fn skip_symlinks() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]);
populate_symlinks(&env);
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.is_empty()
);
assert_eq!(metainfo.info.pieces, &[]);
}
#[test]
#[cfg(unix)]
fn follow_symlinks() {
let mut env = environment(&[
"--input",
"foo",
"--announce",
"http://bar",
"--follow-symlinks",
"--md5sum",
]);
populate_symlinks(&env);
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_eq!(metainfo.info.pieces, Sha1::from("barbaz").digest().bytes());
match metainfo.info.mode {
Mode::Multiple { files } => {
assert_eq!(
files,
&[
FileInfo {
length: 3,
md5sum: Some("37b51d194a7513e45b56f6524f2d51f2".to_owned()),
path: FilePath::from_components(&["bar"]),
},
FileInfo {
length: 3,
md5sum: Some("73feffa4b7f6bb68e44cf984c85f6e88".to_owned()),
path: FilePath::from_components(&["dir", "baz"]),
},
]
);
}
_ => panic!("Expected multi-file torrent"),
}
}
#[test]
#[cfg(unix)]
fn symlink_root() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]);
let file_src = env.resolve("bar");
let file_link = env.resolve("foo");
Command::new("ln")
.arg("-s")
.arg(&file_src)
.arg(&file_link)
.status()
.unwrap();
assert_matches!(env.run().unwrap_err(), Error::SymlinkRoot { root } if root == file_link);
}
#[test]
fn skip_dot_dir_contents() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]);
env.create_dir("foo/.bar");
env.create_file("foo/.bar/baz", "baz");
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.is_empty()
);
assert_eq!(metainfo.info.pieces, &[]);
}
#[test]
fn skip_hidden_attribute_dir_contents() {
let mut env = environment(&["--input", "foo", "--announce", "http://bar", "--md5sum"]);
env.create_dir("foo/bar");
#[cfg(target_os = "windows")]
{
env.create_file("foo/bar/baz", "baz");
let path = env.resolve("foo/bar");
Command::new("attrib")
.arg("+h")
.arg(&path)
.status()
.unwrap();
}
#[cfg(target_os = "macos")]
{
env.create_file("foo/bar/baz", "baz");
let path = env.resolve("foo/bar");
Command::new("chflags")
.arg("hidden")
.arg(&path)
.status()
.unwrap();
}
env.run().unwrap();
let torrent = env.resolve("foo.torrent");
let bytes = fs::read(torrent).unwrap();
let metainfo = serde_bencode::de::from_bytes::<Metainfo>(&bytes).unwrap();
assert_matches!(
metainfo.info.mode,
Mode::Multiple { files } if files.is_empty()
);
assert_eq!(metainfo.info.pieces, &[]);
}
}

View File

@ -22,6 +22,14 @@ impl TestEnv {
pub(crate) fn out_bytes(&self) -> Vec<u8> {
self.out.bytes()
}
pub(crate) fn create_dir(&self, path: impl AsRef<Path>) {
fs::create_dir_all(self.env.resolve(path.as_ref())).unwrap();
}
pub(crate) fn create_file(&self, path: impl AsRef<Path>, bytes: impl AsRef<[u8]>) {
fs::write(self.env.resolve(path), bytes.as_ref()).unwrap();
}
}
impl Deref for TestEnv {

View File

@ -1,69 +1,113 @@
use crate::common::*;
const JUNK: &[&str] = &["Thumbs.db", "Desktop.ini"];
pub(crate) struct Walker {
include_junk: bool,
follow_symlinks: bool,
include_hidden: bool,
include_junk: bool,
root: PathBuf,
}
impl Walker {
pub(crate) fn new(root: &Path) -> Walker {
Walker {
include_junk: false,
follow_symlinks: false,
include_hidden: false,
include_junk: false,
root: root.to_owned(),
}
}
pub(crate) fn _include_junk(self) -> Self {
pub(crate) fn include_junk(self, include_junk: bool) -> Self {
Walker {
include_junk: true,
include_junk,
..self
}
}
pub(crate) fn _include_hidden(self) -> Self {
pub(crate) fn include_hidden(self, include_hidden: bool) -> Self {
Walker {
include_hidden: true,
include_hidden,
..self
}
}
pub(crate) fn follow_symlinks(self, follow_symlinks: bool) -> Self {
Walker {
follow_symlinks,
..self
}
}
pub(crate) fn files(self) -> Result<Files, Error> {
let mut paths = Vec::new();
let mut total_size = 0;
if !self.follow_symlinks
&& self
.root
.symlink_metadata()
.context(error::Filesystem { path: &self.root })?
.file_type()
.is_symlink()
{
return Err(Error::SymlinkRoot { root: self.root });
}
let junk: &[&OsStr] = &[OsStr::new("Thumbs.db"), OsStr::new("Desktop.ini")];
let root_metadata = self
.root
.metadata()
.context(error::Filesystem { path: &self.root })?;
for result in WalkDir::new(&self.root).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
let entry = result?;
if root_metadata.is_file() {
return Ok(Files::file(self.root, Bytes::from(root_metadata.len())));
}
let filter = |entry: &walkdir::DirEntry| {
let path = entry.path();
let file_name = entry.file_name();
if !self.include_hidden && file_name.to_string_lossy().starts_with('.') {
return false;
}
let hidden = Platform::hidden(path).unwrap_or(true);
if !self.include_hidden && hidden {
return false;
}
true
};
let mut paths = Vec::new();
let mut total_size = 0;
for result in WalkDir::new(&self.root)
.follow_links(self.follow_symlinks)
.sort_by(|a, b| a.file_name().cmp(b.file_name()))
.into_iter()
.filter_entry(filter)
{
let entry = result?;
let path = entry.path();
let metadata = entry.metadata()?;
if !metadata.is_file() {
continue;
}
if !self.include_hidden && file_name.to_string_lossy().starts_with('.') {
continue;
}
let file_path = FilePath::from_prefix_and_path(&self.root, &path)?;
if !self.include_hidden && Platform::hidden(path)? {
continue;
}
if !self.include_junk && junk.contains(&file_name) {
if !self.include_junk && JUNK.contains(&file_path.name()) {
continue;
}
total_size += metadata.len();
paths.push(entry.path().to_owned());
paths.push(file_path);
}
Ok(Files::new(self.root, Bytes::from(total_size)))
Ok(Files::dir(self.root, Bytes::from(total_size), paths))
}
}