Casey Rodarmor 57a358e458
Allow creating magnet links with imdl torrent link
Magnet links can now be created from a metainfo file with:

    imdl torrent link --input METAINFO

type: added
2020-04-07 19:01:27 -07:00

264 lines
7.1 KiB
Rust

use crate::common::*;
#[derive(StructOpt)]
#[structopt(
help_message(consts::HELP_MESSAGE),
version_message(consts::VERSION_MESSAGE),
about("Show statistics about a collection of `.torrent` files.")
)]
pub(crate) struct Stats {
#[structopt(
long = "limit",
short = "l",
value_name = "N",
help = "Stop after processing `N` torrents. Useful when processing large collections of \
`.torrent` files."
)]
limit: Option<u64>,
#[structopt(
long = "extract-pattern",
short = "e",
value_name = "REGEX",
help = "Extract and display values under key paths that match `REGEX`. Subkeys of a \
bencodeded dictionary are delimited by `/`, and values of a bencoded list are \
delmited by `*`. For example, given the following bencoded dictionary `{\"foo\": \
[{\"bar\": {\"baz\": 2}}]}`, the value `2`'s key path will be `foo*bar/baz`. The \
value `2` would be displayed if any of `bar`, `foo[*]bar/baz`, or `foo.*baz` were \
passed to `--extract-pattern."
)]
extract_patterns: Vec<Regex>,
#[structopt(
long = "input",
short = "i",
value_name = "PATH",
help = "Search `PATH` for torrents. May be a directory or a single torrent file.",
parse(from_os_str)
)]
input: PathBuf,
#[structopt(
long = "print",
short = "p",
help = "Pretty print the contents of each torrent as it is processed."
)]
print: bool,
}
impl Stats {
pub(crate) fn run(self, env: &mut Env, options: &Options) -> Result<(), Error> {
options.require_unstable("torrent stats subcommand")?;
let path = env.resolve(self.input);
let mut extractor = Extractor::new(self.print, &self.extract_patterns);
for result in WalkDir::new(path).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
if extractor.torrents >= self.limit.unwrap_or(u64::max_value()) {
break;
}
let entry = result?;
extractor.process(entry.path());
}
errln!(env, "Torrents processed: {}", extractor.torrents)?;
errln!(env, "Read failed: {}", extractor.io_errors)?;
errln!(
env,
"Decode failed: {}",
extractor.bencode_decode_errors
)?;
let mut paths = extractor.paths.into_iter().collect::<Vec<(String, u64)>>();
paths.sort_by_key(|(_, count)| Reverse(*count));
let max = paths.iter().map(|(_, count)| *count).max().unwrap_or(0);
let width = max.to_string().len();
if !paths.is_empty() {
errln!(env, "Keys:")?;
for (key, count) in &paths {
if key.starts_with("info/files") {
continue;
}
errln!(env, "{:<width$} - {}", count, key, width = width)?;
}
for (key, count) in paths {
if key.starts_with("info/files") {
errln!(env, "{:<width$} - {}", count, key, width = width)?;
}
}
}
if !extractor.values.is_empty() {
let values = extractor
.values
.into_iter()
.collect::<Vec<(String, Vec<String>)>>();
errln!(env, "Values:")?;
for (pattern, values) in values {
err!(env, "{}: ", pattern)?;
for (i, value) in values.iter().enumerate() {
if i > 0 {
err!(env, ", ")?;
}
err!(env, "{}", value)?;
}
errln!(env)?;
}
}
Ok(())
}
}
struct Extractor {
bencode_decode_errors: u64,
current_path: String,
io_errors: u64,
paths: HashMap<String, u64>,
print: bool,
regex_set: RegexSet,
torrents: u64,
values: HashMap<String, Vec<String>>,
}
impl Extractor {
fn new(print: bool, regexes: &[Regex]) -> Self {
let regex_set = RegexSet::new(regexes.iter().map(Regex::as_str))
.expect("Validated regex pattern failed to recompile in regex set");
Self {
bencode_decode_errors: 0,
io_errors: 0,
paths: HashMap::new(),
torrents: 0,
values: HashMap::new(),
current_path: String::new(),
print,
regex_set,
}
}
fn process(&mut self, path: &Path) {
if !path.is_file() || path.extension() != Some(OsStr::new("torrent")) {
return;
}
if self.torrents % 10000 == 0 {
eprintln!("Processing torrent {}...", self.torrents);
}
self.torrents += 1;
let contents = if let Ok(contents) = fs::read(&path) {
contents
} else {
self.io_errors += 1;
return;
};
if let Ok(value) = bendy::serde::de::from_bytes::<Value>(&contents) {
self.extract(&value);
if self.print {
eprintln!("{}:\n{}", path.display(), Self::pretty_print(&value));
}
} else {
self.bencode_decode_errors += 1;
}
}
fn extract(&mut self, value: &Value) {
let matches = self.regex_set.matches(&self.current_path);
for i in matches.iter() {
let pattern = &self.regex_set.patterns()[i];
if let Some(values) = self.values.get_mut(pattern) {
values.push(Self::pretty_print(value));
} else {
self
.values
.insert(pattern.clone(), vec![Self::pretty_print(value)]);
}
}
let starting_length = self.current_path.len();
match value {
Value::List(list) => {
if self.current_path.pop().is_some() {
self.current_path.push('*');
}
for value in list {
self.extract(value);
}
self.current_path.truncate(starting_length);
}
Value::Dict(dict) => {
for (key, value) in dict {
match String::from_utf8_lossy(key) {
Cow::Borrowed(s) => self.current_path.push_str(s),
Cow::Owned(s) => self.current_path.push_str(&s),
}
self.paths.increment_ref(&self.current_path);
self.current_path.push('/');
self.extract(value);
self.current_path.truncate(starting_length);
}
}
Value::Integer(_) | Value::Bytes(_) => {}
}
}
fn pretty_print(value: &Value) -> String {
let mut buffer = String::new();
Self::pretty_print_inner(value, &mut buffer);
buffer
}
fn pretty_print_inner(value: &Value, buffer: &mut String) {
match value {
Value::List(list) => {
buffer.push('[');
for (i, value) in list.iter().enumerate() {
if i > 0 {
buffer.push_str(", ");
}
Self::pretty_print_inner(value, buffer);
}
buffer.push(']');
}
Value::Dict(dict) => {
buffer.push('{');
for (i, (key, value)) in dict.iter().enumerate() {
if i > 0 {
buffer.push_str(", ");
}
Self::pretty_print_string(key, buffer);
buffer.push_str(": ");
Self::pretty_print_inner(value, buffer);
}
buffer.push('}');
}
Value::Integer(integer) => buffer.push_str(&integer.to_string()),
Value::Bytes(bytes) => {
Self::pretty_print_string(bytes, buffer);
}
}
}
fn pretty_print_string(string: &[u8], buffer: &mut String) {
if let Ok(text) = str::from_utf8(string) {
buffer.push('\"');
buffer.push_str(text);
buffer.push('\"');
} else {
buffer.push('<');
for byte in string {
buffer.push_str(&format!("{:02X}", byte));
}
buffer.push('>');
}
}
}