diff --git a/src/feed_store.rs b/src/feed_store.rs index bcf0f34..f63cc1d 100644 --- a/src/feed_store.rs +++ b/src/feed_store.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{Result, bail}; use camino::{Utf8Path, Utf8PathBuf}; use chrono::{DateTime, Duration, Utc}; use feed_rs::model::Entry; @@ -99,33 +99,33 @@ impl FeedStoreFeed { self.info.fetch_data.as_ref() } - pub fn load_feed(&self, sanitize: bool) -> Option { + pub fn load_feed(&self, sanitize: bool) -> Result { if let Some(raw_feed) = &self.raw_feed { let parser = feed_rs::parser::Builder::new() .sanitize_content(sanitize) .build(); - Some(parser.parse(raw_feed.as_bytes()).unwrap()) + Ok(parser.parse(raw_feed.as_bytes())?) } else { - None + bail!("Feed not loaded yet: {}", self.url); } } - pub fn has_changed(&self, new_feed: &Feed) -> bool { - let Some(old_feed) = self.load_feed(false) else { - return true; + pub fn has_changed(&self, new_feed: &Feed) -> Result { + let Ok(old_feed) = self.load_feed(false) else { + return Ok(true); }; let mut old_iter = old_feed.entries.iter(); for new in &new_feed.entries { let Some(old) = old_iter.next() else { - return true; + return Ok(true); }; if old != new { - return true; + return Ok(true); } } // ignoring any entries left in old_iter - false + Ok(false) } pub fn store(&mut self, mut response: Response) -> Result { @@ -149,7 +149,7 @@ impl FeedStoreFeed { self.info.fetch_data = Some(fetchdata); Self::write(&self.path_settings, toml::to_string(&self.info)?)?; - if !self.has_changed(&feed) { + if !self.has_changed(&feed)? { return Ok(false); } debug!("Storing feed for {}.", self.url); @@ -235,7 +235,7 @@ pub struct FeedStore { } impl FeedStore { - pub fn new(dir: &str, feedlist: &Vec) -> Self { + pub fn new(dir: &Utf8Path, feedlist: &Vec) -> Self { let dir = super::to_checked_pathbuf(dir); let mut feeds: BTreeMap = BTreeMap::new(); @@ -248,20 +248,25 @@ impl FeedStore { } pub fn collect(&mut self, max_entries: usize) -> (HashMap, Vec) { + debug!("Collecting feeds"); let mut feeds = HashMap::new(); let mut entries = Vec::new(); for (feed_url, feed_store_feed) in self.feeds.iter_mut() { - let Some(mut feed) = feed_store_feed.load_feed(true) else { - warn!("Problem parsing feed file for feed {}", feed_url); - continue; + debug!("Collecting {feed_url}"); + let mut feed = match feed_store_feed.load_feed(true) { + Ok(feed) => feed, + Err(e) => { + warn!("Problem parsing feed file for feed {}: {}", feed_url, e); + continue; + }, }; for entry in &mut feed.entries { entry.source = Some(feed_url.to_string()); } - entries.append(&mut std::mem::take(&mut feed.entries)); + entries.extend(feed.entries.clone()); if entries.len() > 4 * max_entries { entries = trim_entries(entries, max_entries); } diff --git a/src/main.rs b/src/main.rs index b071044..ec8058f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,7 +26,7 @@ extern crate log; use crate::feed_store::FeedStore; use crate::fetcher::Fetcher; use anyhow::Result; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use clap::Parser; use serde::Deserialize; use std::fs; @@ -48,6 +48,8 @@ struct Args { config: String, #[arg(long, default_value_t = false)] no_fetch: bool, + #[arg(long, default_value_t = false)] + force: bool, } /// Config to be parsed from toml file given as cmdline option @@ -56,15 +58,15 @@ struct Config { /// to be used as part of the fetchers username header bot_name: String, /// where to store downloaded feeds and their metadata - feed_dir: String, + feed_dir: Utf8PathBuf, /// feeds to be agregated feeds: Vec, /// Email adress to use for the from header when fetching feeds from: String, /// where to build the output files - out_dir: String, + out_dir: Utf8PathBuf, /// templates folder - templates_dir: String, + templates_dir: Utf8PathBuf, /// How many feed entries should be included in the planet max_entries: usize, /// How soon to refresh, in hours @@ -74,11 +76,12 @@ struct Config { pub fn to_checked_pathbuf(dir: &str) -> Utf8PathBuf { let dir = Utf8PathBuf::from(dir); +pub fn to_checked_pathbuf(dir: &Utf8Path) -> Utf8PathBuf { let m = dir .metadata() .unwrap_or_else(|_| panic!("Could not get metadata of dir: {dir}")); assert!(m.is_dir(), "Not a dir: {dir}"); - dir + dir.to_path_buf() } /// Config for one individual input feed @@ -91,10 +94,10 @@ struct FeedConfig { url: String, } -fn fetch(config: &Config, feed_store: &mut FeedStore) -> Result { +fn fetch(config: &Config, feed_store: &mut FeedStore, force_rebuild: bool) -> Result { let fetcher = Fetcher::new(&config.bot_name, &config.from); let rebuild = feed_store.fetch(&fetcher, config.refresh)?; - Ok(rebuild) + Ok(rebuild || force_rebuild) } fn main() -> Result<()> { @@ -116,7 +119,7 @@ fn main() -> Result<()> { let should_build = if args.no_fetch { true } else { - fetch(&config, &mut feed_store)? + fetch(&config, &mut feed_store, args.force)? }; if should_build { diff --git a/src/template_engine.rs b/src/template_engine.rs index 50ff93c..6da36c8 100644 --- a/src/template_engine.rs +++ b/src/template_engine.rs @@ -29,7 +29,7 @@ pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> { Ok(()) } -fn create_tera(templates_dir: &str) -> Result { +fn create_tera(templates_dir: &Utf8Path) -> Result { let dir = to_checked_pathbuf(templates_dir); let mut tera = tera::Tera::new(&format!("{dir}/*"))?; // disable autoescape as this would corrupt urls or the entriy contents. todo check this!