feat: add --force flag to force rebuild

This commit is contained in:
selfhoster selfhoster 2025-04-15 13:27:32 +02:00
parent 5271c4c9aa
commit 001ca4879a
3 changed files with 33 additions and 25 deletions

View File

@ -1,4 +1,4 @@
use anyhow::Result;
use anyhow::{Result, bail};
use camino::{Utf8Path, Utf8PathBuf};
use chrono::{DateTime, Duration, Utc};
use feed_rs::model::Entry;
@ -99,33 +99,33 @@ impl FeedStoreFeed {
self.info.fetch_data.as_ref()
}
pub fn load_feed(&self, sanitize: bool) -> Option<Feed> {
pub fn load_feed(&self, sanitize: bool) -> Result<Feed> {
if let Some(raw_feed) = &self.raw_feed {
let parser = feed_rs::parser::Builder::new()
.sanitize_content(sanitize)
.build();
Some(parser.parse(raw_feed.as_bytes()).unwrap())
Ok(parser.parse(raw_feed.as_bytes())?)
} else {
None
bail!("Feed not loaded yet: {}", self.url);
}
}
pub fn has_changed(&self, new_feed: &Feed) -> bool {
let Some(old_feed) = self.load_feed(false) else {
return true;
pub fn has_changed(&self, new_feed: &Feed) -> Result<bool> {
let Ok(old_feed) = self.load_feed(false) else {
return Ok(true);
};
let mut old_iter = old_feed.entries.iter();
for new in &new_feed.entries {
let Some(old) = old_iter.next() else {
return true;
return Ok(true);
};
if old != new {
return true;
return Ok(true);
}
}
// ignoring any entries left in old_iter
false
Ok(false)
}
pub fn store(&mut self, mut response: Response<Body>) -> Result<bool> {
@ -149,7 +149,7 @@ impl FeedStoreFeed {
self.info.fetch_data = Some(fetchdata);
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
if !self.has_changed(&feed) {
if !self.has_changed(&feed)? {
return Ok(false);
}
debug!("Storing feed for {}.", self.url);
@ -235,7 +235,7 @@ pub struct FeedStore {
}
impl FeedStore {
pub fn new(dir: &str, feedlist: &Vec<super::FeedConfig>) -> Self {
pub fn new(dir: &Utf8Path, feedlist: &Vec<super::FeedConfig>) -> Self {
let dir = super::to_checked_pathbuf(dir);
let mut feeds: BTreeMap<Url, FeedStoreFeed> = BTreeMap::new();
@ -248,20 +248,25 @@ impl FeedStore {
}
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
debug!("Collecting feeds");
let mut feeds = HashMap::new();
let mut entries = Vec::new();
for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
let Some(mut feed) = feed_store_feed.load_feed(true) else {
warn!("Problem parsing feed file for feed {}", feed_url);
continue;
debug!("Collecting {feed_url}");
let mut feed = match feed_store_feed.load_feed(true) {
Ok(feed) => feed,
Err(e) => {
warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
continue;
},
};
for entry in &mut feed.entries {
entry.source = Some(feed_url.to_string());
}
entries.append(&mut std::mem::take(&mut feed.entries));
entries.extend(feed.entries.clone());
if entries.len() > 4 * max_entries {
entries = trim_entries(entries, max_entries);
}

View File

@ -26,7 +26,7 @@ extern crate log;
use crate::feed_store::FeedStore;
use crate::fetcher::Fetcher;
use anyhow::Result;
use camino::Utf8PathBuf;
use camino::{Utf8Path, Utf8PathBuf};
use clap::Parser;
use serde::Deserialize;
use std::fs;
@ -48,6 +48,8 @@ struct Args {
config: String,
#[arg(long, default_value_t = false)]
no_fetch: bool,
#[arg(long, default_value_t = false)]
force: bool,
}
/// Config to be parsed from toml file given as cmdline option
@ -56,15 +58,15 @@ struct Config {
/// to be used as part of the fetchers username header
bot_name: String,
/// where to store downloaded feeds and their metadata
feed_dir: String,
feed_dir: Utf8PathBuf,
/// feeds to be agregated
feeds: Vec<FeedConfig>,
/// Email adress to use for the from header when fetching feeds
from: String,
/// where to build the output files
out_dir: String,
out_dir: Utf8PathBuf,
/// templates folder
templates_dir: String,
templates_dir: Utf8PathBuf,
/// How many feed entries should be included in the planet
max_entries: usize,
/// How soon to refresh, in hours
@ -74,11 +76,12 @@ struct Config {
pub fn to_checked_pathbuf(dir: &str) -> Utf8PathBuf {
let dir = Utf8PathBuf::from(dir);
pub fn to_checked_pathbuf(dir: &Utf8Path) -> Utf8PathBuf {
let m = dir
.metadata()
.unwrap_or_else(|_| panic!("Could not get metadata of dir: {dir}"));
assert!(m.is_dir(), "Not a dir: {dir}");
dir
dir.to_path_buf()
}
/// Config for one individual input feed
@ -91,10 +94,10 @@ struct FeedConfig {
url: String,
}
fn fetch(config: &Config, feed_store: &mut FeedStore) -> Result<bool> {
fn fetch(config: &Config, feed_store: &mut FeedStore, force_rebuild: bool) -> Result<bool> {
let fetcher = Fetcher::new(&config.bot_name, &config.from);
let rebuild = feed_store.fetch(&fetcher, config.refresh)?;
Ok(rebuild)
Ok(rebuild || force_rebuild)
}
fn main() -> Result<()> {
@ -116,7 +119,7 @@ fn main() -> Result<()> {
let should_build = if args.no_fetch {
true
} else {
fetch(&config, &mut feed_store)?
fetch(&config, &mut feed_store, args.force)?
};
if should_build {

View File

@ -29,7 +29,7 @@ pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
Ok(())
}
fn create_tera(templates_dir: &str) -> Result<Tera> {
fn create_tera(templates_dir: &Utf8Path) -> Result<Tera> {
let dir = to_checked_pathbuf(templates_dir);
let mut tera = tera::Tera::new(&format!("{dir}/*"))?;
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!