feat: Save previous entries in feed settings
This commit is contained in:
parent
90f29bd2a4
commit
6bb67a8129
@ -31,6 +31,12 @@ pub struct FeedStoreFeedInfo {
|
||||
///
|
||||
/// Used to let the server know whether we need a new entry or not.
|
||||
pub fetch_data: Option<FetchData>,
|
||||
/// A cache of already downloaded articles.
|
||||
///
|
||||
/// Kept for historical purposes, but also to inject retrieval date
|
||||
/// when published date was not exposed in an article.
|
||||
#[serde(default)]
|
||||
pub entries: BTreeMap<Url, FeedStoreEntry>,
|
||||
}
|
||||
|
||||
impl FeedStoreFeedInfo {
|
||||
@ -38,6 +44,7 @@ impl FeedStoreFeedInfo {
|
||||
Self {
|
||||
added: Utc::now(),
|
||||
fetch_data: None,
|
||||
entries: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -147,7 +154,7 @@ impl FeedStoreFeed {
|
||||
|
||||
debug!("Storing fetchdata for {}", self.url);
|
||||
self.info.fetch_data = Some(fetchdata);
|
||||
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
|
||||
self.save_info()?;
|
||||
|
||||
if !self.has_changed(&feed)? {
|
||||
return Ok(false);
|
||||
@ -158,10 +165,16 @@ impl FeedStoreFeed {
|
||||
&self.path_feed_ron,
|
||||
to_string_pretty(&feed, PrettyConfig::default())?,
|
||||
)?;
|
||||
Self::write(&self.path_feed, body)?;
|
||||
Self::write(&self.path_feed, &body)?;
|
||||
self.raw_feed = Some(String::from_utf8_lossy(&body).to_string());
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub fn save_info(&self) -> Result<()> {
|
||||
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// refresh in hours
|
||||
pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
|
||||
let mut builder = fetcher
|
||||
@ -247,14 +260,14 @@ impl FeedStore {
|
||||
Self { _dir: dir, feeds }
|
||||
}
|
||||
|
||||
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
|
||||
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<FeedStoreEntry>) {
|
||||
debug!("Collecting feeds");
|
||||
let mut feeds = HashMap::new();
|
||||
let mut entries = Vec::new();
|
||||
|
||||
for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
|
||||
debug!("Collecting {feed_url}");
|
||||
let mut feed = match feed_store_feed.load_feed(true) {
|
||||
let feed = match feed_store_feed.load_feed(true) {
|
||||
Ok(feed) => feed,
|
||||
Err(e) => {
|
||||
warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
|
||||
@ -262,17 +275,48 @@ impl FeedStore {
|
||||
}
|
||||
};
|
||||
|
||||
for entry in &mut feed.entries {
|
||||
entry.source = Some(feed_url.to_string());
|
||||
let mut changed_info = false;
|
||||
|
||||
for entry in &feed.entries {
|
||||
// If we already have the entry in store, don't parse it again.
|
||||
// Simply change the raw entry attribute.
|
||||
let entry_link = Url::parse(&entry.links.first().cloned().unwrap().href).unwrap();
|
||||
if let Some(archived_entry) = feed_store_feed.info.entries.get_mut(&entry_link) {
|
||||
if &archived_entry.entry != entry {
|
||||
changed_info = true;
|
||||
archived_entry.entry = entry.clone();
|
||||
}
|
||||
|
||||
entries.push(archived_entry.clone());
|
||||
} else {
|
||||
// TODO: should this be done here? or earlier in the fetching process?
|
||||
let enhanced_entry = FeedStoreEntry::from_entry(
|
||||
entry.clone(),
|
||||
entry_link.clone(),
|
||||
feed_url.clone(),
|
||||
);
|
||||
feed_store_feed
|
||||
.info
|
||||
.entries
|
||||
.insert(entry_link.clone(), enhanced_entry.clone());
|
||||
entries.push(enhanced_entry);
|
||||
}
|
||||
}
|
||||
|
||||
entries.extend(feed.entries.clone());
|
||||
if entries.len() > 4 * max_entries {
|
||||
entries = trim_entries(entries, max_entries);
|
||||
}
|
||||
|
||||
feeds.insert(feed_url.to_string(), feed.clone());
|
||||
|
||||
// If some info from an entry was changed, save feed info
|
||||
if changed_info {
|
||||
feed_store_feed
|
||||
.save_info()
|
||||
.expect("Failed to save feed info");
|
||||
}
|
||||
}
|
||||
|
||||
(feeds, trim_entries(entries, max_entries))
|
||||
}
|
||||
|
||||
@ -286,8 +330,9 @@ impl FeedStore {
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
|
||||
entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default()));
|
||||
fn trim_entries(mut entries: Vec<FeedStoreEntry>, max_entries: usize) -> Vec<FeedStoreEntry> {
|
||||
entries.sort_by(|a, b| a.mars_date.cmp(&b.mars_date));
|
||||
entries.reverse();
|
||||
entries.truncate(max_entries);
|
||||
entries
|
||||
}
|
||||
@ -298,3 +343,34 @@ fn hv(headers: &HeaderMap, key: &str) -> String {
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// A single article in the [FeedStore].
|
||||
///
|
||||
/// This transformation allows to inject retrieval date when published date
|
||||
/// is not available in the feed,
|
||||
///
|
||||
/// Used for ordering by date and accessing source information.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
|
||||
pub struct FeedStoreEntry {
|
||||
/// Usually extracted from the feed, but when not present,
|
||||
/// the first retrieval time is used.
|
||||
pub mars_date: DateTime<Utc>,
|
||||
/// [FeedStoreFeed::url]
|
||||
pub mars_source: Url,
|
||||
/// Canonical URL
|
||||
pub mars_url: Url,
|
||||
/// Usual RSS feed entry
|
||||
#[serde(flatten)]
|
||||
pub entry: Entry,
|
||||
}
|
||||
|
||||
impl FeedStoreEntry {
|
||||
pub fn from_entry(entry: Entry, url: Url, source: Url) -> Self {
|
||||
Self {
|
||||
mars_source: source,
|
||||
mars_url: url,
|
||||
mars_date: entry.published.unwrap_or(Utc::now()),
|
||||
entry,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ struct Args {
|
||||
}
|
||||
|
||||
/// Config to be parsed from toml file given as cmdline option
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Deserialize, Serialize)]
|
||||
struct Config {
|
||||
/// to be used as part of the fetchers username header
|
||||
bot_name: String,
|
||||
@ -101,8 +101,12 @@ pub fn to_checked_pathbuf(dir: &Utf8Path) -> Utf8PathBuf {
|
||||
///
|
||||
/// This is a separate struct in case one wants to configure additional
|
||||
/// information in the future.
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct FeedConfig {
|
||||
/// short name for the feed
|
||||
name: String,
|
||||
/// homepage URL for the website
|
||||
homepage: Url,
|
||||
/// url of an ATOM, RSS or Json feed
|
||||
url: String,
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
|
||||
|
||||
let mut context = tera::Context::new();
|
||||
let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
|
||||
context.insert("config", config);
|
||||
context.insert("feeds", &feeds);
|
||||
context.insert("entries", &entries);
|
||||
context.insert("lang", &config.lang);
|
||||
|
Loading…
x
Reference in New Issue
Block a user