feat: Save previous entries in feed settings
This commit is contained in:
		
							parent
							
								
									90f29bd2a4
								
							
						
					
					
						commit
						6bb67a8129
					
				@ -31,6 +31,12 @@ pub struct FeedStoreFeedInfo {
 | 
				
			|||||||
    ///
 | 
					    ///
 | 
				
			||||||
    /// Used to let the server know whether we need a new entry or not.
 | 
					    /// Used to let the server know whether we need a new entry or not.
 | 
				
			||||||
    pub fetch_data: Option<FetchData>,
 | 
					    pub fetch_data: Option<FetchData>,
 | 
				
			||||||
 | 
					    /// A cache of already downloaded articles.
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// Kept for historical purposes, but also to inject retrieval date
 | 
				
			||||||
 | 
					    /// when published date was not exposed in an article.
 | 
				
			||||||
 | 
					    #[serde(default)]
 | 
				
			||||||
 | 
					    pub entries: BTreeMap<Url, FeedStoreEntry>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl FeedStoreFeedInfo {
 | 
					impl FeedStoreFeedInfo {
 | 
				
			||||||
@ -38,6 +44,7 @@ impl FeedStoreFeedInfo {
 | 
				
			|||||||
        Self {
 | 
					        Self {
 | 
				
			||||||
            added: Utc::now(),
 | 
					            added: Utc::now(),
 | 
				
			||||||
            fetch_data: None,
 | 
					            fetch_data: None,
 | 
				
			||||||
 | 
					            entries: BTreeMap::new(),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -147,7 +154,7 @@ impl FeedStoreFeed {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        debug!("Storing fetchdata for {}", self.url);
 | 
					        debug!("Storing fetchdata for {}", self.url);
 | 
				
			||||||
        self.info.fetch_data = Some(fetchdata);
 | 
					        self.info.fetch_data = Some(fetchdata);
 | 
				
			||||||
        Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
 | 
					        self.save_info()?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if !self.has_changed(&feed)? {
 | 
					        if !self.has_changed(&feed)? {
 | 
				
			||||||
            return Ok(false);
 | 
					            return Ok(false);
 | 
				
			||||||
@ -158,10 +165,16 @@ impl FeedStoreFeed {
 | 
				
			|||||||
            &self.path_feed_ron,
 | 
					            &self.path_feed_ron,
 | 
				
			||||||
            to_string_pretty(&feed, PrettyConfig::default())?,
 | 
					            to_string_pretty(&feed, PrettyConfig::default())?,
 | 
				
			||||||
        )?;
 | 
					        )?;
 | 
				
			||||||
        Self::write(&self.path_feed, body)?;
 | 
					        Self::write(&self.path_feed, &body)?;
 | 
				
			||||||
 | 
					        self.raw_feed = Some(String::from_utf8_lossy(&body).to_string());
 | 
				
			||||||
        Ok(true)
 | 
					        Ok(true)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn save_info(&self) -> Result<()> {
 | 
				
			||||||
 | 
					        Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
 | 
				
			||||||
 | 
					        Ok(())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// refresh in hours
 | 
					    /// refresh in hours
 | 
				
			||||||
    pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
 | 
					    pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
 | 
				
			||||||
        let mut builder = fetcher
 | 
					        let mut builder = fetcher
 | 
				
			||||||
@ -247,14 +260,14 @@ impl FeedStore {
 | 
				
			|||||||
        Self { _dir: dir, feeds }
 | 
					        Self { _dir: dir, feeds }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
 | 
					    pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<FeedStoreEntry>) {
 | 
				
			||||||
        debug!("Collecting feeds");
 | 
					        debug!("Collecting feeds");
 | 
				
			||||||
        let mut feeds = HashMap::new();
 | 
					        let mut feeds = HashMap::new();
 | 
				
			||||||
        let mut entries = Vec::new();
 | 
					        let mut entries = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
 | 
					        for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
 | 
				
			||||||
            debug!("Collecting {feed_url}");
 | 
					            debug!("Collecting {feed_url}");
 | 
				
			||||||
            let mut feed = match feed_store_feed.load_feed(true) {
 | 
					            let feed = match feed_store_feed.load_feed(true) {
 | 
				
			||||||
                Ok(feed) => feed,
 | 
					                Ok(feed) => feed,
 | 
				
			||||||
                Err(e) => {
 | 
					                Err(e) => {
 | 
				
			||||||
                    warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
 | 
					                    warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
 | 
				
			||||||
@ -262,17 +275,48 @@ impl FeedStore {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for entry in &mut feed.entries {
 | 
					            let mut changed_info = false;
 | 
				
			||||||
                entry.source = Some(feed_url.to_string());
 | 
					
 | 
				
			||||||
 | 
					            for entry in &feed.entries {
 | 
				
			||||||
 | 
					                // If we already have the entry in store, don't parse it again.
 | 
				
			||||||
 | 
					                // Simply change the raw entry attribute.
 | 
				
			||||||
 | 
					                let entry_link = Url::parse(&entry.links.first().cloned().unwrap().href).unwrap();
 | 
				
			||||||
 | 
					                if let Some(archived_entry) = feed_store_feed.info.entries.get_mut(&entry_link) {
 | 
				
			||||||
 | 
					                    if &archived_entry.entry != entry {
 | 
				
			||||||
 | 
					                        changed_info = true;
 | 
				
			||||||
 | 
					                        archived_entry.entry = entry.clone();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    entries.push(archived_entry.clone());
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    // TODO: should this be done here? or earlier in the fetching process?
 | 
				
			||||||
 | 
					                    let enhanced_entry = FeedStoreEntry::from_entry(
 | 
				
			||||||
 | 
					                        entry.clone(),
 | 
				
			||||||
 | 
					                        entry_link.clone(),
 | 
				
			||||||
 | 
					                        feed_url.clone(),
 | 
				
			||||||
 | 
					                    );
 | 
				
			||||||
 | 
					                    feed_store_feed
 | 
				
			||||||
 | 
					                        .info
 | 
				
			||||||
 | 
					                        .entries
 | 
				
			||||||
 | 
					                        .insert(entry_link.clone(), enhanced_entry.clone());
 | 
				
			||||||
 | 
					                    entries.push(enhanced_entry);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            entries.extend(feed.entries.clone());
 | 
					 | 
				
			||||||
            if entries.len() > 4 * max_entries {
 | 
					            if entries.len() > 4 * max_entries {
 | 
				
			||||||
                entries = trim_entries(entries, max_entries);
 | 
					                entries = trim_entries(entries, max_entries);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            feeds.insert(feed_url.to_string(), feed.clone());
 | 
					            feeds.insert(feed_url.to_string(), feed.clone());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // If some info from an entry was changed, save feed info
 | 
				
			||||||
 | 
					            if changed_info {
 | 
				
			||||||
 | 
					                feed_store_feed
 | 
				
			||||||
 | 
					                    .save_info()
 | 
				
			||||||
 | 
					                    .expect("Failed to save feed info");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        (feeds, trim_entries(entries, max_entries))
 | 
					        (feeds, trim_entries(entries, max_entries))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -286,8 +330,9 @@ impl FeedStore {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
 | 
					fn trim_entries(mut entries: Vec<FeedStoreEntry>, max_entries: usize) -> Vec<FeedStoreEntry> {
 | 
				
			||||||
    entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default()));
 | 
					    entries.sort_by(|a, b| a.mars_date.cmp(&b.mars_date));
 | 
				
			||||||
 | 
					    entries.reverse();
 | 
				
			||||||
    entries.truncate(max_entries);
 | 
					    entries.truncate(max_entries);
 | 
				
			||||||
    entries
 | 
					    entries
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -298,3 +343,34 @@ fn hv(headers: &HeaderMap, key: &str) -> String {
 | 
				
			|||||||
        _ => "".to_string(),
 | 
					        _ => "".to_string(),
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// A single article in the [FeedStore].
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// This transformation allows to inject retrieval date when published date
 | 
				
			||||||
 | 
					/// is not available in the feed,
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Used for ordering by date and accessing source information.
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
 | 
				
			||||||
 | 
					pub struct FeedStoreEntry {
 | 
				
			||||||
 | 
					    /// Usually extracted from the feed, but when not present,
 | 
				
			||||||
 | 
					    /// the first retrieval time is used.
 | 
				
			||||||
 | 
					    pub mars_date: DateTime<Utc>,
 | 
				
			||||||
 | 
					    /// [FeedStoreFeed::url]
 | 
				
			||||||
 | 
					    pub mars_source: Url,
 | 
				
			||||||
 | 
					    /// Canonical URL
 | 
				
			||||||
 | 
					    pub mars_url: Url,
 | 
				
			||||||
 | 
					    /// Usual RSS feed entry
 | 
				
			||||||
 | 
					    #[serde(flatten)]
 | 
				
			||||||
 | 
					    pub entry: Entry,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl FeedStoreEntry {
 | 
				
			||||||
 | 
					    pub fn from_entry(entry: Entry, url: Url, source: Url) -> Self {
 | 
				
			||||||
 | 
					        Self {
 | 
				
			||||||
 | 
					            mars_source: source,
 | 
				
			||||||
 | 
					            mars_url: url,
 | 
				
			||||||
 | 
					            mars_date: entry.published.unwrap_or(Utc::now()),
 | 
				
			||||||
 | 
					            entry,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -54,7 +54,7 @@ struct Args {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Config to be parsed from toml file given as cmdline option
 | 
					/// Config to be parsed from toml file given as cmdline option
 | 
				
			||||||
#[derive(Deserialize)]
 | 
					#[derive(Deserialize, Serialize)]
 | 
				
			||||||
struct Config {
 | 
					struct Config {
 | 
				
			||||||
    /// to be used as part of the fetchers username header
 | 
					    /// to be used as part of the fetchers username header
 | 
				
			||||||
    bot_name: String,
 | 
					    bot_name: String,
 | 
				
			||||||
@ -101,8 +101,12 @@ pub fn to_checked_pathbuf(dir: &Utf8Path) -> Utf8PathBuf {
 | 
				
			|||||||
///
 | 
					///
 | 
				
			||||||
/// This is a separate struct in case one wants to configure additional
 | 
					/// This is a separate struct in case one wants to configure additional
 | 
				
			||||||
/// information in the future.
 | 
					/// information in the future.
 | 
				
			||||||
#[derive(Deserialize)]
 | 
					#[derive(Debug, Deserialize, Serialize)]
 | 
				
			||||||
struct FeedConfig {
 | 
					struct FeedConfig {
 | 
				
			||||||
 | 
					    /// short name for the feed
 | 
				
			||||||
 | 
					    name: String,
 | 
				
			||||||
 | 
					    /// homepage URL for the website
 | 
				
			||||||
 | 
					    homepage: Url,
 | 
				
			||||||
    /// url of an ATOM, RSS or Json feed
 | 
					    /// url of an ATOM, RSS or Json feed
 | 
				
			||||||
    url: String,
 | 
					    url: String,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -18,6 +18,7 @@ pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    let mut context = tera::Context::new();
 | 
					    let mut context = tera::Context::new();
 | 
				
			||||||
    let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
 | 
					    let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
 | 
				
			||||||
 | 
					    context.insert("config", config);
 | 
				
			||||||
    context.insert("feeds", &feeds);
 | 
					    context.insert("feeds", &feeds);
 | 
				
			||||||
    context.insert("entries", &entries);
 | 
					    context.insert("entries", &entries);
 | 
				
			||||||
    context.insert("lang", &config.lang);
 | 
					    context.insert("lang", &config.lang);
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user