feat: Save previous entries in feed settings
This commit is contained in:
parent
90f29bd2a4
commit
6bb67a8129
@ -31,6 +31,12 @@ pub struct FeedStoreFeedInfo {
|
|||||||
///
|
///
|
||||||
/// Used to let the server know whether we need a new entry or not.
|
/// Used to let the server know whether we need a new entry or not.
|
||||||
pub fetch_data: Option<FetchData>,
|
pub fetch_data: Option<FetchData>,
|
||||||
|
/// A cache of already downloaded articles.
|
||||||
|
///
|
||||||
|
/// Kept for historical purposes, but also to inject retrieval date
|
||||||
|
/// when published date was not exposed in an article.
|
||||||
|
#[serde(default)]
|
||||||
|
pub entries: BTreeMap<Url, FeedStoreEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeedStoreFeedInfo {
|
impl FeedStoreFeedInfo {
|
||||||
@ -38,6 +44,7 @@ impl FeedStoreFeedInfo {
|
|||||||
Self {
|
Self {
|
||||||
added: Utc::now(),
|
added: Utc::now(),
|
||||||
fetch_data: None,
|
fetch_data: None,
|
||||||
|
entries: BTreeMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -147,7 +154,7 @@ impl FeedStoreFeed {
|
|||||||
|
|
||||||
debug!("Storing fetchdata for {}", self.url);
|
debug!("Storing fetchdata for {}", self.url);
|
||||||
self.info.fetch_data = Some(fetchdata);
|
self.info.fetch_data = Some(fetchdata);
|
||||||
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
|
self.save_info()?;
|
||||||
|
|
||||||
if !self.has_changed(&feed)? {
|
if !self.has_changed(&feed)? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
@ -158,10 +165,16 @@ impl FeedStoreFeed {
|
|||||||
&self.path_feed_ron,
|
&self.path_feed_ron,
|
||||||
to_string_pretty(&feed, PrettyConfig::default())?,
|
to_string_pretty(&feed, PrettyConfig::default())?,
|
||||||
)?;
|
)?;
|
||||||
Self::write(&self.path_feed, body)?;
|
Self::write(&self.path_feed, &body)?;
|
||||||
|
self.raw_feed = Some(String::from_utf8_lossy(&body).to_string());
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn save_info(&self) -> Result<()> {
|
||||||
|
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// refresh in hours
|
/// refresh in hours
|
||||||
pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
|
pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
|
||||||
let mut builder = fetcher
|
let mut builder = fetcher
|
||||||
@ -247,14 +260,14 @@ impl FeedStore {
|
|||||||
Self { _dir: dir, feeds }
|
Self { _dir: dir, feeds }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
|
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<FeedStoreEntry>) {
|
||||||
debug!("Collecting feeds");
|
debug!("Collecting feeds");
|
||||||
let mut feeds = HashMap::new();
|
let mut feeds = HashMap::new();
|
||||||
let mut entries = Vec::new();
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
|
for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
|
||||||
debug!("Collecting {feed_url}");
|
debug!("Collecting {feed_url}");
|
||||||
let mut feed = match feed_store_feed.load_feed(true) {
|
let feed = match feed_store_feed.load_feed(true) {
|
||||||
Ok(feed) => feed,
|
Ok(feed) => feed,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
|
warn!("Problem parsing feed file for feed {}: {}", feed_url, e);
|
||||||
@ -262,17 +275,48 @@ impl FeedStore {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for entry in &mut feed.entries {
|
let mut changed_info = false;
|
||||||
entry.source = Some(feed_url.to_string());
|
|
||||||
|
for entry in &feed.entries {
|
||||||
|
// If we already have the entry in store, don't parse it again.
|
||||||
|
// Simply change the raw entry attribute.
|
||||||
|
let entry_link = Url::parse(&entry.links.first().cloned().unwrap().href).unwrap();
|
||||||
|
if let Some(archived_entry) = feed_store_feed.info.entries.get_mut(&entry_link) {
|
||||||
|
if &archived_entry.entry != entry {
|
||||||
|
changed_info = true;
|
||||||
|
archived_entry.entry = entry.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
entries.push(archived_entry.clone());
|
||||||
|
} else {
|
||||||
|
// TODO: should this be done here? or earlier in the fetching process?
|
||||||
|
let enhanced_entry = FeedStoreEntry::from_entry(
|
||||||
|
entry.clone(),
|
||||||
|
entry_link.clone(),
|
||||||
|
feed_url.clone(),
|
||||||
|
);
|
||||||
|
feed_store_feed
|
||||||
|
.info
|
||||||
|
.entries
|
||||||
|
.insert(entry_link.clone(), enhanced_entry.clone());
|
||||||
|
entries.push(enhanced_entry);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
entries.extend(feed.entries.clone());
|
|
||||||
if entries.len() > 4 * max_entries {
|
if entries.len() > 4 * max_entries {
|
||||||
entries = trim_entries(entries, max_entries);
|
entries = trim_entries(entries, max_entries);
|
||||||
}
|
}
|
||||||
|
|
||||||
feeds.insert(feed_url.to_string(), feed.clone());
|
feeds.insert(feed_url.to_string(), feed.clone());
|
||||||
|
|
||||||
|
// If some info from an entry was changed, save feed info
|
||||||
|
if changed_info {
|
||||||
|
feed_store_feed
|
||||||
|
.save_info()
|
||||||
|
.expect("Failed to save feed info");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(feeds, trim_entries(entries, max_entries))
|
(feeds, trim_entries(entries, max_entries))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,8 +330,9 @@ impl FeedStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
|
fn trim_entries(mut entries: Vec<FeedStoreEntry>, max_entries: usize) -> Vec<FeedStoreEntry> {
|
||||||
entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default()));
|
entries.sort_by(|a, b| a.mars_date.cmp(&b.mars_date));
|
||||||
|
entries.reverse();
|
||||||
entries.truncate(max_entries);
|
entries.truncate(max_entries);
|
||||||
entries
|
entries
|
||||||
}
|
}
|
||||||
@ -298,3 +343,34 @@ fn hv(headers: &HeaderMap, key: &str) -> String {
|
|||||||
_ => "".to_string(),
|
_ => "".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A single article in the [FeedStore].
|
||||||
|
///
|
||||||
|
/// This transformation allows to inject retrieval date when published date
|
||||||
|
/// is not available in the feed,
|
||||||
|
///
|
||||||
|
/// Used for ordering by date and accessing source information.
|
||||||
|
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
|
||||||
|
pub struct FeedStoreEntry {
|
||||||
|
/// Usually extracted from the feed, but when not present,
|
||||||
|
/// the first retrieval time is used.
|
||||||
|
pub mars_date: DateTime<Utc>,
|
||||||
|
/// [FeedStoreFeed::url]
|
||||||
|
pub mars_source: Url,
|
||||||
|
/// Canonical URL
|
||||||
|
pub mars_url: Url,
|
||||||
|
/// Usual RSS feed entry
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub entry: Entry,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FeedStoreEntry {
|
||||||
|
pub fn from_entry(entry: Entry, url: Url, source: Url) -> Self {
|
||||||
|
Self {
|
||||||
|
mars_source: source,
|
||||||
|
mars_url: url,
|
||||||
|
mars_date: entry.published.unwrap_or(Utc::now()),
|
||||||
|
entry,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -54,7 +54,7 @@ struct Args {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Config to be parsed from toml file given as cmdline option
|
/// Config to be parsed from toml file given as cmdline option
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize, Serialize)]
|
||||||
struct Config {
|
struct Config {
|
||||||
/// to be used as part of the fetchers username header
|
/// to be used as part of the fetchers username header
|
||||||
bot_name: String,
|
bot_name: String,
|
||||||
@ -101,8 +101,12 @@ pub fn to_checked_pathbuf(dir: &Utf8Path) -> Utf8PathBuf {
|
|||||||
///
|
///
|
||||||
/// This is a separate struct in case one wants to configure additional
|
/// This is a separate struct in case one wants to configure additional
|
||||||
/// information in the future.
|
/// information in the future.
|
||||||
#[derive(Deserialize)]
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
struct FeedConfig {
|
struct FeedConfig {
|
||||||
|
/// short name for the feed
|
||||||
|
name: String,
|
||||||
|
/// homepage URL for the website
|
||||||
|
homepage: Url,
|
||||||
/// url of an ATOM, RSS or Json feed
|
/// url of an ATOM, RSS or Json feed
|
||||||
url: String,
|
url: String,
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
|
|||||||
|
|
||||||
let mut context = tera::Context::new();
|
let mut context = tera::Context::new();
|
||||||
let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
|
let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
|
||||||
|
context.insert("config", config);
|
||||||
context.insert("feeds", &feeds);
|
context.insert("feeds", &feeds);
|
||||||
context.insert("entries", &entries);
|
context.insert("entries", &entries);
|
||||||
context.insert("lang", &config.lang);
|
context.insert("lang", &config.lang);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user