meta: start refactoring
This commit is contained in:
parent
eb09729f38
commit
a15d380bc9
23
Cargo.lock
generated
23
Cargo.lock
generated
@ -182,6 +182,15 @@ version = "1.9.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
|
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "camino"
|
||||||
|
version = "1.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.9"
|
version = "1.2.9"
|
||||||
@ -199,9 +208,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chrono"
|
name = "chrono"
|
||||||
version = "0.4.39"
|
version = "0.4.40"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
|
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"android-tzdata",
|
"android-tzdata",
|
||||||
"iana-time-zone",
|
"iana-time-zone",
|
||||||
@ -209,7 +218,7 @@ dependencies = [
|
|||||||
"num-traits",
|
"num-traits",
|
||||||
"serde",
|
"serde",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"windows-targets",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1109,6 +1118,8 @@ name = "planet-mars"
|
|||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"camino",
|
||||||
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"feed-rs",
|
"feed-rs",
|
||||||
@ -1887,6 +1898,12 @@ dependencies = [
|
|||||||
"windows-targets",
|
"windows-targets",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.52.0"
|
version = "0.52.0"
|
||||||
|
@ -11,6 +11,8 @@ categories = ["web-programming"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
|
camino = { version = "1.1.9", features = ["serde", "serde1"] }
|
||||||
|
chrono = { version = "0.4.40", features = ["now", "serde"] }
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
env_logger = "0"
|
env_logger = "0"
|
||||||
feed-rs = "2"
|
feed-rs = "2"
|
||||||
|
@ -1,85 +1,205 @@
|
|||||||
use anyhow::bail;
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
use feed_rs::model::Entry;
|
use feed_rs::model::Entry;
|
||||||
use feed_rs::model::Feed;
|
use feed_rs::model::Feed;
|
||||||
use ron::ser::{to_string_pretty, PrettyConfig};
|
use ron::ser::{to_string_pretty, PrettyConfig};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::AsRef;
|
use std::convert::AsRef;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::BufReader;
|
use std::time::Instant;
|
||||||
use std::path::PathBuf;
|
|
||||||
use ureq::http::HeaderMap;
|
use ureq::http::HeaderMap;
|
||||||
use ureq::http::Response;
|
use ureq::http::Response;
|
||||||
use ureq::Body;
|
use ureq::Body;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[derive(Deserialize, Serialize, Default)]
|
pub fn slugify_url(url: &Url) -> String {
|
||||||
pub struct FetchData {
|
let domain = url.domain().unwrap();
|
||||||
pub etag: String,
|
|
||||||
pub last_modified: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FeedStore {
|
|
||||||
pub dir: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FeedStore {
|
|
||||||
pub fn new(dir: &str) -> Self {
|
|
||||||
Self {
|
|
||||||
dir: super::to_checked_pathbuf(dir),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn slugify_url(url: &Url) -> Result<String> {
|
|
||||||
let Some(domain) = url.domain() else {
|
|
||||||
bail!("Url has no domain: '{url}'.")
|
|
||||||
};
|
|
||||||
let query = url.query().unwrap_or("");
|
let query = url.query().unwrap_or("");
|
||||||
Ok(slug::slugify(format!("{domain}{}{query}", url.path())))
|
slug::slugify(format!("{domain}{}{query}", url.path()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generic_path(&self, url: &Url, ext: &str) -> Result<String> {
|
/// Stored settings/info about a feed.
|
||||||
Ok(format!(
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
"{}/{}{ext}",
|
pub struct FeedStoreFeedInfo {
|
||||||
self.dir.display(),
|
/// First time we added this feed.
|
||||||
Self::slugify_url(url)?
|
///
|
||||||
))
|
/// Used for historical purposes only.
|
||||||
|
pub added: DateTime<Utc>,
|
||||||
|
/// Last known cached entry, if any.
|
||||||
|
///
|
||||||
|
/// Used to let the server know whether we need a new entry or not.
|
||||||
|
pub fetch_data: Option<FetchData>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn feed_path(&self, url: &Url) -> Result<String> {
|
impl FeedStoreFeedInfo {
|
||||||
self.generic_path(url, "")
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
added: Utc::now(),
|
||||||
|
fetch_data: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetchdata_path(&self, url: &Url) -> Result<String> {
|
/// Storage for a single feed.
|
||||||
self.generic_path(url, ".toml")
|
///
|
||||||
|
/// Contains one [FeedStoreVersion] for every time the feed has been successfully fetched,
|
||||||
|
/// and one [FeedStoreEntry] for each article referenced throughout the entries.
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct FeedStoreFeed {
|
||||||
|
/// The feed URL
|
||||||
|
pub url: Url,
|
||||||
|
/// Where it's stored, should be inside the [FeedStore::dir].
|
||||||
|
pub dir: Utf8PathBuf,
|
||||||
|
/// Raw feed path
|
||||||
|
pub path_feed: Utf8PathBuf,
|
||||||
|
/// Raw feed RON path
|
||||||
|
pub path_feed_ron: Utf8PathBuf,
|
||||||
|
/// Settings path
|
||||||
|
pub path_settings: Utf8PathBuf,
|
||||||
|
/// Detailed settings/info about a feed.
|
||||||
|
pub info: FeedStoreFeedInfo,
|
||||||
|
/// Stored copy of the raw XML feed (if any)
|
||||||
|
pub raw_feed: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn load_fetchdata(&self, url: &Url) -> Result<FetchData> {
|
impl FeedStoreFeed {
|
||||||
let path = self.fetchdata_path(url)?;
|
pub fn new(basedir: &Utf8Path, url: &Url) -> Self {
|
||||||
if !fs::exists(path.clone())? {
|
let dir = basedir.join(slugify_url(url));
|
||||||
return Ok(FetchData::default());
|
if !dir.is_dir() {
|
||||||
}
|
std::fs::create_dir_all(&dir).unwrap();
|
||||||
Ok(toml::from_str(&fs::read_to_string(path)?)?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn has_changed(&self, url: &Url, new_feed: &Feed) -> Result<bool> {
|
let path_settings = dir.join("settings.toml");
|
||||||
let Some(old_feed) = self.load_feed(url, false)? else {
|
let info: FeedStoreFeedInfo = match std::fs::read_to_string(&path_settings) {
|
||||||
return Ok(true);
|
Ok(s) => toml::from_str(&s).unwrap(),
|
||||||
|
Err(_e) => {
|
||||||
|
// Assume file has not been created yet. Initialize
|
||||||
|
let info = FeedStoreFeedInfo::new();
|
||||||
|
std::fs::write(&path_settings, toml::to_string(&info).unwrap()).unwrap();
|
||||||
|
info
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let raw_feed: Option<String> = std::fs::read_to_string(dir.join("feed.xml")).ok();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
dir: dir.clone(),
|
||||||
|
path_feed: dir.join("feed.xml"),
|
||||||
|
path_feed_ron: dir.join("feed.ron"),
|
||||||
|
path_settings: dir.join("settings.toml"),
|
||||||
|
url: url.clone(),
|
||||||
|
info,
|
||||||
|
raw_feed,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_fetchdata(&self) -> Option<&FetchData> {
|
||||||
|
self.info.fetch_data.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_feed(&self, sanitize: bool) -> Option<Feed> {
|
||||||
|
if let Some(raw_feed) = &self.raw_feed {
|
||||||
|
let parser = feed_rs::parser::Builder::new()
|
||||||
|
.sanitize_content(sanitize)
|
||||||
|
.build();
|
||||||
|
Some(parser.parse(raw_feed.as_bytes()).unwrap())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_changed(&self, new_feed: &Feed) -> bool {
|
||||||
|
let Some(old_feed) = self.load_feed(false) else {
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut old_iter = old_feed.entries.iter();
|
let mut old_iter = old_feed.entries.iter();
|
||||||
for new in &new_feed.entries {
|
for new in &new_feed.entries {
|
||||||
let Some(old) = old_iter.next() else {
|
let Some(old) = old_iter.next() else {
|
||||||
return Ok(true);
|
return true;
|
||||||
};
|
};
|
||||||
if old != new {
|
if old != new {
|
||||||
return Ok(true);
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ignoring any entries left in old_iter
|
// ignoring any entries left in old_iter
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn store(&mut self, mut response: Response<Body>) -> Result<bool> {
|
||||||
|
let headers = response.headers();
|
||||||
|
let fetchdata = FetchData {
|
||||||
|
etag: hv(headers, "etag"),
|
||||||
|
last_modified: hv(headers, "last_modified"),
|
||||||
|
when: Utc::now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let body = response.body_mut().with_config().read_to_vec()?;
|
||||||
|
let feed = match feed_rs::parser::parse(body.as_slice()) {
|
||||||
|
Ok(f) => f,
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Error when parsing feed for {}: {e:?}", self.url);
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if !self.has_changed(&feed) {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
debug!("Storing feed for {}.", self.url);
|
||||||
|
// todo don't serialize to string but to writer
|
||||||
|
Self::write(
|
||||||
|
&self.path_feed_ron,
|
||||||
|
to_string_pretty(&feed, PrettyConfig::default())?,
|
||||||
|
)?;
|
||||||
|
Self::write(&self.path_feed, body)?;
|
||||||
|
// Save info
|
||||||
|
self.info.fetch_data = Some(fetchdata);
|
||||||
|
Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fetch(&mut self, fetcher: &super::Fetcher) -> Result<bool> {
|
||||||
|
let mut builder = fetcher
|
||||||
|
.agent
|
||||||
|
.get(self.url.to_string())
|
||||||
|
.header("FROM", fetcher.from.clone());
|
||||||
|
|
||||||
|
if let Some(fetchdata) = self.load_fetchdata() {
|
||||||
|
if !fetchdata.etag.is_empty() {
|
||||||
|
builder = builder.header("If-None-Match", fetchdata.etag.clone());
|
||||||
|
}
|
||||||
|
if !fetchdata.last_modified.is_empty() {
|
||||||
|
builder = builder.header("If-Modified-Since", fetchdata.last_modified.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let start_instant = Instant::now();
|
||||||
|
let result = builder.call();
|
||||||
|
let duration = start_instant.elapsed();
|
||||||
|
|
||||||
|
let response = result?;
|
||||||
|
debug!(
|
||||||
|
"fetched with status {} in {} ms: {}",
|
||||||
|
response.status(),
|
||||||
|
duration.as_millis(),
|
||||||
|
self.url,
|
||||||
|
);
|
||||||
|
let status = response.status();
|
||||||
|
match status.as_u16() {
|
||||||
|
304 => Ok(false), // Not Modified -> nothing to do
|
||||||
|
200 => self.store(response),
|
||||||
|
_ => {
|
||||||
|
warn!(
|
||||||
|
"HTTP Status {} not implemented for {}",
|
||||||
|
response.status(),
|
||||||
|
self.url,
|
||||||
|
);
|
||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
|
fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
|
||||||
path: P,
|
path: P,
|
||||||
@ -90,85 +210,66 @@ impl FeedStore {
|
|||||||
}
|
}
|
||||||
fs::write(path, contents)
|
fs::write(path, contents)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn store(&self, url: &Url, mut response: Response<Body>) -> Result<bool> {
|
|
||||||
let headers = response.headers();
|
|
||||||
let fetchdata = FetchData {
|
|
||||||
etag: hv(headers, "etag"),
|
|
||||||
last_modified: hv(headers, "last_modified"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let body = response.body_mut().with_config().read_to_vec()?;
|
|
||||||
let feed = match feed_rs::parser::parse(body.as_slice()) {
|
|
||||||
Ok(f) => f,
|
|
||||||
Err(e) => {
|
|
||||||
warn!("Error when parsing feed for {url}: {e:?}");
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if !self.has_changed(url, &feed)? {
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
debug!("Storing feed for {url}.");
|
|
||||||
// todo don't serialize to string but to writer
|
|
||||||
Self::write(
|
|
||||||
self.generic_path(url, ".ron")?,
|
|
||||||
to_string_pretty(&feed, PrettyConfig::default())?,
|
|
||||||
)?;
|
|
||||||
Self::write(self.feed_path(url)?, body)?;
|
|
||||||
Self::write(self.fetchdata_path(url)?, toml::to_string(&fetchdata)?)?;
|
|
||||||
Ok(true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_feed(&self, url: &Url, sanitize: bool) -> Result<Option<Feed>> {
|
#[derive(Clone, Debug, Deserialize, Serialize, Default)]
|
||||||
let parser = feed_rs::parser::Builder::new()
|
pub struct FetchData {
|
||||||
.sanitize_content(sanitize)
|
pub when: DateTime<Utc>,
|
||||||
.build();
|
pub etag: String,
|
||||||
|
pub last_modified: String,
|
||||||
let path = self.feed_path(url)?;
|
|
||||||
if !fs::exists(path.clone())? {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
let file = fs::File::open(path)?;
|
|
||||||
Ok(Some(parser.parse(BufReader::new(file))?))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn collect(
|
#[derive(Debug)]
|
||||||
&self,
|
pub struct FeedStore {
|
||||||
feed_configs: &Vec<super::FeedConfig>,
|
pub _dir: Utf8PathBuf,
|
||||||
max_entries: usize,
|
pub feeds: BTreeMap<Url, FeedStoreFeed>,
|
||||||
) -> (HashMap<String, Feed>, Vec<Entry>) {
|
}
|
||||||
|
|
||||||
|
impl FeedStore {
|
||||||
|
pub fn new(dir: &str, feedlist: &Vec<super::FeedConfig>) -> Self {
|
||||||
|
let dir = super::to_checked_pathbuf(dir);
|
||||||
|
let mut feeds: BTreeMap<Url, FeedStoreFeed> = BTreeMap::new();
|
||||||
|
|
||||||
|
for feed_config in feedlist {
|
||||||
|
let feed_url = Url::parse(&feed_config.url).unwrap();
|
||||||
|
feeds.insert(feed_url.clone(), FeedStoreFeed::new(&dir, &feed_url));
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { _dir: dir, feeds }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
|
||||||
let mut feeds = HashMap::new();
|
let mut feeds = HashMap::new();
|
||||||
let mut entries = Vec::new();
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
for feed_config in feed_configs {
|
for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
|
||||||
let mut feed = match (|| {
|
let Some(mut feed) = feed_store_feed.load_feed(true) else {
|
||||||
let url = Url::parse(&feed_config.url)?;
|
warn!("Problem parsing feed file for feed {}", feed_url);
|
||||||
self.load_feed(&url, true)
|
|
||||||
})() {
|
|
||||||
Err(e) => {
|
|
||||||
warn!(
|
|
||||||
"Problem parsing feed file for feed {}: {e:?}",
|
|
||||||
feed_config.url
|
|
||||||
);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
Ok(None) => continue,
|
|
||||||
Ok(Some(f)) => f,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
for entry in &mut feed.entries {
|
for entry in &mut feed.entries {
|
||||||
entry.source = Some(feed_config.url.clone());
|
entry.source = Some(feed_url.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
entries.append(&mut std::mem::take(&mut feed.entries));
|
entries.append(&mut std::mem::take(&mut feed.entries));
|
||||||
feeds.insert(feed_config.url.clone(), feed);
|
|
||||||
// optimization to reduce memory usage
|
|
||||||
if entries.len() > 4 * max_entries {
|
if entries.len() > 4 * max_entries {
|
||||||
entries = trim_entries(entries, max_entries);
|
entries = trim_entries(entries, max_entries);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
feeds.insert(feed_url.to_string(), feed.clone());
|
||||||
}
|
}
|
||||||
(feeds, trim_entries(entries, max_entries))
|
(feeds, trim_entries(entries, max_entries))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn fetch(&mut self, fetcher: &super::Fetcher) -> Result<bool> {
|
||||||
|
let mut rebuild = false;
|
||||||
|
for (_url, feed) in self.feeds.iter_mut() {
|
||||||
|
rebuild |= feed.fetch(fetcher)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(rebuild)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
|
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
|
||||||
|
@ -1,15 +1,10 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use std::time::Instant;
|
|
||||||
use ureq::tls::{TlsConfig, TlsProvider};
|
use ureq::tls::{TlsConfig, TlsProvider};
|
||||||
use ureq::Agent;
|
use ureq::Agent;
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
use crate::FeedStore;
|
|
||||||
|
|
||||||
pub struct Fetcher {
|
pub struct Fetcher {
|
||||||
agent: Agent,
|
pub agent: Agent,
|
||||||
/// FROM header for requests
|
/// FROM header for requests
|
||||||
from: String,
|
pub from: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Fetcher {
|
impl Fetcher {
|
||||||
@ -36,41 +31,4 @@ impl Fetcher {
|
|||||||
from: from.to_string(),
|
from: from.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> Result<bool> {
|
|
||||||
let fetchdata = feed_store.load_fetchdata(&url)?;
|
|
||||||
let mut builder = self
|
|
||||||
.agent
|
|
||||||
.get(url.to_string())
|
|
||||||
.header("FROM", self.from.clone());
|
|
||||||
if !fetchdata.etag.is_empty() {
|
|
||||||
builder = builder.header("If-None-Match", fetchdata.etag);
|
|
||||||
}
|
|
||||||
if !fetchdata.last_modified.is_empty() {
|
|
||||||
builder = builder.header("If-Modified-Since", fetchdata.last_modified);
|
|
||||||
}
|
|
||||||
|
|
||||||
let start_instant = Instant::now();
|
|
||||||
let result = builder.call();
|
|
||||||
let duration = start_instant.elapsed();
|
|
||||||
|
|
||||||
let response = result?;
|
|
||||||
debug!(
|
|
||||||
"fetched with status {} in {} ms: {url}",
|
|
||||||
response.status(),
|
|
||||||
duration.as_millis()
|
|
||||||
);
|
|
||||||
let status = response.status();
|
|
||||||
match status.as_u16() {
|
|
||||||
304 => Ok(false), // Not Modified -> nothing to do
|
|
||||||
200 => feed_store.store(&url, response),
|
|
||||||
_ => {
|
|
||||||
warn!(
|
|
||||||
"HTTP Status {} not implemented for {url}",
|
|
||||||
response.status()
|
|
||||||
);
|
|
||||||
Ok(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
32
src/main.rs
32
src/main.rs
@ -26,11 +26,10 @@ extern crate log;
|
|||||||
use crate::feed_store::FeedStore;
|
use crate::feed_store::FeedStore;
|
||||||
use crate::fetcher::Fetcher;
|
use crate::fetcher::Fetcher;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::PathBuf;
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
//mod atom_serializer;
|
//mod atom_serializer;
|
||||||
mod feed_store;
|
mod feed_store;
|
||||||
@ -70,13 +69,13 @@ struct Config {
|
|||||||
max_entries: usize,
|
max_entries: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_checked_pathbuf(dir: &str) -> PathBuf {
|
pub fn to_checked_pathbuf(dir: &str) -> Utf8PathBuf {
|
||||||
let dir: PathBuf = PathBuf::from(dir);
|
let dir = Utf8PathBuf::from(dir);
|
||||||
|
|
||||||
let m = dir
|
let m = dir
|
||||||
.metadata()
|
.metadata()
|
||||||
.unwrap_or_else(|_| panic!("Could not get metadata of dir: {}", dir.display()));
|
.unwrap_or_else(|_| panic!("Could not get metadata of dir: {dir}"));
|
||||||
assert!(m.is_dir(), "Not a dir: {}", dir.display());
|
assert!(m.is_dir(), "Not a dir: {dir}");
|
||||||
dir
|
dir
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,20 +89,9 @@ struct FeedConfig {
|
|||||||
url: String,
|
url: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetch(config: &Config, feed_store: &FeedStore) -> Result<bool> {
|
fn fetch(config: &Config, feed_store: &mut FeedStore) -> Result<bool> {
|
||||||
let fetcher = Fetcher::new(&config.bot_name, &config.from);
|
let fetcher = Fetcher::new(&config.bot_name, &config.from);
|
||||||
let mut rebuild = false;
|
let rebuild = feed_store.fetch(&fetcher)?;
|
||||||
for feed in &config.feeds {
|
|
||||||
let url = match Url::parse(&feed.url) {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(e) => {
|
|
||||||
error!("Error parsing url '{}': {e:?}", feed.url);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
rebuild |= fetcher.fetch(url, feed_store)?;
|
|
||||||
}
|
|
||||||
info!("Done fetching. Rebuild needed: {rebuild}");
|
|
||||||
Ok(rebuild)
|
Ok(rebuild)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,15 +110,15 @@ fn main() -> Result<()> {
|
|||||||
let _ = to_checked_pathbuf(&config.templates_dir);
|
let _ = to_checked_pathbuf(&config.templates_dir);
|
||||||
let _ = to_checked_pathbuf(&config.out_dir);
|
let _ = to_checked_pathbuf(&config.out_dir);
|
||||||
|
|
||||||
let feed_store = FeedStore::new(&config.feed_dir);
|
let mut feed_store = FeedStore::new(&config.feed_dir, &config.feeds);
|
||||||
let should_build = if args.no_fetch {
|
let should_build = if args.no_fetch {
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
fetch(&config, &feed_store)?
|
fetch(&config, &mut feed_store)?
|
||||||
};
|
};
|
||||||
|
|
||||||
if should_build {
|
if should_build {
|
||||||
template_engine::build(&config, &feed_store)?;
|
template_engine::build(&config, &mut feed_store)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -7,13 +7,12 @@ use std::collections::HashMap;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use tera::{from_value, Tera};
|
use tera::{from_value, Tera};
|
||||||
|
|
||||||
pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
|
pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
|
||||||
let mut tera = create_tera(&config.templates_dir)?;
|
let mut tera = create_tera(&config.templates_dir)?;
|
||||||
let out_dir = to_checked_pathbuf(&config.out_dir);
|
let out_dir = to_checked_pathbuf(&config.out_dir);
|
||||||
|
|
||||||
let mut context = tera::Context::new();
|
let mut context = tera::Context::new();
|
||||||
let (feeds, entries): (HashMap<String, Feed>, _) =
|
let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
|
||||||
feed_store.collect(&config.feeds, config.max_entries);
|
|
||||||
context.insert("feeds", &feeds);
|
context.insert("feeds", &feeds);
|
||||||
context.insert("entries", &entries);
|
context.insert("entries", &entries);
|
||||||
context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
|
context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
|
||||||
@ -24,7 +23,7 @@ pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
|
|||||||
|
|
||||||
for name in tera.get_template_names() {
|
for name in tera.get_template_names() {
|
||||||
debug!("Processing template {name}");
|
debug!("Processing template {name}");
|
||||||
let file = File::create(format!("{}/{name}", out_dir.display()))?;
|
let file = File::create(format!("{out_dir}/{name}"))?;
|
||||||
tera.render_to(name, &context, file)?;
|
tera.render_to(name, &context, file)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -32,7 +31,7 @@ pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
|
|||||||
|
|
||||||
fn create_tera(templates_dir: &str) -> Result<Tera> {
|
fn create_tera(templates_dir: &str) -> Result<Tera> {
|
||||||
let dir = to_checked_pathbuf(templates_dir);
|
let dir = to_checked_pathbuf(templates_dir);
|
||||||
let mut tera = tera::Tera::new(&format!("{}/*", &dir.display()))?;
|
let mut tera = tera::Tera::new(&format!("{dir}/*"))?;
|
||||||
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
|
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
|
||||||
tera.autoescape_on(vec![]);
|
tera.autoescape_on(vec![]);
|
||||||
Ok(tera)
|
Ok(tera)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user