.
This commit is contained in:
commit
0f134ad61b
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/target
|
||||
/mars.toml
|
2056
Cargo.lock
generated
Normal file
2056
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
18
Cargo.toml
Normal file
18
Cargo.toml
Normal file
@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "planet-mars"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
# ammonia = "*" done by feed-rs
|
||||
clap = { version = "*", features = ["derive"] }
|
||||
env_logger = "*"
|
||||
feed-rs = "*"
|
||||
log = "*"
|
||||
serde = { version = "*", features = ["derive"] }
|
||||
slug = "*"
|
||||
tera = "*"
|
||||
toml = "*"
|
||||
ureq = { version = "3.0.0-rc5", features = ["brotli", "charset", "gzip", "native-tls"]}
|
||||
url = "*"
|
||||
|
18
README.org
Normal file
18
README.org
Normal file
@ -0,0 +1,18 @@
|
||||
Simple planet like planet venus but in rust and maintained.
|
||||
|
||||
** todo
|
||||
Also see todos in the source files
|
||||
|
||||
*** do clippy and rustfmt
|
||||
*** TODO error handling everywhere
|
||||
*** write templates for html and atom/rss feeds
|
||||
*** use a nice lib to process the config file
|
||||
- should check whether dirs exists and are writeable
|
||||
- should check whether feed urls can be parsed
|
||||
** Credits
|
||||
|
||||
While writing this, I read and also copied code from:
|
||||
|
||||
- [[https://docs.rs/crate/agro/0.1.1][agro]]
|
||||
- [[https://github.com/kitallis/hades][haded]] by Akshay Gupta
|
||||
- [[https://github.com/djc/planetrs][planetrs]] by Vagdish/Adau, Dirkjan Ochtman, Josh Matthews
|
8
mars.toml.example
Normal file
8
mars.toml.example
Normal file
@ -0,0 +1,8 @@
|
||||
bot_name = "planet-mars"
|
||||
feed_dir = "/var/lib/planet-mars/feeds"
|
||||
from = "thomas@koch.ro"
|
||||
out_dir = "/var/lib/planet-mars/out"
|
||||
templates_dir = "/var/lib/planet-mars/templates"
|
||||
|
||||
[[feeds]]
|
||||
url = "https://blog.fefe.de/rss.xml"
|
141
src/feed_store.rs
Normal file
141
src/feed_store.rs
Normal file
@ -0,0 +1,141 @@
|
||||
use feed_rs::model::Entry;
|
||||
use feed_rs::model::Feed;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
use std::path::PathBuf;
|
||||
use ureq::http::HeaderMap;
|
||||
use ureq::http::Response;
|
||||
use ureq::Body;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Deserialize, Serialize, Default)]
|
||||
pub struct FetchData {
|
||||
pub etag: String,
|
||||
pub date: String,
|
||||
}
|
||||
|
||||
pub struct FeedStore {
|
||||
pub dir: PathBuf,
|
||||
}
|
||||
|
||||
impl FeedStore {
|
||||
pub fn new(dir: String) -> Self {
|
||||
Self {
|
||||
dir: super::to_checked_pathbuf(dir),
|
||||
}
|
||||
}
|
||||
|
||||
fn slugify_url(url: &Url) -> String {
|
||||
let domain = url.domain().unwrap();
|
||||
let query = url.query().unwrap_or("");
|
||||
slug::slugify(format!("{domain}{}{query}", url.path()))
|
||||
}
|
||||
|
||||
fn feed_path(&self, url: &Url) -> String {
|
||||
format!("{}/{}", self.dir.display(), Self::slugify_url(url))
|
||||
}
|
||||
|
||||
fn fetchdata_path(&self, url: &Url) -> String {
|
||||
format!("{}.toml", self.feed_path(url))
|
||||
}
|
||||
|
||||
pub fn get_fetchdata(&self, url: &Url) -> FetchData {
|
||||
let path = self.fetchdata_path(url);
|
||||
if !fs::exists(path.clone()).unwrap() {
|
||||
return FetchData::default();
|
||||
}
|
||||
toml::from_str(&fs::read_to_string(path).unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn has_changed(&self, url: &Url, new_feed: &Feed) -> bool {
|
||||
let Some(old_feed) = self.load_feed(url, false) else {
|
||||
return true;
|
||||
};
|
||||
|
||||
let mut old_iter = old_feed.entries.iter();
|
||||
for new in &new_feed.entries {
|
||||
let Some(old) = old_iter.next() else {
|
||||
return true;
|
||||
};
|
||||
if old != new {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// ignoring any entries left in old_iter
|
||||
false
|
||||
}
|
||||
|
||||
pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool {
|
||||
let headers = response.headers();
|
||||
let fetchdata = FetchData {
|
||||
etag: hv(headers, "etag"),
|
||||
date: hv(headers, "date"),
|
||||
};
|
||||
|
||||
let body = response
|
||||
.body_mut()
|
||||
.with_config()
|
||||
// .limit(MAX_BODY_SIZE)
|
||||
.read_to_vec()
|
||||
.unwrap();
|
||||
let feed = match feed_rs::parser::parse(body.as_slice()) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
warn!("Error when parsing feed for {url}: {e:?}");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
if !self.has_changed(url, &feed) {
|
||||
return false;
|
||||
}
|
||||
let _ = fs::write(self.feed_path(url), body);
|
||||
let _ = fs::write(
|
||||
self.fetchdata_path(url),
|
||||
toml::to_string(&fetchdata).unwrap(),
|
||||
);
|
||||
true
|
||||
}
|
||||
|
||||
fn load_feed(&self, url: &Url, sanitize: bool) -> Option<Feed> {
|
||||
let parser = feed_rs::parser::Builder::new()
|
||||
.sanitize_content(sanitize)
|
||||
.build();
|
||||
|
||||
let path = self.feed_path(url);
|
||||
if !fs::exists(path.clone()).unwrap() {
|
||||
return None;
|
||||
}
|
||||
let file = fs::File::open(path).unwrap();
|
||||
Some(parser.parse(BufReader::new(file)).unwrap())
|
||||
}
|
||||
|
||||
pub fn collect(&self, feed_configs: &Vec<super::FeedConfig>) -> Vec<Entry> {
|
||||
let mut entries = vec![];
|
||||
|
||||
for feed_config in feed_configs {
|
||||
let url = Url::parse(&feed_config.url).unwrap();
|
||||
let Some(mut feed) = self.load_feed(&url, true) else {
|
||||
// todo error handling!
|
||||
warn!("Problem parsing feed file for feed {}", feed_config.url);
|
||||
continue;
|
||||
};
|
||||
entries.append(&mut feed.entries);
|
||||
// todo also trim mid-way when length > something, trading cpu for memory
|
||||
}
|
||||
trim_entries(entries)
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_entries(mut entries: Vec<Entry>) -> Vec<Entry> {
|
||||
entries.sort_by_key(|e| std::cmp::Reverse(e.updated.or(e.published).unwrap_or_default()));
|
||||
entries.truncate(10);
|
||||
entries
|
||||
}
|
||||
|
||||
fn hv(headers: &HeaderMap, key: &str) -> String {
|
||||
match headers.get(key) {
|
||||
Some(hv) => hv.to_str().unwrap_or_default().to_string(),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
70
src/fetcher.rs
Normal file
70
src/fetcher.rs
Normal file
@ -0,0 +1,70 @@
|
||||
use std::time::Instant;
|
||||
use ureq::tls::{TlsConfig, TlsProvider};
|
||||
use ureq::Agent;
|
||||
use url::Url;
|
||||
|
||||
use crate::FeedStore;
|
||||
|
||||
pub struct Fetcher {
|
||||
agent: Agent,
|
||||
/// FROM header for requests
|
||||
from: String,
|
||||
}
|
||||
|
||||
impl Fetcher {
|
||||
pub fn new(bot_name: &str, from: &str) -> Fetcher {
|
||||
// TODO Get URL from a better place, e.g. Cargo.toml?
|
||||
let ua_name = format!("{bot_name}/{} https://TODO", env!("CARGO_PKG_VERSION"));
|
||||
let agent = Agent::config_builder()
|
||||
.http_status_as_error(false)
|
||||
.user_agent(ua_name)
|
||||
.tls_config(
|
||||
TlsConfig::builder()
|
||||
.provider(TlsProvider::NativeTls)
|
||||
.build(),
|
||||
)
|
||||
.build()
|
||||
.into();
|
||||
Fetcher {
|
||||
agent,
|
||||
from: from.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> bool {
|
||||
let fetchdata = feed_store.get_fetchdata(&url);
|
||||
let mut builder = self
|
||||
.agent
|
||||
.get(url.to_string())
|
||||
.header("FROM", self.from.clone());
|
||||
if fetchdata.etag != "" {
|
||||
builder = builder.header("If-None-Match", fetchdata.etag);
|
||||
}
|
||||
if fetchdata.date != "" {
|
||||
builder = builder.header("If-Modified-Since", fetchdata.date);
|
||||
}
|
||||
|
||||
let start_instant = Instant::now();
|
||||
let result = builder.call();
|
||||
let duration = start_instant.elapsed();
|
||||
|
||||
let response = result.unwrap(); // todo log and return false
|
||||
debug!(
|
||||
"fetched with status {} in {} ms: {url}",
|
||||
response.status(),
|
||||
duration.as_millis()
|
||||
);
|
||||
let status = response.status();
|
||||
match status.as_u16() {
|
||||
304 => false, // Not Modified -> nothing to do
|
||||
200 => feed_store.store(&url, response),
|
||||
_ => {
|
||||
warn!(
|
||||
"HTTP Status {} not implemented for {url}",
|
||||
response.status()
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
98
src/main.rs
Normal file
98
src/main.rs
Normal file
@ -0,0 +1,98 @@
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
use crate::feed_store::FeedStore;
|
||||
use crate::fetcher::Fetcher;
|
||||
use clap::Parser;
|
||||
use serde::Deserialize;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use url::Url;
|
||||
|
||||
mod feed_store;
|
||||
mod fetcher;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
default_value_t = String::from("mars.toml")
|
||||
)]
|
||||
config: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Config {
|
||||
/// to be used as part of the fetchers username header
|
||||
bot_name: String,
|
||||
/// where to store downloaded feeds and their metadata
|
||||
feed_dir: String,
|
||||
/// feeds to be agregated
|
||||
feeds: Vec<FeedConfig>,
|
||||
/// Email adress to use for the from header when fetching feeds
|
||||
from: String,
|
||||
/// where to build the output files
|
||||
out_dir: String,
|
||||
/// templates folder
|
||||
templates_dir: String,
|
||||
}
|
||||
|
||||
pub fn to_checked_pathbuf(dir: String) -> PathBuf {
|
||||
let dir: PathBuf = PathBuf::from(dir);
|
||||
|
||||
let m = dir
|
||||
.metadata()
|
||||
.unwrap_or_else(|_| panic!("Could not get metadata of dir: {}", dir.display()));
|
||||
assert!(m.is_dir(), "Not a dir: {}", dir.display());
|
||||
dir
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct FeedConfig {
|
||||
url: String,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
info!("starting up");
|
||||
|
||||
let args = Args::parse();
|
||||
let config_path = &args.config;
|
||||
if !fs::exists(config_path)? {
|
||||
panic!("Configuration file {config_path} does not exist!");
|
||||
}
|
||||
let config: Config = toml::from_str(&fs::read_to_string(config_path)?)?;
|
||||
let templates_dir = to_checked_pathbuf(config.templates_dir);
|
||||
let out_dir = to_checked_pathbuf(config.out_dir);
|
||||
|
||||
let feed_store = FeedStore::new(config.feed_dir);
|
||||
let fetcher = Fetcher::new(&config.bot_name, &config.from);
|
||||
|
||||
let mut rebuild = false;
|
||||
for feed in &config.feeds {
|
||||
let url = Url::parse(&feed.url)?;
|
||||
rebuild |= fetcher.fetch(url, &feed_store);
|
||||
}
|
||||
info!("Done fetching. Rebuild needed: {rebuild}");
|
||||
if rebuild {
|
||||
let entries = feed_store.collect(&config.feeds);
|
||||
let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
println!("Parsing error(s): {}", e);
|
||||
::std::process::exit(1);
|
||||
}
|
||||
};
|
||||
tera.autoescape_on(vec![]);
|
||||
let mut context = tera::Context::new();
|
||||
context.insert("entries", &entries);
|
||||
for name in tera.get_template_names() {
|
||||
debug!("Processing template {name}");
|
||||
let file = fs::File::create(&format!("{}/{name}", out_dir.display()))?;
|
||||
let _ = tera.render_to(name, &context, file)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
8
templates/index.html
Normal file
8
templates/index.html
Normal file
@ -0,0 +1,8 @@
|
||||
hello world
|
||||
|
||||
{% for entry in entries %}
|
||||
{% for link in entry.links %}
|
||||
{{link.href}}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
Loading…
x
Reference in New Issue
Block a user