implemented also atom feed via template

This commit is contained in:
Thomas Koch 2025-01-11 21:17:55 +02:00
parent 6e325179a2
commit 66ddfaf94f
10 changed files with 259 additions and 114 deletions

28
Cargo.lock generated
View File

@ -108,6 +108,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "base64"
version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.1"
@ -125,6 +131,9 @@ name = "bitflags"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
dependencies = [
"serde",
]
[[package]]
name = "block-buffer"
@ -1104,6 +1113,8 @@ dependencies = [
"env_logger",
"feed-rs",
"log",
"quick-xml",
"ron",
"serde",
"slug",
"tera",
@ -1144,6 +1155,7 @@ checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003"
dependencies = [
"encoding_rs",
"memchr",
"serde",
]
[[package]]
@ -1229,6 +1241,18 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "ron"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
dependencies = [
"base64 0.21.7",
"bitflags",
"serde",
"serde_derive",
]
[[package]]
name = "rustix"
version = "0.38.43"
@ -1667,7 +1691,7 @@ version = "3.0.0-rc5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77f9bd1d03fe3bf87c90b115a0e09e22535cbcff3f8d0a6487524f006325f173"
dependencies = [
"base64",
"base64 0.22.1",
"brotli-decompressor",
"cc",
"cookie_store",
@ -1695,7 +1719,7 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a27729fd2c15426f48992a911ce31bd1f29c1cd0898c2c86b410d24f51c99eb3"
dependencies = [
"base64",
"base64 0.22.1",
"http",
"httparse",
"log",

View File

@ -9,10 +9,11 @@ clap = { version = "*", features = ["derive"] }
env_logger = "*"
feed-rs = "*"
log = "*"
ron = "*" # todo for development, to check atom-rs internal representation of feeds
serde = { version = "*", features = ["derive"] }
slug = "*"
tera = "*"
toml = "*"
ureq = { version = "3.0.0-rc5", features = ["brotli", "charset", "gzip", "native-tls"]}
url = "*"
quick-xml = { version = "*", features = ["serialize"] }

59
planet.css Normal file
View File

@ -0,0 +1,59 @@
p, h1, h2, h3, h4, h5, h6, small {
max-width: 48em;
}
h1 a, h2 a, h3 a, h4 a, h5 a, h6 a {
color: inherit !important;
text-decoration: none;
}
ul, ol {
/* account for the 1em -webkit-margin-start for the list icon */
max-width: 45em;
}
ul,ol,dl, p {
line-height: 1.4;
}
body {
margin-top: 1em;
margin-bottom: 1em;
}
#maincontainer {
display: flex;
max-width: 80em;
}
#maincontainer main {
max-width: 50em;
}
#maincontainer main * {
max-width: 50em;
}
#maincontainer aside {
margin-left: 5em;
max-width: 25em;
}
article > h2.entry_header {
margin-bottom: 3px;
}
.entry_meta {
border: 1px thin;
padding: 3px 0;
background-color: LightBlue;
}
hr.entry_sep {
border: none;
}
hr.entry_sep::before {
content: '* * *';
display: block;
text-align: center;
}

View File

@ -1,5 +1,7 @@
use anyhow::Result;
use feed_rs::model::Entry;
use feed_rs::model::Feed;
use ron::ser::{to_string_pretty, PrettyConfig};
use serde::{Deserialize, Serialize};
use std::convert::AsRef;
use std::fs;
@ -28,43 +30,47 @@ impl FeedStore {
}
fn slugify_url(url: &Url) -> String {
let domain = url.domain().unwrap();
let domain = url.domain().unwrap(); // todo don't hide error
let query = url.query().unwrap_or("");
slug::slugify(format!("{domain}{}{query}", url.path()))
}
fn generic_path(&self, url: &Url, ext: &str) -> String {
format!("{}/{}{ext}", self.dir.display(), Self::slugify_url(url))
}
fn feed_path(&self, url: &Url) -> String {
format!("{}/{}", self.dir.display(), Self::slugify_url(url))
self.generic_path(url, "")
}
fn fetchdata_path(&self, url: &Url) -> String {
format!("{}.toml", self.feed_path(url))
self.generic_path(url, ".toml")
}
pub fn load_fetchdata(&self, url: &Url) -> FetchData {
pub fn load_fetchdata(&self, url: &Url) -> Result<FetchData> {
let path = self.fetchdata_path(url);
if !fs::exists(path.clone()).unwrap() {
return FetchData::default();
if !fs::exists(path.clone())? {
return Ok(FetchData::default());
}
toml::from_str(&fs::read_to_string(path).unwrap()).unwrap()
Ok(toml::from_str(&fs::read_to_string(path)?)?)
}
fn has_changed(&self, url: &Url, new_feed: &Feed) -> bool {
fn has_changed(&self, url: &Url, new_feed: &Feed) -> Result<bool> {
let Some(old_feed) = self.load_feed(url, false) else {
return true;
return Ok(true);
};
let mut old_iter = old_feed.entries.iter();
for new in &new_feed.entries {
let Some(old) = old_iter.next() else {
return true;
return Ok(true);
};
if old != new {
return true;
return Ok(true);
}
}
// ignoring any entries left in old_iter
false
Ok(false)
}
fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
@ -77,7 +83,7 @@ impl FeedStore {
fs::write(path, contents)
}
pub fn store(&self, url: &Url, mut response: Response<Body>) -> bool {
pub fn store(&self, url: &Url, mut response: Response<Body>) -> Result<bool> {
let headers = response.headers();
let fetchdata = FetchData {
etag: hv(headers, "etag"),
@ -94,19 +100,24 @@ impl FeedStore {
Ok(f) => f,
Err(e) => {
warn!("Error when parsing feed for {url}: {e:?}");
return false;
return Ok(false);
}
};
if !self.has_changed(url, &feed) {
return false;
if !self.has_changed(url, &feed)? {
return Ok(false);
}
debug!("Storing feed for {url}.");
let _ = Self::write(self.feed_path(url), body);
let _ = Self::write(
// todo don't serialize to string but to writer
Self::write(
self.generic_path(url, ".ron"),
to_string_pretty(&feed, PrettyConfig::default())?,
)?;
Self::write(self.feed_path(url), body)?;
Self::write(
self.fetchdata_path(url),
toml::to_string(&fetchdata).unwrap(),
);
true
)?;
Ok(true)
}
fn load_feed(&self, url: &Url, sanitize: bool) -> Option<Feed> {
@ -132,6 +143,7 @@ impl FeedStore {
warn!("Problem parsing feed file for feed {}", feed_config.url);
continue;
};
entries.append(&mut feed.entries);
// todo also trim mid-way when length > something, trading cpu for memory
}

View File

@ -1,3 +1,4 @@
use anyhow::Result;
use std::time::Instant;
use ureq::tls::{TlsConfig, TlsProvider};
use ureq::Agent;
@ -31,8 +32,8 @@ impl Fetcher {
}
}
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> bool {
let fetchdata = feed_store.load_fetchdata(&url);
pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> Result<bool> {
let fetchdata = feed_store.load_fetchdata(&url)?;
let mut builder = self
.agent
.get(url.to_string())
@ -48,7 +49,7 @@ impl Fetcher {
let result = builder.call();
let duration = start_instant.elapsed();
let response = result.unwrap(); // todo log and return false
let response = result?; // todo log and return false
debug!(
"fetched with status {} in {} ms: {url}",
response.status(),
@ -56,14 +57,14 @@ impl Fetcher {
);
let status = response.status();
match status.as_u16() {
304 => false, // Not Modified -> nothing to do
304 => Ok(false), // Not Modified -> nothing to do
200 => feed_store.store(&url, response),
_ => {
warn!(
"HTTP Status {} not implemented for {url}",
response.status()
);
false
Ok(false)
}
}
}

View File

@ -6,14 +6,14 @@ use crate::fetcher::Fetcher;
use anyhow::Result;
use clap::Parser;
use serde::Deserialize;
use simple_entry::SimpleEntry;
use std::fs;
use std::path::PathBuf;
use url::Url;
//mod atom_serializer;
mod feed_store;
mod fetcher;
mod simple_entry;
mod template_engine;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
@ -70,42 +70,12 @@ fn fetch(config: &Config, feed_store: &FeedStore) -> Result<bool> {
continue;
}
};
rebuild |= fetcher.fetch(url, feed_store);
rebuild |= fetcher.fetch(url, feed_store)?;
}
info!("Done fetching. Rebuild needed: {rebuild}");
Ok(rebuild)
}
fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
let templates_dir = to_checked_pathbuf(&config.templates_dir);
let out_dir = to_checked_pathbuf(&config.out_dir);
let mut tera = match tera::Tera::new(&format!("{}/*", &templates_dir.display())) {
Ok(t) => t,
Err(e) => {
println!("Parsing error(s): {}", e);
::std::process::exit(1);
}
};
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
tera.autoescape_on(vec![]);
let mut context = tera::Context::new();
let entries: Vec<SimpleEntry> = feed_store
.collect(&config.feeds)
.into_iter()
.map(SimpleEntry::from_feed_entry)
.collect();
context.insert("entries", &entries);
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = fs::File::create(format!("{}/{name}", out_dir.display()))?;
tera.render_to(name, &context, file)?;
}
Ok(())
}
fn main() -> Result<()> {
env_logger::init();
info!("starting up");
@ -129,7 +99,7 @@ fn main() -> Result<()> {
};
if should_build {
build(&config, &feed_store)?;
template_engine::build(&config, &feed_store)?;
}
Ok(())
}

View File

@ -1,43 +0,0 @@
use feed_rs::model::Entry;
/// Simplified Feed entry for easier value access in template
#[derive(serde::Serialize)]
pub struct SimpleEntry {
pub date: String,
pub content: String,
pub author: String,
pub link: String,
pub title: String,
}
/// format for the entries timestamp
/// <https://docs.rs/chrono/latest/chrono/format/strftime>
const FMT: &str = "%c";
impl SimpleEntry {
pub fn from_feed_entry(entry: Entry) -> Self {
Self {
date: entry
.updated
.or(entry.published)
.unwrap_or_default()
.format(FMT)
.to_string(),
content: entry
.content
.map(|x| x.body.unwrap_or_default())
.unwrap_or_default(),
author: if !entry.authors.is_empty() {
entry.authors[0].name.clone()
} else {
"".to_string()
},
link: if !entry.links.is_empty() {
entry.links[0].href.clone()
} else {
"".to_string()
},
title: entry.title.map(|x| x.content).unwrap_or_default(),
}
}
}

35
src/template_engine.rs Normal file
View File

@ -0,0 +1,35 @@
use crate::feed_store::FeedStore;
use crate::to_checked_pathbuf;
use crate::Config;
use anyhow::Result;
use feed_rs::model::Entry;
use std::fs::File;
use tera::Tera;
pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
let tera = create_tera(&config.templates_dir)?;
let out_dir = to_checked_pathbuf(&config.out_dir);
let mut context = tera::Context::new();
let feed_entries: Vec<Entry> = feed_store.collect(&config.feeds);
context.insert("entries", &feed_entries);
context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
context.insert("PKG_HOMEPAGE", env!("CARGO_PKG_HOMEPAGE"));
context.insert("PKG_NAME", env!("CARGO_PKG_NAME"));
context.insert("PKG_VERSION", env!("CARGO_PKG_VERSION"));
for name in tera.get_template_names() {
debug!("Processing template {name}");
let file = File::create(format!("{}/{name}", out_dir.display()))?;
tera.render_to(name, &context, file)?;
}
Ok(())
}
fn create_tera(templates_dir: &str) -> Result<Tera> {
let dir = to_checked_pathbuf(templates_dir);
let mut tera = tera::Tera::new(&format!("{}/*", &dir.display()))?;
// disable autoescape as this would corrupt urls or the entriy contents. todo check this!
tera.autoescape_on(vec![]);
Ok(tera)
}

52
templates/atom.xml Normal file
View File

@ -0,0 +1,52 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Planet TVL</title>
<link href="https::/planet.tvl.fyi"/>
<updated>{{now()|date(format="%Y-%m-%dT%H:%M:%SZ")}}</updated>
<id>https::/planet.tvl.fyi</id>
<generator uri="{{ PKG_HOMEPAGE }}" version="{{ PKG_VERSION }}">
{{ PKG_NAME }} by {{ PKG_AUTHORS }}
</generator>
<icon>https://planet.tvl.fyi/logo.svg</icon>
{% for entry in entries %}
<entry>
<id>{{ entry.id }}/planet.tvl.fyi</id>
{% if entry.title -%}
<title>{{ entry.title.content }}</title>
{% endif -%}
{% for link in entry.links %}
<link href="{{ link.href }}" {% if link.rel %}rel="{{ link.rel }}"{% endif %}/>
{% endfor %}
{% if entry.updated %}
<updated>{{ entry.updated }}</updated>
{% endif %}
{% if entry.published %}
<published>{{ entry.published }}</published>
{% endif %}
{% if entry.summary -%}
<summary>
{{ entry.summary.content|escape }}
</summary>
{% endif -%}
{% for author in entry.authors %}
<author>
{% if author.name -%}
<name>{{ author.name }}</name>
{% endif -%}
{% if author.email -%}
<email>{{ author.email }}</email>
{% endif -%}
</author>
{% if author.email -%}
<uri>{{ author.uri }}</uri>
{% endif -%}
{% endfor %}
{% if entry.content -%}
<content {% if entry.content.type %}type="{{ entry.content.type }}"{% endif %} {% if entry.content.src %}type="{{ entry.content.src }}"{% endif %}>
{{ entry.content.body|escape }}
</content>
{% endif -%}
</entry>
{% endfor %}
</feed>

View File

@ -1,3 +1,5 @@
{% set dateformat = "%d.%m.%Y %H:%M" -%}
<html>
<head>
<title>Planet TVL</title>
@ -5,7 +7,6 @@
<meta name="generator" content="planet-mars">
<link rel="shortcut icon" href="/favicon.ico">
<link rel="stylesheet" href="planet.css" type="text/css">
{# todo <link rel="alternate" type="application/rss+xml" title="Planet Haskell RSS Feed" href="rss20.xml"> #}
<link rel="alternate" type="application/xml+atom" title="Planet Haskell Atom Feed" href="atom.xml">
</head>
<body>
@ -14,17 +15,50 @@
</header>
<div id="maincontainer">
<main>
{% for entry in entries %}
{% for entry in entries -%}
{% if loop.index > 1 -%}
<hr class="entry_sep">
{% endif -%}
{% if entry.links.0 -%}
{% set link = entry.links.0.href -%}
{% else -%}
{% set link = "" -%}
{% endif -%}
<article>
<h2><a href="{{entry.link}}">{{ entry.title|striptags }}<a></h2>
<date>{% if entry.published %}{{ entry.published | date(format="%Y-%m-%d %H:%M", timezone="Europe/Moscow") }}{% endif %}</date>{# todo: maybe group posts by day? #}
<p class="entry_author">{{ entry.author|striptags }}</p>
<div class="entry_content">
{{ entry.content }}
<h2 class="entry_header">
<a {% if link -%}href="{{link}}"{% endif -%}>
{% if entry.title -%}
{{ entry.title.content|striptags }}
{% else -%}
NO TITLE
{% endif -%}
</a>
</h2>
<div class="entry_meta">
<date>
{% if entry.updated -%}
<span>{{ entry.updated | date(format=dateformat) }}</span>
{% else -%}
<span>{{ entry.published | date(format=dateformat) }}</span>
{% endif -%}
</date>
{% if entry.authors -%}
&mdash; <span class="entry_author">{{ entry.authors.0.name | striptags }}</span>
{% endif -%}
</div>
<p><a href="{{entry.link}}">full post</a></p>
{% if entry.summary -%}
<div class="entry_summary">
{{ entry.summary.content }}
</div>
{% endif -%}
{% if entry.content -%}
<div class="entry_content">
{{ entry.content.body }}
</div>
{% endif -%}
</article>
{% endfor %}
{% endfor -%}
</main>
<aside>