Compare commits
	
		
			10 Commits
		
	
	
		
			5c629bce7a
			...
			5271c4c9aa
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 5271c4c9aa | |||
| a15d380bc9 | |||
| eb09729f38 | |||
| 3d3bbc3709 | |||
| 
						 | 
					d41e2bdd1c | ||
| 
						 | 
					50f87e0ea0 | ||
| 
						 | 
					32d314a357 | ||
| 
						 | 
					f333fe2b22 | ||
| 
						 | 
					917aed0247 | ||
| 
						 | 
					f8c5668506 | 
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,2 +1,4 @@
 | 
				
			|||||||
/target
 | 
					/target
 | 
				
			||||||
/mars.toml
 | 
					/mars.toml
 | 
				
			||||||
 | 
					/out
 | 
				
			||||||
 | 
					/feeds
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										69
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										69
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@ -128,9 +128,9 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "bitflags"
 | 
					name = "bitflags"
 | 
				
			||||||
version = "2.6.0"
 | 
					version = "2.7.0"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 | 
					checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
@ -183,10 +183,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			|||||||
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
 | 
					checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "cc"
 | 
					name = "camino"
 | 
				
			||||||
version = "1.2.7"
 | 
					version = "1.1.9"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7"
 | 
					checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3"
 | 
				
			||||||
 | 
					dependencies = [
 | 
				
			||||||
 | 
					 "serde",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "cc"
 | 
				
			||||||
 | 
					version = "1.2.9"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "shlex",
 | 
					 "shlex",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
@ -199,9 +208,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "chrono"
 | 
					name = "chrono"
 | 
				
			||||||
version = "0.4.39"
 | 
					version = "0.4.40"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
 | 
					checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "android-tzdata",
 | 
					 "android-tzdata",
 | 
				
			||||||
 "iana-time-zone",
 | 
					 "iana-time-zone",
 | 
				
			||||||
@ -209,7 +218,7 @@ dependencies = [
 | 
				
			|||||||
 "num-traits",
 | 
					 "num-traits",
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
 "wasm-bindgen",
 | 
					 "wasm-bindgen",
 | 
				
			||||||
 "windows-targets",
 | 
					 "windows-link",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
@ -236,9 +245,9 @@ dependencies = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "clap"
 | 
					name = "clap"
 | 
				
			||||||
version = "4.5.25"
 | 
					version = "4.5.26"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "b95dca1b68188a08ca6af9d96a6576150f598824bdb528c1190460c2940a0b48"
 | 
					checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "clap_builder",
 | 
					 "clap_builder",
 | 
				
			||||||
 "clap_derive",
 | 
					 "clap_derive",
 | 
				
			||||||
@ -246,9 +255,9 @@ dependencies = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "clap_builder"
 | 
					name = "clap_builder"
 | 
				
			||||||
version = "4.5.25"
 | 
					version = "4.5.26"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "9ab52925392148efd3f7562f2136a81ffb778076bcc85727c6e020d6dd57cf15"
 | 
					checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "anstream",
 | 
					 "anstream",
 | 
				
			||||||
 "anstyle",
 | 
					 "anstyle",
 | 
				
			||||||
@ -1106,14 +1115,15 @@ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "planet-mars"
 | 
					name = "planet-mars"
 | 
				
			||||||
version = "0.1.0"
 | 
					version = "0.1.1"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "anyhow",
 | 
					 "anyhow",
 | 
				
			||||||
 | 
					 "camino",
 | 
				
			||||||
 | 
					 "chrono",
 | 
				
			||||||
 "clap",
 | 
					 "clap",
 | 
				
			||||||
 "env_logger",
 | 
					 "env_logger",
 | 
				
			||||||
 "feed-rs",
 | 
					 "feed-rs",
 | 
				
			||||||
 "log",
 | 
					 "log",
 | 
				
			||||||
 "quick-xml",
 | 
					 | 
				
			||||||
 "ron",
 | 
					 "ron",
 | 
				
			||||||
 "serde",
 | 
					 "serde",
 | 
				
			||||||
 "slug",
 | 
					 "slug",
 | 
				
			||||||
@ -1140,9 +1150,9 @@ dependencies = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "proc-macro2"
 | 
					name = "proc-macro2"
 | 
				
			||||||
version = "1.0.92"
 | 
					version = "1.0.93"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
 | 
					checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "unicode-ident",
 | 
					 "unicode-ident",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
@ -1155,7 +1165,6 @@ checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003"
 | 
				
			|||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "encoding_rs",
 | 
					 "encoding_rs",
 | 
				
			||||||
 "memchr",
 | 
					 "memchr",
 | 
				
			||||||
 "serde",
 | 
					 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
@ -1268,9 +1277,9 @@ dependencies = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "rustls"
 | 
					name = "rustls"
 | 
				
			||||||
version = "0.23.20"
 | 
					version = "0.23.21"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b"
 | 
					checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "log",
 | 
					 "log",
 | 
				
			||||||
 "once_cell",
 | 
					 "once_cell",
 | 
				
			||||||
@ -1460,9 +1469,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "syn"
 | 
					name = "syn"
 | 
				
			||||||
version = "2.0.95"
 | 
					version = "2.0.96"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a"
 | 
					checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "proc-macro2",
 | 
					 "proc-macro2",
 | 
				
			||||||
 "quote",
 | 
					 "quote",
 | 
				
			||||||
@ -1518,18 +1527,18 @@ dependencies = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "thiserror"
 | 
					name = "thiserror"
 | 
				
			||||||
version = "2.0.10"
 | 
					version = "2.0.11"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "a3ac7f54ca534db81081ef1c1e7f6ea8a3ef428d2fc069097c079443d24124d3"
 | 
					checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "thiserror-impl",
 | 
					 "thiserror-impl",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "thiserror-impl"
 | 
					name = "thiserror-impl"
 | 
				
			||||||
version = "2.0.10"
 | 
					version = "2.0.11"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb"
 | 
					checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "proc-macro2",
 | 
					 "proc-macro2",
 | 
				
			||||||
 "quote",
 | 
					 "quote",
 | 
				
			||||||
@ -1889,6 +1898,12 @@ dependencies = [
 | 
				
			|||||||
 "windows-targets",
 | 
					 "windows-targets",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[package]]
 | 
				
			||||||
 | 
					name = "windows-link"
 | 
				
			||||||
 | 
					version = "0.1.1"
 | 
				
			||||||
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
 | 
					checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "windows-sys"
 | 
					name = "windows-sys"
 | 
				
			||||||
version = "0.52.0"
 | 
					version = "0.52.0"
 | 
				
			||||||
@ -1973,9 +1988,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
[[package]]
 | 
					[[package]]
 | 
				
			||||||
name = "winnow"
 | 
					name = "winnow"
 | 
				
			||||||
version = "0.6.22"
 | 
					version = "0.6.24"
 | 
				
			||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
					source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
				
			||||||
checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980"
 | 
					checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a"
 | 
				
			||||||
dependencies = [
 | 
					dependencies = [
 | 
				
			||||||
 "memchr",
 | 
					 "memchr",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										28
									
								
								Cargo.toml
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								Cargo.toml
									
									
									
									
									
								
							@ -1,6 +1,6 @@
 | 
				
			|||||||
[package]
 | 
					[package]
 | 
				
			||||||
name = "planet-mars"
 | 
					name = "planet-mars"
 | 
				
			||||||
version = "0.1.0"
 | 
					version = "0.1.1"
 | 
				
			||||||
edition = "2021"
 | 
					edition = "2021"
 | 
				
			||||||
authors = ["Thomas Koch <thomas@koch.ro>"]
 | 
					authors = ["Thomas Koch <thomas@koch.ro>"]
 | 
				
			||||||
description = "Feed aggregation planet like Planet Venus, produces static HTML and ATOM feed from fetched feeds."
 | 
					description = "Feed aggregation planet like Planet Venus, produces static HTML and ATOM feed from fetched feeds."
 | 
				
			||||||
@ -10,16 +10,18 @@ keywords = ["atom", "rss", "planet", "feed", "blogging"]
 | 
				
			|||||||
categories = ["web-programming"]
 | 
					categories = ["web-programming"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
anyhow = "*"
 | 
					anyhow = "1"
 | 
				
			||||||
clap = { version = "*", features = ["derive"] }
 | 
					camino = { version = "1.1.9", features = ["serde", "serde1"] }
 | 
				
			||||||
env_logger = "*"
 | 
					chrono = { version = "0.4.40", features = ["now", "serde"] }
 | 
				
			||||||
feed-rs = "*"
 | 
					clap = { version = "4", features = ["derive"] }
 | 
				
			||||||
log = "*"
 | 
					env_logger = "0"
 | 
				
			||||||
ron = "*" # todo for development, to check atom-rs internal representation of feeds
 | 
					feed-rs = "2"
 | 
				
			||||||
serde = { version = "*", features = ["derive"] }
 | 
					log = "0"
 | 
				
			||||||
slug = "*"
 | 
					ron = "0"
 | 
				
			||||||
tera = "*"
 | 
					serde = { version = "1", features = ["derive"] }
 | 
				
			||||||
toml = "*"
 | 
					slug = "0"
 | 
				
			||||||
 | 
					tera = "1"
 | 
				
			||||||
 | 
					toml = "0"
 | 
				
			||||||
ureq = { version = "3.0.0-rc5", features = ["brotli", "charset", "gzip", "native-tls"]}
 | 
					ureq = { version = "3.0.0-rc5", features = ["brotli", "charset", "gzip", "native-tls"]}
 | 
				
			||||||
url = "*"
 | 
					url = "2"
 | 
				
			||||||
quick-xml = { version = "*", features = ["serialize"] }
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										17
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								README.md
									
									
									
									
									
								
							@ -1,13 +1,24 @@
 | 
				
			|||||||
Simple planet like planet venus but in rust and maintained.
 | 
					Simple successor to Planet Venus but in Rust and maintained.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Please see the rustdoc of main.rs for further information.
 | 
					Please see the rustdoc of main.rs for further information.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## todo
 | 
					## Todo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
* use a nice lib to process the config file
 | 
					* find and use a nice lib to process the config file
 | 
				
			||||||
  * should check whether dirs exists and are writeable
 | 
					  * should check whether dirs exists and are writeable
 | 
				
			||||||
  * should check whether feed urls can be parsed
 | 
					  * should check whether feed urls can be parsed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Planet Venus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Planet Venus is used by many planets on the internet. However its code has not
 | 
				
			||||||
 | 
					been maintained since ~2011 and it uses Python 2.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Planet Mars should be a lightweight successor to Planet Venus.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Still the Planet Venus documentation contains some useful information on
 | 
				
			||||||
 | 
					[Etiquette](https://intertwingly.net/code/venus/docs/etiquette.html) for
 | 
				
			||||||
 | 
					Planet hosters.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Credits
 | 
					## Credits
 | 
				
			||||||
 | 
					
 | 
				
			||||||
While writing this, I read and also copied code from:
 | 
					While writing this, I read and also copied code from:
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										17
									
								
								default.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								default.nix
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,17 @@
 | 
				
			|||||||
 | 
					{ depot, pkgs, ... }:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pkgs.rustPlatform.buildRustPackage {
 | 
				
			||||||
 | 
					  name = "planet-mars";
 | 
				
			||||||
 | 
					  src = depot.third_party.gitignoreSource ./.;
 | 
				
			||||||
 | 
					  cargoLock.lockFile = ./Cargo.lock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  nativeBuildInputs = [ pkgs.pkg-config ];
 | 
				
			||||||
 | 
					  buildInputs = [ pkgs.openssl ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # planet-mars is mirrored to Github.
 | 
				
			||||||
 | 
					  passthru.meta.ci.extraSteps.github = depot.tools.releases.filteredGitPush {
 | 
				
			||||||
 | 
					    filter = ":/web/planet-mars";
 | 
				
			||||||
 | 
					    remote = "git@github.com:thkoch2001/planet-mars.git";
 | 
				
			||||||
 | 
					    ref = "refs/heads/master";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -3,6 +3,7 @@ feed_dir = "/var/lib/planet-mars/feeds"
 | 
				
			|||||||
from = "thomas@koch.ro"
 | 
					from = "thomas@koch.ro"
 | 
				
			||||||
out_dir = "/var/lib/planet-mars/out"
 | 
					out_dir = "/var/lib/planet-mars/out"
 | 
				
			||||||
templates_dir = "/var/lib/planet-mars/templates"
 | 
					templates_dir = "/var/lib/planet-mars/templates"
 | 
				
			||||||
 | 
					max_entries = 50
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[feeds]]
 | 
					[[feeds]]
 | 
				
			||||||
url = "https://blog.fefe.de/rss.xml"
 | 
					url = "https://blog.fefe.de/rss.xml"
 | 
				
			||||||
 | 
				
			|||||||
@ -1,84 +1,213 @@
 | 
				
			|||||||
use anyhow::bail;
 | 
					 | 
				
			||||||
use anyhow::Result;
 | 
					use anyhow::Result;
 | 
				
			||||||
 | 
					use camino::{Utf8Path, Utf8PathBuf};
 | 
				
			||||||
 | 
					use chrono::{DateTime, Duration, Utc};
 | 
				
			||||||
use feed_rs::model::Entry;
 | 
					use feed_rs::model::Entry;
 | 
				
			||||||
use feed_rs::model::Feed;
 | 
					use feed_rs::model::Feed;
 | 
				
			||||||
use ron::ser::{to_string_pretty, PrettyConfig};
 | 
					use ron::ser::{to_string_pretty, PrettyConfig};
 | 
				
			||||||
use serde::{Deserialize, Serialize};
 | 
					use serde::{Deserialize, Serialize};
 | 
				
			||||||
use std::collections::HashMap;
 | 
					use std::collections::{BTreeMap, HashMap};
 | 
				
			||||||
use std::convert::AsRef;
 | 
					use std::convert::AsRef;
 | 
				
			||||||
use std::fs;
 | 
					use std::fs;
 | 
				
			||||||
use std::io::BufReader;
 | 
					use std::time::Instant;
 | 
				
			||||||
use std::path::PathBuf;
 | 
					 | 
				
			||||||
use ureq::http::HeaderMap;
 | 
					use ureq::http::HeaderMap;
 | 
				
			||||||
use ureq::http::Response;
 | 
					use ureq::http::Response;
 | 
				
			||||||
use ureq::Body;
 | 
					use ureq::Body;
 | 
				
			||||||
use url::Url;
 | 
					use url::Url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Deserialize, Serialize, Default)]
 | 
					pub fn slugify_url(url: &Url) -> String {
 | 
				
			||||||
pub struct FetchData {
 | 
					    let domain = url.domain().unwrap();
 | 
				
			||||||
    pub etag: String,
 | 
					    let query = url.query().unwrap_or("");
 | 
				
			||||||
    pub last_modified: String,
 | 
					    slug::slugify(format!("{domain}{}{query}", url.path()))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct FeedStore {
 | 
					/// Stored settings/info about a feed.
 | 
				
			||||||
    pub dir: PathBuf,
 | 
					#[derive(Debug, Deserialize, Serialize)]
 | 
				
			||||||
 | 
					pub struct FeedStoreFeedInfo {
 | 
				
			||||||
 | 
					    /// First time we added this feed.
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// Used for historical purposes only.
 | 
				
			||||||
 | 
					    pub added: DateTime<Utc>,
 | 
				
			||||||
 | 
					    /// Last known cached entry, if any.
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// Used to let the server know whether we need a new entry or not.
 | 
				
			||||||
 | 
					    pub fetch_data: Option<FetchData>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl FeedStore {
 | 
					impl FeedStoreFeedInfo {
 | 
				
			||||||
    pub fn new(dir: &str) -> Self {
 | 
					    pub fn new() -> Self {
 | 
				
			||||||
        Self {
 | 
					        Self {
 | 
				
			||||||
            dir: super::to_checked_pathbuf(dir),
 | 
					            added: Utc::now(),
 | 
				
			||||||
 | 
					            fetch_data: None,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn slugify_url(url: &Url) -> Result<String> {
 | 
					/// Storage for a single feed.
 | 
				
			||||||
        let Some(domain) = url.domain() else {
 | 
					///
 | 
				
			||||||
            bail!("Url has no domain: '{url}'.")
 | 
					/// Contains one [FeedStoreVersion] for every time the feed has been successfully fetched,
 | 
				
			||||||
 | 
					/// and one [FeedStoreEntry] for each article referenced throughout the entries.
 | 
				
			||||||
 | 
					#[derive(Debug, Deserialize, Serialize)]
 | 
				
			||||||
 | 
					pub struct FeedStoreFeed {
 | 
				
			||||||
 | 
					    /// The feed URL
 | 
				
			||||||
 | 
					    pub url: Url,
 | 
				
			||||||
 | 
					    /// Where it's stored, should be inside the [FeedStore::dir].
 | 
				
			||||||
 | 
					    pub dir: Utf8PathBuf,
 | 
				
			||||||
 | 
					    /// Raw feed path
 | 
				
			||||||
 | 
					    pub path_feed: Utf8PathBuf,
 | 
				
			||||||
 | 
					    /// Raw feed RON path
 | 
				
			||||||
 | 
					    pub path_feed_ron: Utf8PathBuf,
 | 
				
			||||||
 | 
					    /// Settings path
 | 
				
			||||||
 | 
					    pub path_settings: Utf8PathBuf,
 | 
				
			||||||
 | 
					    /// Detailed settings/info about a feed.
 | 
				
			||||||
 | 
					    pub info: FeedStoreFeedInfo,
 | 
				
			||||||
 | 
					    /// Stored copy of the raw XML feed (if any)
 | 
				
			||||||
 | 
					    pub raw_feed: Option<String>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl FeedStoreFeed {
 | 
				
			||||||
 | 
					    pub fn new(basedir: &Utf8Path, url: &Url) -> Self {
 | 
				
			||||||
 | 
					        let dir = basedir.join(slugify_url(url));
 | 
				
			||||||
 | 
					        if !dir.is_dir() {
 | 
				
			||||||
 | 
					            std::fs::create_dir_all(&dir).unwrap();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let path_settings = dir.join("settings.toml");
 | 
				
			||||||
 | 
					        let info: FeedStoreFeedInfo = match std::fs::read_to_string(&path_settings) {
 | 
				
			||||||
 | 
					            Ok(s) => toml::from_str(&s).unwrap(),
 | 
				
			||||||
 | 
					            Err(_e) => {
 | 
				
			||||||
 | 
					                // Assume file has not been created yet. Initialize
 | 
				
			||||||
 | 
					                let info = FeedStoreFeedInfo::new();
 | 
				
			||||||
 | 
					                std::fs::write(&path_settings, toml::to_string(&info).unwrap()).unwrap();
 | 
				
			||||||
 | 
					                info
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
        let query = url.query().unwrap_or("");
 | 
					 | 
				
			||||||
        Ok(slug::slugify(format!("{domain}{}{query}", url.path())))
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn generic_path(&self, url: &Url, ext: &str) -> Result<String> {
 | 
					        let raw_feed: Option<String> = std::fs::read_to_string(dir.join("feed.xml")).ok();
 | 
				
			||||||
        Ok(format!(
 | 
					 | 
				
			||||||
            "{}/{}{ext}",
 | 
					 | 
				
			||||||
            self.dir.display(),
 | 
					 | 
				
			||||||
            Self::slugify_url(url)?
 | 
					 | 
				
			||||||
        ))
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn feed_path(&self, url: &Url) -> Result<String> {
 | 
					        Self {
 | 
				
			||||||
        self.generic_path(url, "")
 | 
					            dir: dir.clone(),
 | 
				
			||||||
    }
 | 
					            path_feed: dir.join("feed.xml"),
 | 
				
			||||||
 | 
					            path_feed_ron: dir.join("feed.ron"),
 | 
				
			||||||
    fn fetchdata_path(&self, url: &Url) -> Result<String> {
 | 
					            path_settings: dir.join("settings.toml"),
 | 
				
			||||||
        self.generic_path(url, ".toml")
 | 
					            url: url.clone(),
 | 
				
			||||||
    }
 | 
					            info,
 | 
				
			||||||
 | 
					            raw_feed,
 | 
				
			||||||
    pub fn load_fetchdata(&self, url: &Url) -> Result<FetchData> {
 | 
					 | 
				
			||||||
        let path = self.fetchdata_path(url)?;
 | 
					 | 
				
			||||||
        if !fs::exists(path.clone())? {
 | 
					 | 
				
			||||||
            return Ok(FetchData::default());
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        Ok(toml::from_str(&fs::read_to_string(path)?)?)
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn has_changed(&self, url: &Url, new_feed: &Feed) -> Result<bool> {
 | 
					    pub fn load_fetchdata(&self) -> Option<&FetchData> {
 | 
				
			||||||
        let Some(old_feed) = self.load_feed(url, false)? else {
 | 
					        self.info.fetch_data.as_ref()
 | 
				
			||||||
            return Ok(true);
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn load_feed(&self, sanitize: bool) -> Option<Feed> {
 | 
				
			||||||
 | 
					        if let Some(raw_feed) = &self.raw_feed {
 | 
				
			||||||
 | 
					            let parser = feed_rs::parser::Builder::new()
 | 
				
			||||||
 | 
					                .sanitize_content(sanitize)
 | 
				
			||||||
 | 
					                .build();
 | 
				
			||||||
 | 
					            Some(parser.parse(raw_feed.as_bytes()).unwrap())
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            None
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn has_changed(&self, new_feed: &Feed) -> bool {
 | 
				
			||||||
 | 
					        let Some(old_feed) = self.load_feed(false) else {
 | 
				
			||||||
 | 
					            return true;
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let mut old_iter = old_feed.entries.iter();
 | 
					        let mut old_iter = old_feed.entries.iter();
 | 
				
			||||||
        for new in &new_feed.entries {
 | 
					        for new in &new_feed.entries {
 | 
				
			||||||
            let Some(old) = old_iter.next() else {
 | 
					            let Some(old) = old_iter.next() else {
 | 
				
			||||||
                return Ok(true);
 | 
					                return true;
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
            if old != new {
 | 
					            if old != new {
 | 
				
			||||||
                return Ok(true);
 | 
					                return true;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        // ignoring any entries left in old_iter
 | 
					        // ignoring any entries left in old_iter
 | 
				
			||||||
        Ok(false)
 | 
					        false
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn store(&mut self, mut response: Response<Body>) -> Result<bool> {
 | 
				
			||||||
 | 
					        let headers = response.headers();
 | 
				
			||||||
 | 
					        let fetchdata = FetchData {
 | 
				
			||||||
 | 
					            etag: hv(headers, "etag"),
 | 
				
			||||||
 | 
					            last_modified: hv(headers, "last_modified"),
 | 
				
			||||||
 | 
					            when: Utc::now(),
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let body = response.body_mut().with_config().read_to_vec()?;
 | 
				
			||||||
 | 
					        let feed = match feed_rs::parser::parse(body.as_slice()) {
 | 
				
			||||||
 | 
					            Ok(f) => f,
 | 
				
			||||||
 | 
					            Err(e) => {
 | 
				
			||||||
 | 
					                warn!("Error when parsing feed for {}: {e:?}", self.url);
 | 
				
			||||||
 | 
					                return Ok(false);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        debug!("Storing fetchdata for {}", self.url);
 | 
				
			||||||
 | 
					        self.info.fetch_data = Some(fetchdata);
 | 
				
			||||||
 | 
					        Self::write(&self.path_settings, toml::to_string(&self.info)?)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if !self.has_changed(&feed) {
 | 
				
			||||||
 | 
					            return Ok(false);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        debug!("Storing feed for {}.", self.url);
 | 
				
			||||||
 | 
					        // todo don't serialize to string but to writer
 | 
				
			||||||
 | 
					        Self::write(
 | 
				
			||||||
 | 
					            &self.path_feed_ron,
 | 
				
			||||||
 | 
					            to_string_pretty(&feed, PrettyConfig::default())?,
 | 
				
			||||||
 | 
					        )?;
 | 
				
			||||||
 | 
					        Self::write(&self.path_feed, body)?;
 | 
				
			||||||
 | 
					        Ok(true)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// refresh in hours
 | 
				
			||||||
 | 
					    pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
 | 
				
			||||||
 | 
					        let mut builder = fetcher
 | 
				
			||||||
 | 
					            .agent
 | 
				
			||||||
 | 
					            .get(self.url.to_string())
 | 
				
			||||||
 | 
					            .header("FROM", fetcher.from.clone());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if let Some(fetchdata) = self.load_fetchdata() {
 | 
				
			||||||
 | 
					            if !fetchdata.etag.is_empty() {
 | 
				
			||||||
 | 
					                builder = builder.header("If-None-Match", fetchdata.etag.clone());
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if !fetchdata.last_modified.is_empty() {
 | 
				
			||||||
 | 
					                builder = builder.header("If-Modified-Since", fetchdata.last_modified.clone());
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Check if we have hit time for refresh
 | 
				
			||||||
 | 
					            if fetchdata.when + Duration::try_hours(refresh as i64).unwrap() >= Utc::now() {
 | 
				
			||||||
 | 
					                // No need to rebuild, check again later
 | 
				
			||||||
 | 
					                return Ok(false);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let start_instant = Instant::now();
 | 
				
			||||||
 | 
					        let result = builder.call();
 | 
				
			||||||
 | 
					        let duration = start_instant.elapsed();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let response = result?;
 | 
				
			||||||
 | 
					        debug!(
 | 
				
			||||||
 | 
					            "fetched with status {} in {} ms: {}",
 | 
				
			||||||
 | 
					            response.status(),
 | 
				
			||||||
 | 
					            duration.as_millis(),
 | 
				
			||||||
 | 
					            self.url,
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        let status = response.status();
 | 
				
			||||||
 | 
					        match status.as_u16() {
 | 
				
			||||||
 | 
					            304 => Ok(false), // Not Modified -> nothing to do
 | 
				
			||||||
 | 
					            200 => self.store(response),
 | 
				
			||||||
 | 
					            _ => {
 | 
				
			||||||
 | 
					                warn!(
 | 
				
			||||||
 | 
					                    "HTTP Status {} not implemented for {}",
 | 
				
			||||||
 | 
					                    response.status(),
 | 
				
			||||||
 | 
					                    self.url,
 | 
				
			||||||
 | 
					                );
 | 
				
			||||||
 | 
					                Ok(false)
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
 | 
					    fn write<P: AsRef<std::path::Path> + std::fmt::Display, C: AsRef<[u8]>>(
 | 
				
			||||||
@ -90,85 +219,66 @@ impl FeedStore {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        fs::write(path, contents)
 | 
					        fs::write(path, contents)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn store(&self, url: &Url, mut response: Response<Body>) -> Result<bool> {
 | 
					#[derive(Clone, Debug, Deserialize, Serialize, Default)]
 | 
				
			||||||
        let headers = response.headers();
 | 
					pub struct FetchData {
 | 
				
			||||||
        let fetchdata = FetchData {
 | 
					    pub when: DateTime<Utc>,
 | 
				
			||||||
            etag: hv(headers, "etag"),
 | 
					    pub etag: String,
 | 
				
			||||||
            last_modified: hv(headers, "last_modified"),
 | 
					    pub last_modified: String,
 | 
				
			||||||
        };
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let body = response.body_mut().with_config().read_to_vec()?;
 | 
					#[derive(Debug)]
 | 
				
			||||||
        let feed = match feed_rs::parser::parse(body.as_slice()) {
 | 
					pub struct FeedStore {
 | 
				
			||||||
            Ok(f) => f,
 | 
					    pub _dir: Utf8PathBuf,
 | 
				
			||||||
            Err(e) => {
 | 
					    pub feeds: BTreeMap<Url, FeedStoreFeed>,
 | 
				
			||||||
                warn!("Error when parsing feed for {url}: {e:?}");
 | 
					}
 | 
				
			||||||
                return Ok(false);
 | 
					
 | 
				
			||||||
            }
 | 
					impl FeedStore {
 | 
				
			||||||
        };
 | 
					    pub fn new(dir: &str, feedlist: &Vec<super::FeedConfig>) -> Self {
 | 
				
			||||||
        if !self.has_changed(url, &feed)? {
 | 
					        let dir = super::to_checked_pathbuf(dir);
 | 
				
			||||||
            return Ok(false);
 | 
					        let mut feeds: BTreeMap<Url, FeedStoreFeed> = BTreeMap::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for feed_config in feedlist {
 | 
				
			||||||
 | 
					            let feed_url = Url::parse(&feed_config.url).unwrap();
 | 
				
			||||||
 | 
					            feeds.insert(feed_url.clone(), FeedStoreFeed::new(&dir, &feed_url));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        debug!("Storing feed for {url}.");
 | 
					
 | 
				
			||||||
        // todo don't serialize to string but to writer
 | 
					        Self { _dir: dir, feeds }
 | 
				
			||||||
        Self::write(
 | 
					 | 
				
			||||||
            self.generic_path(url, ".ron")?,
 | 
					 | 
				
			||||||
            to_string_pretty(&feed, PrettyConfig::default())?,
 | 
					 | 
				
			||||||
        )?;
 | 
					 | 
				
			||||||
        Self::write(self.feed_path(url)?, body)?;
 | 
					 | 
				
			||||||
        Self::write(self.fetchdata_path(url)?, toml::to_string(&fetchdata)?)?;
 | 
					 | 
				
			||||||
        Ok(true)
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn load_feed(&self, url: &Url, sanitize: bool) -> Result<Option<Feed>> {
 | 
					    pub fn collect(&mut self, max_entries: usize) -> (HashMap<String, Feed>, Vec<Entry>) {
 | 
				
			||||||
        let parser = feed_rs::parser::Builder::new()
 | 
					 | 
				
			||||||
            .sanitize_content(sanitize)
 | 
					 | 
				
			||||||
            .build();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        let path = self.feed_path(url)?;
 | 
					 | 
				
			||||||
        if !fs::exists(path.clone())? {
 | 
					 | 
				
			||||||
            return Ok(None);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        let file = fs::File::open(path)?;
 | 
					 | 
				
			||||||
        Ok(Some(parser.parse(BufReader::new(file))?))
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    pub fn collect(
 | 
					 | 
				
			||||||
        &self,
 | 
					 | 
				
			||||||
        feed_configs: &Vec<super::FeedConfig>,
 | 
					 | 
				
			||||||
        max_entries: usize,
 | 
					 | 
				
			||||||
    ) -> (HashMap<String, Feed>, Vec<Entry>) {
 | 
					 | 
				
			||||||
        let mut feeds = HashMap::new();
 | 
					        let mut feeds = HashMap::new();
 | 
				
			||||||
        let mut entries = Vec::new();
 | 
					        let mut entries = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for feed_config in feed_configs {
 | 
					        for (feed_url, feed_store_feed) in self.feeds.iter_mut() {
 | 
				
			||||||
            let mut feed = match (|| {
 | 
					            let Some(mut feed) = feed_store_feed.load_feed(true) else {
 | 
				
			||||||
                let url = Url::parse(&feed_config.url)?;
 | 
					                warn!("Problem parsing feed file for feed {}", feed_url);
 | 
				
			||||||
                self.load_feed(&url, true)
 | 
					                continue;
 | 
				
			||||||
            })() {
 | 
					 | 
				
			||||||
                Err(e) => {
 | 
					 | 
				
			||||||
                    warn!(
 | 
					 | 
				
			||||||
                        "Problem parsing feed file for feed {}: {e:?}",
 | 
					 | 
				
			||||||
                        feed_config.url
 | 
					 | 
				
			||||||
                    );
 | 
					 | 
				
			||||||
                    continue;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                Ok(None) => continue,
 | 
					 | 
				
			||||||
                Ok(Some(f)) => f,
 | 
					 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for entry in &mut feed.entries {
 | 
					            for entry in &mut feed.entries {
 | 
				
			||||||
                entry.source = Some(feed_config.url.clone());
 | 
					                entry.source = Some(feed_url.to_string());
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            entries.append(&mut std::mem::take(&mut feed.entries));
 | 
					            entries.append(&mut std::mem::take(&mut feed.entries));
 | 
				
			||||||
            feeds.insert(feed_config.url.clone(), feed);
 | 
					 | 
				
			||||||
            // optimization to reduce memory usage
 | 
					 | 
				
			||||||
            if entries.len() > 4 * max_entries {
 | 
					            if entries.len() > 4 * max_entries {
 | 
				
			||||||
                entries = trim_entries(entries, max_entries);
 | 
					                entries = trim_entries(entries, max_entries);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            feeds.insert(feed_url.to_string(), feed.clone());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        (feeds, trim_entries(entries, max_entries))
 | 
					        (feeds, trim_entries(entries, max_entries))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn fetch(&mut self, fetcher: &super::Fetcher, refresh: usize) -> Result<bool> {
 | 
				
			||||||
 | 
					        let mut rebuild = false;
 | 
				
			||||||
 | 
					        for (_url, feed) in self.feeds.iter_mut() {
 | 
				
			||||||
 | 
					            rebuild |= feed.fetch(fetcher, refresh)?;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Ok(rebuild)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
 | 
					fn trim_entries(mut entries: Vec<Entry>, max_entries: usize) -> Vec<Entry> {
 | 
				
			||||||
 | 
				
			|||||||
@ -1,15 +1,10 @@
 | 
				
			|||||||
use anyhow::Result;
 | 
					 | 
				
			||||||
use std::time::Instant;
 | 
					 | 
				
			||||||
use ureq::tls::{TlsConfig, TlsProvider};
 | 
					use ureq::tls::{TlsConfig, TlsProvider};
 | 
				
			||||||
use ureq::Agent;
 | 
					use ureq::Agent;
 | 
				
			||||||
use url::Url;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
use crate::FeedStore;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct Fetcher {
 | 
					pub struct Fetcher {
 | 
				
			||||||
    agent: Agent,
 | 
					    pub agent: Agent,
 | 
				
			||||||
    /// FROM header for requests
 | 
					    /// FROM header for requests
 | 
				
			||||||
    from: String,
 | 
					    pub from: String,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl Fetcher {
 | 
					impl Fetcher {
 | 
				
			||||||
@ -36,41 +31,4 @@ impl Fetcher {
 | 
				
			|||||||
            from: from.to_string(),
 | 
					            from: from.to_string(),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    pub fn fetch(&self, url: Url, feed_store: &FeedStore) -> Result<bool> {
 | 
					 | 
				
			||||||
        let fetchdata = feed_store.load_fetchdata(&url)?;
 | 
					 | 
				
			||||||
        let mut builder = self
 | 
					 | 
				
			||||||
            .agent
 | 
					 | 
				
			||||||
            .get(url.to_string())
 | 
					 | 
				
			||||||
            .header("FROM", self.from.clone());
 | 
					 | 
				
			||||||
        if !fetchdata.etag.is_empty() {
 | 
					 | 
				
			||||||
            builder = builder.header("If-None-Match", fetchdata.etag);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if !fetchdata.last_modified.is_empty() {
 | 
					 | 
				
			||||||
            builder = builder.header("If-Modified-Since", fetchdata.last_modified);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        let start_instant = Instant::now();
 | 
					 | 
				
			||||||
        let result = builder.call();
 | 
					 | 
				
			||||||
        let duration = start_instant.elapsed();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        let response = result?;
 | 
					 | 
				
			||||||
        debug!(
 | 
					 | 
				
			||||||
            "fetched with status {} in {} ms: {url}",
 | 
					 | 
				
			||||||
            response.status(),
 | 
					 | 
				
			||||||
            duration.as_millis()
 | 
					 | 
				
			||||||
        );
 | 
					 | 
				
			||||||
        let status = response.status();
 | 
					 | 
				
			||||||
        match status.as_u16() {
 | 
					 | 
				
			||||||
            304 => Ok(false), // Not Modified -> nothing to do
 | 
					 | 
				
			||||||
            200 => feed_store.store(&url, response),
 | 
					 | 
				
			||||||
            _ => {
 | 
					 | 
				
			||||||
                warn!(
 | 
					 | 
				
			||||||
                    "HTTP Status {} not implemented for {url}",
 | 
					 | 
				
			||||||
                    response.status()
 | 
					 | 
				
			||||||
                );
 | 
					 | 
				
			||||||
                Ok(false)
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										34
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								src/main.rs
									
									
									
									
									
								
							@ -26,11 +26,10 @@ extern crate log;
 | 
				
			|||||||
use crate::feed_store::FeedStore;
 | 
					use crate::feed_store::FeedStore;
 | 
				
			||||||
use crate::fetcher::Fetcher;
 | 
					use crate::fetcher::Fetcher;
 | 
				
			||||||
use anyhow::Result;
 | 
					use anyhow::Result;
 | 
				
			||||||
 | 
					use camino::Utf8PathBuf;
 | 
				
			||||||
use clap::Parser;
 | 
					use clap::Parser;
 | 
				
			||||||
use serde::Deserialize;
 | 
					use serde::Deserialize;
 | 
				
			||||||
use std::fs;
 | 
					use std::fs;
 | 
				
			||||||
use std::path::PathBuf;
 | 
					 | 
				
			||||||
use url::Url;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
//mod atom_serializer;
 | 
					//mod atom_serializer;
 | 
				
			||||||
mod feed_store;
 | 
					mod feed_store;
 | 
				
			||||||
@ -68,15 +67,17 @@ struct Config {
 | 
				
			|||||||
    templates_dir: String,
 | 
					    templates_dir: String,
 | 
				
			||||||
    /// How many feed entries should be included in the planet
 | 
					    /// How many feed entries should be included in the planet
 | 
				
			||||||
    max_entries: usize,
 | 
					    max_entries: usize,
 | 
				
			||||||
 | 
					    /// How soon to refresh, in hours
 | 
				
			||||||
 | 
					    refresh: usize,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn to_checked_pathbuf(dir: &str) -> PathBuf {
 | 
					pub fn to_checked_pathbuf(dir: &str) -> Utf8PathBuf {
 | 
				
			||||||
    let dir: PathBuf = PathBuf::from(dir);
 | 
					    let dir = Utf8PathBuf::from(dir);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let m = dir
 | 
					    let m = dir
 | 
				
			||||||
        .metadata()
 | 
					        .metadata()
 | 
				
			||||||
        .unwrap_or_else(|_| panic!("Could not get metadata of dir: {}", dir.display()));
 | 
					        .unwrap_or_else(|_| panic!("Could not get metadata of dir: {dir}"));
 | 
				
			||||||
    assert!(m.is_dir(), "Not a dir: {}", dir.display());
 | 
					    assert!(m.is_dir(), "Not a dir: {dir}");
 | 
				
			||||||
    dir
 | 
					    dir
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -90,20 +91,9 @@ struct FeedConfig {
 | 
				
			|||||||
    url: String,
 | 
					    url: String,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn fetch(config: &Config, feed_store: &FeedStore) -> Result<bool> {
 | 
					fn fetch(config: &Config, feed_store: &mut FeedStore) -> Result<bool> {
 | 
				
			||||||
    let fetcher = Fetcher::new(&config.bot_name, &config.from);
 | 
					    let fetcher = Fetcher::new(&config.bot_name, &config.from);
 | 
				
			||||||
    let mut rebuild = false;
 | 
					    let rebuild = feed_store.fetch(&fetcher, config.refresh)?;
 | 
				
			||||||
    for feed in &config.feeds {
 | 
					 | 
				
			||||||
        let url = match Url::parse(&feed.url) {
 | 
					 | 
				
			||||||
            Ok(x) => x,
 | 
					 | 
				
			||||||
            Err(e) => {
 | 
					 | 
				
			||||||
                error!("Error parsing url '{}': {e:?}", feed.url);
 | 
					 | 
				
			||||||
                continue;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        };
 | 
					 | 
				
			||||||
        rebuild |= fetcher.fetch(url, feed_store)?;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    info!("Done fetching. Rebuild needed: {rebuild}");
 | 
					 | 
				
			||||||
    Ok(rebuild)
 | 
					    Ok(rebuild)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -122,15 +112,15 @@ fn main() -> Result<()> {
 | 
				
			|||||||
    let _ = to_checked_pathbuf(&config.templates_dir);
 | 
					    let _ = to_checked_pathbuf(&config.templates_dir);
 | 
				
			||||||
    let _ = to_checked_pathbuf(&config.out_dir);
 | 
					    let _ = to_checked_pathbuf(&config.out_dir);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let feed_store = FeedStore::new(&config.feed_dir);
 | 
					    let mut feed_store = FeedStore::new(&config.feed_dir, &config.feeds);
 | 
				
			||||||
    let should_build = if args.no_fetch {
 | 
					    let should_build = if args.no_fetch {
 | 
				
			||||||
        true
 | 
					        true
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        fetch(&config, &feed_store)?
 | 
					        fetch(&config, &mut feed_store)?
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if should_build {
 | 
					    if should_build {
 | 
				
			||||||
        template_engine::build(&config, &feed_store)?;
 | 
					        template_engine::build(&config, &mut feed_store)?;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    Ok(())
 | 
					    Ok(())
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -7,13 +7,12 @@ use std::collections::HashMap;
 | 
				
			|||||||
use std::fs::File;
 | 
					use std::fs::File;
 | 
				
			||||||
use tera::{from_value, Tera};
 | 
					use tera::{from_value, Tera};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
 | 
					pub fn build(config: &Config, feed_store: &mut FeedStore) -> Result<()> {
 | 
				
			||||||
    let mut tera = create_tera(&config.templates_dir)?;
 | 
					    let mut tera = create_tera(&config.templates_dir)?;
 | 
				
			||||||
    let out_dir = to_checked_pathbuf(&config.out_dir);
 | 
					    let out_dir = to_checked_pathbuf(&config.out_dir);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut context = tera::Context::new();
 | 
					    let mut context = tera::Context::new();
 | 
				
			||||||
    let (feeds, entries): (HashMap<String, Feed>, _) =
 | 
					    let (feeds, entries): (HashMap<String, Feed>, _) = feed_store.collect(config.max_entries);
 | 
				
			||||||
        feed_store.collect(&config.feeds, config.max_entries);
 | 
					 | 
				
			||||||
    context.insert("feeds", &feeds);
 | 
					    context.insert("feeds", &feeds);
 | 
				
			||||||
    context.insert("entries", &entries);
 | 
					    context.insert("entries", &entries);
 | 
				
			||||||
    context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
 | 
					    context.insert("PKG_AUTHORS", env!("CARGO_PKG_AUTHORS"));
 | 
				
			||||||
@ -24,7 +23,7 @@ pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for name in tera.get_template_names() {
 | 
					    for name in tera.get_template_names() {
 | 
				
			||||||
        debug!("Processing template {name}");
 | 
					        debug!("Processing template {name}");
 | 
				
			||||||
        let file = File::create(format!("{}/{name}", out_dir.display()))?;
 | 
					        let file = File::create(format!("{out_dir}/{name}"))?;
 | 
				
			||||||
        tera.render_to(name, &context, file)?;
 | 
					        tera.render_to(name, &context, file)?;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    Ok(())
 | 
					    Ok(())
 | 
				
			||||||
@ -32,7 +31,7 @@ pub fn build(config: &Config, feed_store: &FeedStore) -> Result<()> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
fn create_tera(templates_dir: &str) -> Result<Tera> {
 | 
					fn create_tera(templates_dir: &str) -> Result<Tera> {
 | 
				
			||||||
    let dir = to_checked_pathbuf(templates_dir);
 | 
					    let dir = to_checked_pathbuf(templates_dir);
 | 
				
			||||||
    let mut tera = tera::Tera::new(&format!("{}/*", &dir.display()))?;
 | 
					    let mut tera = tera::Tera::new(&format!("{dir}/*"))?;
 | 
				
			||||||
    // disable autoescape as this would corrupt urls or the entriy contents. todo check this!
 | 
					    // disable autoescape as this would corrupt urls or the entriy contents. todo check this!
 | 
				
			||||||
    tera.autoescape_on(vec![]);
 | 
					    tera.autoescape_on(vec![]);
 | 
				
			||||||
    Ok(tera)
 | 
					    Ok(tera)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user