diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..142ea76 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +cargo-features = ["edition2024"] + +[package] +name = "rss2json" +version = "0.1.0" +edition = "2024" +resolver = "2" +authors = ["François Girault "] + +[dependencies] +rss = "2.0" +url = "2.5.2" +json = "0.12.4" +clap = { version = "4.5.9", features = ["derive"] } +chrono = "0.4.38" +env_logger = "0.11.3" +log = "0.4.22" +reqwest = { version = "0.12.5", features = ["blocking"] } +regex = "1.10.5" + diff --git a/README.md b/README.md index 4a841a4..4cd98d7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,72 @@ # rss2json -My Rust tutorial \ No newline at end of file +rss2json converts rss xml feeds to a json format. + +## About + +This is my first Rust tutorial project, so surely not coded with all the best practices. + +Output format is a complete personnal choice, inspired by some formats I've seen when using other's api. + +>It handles a *very* minimal subset and not pretends to fullfill industrial needs :) + +## Installation + +Setup Rust using rust-up, then run in a terminal: + +```bash +cargo install --git https://git.tetalab.org/Mutah/rss2json.git +``` + +## Usage + +```bash +Usage: rss2json.exe [OPTIONS] --input + +Options: + -i, --input + -o, --output [default: ] + -p, --pretty + -h, --help Print help + -V, --version Print version +``` + +### Local file + +```bash +rss2json -i some_downloaded_rss_file.xml +``` + +### Remote + +```bash +rss2json -i https://git.tetalab.org/Mutah/rss2json.rss +``` + +## Output format + +Only translate a minimal subset of attributes (if available). + +```javascript +{ + "channel": { + "title": "", + "link": "", + "description": "", + "last_build_date": "", + "language": "", + "copyright": "", + "generator": "" + }, + "items": [ + { + "title": "", + "link": "", + "pub_date": "Sun, 14 Jul 2024 19:53:25 +0200", + "pub_ts": 1720979605, + "description": "", + "categories": ["", ""] + } + ] +} +``` \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..95af1c6 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,146 @@ +use std::borrow::Borrow; +/** + * A simple personnal tutorial for my first baby steps with Rust. + */ +use std::fs; +use std::fs::File; +use std::io::{BufReader, Cursor}; + + +use chrono::DateTime; +use clap::Parser; +use json::object; +use rss::Channel; + +// use log::debug; +use log::error; +use log::info; +// use log::warn; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Args { + #[clap(short, long, required = true)] + input: String, + + #[clap(short, long, default_value_t=String::new())] + output: String, + + #[clap(short, long, default_value_t = false)] + pretty: bool, +} + +fn main() -> std::io::Result<()> { + env_logger::init(); + + let args = Args::parse(); + + // TODO handle remote http feed discovery by parsing index and search for the feed link + + let channel: Channel; + + if args.input.starts_with("http://") || args.input.starts_with("https://") { + // process download + match reqwest::blocking::get(args.input).unwrap().bytes() { + Ok(content) => { + info!("Extracted {} bytes", content.len()); + let cursor = Cursor::new(content); + channel = Channel::read_from(cursor).unwrap(); + } + Err(e) => { + error!("{}", e); + std::process::exit(1); + } + } + } else { + // read locale file + let file = File::open(args.input).unwrap(); + channel = Channel::read_from(BufReader::new(file)).unwrap(); + } + + // Initialize root object with channel informations, using object macro from json crate + let mut data = object! { + channel: object!{ + title: channel.title(), + link: channel.link(), + description: channel.description(), + last_build_date: channel.last_build_date(), + language: channel.language(), + copyright: channel.copyright(), + generator: channel.generator() + } + }; + + // declare an mutable array to populate it with channel item data + let mut items_data = json::JsonValue::new_array(); + + // populate the items array + for item in channel.items() { + let pub_datetime = DateTime::parse_from_rfc2822(item.pub_date().unwrap()).unwrap(); + + // create object to hold item data + let mut item_data = object! { + title: item.title(), + link: item.link(), + pub_date: item.pub_date(), + pub_ts: pub_datetime.timestamp(), + description: item.description(), + // author: item.author() + }; + + // populate categories + let mut categories = json::JsonValue::new_array(); + + for category in item.categories() { + match categories.push(category.name()) { + Ok(_) => {} + Err(e) => { + // memory overflow ? as a beginner, style puzzled by rust + error!("Error pushing to items_data {}", e); + } + } + } + + item_data["categories"] = categories; + + if item.content().is_some() { + item_data["content"] = json::JsonValue::String(item.content().unwrap().to_string()); + } + + match items_data.push(item_data) { + Ok(_) => {} + Err(e) => { + // memory overflow ? as a beginner, style puzzled by rust + error!("Error pushing to items_data {}", e); + } + } + } + + // attach the items to the json data root + data["items"] = items_data; + + // output result + let output_string: String = if args.pretty { + data.pretty(4) + } else { + data.dump() + }; + + if args.output.len() > 0 { + let output_length = output_string.len(); + let filename = args.output.to_string(); + match fs::write(filename, output_string) { + Ok(_) => { + info!("saving {} characters to {}", output_length, args.output); + } + Err(e) => { + error!("{:?}", e); + } + } + Ok(()) + } else { + // no file name specified, dump json to stdout + println!("{}", output_string); + Ok(()) + } +}