Compare commits

...

2 Commits

Author SHA1 Message Date
fgirault
92cc323f93 chore: 🙈 gitignore update 2024-07-14 20:18:04 +02:00
fgirault
eab724adf3 feat: Initial commit 2024-07-14 20:14:17 +02:00
4 changed files with 236 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

20
Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
cargo-features = ["edition2024"]
[package]
name = "rss2json"
version = "0.1.0"
edition = "2024"
resolver = "2"
authors = ["François Girault <fgirault@gmail.com>"]
[dependencies]
rss = "2.0"
url = "2.5.2"
json = "0.12.4"
clap = { version = "4.5.9", features = ["derive"] }
chrono = "0.4.38"
env_logger = "0.11.3"
log = "0.4.22"
reqwest = { version = "0.12.5", features = ["blocking"] }
regex = "1.10.5"

View File

@ -1,3 +1,72 @@
# rss2json # rss2json
My Rust tutorial rss2json converts rss xml feeds to a json format.
## About
This is my first Rust tutorial project, so surely not coded with all the best practices.
Output format is a complete personnal choice, inspired by some formats I've seen when using other's api.
>It handles a *very* minimal subset and not pretends to fullfill industrial needs :)
## Installation
Setup Rust using rust-up, then run in a terminal:
```bash
cargo install --git https://git.tetalab.org/Mutah/rss2json.git
```
## Usage
```bash
Usage: rss2json.exe [OPTIONS] --input <INPUT>
Options:
-i, --input <INPUT>
-o, --output <OUTPUT> [default: ]
-p, --pretty
-h, --help Print help
-V, --version Print version
```
### Local file
```bash
rss2json -i some_downloaded_rss_file.xml
```
### Remote
```bash
rss2json -i https://git.tetalab.org/Mutah/rss2json.rss
```
## Output format
Only translate a minimal subset of attributes (if available).
```javascript
{
"channel": {
"title": "",
"link": "",
"description": "",
"last_build_date": "",
"language": "",
"copyright": "",
"generator": ""
},
"items": [
{
"title": "",
"link": "",
"pub_date": "Sun, 14 Jul 2024 19:53:25 +0200",
"pub_ts": 1720979605,
"description": "",
"categories": ["", ""]
}
]
}
```

145
src/main.rs Normal file
View File

@ -0,0 +1,145 @@
/**
* A simple personnal tutorial for my first baby steps with Rust.
*/
use std::fs;
use std::fs::File;
use std::io::{BufReader, Cursor};
use chrono::DateTime;
use clap::Parser;
use json::object;
use rss::Channel;
// use log::debug;
use log::error;
use log::info;
// use log::warn;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
#[clap(short, long, required = true)]
input: String,
#[clap(short, long, default_value_t=String::new())]
output: String,
#[clap(short, long, default_value_t = false)]
pretty: bool,
}
fn main() -> std::io::Result<()> {
env_logger::init();
let args = Args::parse();
// TODO handle remote http feed discovery by parsing index and search for the feed link
let channel: Channel;
if args.input.starts_with("http://") || args.input.starts_with("https://") {
// process download
match reqwest::blocking::get(args.input).unwrap().bytes() {
Ok(content) => {
info!("Extracted {} bytes", content.len());
let cursor = Cursor::new(content);
channel = Channel::read_from(cursor).unwrap();
}
Err(e) => {
error!("{}", e);
std::process::exit(1);
}
}
} else {
// read locale file
let file = File::open(args.input).unwrap();
channel = Channel::read_from(BufReader::new(file)).unwrap();
}
// Initialize root object with channel informations, using object macro from json crate
let mut data = object! {
channel: object!{
title: channel.title(),
link: channel.link(),
description: channel.description(),
last_build_date: channel.last_build_date(),
language: channel.language(),
copyright: channel.copyright(),
generator: channel.generator()
}
};
// declare an mutable array to populate it with channel item data
let mut items_data = json::JsonValue::new_array();
// populate the items array
for item in channel.items() {
let pub_datetime = DateTime::parse_from_rfc2822(item.pub_date().unwrap()).unwrap();
// create object to hold item data
let mut item_data = object! {
title: item.title(),
link: item.link(),
pub_date: item.pub_date(),
pub_ts: pub_datetime.timestamp(),
description: item.description(),
// author: item.author()
};
// populate categories
let mut categories = json::JsonValue::new_array();
for category in item.categories() {
match categories.push(category.name()) {
Ok(_) => {}
Err(e) => {
// memory overflow ? as a beginner, style puzzled by rust
error!("Error pushing to items_data {}", e);
}
}
}
item_data["categories"] = categories;
if item.content().is_some() {
item_data["content"] = json::JsonValue::String(item.content().unwrap().to_string());
}
match items_data.push(item_data) {
Ok(_) => {}
Err(e) => {
// memory overflow ? as a beginner, style puzzled by rust
error!("Error pushing to items_data {}", e);
}
}
}
// attach the items to the json data root
data["items"] = items_data;
// output result
let output_string: String = if args.pretty {
data.pretty(4)
} else {
data.dump()
};
if args.output.len() > 0 {
let output_length = output_string.len();
let filename = args.output.to_string();
match fs::write(filename, output_string) {
Ok(_) => {
info!("saving {} characters to {}", output_length, args.output);
}
Err(e) => {
error!("{:?}", e);
}
}
Ok(())
} else {
// no file name specified, dump json to stdout
println!("{}", output_string);
Ok(())
}
}