2021-02-11 21:58:10 +01:00
|
|
|
use std::{
|
|
|
|
collections::HashMap,
|
|
|
|
fs::{self, File},
|
|
|
|
io::Read,
|
|
|
|
};
|
|
|
|
|
|
|
|
use quick_xml::{events::Event, Reader};
|
|
|
|
use zip::ZipArchive;
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Author {
|
|
|
|
pub name: String,
|
|
|
|
pub firstauthor: String,
|
|
|
|
}
|
|
|
|
|
2021-02-15 13:05:32 +01:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Series {
|
|
|
|
pub name: String,
|
|
|
|
pub index: i32,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Series {
|
|
|
|
fn new() -> Self {
|
|
|
|
Series {
|
|
|
|
name: String::new(),
|
|
|
|
index: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-11 21:58:10 +01:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct EpubMetadata {
|
|
|
|
pub authors: Vec<Author>,
|
|
|
|
pub genre: String,
|
2021-02-15 13:05:32 +01:00
|
|
|
pub series: Series,
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl EpubMetadata {
|
|
|
|
fn new() -> Self {
|
|
|
|
EpubMetadata {
|
|
|
|
authors: Vec::new(),
|
|
|
|
genre: String::new(),
|
2021-02-15 13:05:32 +01:00
|
|
|
series: Series::new(),
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_rootfile(archive: &mut ZipArchive<File>) -> String {
|
|
|
|
let mut container = archive.by_name("META-INF/container.xml").unwrap();
|
|
|
|
let mut xml_str_buffer = String::new();
|
|
|
|
|
|
|
|
container.read_to_string(&mut xml_str_buffer).unwrap();
|
|
|
|
|
|
|
|
let mut reader = Reader::from_str(&xml_str_buffer);
|
2024-11-05 14:01:48 +01:00
|
|
|
reader.config_mut().trim_text(true);
|
2021-02-11 21:58:10 +01:00
|
|
|
|
|
|
|
let mut buf = Vec::new();
|
|
|
|
let mut opf_filename = String::new();
|
|
|
|
|
|
|
|
loop {
|
2024-11-05 14:01:48 +01:00
|
|
|
match reader.read_event_into(&mut buf) {
|
|
|
|
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e))
|
2024-11-12 10:37:01 +01:00
|
|
|
if e.name().as_ref() == b"rootfile" =>
|
2024-11-05 14:01:48 +01:00
|
|
|
{
|
2024-11-12 10:37:01 +01:00
|
|
|
opf_filename = e
|
|
|
|
.attributes()
|
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref() == b"full-path")
|
|
|
|
.next()
|
|
|
|
.unwrap()
|
|
|
|
.unwrap()
|
|
|
|
.unescape_value()
|
|
|
|
.unwrap()
|
|
|
|
.to_string();
|
2021-02-11 21:58:10 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
Ok(Event::Eof) => break,
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
opf_filename
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
|
|
|
|
let mut epub_meta = EpubMetadata::new();
|
|
|
|
let file = fs::File::open(&filename);
|
|
|
|
|
|
|
|
let file = match file {
|
|
|
|
Err(_) => return None,
|
|
|
|
Ok(file) => file,
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut archive = ZipArchive::new(file).unwrap();
|
|
|
|
|
|
|
|
let opf_filename = get_rootfile(&mut archive);
|
|
|
|
|
|
|
|
let mut xml_str_buffer = String::new();
|
|
|
|
let mut opf = archive.by_name(&opf_filename).unwrap();
|
|
|
|
opf.read_to_string(&mut xml_str_buffer).unwrap();
|
|
|
|
|
|
|
|
let mut reader = Reader::from_str(&xml_str_buffer);
|
|
|
|
let mut buf = Vec::new();
|
|
|
|
|
|
|
|
let mut curr_id = String::new();
|
|
|
|
let mut creator_found = false;
|
|
|
|
let mut file_as_found = false;
|
|
|
|
let mut role_found = false;
|
|
|
|
let mut genre_found = false;
|
2021-02-15 13:05:32 +01:00
|
|
|
let mut series_found = false;
|
|
|
|
let mut series_index_found = false;
|
2021-02-11 21:58:10 +01:00
|
|
|
let mut is_epub3 = false;
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
struct XmlAut {
|
|
|
|
name: String,
|
|
|
|
sort: String,
|
|
|
|
role: String,
|
|
|
|
}
|
|
|
|
|
2021-02-12 07:50:32 +01:00
|
|
|
impl XmlAut {
|
|
|
|
fn new() -> Self {
|
|
|
|
XmlAut {
|
|
|
|
name: String::new(),
|
|
|
|
sort: String::new(),
|
|
|
|
role: String::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-11 21:58:10 +01:00
|
|
|
let mut xml_authors = HashMap::new();
|
|
|
|
|
|
|
|
loop {
|
2024-11-05 14:01:48 +01:00
|
|
|
match reader.read_event_into(&mut buf) {
|
2021-02-11 21:58:10 +01:00
|
|
|
// See if we have EPUB3 or EPUB2
|
2024-11-12 10:57:03 +01:00
|
|
|
Ok(Event::Start(ref e)) if e.name().as_ref() == b"package" => {
|
2021-02-11 21:58:10 +01:00
|
|
|
if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"version"
|
2021-02-11 21:58:10 +01:00
|
|
|
&& attr.as_ref().unwrap().value.starts_with(b"3")
|
|
|
|
}) {
|
|
|
|
is_epub3 = true;
|
|
|
|
}
|
|
|
|
}
|
2024-11-12 10:57:03 +01:00
|
|
|
Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"creator" => {
|
2021-02-11 21:58:10 +01:00
|
|
|
creator_found = true;
|
|
|
|
if is_epub3 {
|
|
|
|
if let Some(idval) = e
|
|
|
|
.attributes()
|
2024-11-12 10:57:03 +01:00
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref() == b"id")
|
2021-02-11 21:58:10 +01:00
|
|
|
.next()
|
|
|
|
{
|
|
|
|
curr_id = "#".to_string()
|
|
|
|
+ String::from_utf8(idval.unwrap().value.to_vec())
|
|
|
|
.unwrap()
|
|
|
|
.as_str();
|
2021-02-12 07:50:32 +01:00
|
|
|
xml_authors.insert(curr_id.clone(), XmlAut::new());
|
2021-02-26 13:28:29 +01:00
|
|
|
} else {
|
|
|
|
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
|
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
|
|
|
entry.role = "aut".to_string();
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if let Some(file_as_val) = e
|
|
|
|
.attributes()
|
2024-11-05 14:01:48 +01:00
|
|
|
.filter(|attr| {
|
|
|
|
attr.as_ref()
|
|
|
|
.unwrap()
|
|
|
|
.key
|
2024-11-12 10:57:03 +01:00
|
|
|
.as_ref()
|
2024-11-05 14:01:48 +01:00
|
|
|
.ends_with(b"file-as")
|
|
|
|
})
|
2021-02-11 21:58:10 +01:00
|
|
|
.next()
|
|
|
|
{
|
2021-02-12 09:13:31 +01:00
|
|
|
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
|
2021-02-12 07:50:32 +01:00
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.sort = file_as_val
|
|
|
|
.unwrap()
|
2024-11-12 10:57:03 +01:00
|
|
|
.unescape_value()
|
2024-11-05 14:01:48 +01:00
|
|
|
.unwrap_or_default()
|
|
|
|
.to_string();
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.role = "aut".to_string();
|
2021-02-12 09:13:31 +01:00
|
|
|
} else if let Some(_role_val) = e
|
2021-02-11 21:58:10 +01:00
|
|
|
.attributes()
|
2024-11-12 10:57:03 +01:00
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref().ends_with(b"role"))
|
2021-02-11 21:58:10 +01:00
|
|
|
.next()
|
|
|
|
{
|
2021-02-12 09:13:31 +01:00
|
|
|
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(Event::Text(ref e)) if creator_found => {
|
|
|
|
if is_epub3 {
|
2021-02-12 07:50:32 +01:00
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.name = String::from_utf8(e.to_vec()).unwrap();
|
|
|
|
} else {
|
2021-02-12 07:50:32 +01:00
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.name = String::from_utf8(e.to_vec()).unwrap();
|
|
|
|
entry.role = "aut".to_string();
|
|
|
|
}
|
|
|
|
|
|
|
|
creator_found = false;
|
|
|
|
}
|
2024-11-12 10:57:03 +01:00
|
|
|
Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"meta" && is_epub3 => {
|
2021-02-11 21:58:10 +01:00
|
|
|
if let Some(refines) = e
|
|
|
|
.attributes()
|
2024-11-12 10:57:03 +01:00
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref() == b"refines")
|
2021-02-11 21:58:10 +01:00
|
|
|
.next()
|
|
|
|
{
|
|
|
|
if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"property"
|
2021-02-11 21:58:10 +01:00
|
|
|
&& attr.as_ref().unwrap().value.ends_with(b"file-as")
|
|
|
|
}) {
|
|
|
|
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
|
|
|
|
file_as_found = true;
|
|
|
|
} else if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"property"
|
2021-02-11 21:58:10 +01:00
|
|
|
&& attr.as_ref().unwrap().value.ends_with(b"role")
|
|
|
|
}) {
|
|
|
|
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
|
|
|
|
role_found = true;
|
2021-02-15 13:05:32 +01:00
|
|
|
} else if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"property"
|
2021-02-15 13:05:32 +01:00
|
|
|
&& attr.as_ref().unwrap().value.ends_with(b"group-position")
|
|
|
|
}) {
|
|
|
|
series_index_found = true;
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
}
|
2021-02-15 13:05:32 +01:00
|
|
|
if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"property"
|
2021-02-15 13:05:32 +01:00
|
|
|
&& attr
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
|
|
|
.value
|
|
|
|
.ends_with(b"belongs-to-collection")
|
|
|
|
}) {
|
|
|
|
series_found = true;
|
|
|
|
}
|
|
|
|
}
|
2024-11-12 10:57:03 +01:00
|
|
|
Ok(Event::Empty(ref e)) if e.local_name().as_ref() == b"meta" && !is_epub3 => {
|
2021-02-15 13:05:32 +01:00
|
|
|
if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"name"
|
2021-02-15 13:05:32 +01:00
|
|
|
&& attr
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
2024-11-05 14:01:48 +01:00
|
|
|
.unescape_value()
|
2021-02-15 13:05:32 +01:00
|
|
|
.unwrap()
|
2024-11-05 14:01:48 +01:00
|
|
|
.ends_with("series")
|
2021-02-15 13:05:32 +01:00
|
|
|
}) {
|
|
|
|
epub_meta.series.name = e
|
|
|
|
.attributes()
|
2024-11-12 10:57:03 +01:00
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref() == b"content")
|
2021-02-15 13:05:32 +01:00
|
|
|
.next()
|
|
|
|
.unwrap()
|
|
|
|
.unwrap()
|
2024-11-12 10:57:03 +01:00
|
|
|
.unescape_value()
|
2024-11-05 14:01:48 +01:00
|
|
|
.unwrap_or_default()
|
|
|
|
.to_string();
|
2021-02-15 13:05:32 +01:00
|
|
|
} else if e.attributes().any(|attr| {
|
2024-11-12 10:57:03 +01:00
|
|
|
attr.as_ref().unwrap().key.as_ref() == b"name"
|
2021-02-15 13:05:32 +01:00
|
|
|
&& attr
|
|
|
|
.as_ref()
|
|
|
|
.unwrap()
|
2024-11-05 14:01:48 +01:00
|
|
|
.unescape_value()
|
2021-02-15 13:05:32 +01:00
|
|
|
.unwrap()
|
2024-11-05 14:01:48 +01:00
|
|
|
.ends_with("series_index")
|
2021-02-15 13:05:32 +01:00
|
|
|
}) {
|
|
|
|
let index_float = e
|
|
|
|
.attributes()
|
2024-11-12 10:57:03 +01:00
|
|
|
.filter(|attr| attr.as_ref().unwrap().key.as_ref() == b"content")
|
2021-02-15 13:05:32 +01:00
|
|
|
.next()
|
|
|
|
.unwrap()
|
|
|
|
.unwrap()
|
2024-11-12 10:57:03 +01:00
|
|
|
.unescape_value()
|
2021-02-15 13:05:32 +01:00
|
|
|
.unwrap_or_default()
|
|
|
|
.parse::<f32>()
|
|
|
|
.unwrap_or_default();
|
|
|
|
epub_meta.series.index = index_float as i32;
|
|
|
|
}
|
2021-02-11 21:58:10 +01:00
|
|
|
}
|
|
|
|
Ok(Event::Text(ref e)) if file_as_found && is_epub3 => {
|
2021-02-12 07:50:32 +01:00
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.sort = String::from_utf8(e.to_vec()).unwrap();
|
|
|
|
|
|
|
|
file_as_found = false;
|
|
|
|
}
|
|
|
|
Ok(Event::Text(ref e)) if role_found && is_epub3 => {
|
2021-02-12 07:50:32 +01:00
|
|
|
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
|
2021-02-11 21:58:10 +01:00
|
|
|
entry.role = String::from_utf8(e.to_vec()).unwrap();
|
|
|
|
|
|
|
|
role_found = false;
|
|
|
|
}
|
2021-02-15 13:05:32 +01:00
|
|
|
Ok(Event::Text(ref e)) if series_found && is_epub3 => {
|
|
|
|
epub_meta.series.name = String::from_utf8(e.to_vec()).unwrap();
|
|
|
|
|
|
|
|
series_found = false;
|
|
|
|
}
|
|
|
|
Ok(Event::Text(ref e)) if series_index_found && is_epub3 => {
|
|
|
|
epub_meta.series.index = String::from_utf8(e.to_vec())
|
|
|
|
.unwrap()
|
|
|
|
.parse()
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
|
|
series_index_found = false;
|
|
|
|
}
|
2024-11-12 10:57:03 +01:00
|
|
|
Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"subject" => {
|
2021-02-11 21:58:10 +01:00
|
|
|
genre_found = true;
|
|
|
|
}
|
|
|
|
Ok(Event::Text(ref e)) if genre_found => {
|
2024-11-05 14:01:48 +01:00
|
|
|
epub_meta.genre = e.unescape().unwrap().to_string();
|
2021-02-11 21:58:10 +01:00
|
|
|
genre_found = false;
|
|
|
|
}
|
|
|
|
Ok(Event::Eof) => break,
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
epub_meta.authors = xml_authors
|
|
|
|
.into_iter()
|
|
|
|
.filter(|&(_, ref xml_author)| &xml_author.role == "aut" && &xml_author.name.len() > &0)
|
|
|
|
.map(|(_key, value)| Author {
|
|
|
|
name: value.name,
|
|
|
|
firstauthor: value.sort,
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
Some(epub_meta)
|
|
|
|
}
|