xml parsing completely rewritten
This commit is contained in:
parent
1cf8b008d0
commit
90c2d34c03
4 changed files with 326 additions and 400 deletions
19
Cargo.lock
generated
19
Cargo.lock
generated
|
@ -148,10 +148,10 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "pbdbfixer"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
dependencies = [
|
||||
"quick-xml",
|
||||
"rusqlite",
|
||||
"xml-rs",
|
||||
"zip",
|
||||
]
|
||||
|
||||
|
@ -170,6 +170,15 @@ dependencies = [
|
|||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0452695941410a58c8ce4391707ba9bad26a247173bd9886a05a5e8a8babec75"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.8"
|
||||
|
@ -282,12 +291,6 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "xml-rs"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.5.9"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "pbdbfixer"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
authors = ["Martin Brodbeck <martin@brodbeck-online.de>"]
|
||||
edition = "2018"
|
||||
|
||||
|
@ -8,7 +8,7 @@ edition = "2018"
|
|||
|
||||
[dependencies]
|
||||
zip = "0.5"
|
||||
xml-rs = "0.8"
|
||||
quick-xml = "0.21"
|
||||
|
||||
[dependencies.rusqlite]
|
||||
version = "0.24"
|
||||
|
|
250
src/epub.rs
Normal file
250
src/epub.rs
Normal file
|
@ -0,0 +1,250 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
fs::{self, File},
|
||||
io::Read,
|
||||
};
|
||||
|
||||
use quick_xml::{events::Event, Reader};
|
||||
use zip::ZipArchive;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Author {
|
||||
pub name: String,
|
||||
pub firstauthor: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct EpubMetadata {
|
||||
pub authors: Vec<Author>,
|
||||
pub genre: String,
|
||||
}
|
||||
|
||||
impl EpubMetadata {
|
||||
fn new() -> Self {
|
||||
EpubMetadata {
|
||||
authors: Vec::new(),
|
||||
genre: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_rootfile(archive: &mut ZipArchive<File>) -> String {
|
||||
let mut container = archive.by_name("META-INF/container.xml").unwrap();
|
||||
let mut xml_str_buffer = String::new();
|
||||
|
||||
container.read_to_string(&mut xml_str_buffer).unwrap();
|
||||
|
||||
let mut reader = Reader::from_str(&xml_str_buffer);
|
||||
reader.trim_text(true);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let mut opf_filename = String::new();
|
||||
|
||||
loop {
|
||||
match reader.read_event(&mut buf) {
|
||||
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) if e.local_name() == b"rootfile" => {
|
||||
opf_filename = String::from_utf8(
|
||||
e.attributes()
|
||||
.filter(|attr| attr.as_ref().unwrap().key == b"full-path")
|
||||
.next()
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.value
|
||||
.to_vec(),
|
||||
)
|
||||
.unwrap();
|
||||
break;
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
opf_filename
|
||||
}
|
||||
|
||||
pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
|
||||
let mut epub_meta = EpubMetadata::new();
|
||||
let file = fs::File::open(&filename);
|
||||
|
||||
let file = match file {
|
||||
Err(_) => return None,
|
||||
Ok(file) => file,
|
||||
};
|
||||
|
||||
let mut archive = ZipArchive::new(file).unwrap();
|
||||
|
||||
let opf_filename = get_rootfile(&mut archive);
|
||||
|
||||
let mut xml_str_buffer = String::new();
|
||||
let mut opf = archive.by_name(&opf_filename).unwrap();
|
||||
opf.read_to_string(&mut xml_str_buffer).unwrap();
|
||||
|
||||
let mut reader = Reader::from_str(&xml_str_buffer);
|
||||
let mut buf = Vec::new();
|
||||
|
||||
let mut curr_id = String::new();
|
||||
let mut creator_found = false;
|
||||
let mut file_as_found = false;
|
||||
let mut role_found = false;
|
||||
let mut genre_found = false;
|
||||
let mut is_epub3 = false;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct XmlAut {
|
||||
name: String,
|
||||
sort: String,
|
||||
role: String,
|
||||
}
|
||||
|
||||
let mut xml_authors = HashMap::new();
|
||||
|
||||
loop {
|
||||
match reader.read_event(&mut buf) {
|
||||
// See if we have EPUB3 or EPUB2
|
||||
Ok(Event::Start(ref e)) if e.local_name() == b"package" => {
|
||||
if e.attributes().any(|attr| {
|
||||
attr.as_ref().unwrap().key == b"version"
|
||||
&& attr.as_ref().unwrap().value.starts_with(b"3")
|
||||
}) {
|
||||
is_epub3 = true;
|
||||
}
|
||||
}
|
||||
Ok(Event::Start(ref e)) if e.local_name() == b"creator" => {
|
||||
creator_found = true;
|
||||
if is_epub3 {
|
||||
if let Some(idval) = e
|
||||
.attributes()
|
||||
.filter(|attr| attr.as_ref().unwrap().key == b"id")
|
||||
.next()
|
||||
{
|
||||
curr_id = "#".to_string()
|
||||
+ String::from_utf8(idval.unwrap().value.to_vec())
|
||||
.unwrap()
|
||||
.as_str();
|
||||
xml_authors.insert(
|
||||
curr_id.clone(),
|
||||
XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
} else {
|
||||
if let Some(file_as_val) = e
|
||||
.attributes()
|
||||
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"file-as"))
|
||||
.next()
|
||||
{
|
||||
let ns =
|
||||
String::from_utf8(file_as_val.as_ref().unwrap().key.to_vec()).unwrap();
|
||||
curr_id = "none".to_string() + ns.split(':').collect::<Vec<&str>>()[0];
|
||||
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
});
|
||||
entry.sort = file_as_val
|
||||
.unwrap()
|
||||
.unescape_and_decode_value(&reader)
|
||||
.unwrap_or_default();
|
||||
entry.role = "aut".to_string();
|
||||
}
|
||||
if let Some(role_val) = e
|
||||
.attributes()
|
||||
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"role"))
|
||||
.next()
|
||||
{
|
||||
let ns =
|
||||
String::from_utf8(role_val.as_ref().unwrap().key.to_vec()).unwrap();
|
||||
curr_id = "none".to_string() + ns.split(':').collect::<Vec<&str>>()[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(ref e)) if creator_found => {
|
||||
if is_epub3 {
|
||||
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
});
|
||||
entry.name = String::from_utf8(e.to_vec()).unwrap();
|
||||
} else {
|
||||
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
});
|
||||
entry.name = String::from_utf8(e.to_vec()).unwrap();
|
||||
entry.role = "aut".to_string();
|
||||
}
|
||||
|
||||
creator_found = false;
|
||||
}
|
||||
Ok(Event::Start(ref e)) if e.local_name() == b"meta" && is_epub3 => {
|
||||
if let Some(refines) = e
|
||||
.attributes()
|
||||
.filter(|attr| attr.as_ref().unwrap().key == b"refines")
|
||||
.next()
|
||||
{
|
||||
if e.attributes().any(|attr| {
|
||||
attr.as_ref().unwrap().key == b"property"
|
||||
&& attr.as_ref().unwrap().value.ends_with(b"file-as")
|
||||
}) {
|
||||
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
|
||||
file_as_found = true;
|
||||
} else if e.attributes().any(|attr| {
|
||||
attr.as_ref().unwrap().key == b"property"
|
||||
&& attr.as_ref().unwrap().value.ends_with(b"role")
|
||||
}) {
|
||||
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
|
||||
role_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(ref e)) if file_as_found && is_epub3 => {
|
||||
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
});
|
||||
entry.sort = String::from_utf8(e.to_vec()).unwrap();
|
||||
|
||||
file_as_found = false;
|
||||
}
|
||||
Ok(Event::Text(ref e)) if role_found && is_epub3 => {
|
||||
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
|
||||
name: "".to_string(),
|
||||
sort: "".to_string(),
|
||||
role: "".to_string(),
|
||||
});
|
||||
entry.role = String::from_utf8(e.to_vec()).unwrap();
|
||||
|
||||
role_found = false;
|
||||
}
|
||||
Ok(Event::Start(ref e)) if e.local_name() == b"subject" => {
|
||||
genre_found = true;
|
||||
}
|
||||
Ok(Event::Text(ref e)) if genre_found => {
|
||||
//epub_meta.genre = String::from_utf8(e.to_vec()).unwrap();
|
||||
epub_meta.genre = e.unescape_and_decode(&reader).unwrap();
|
||||
genre_found = false;
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
//println!("{:?}", &xml_authors);
|
||||
|
||||
epub_meta.authors = xml_authors
|
||||
.into_iter()
|
||||
.filter(|&(_, ref xml_author)| &xml_author.role == "aut" && &xml_author.name.len() > &0)
|
||||
.map(|(_key, value)| Author {
|
||||
name: value.name,
|
||||
firstauthor: value.sort,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Some(epub_meta)
|
||||
}
|
425
src/main.rs
425
src/main.rs
|
@ -1,338 +1,8 @@
|
|||
mod epub;
|
||||
mod pocketbook;
|
||||
|
||||
use rusqlite::{named_params, Connection, Result, Transaction, NO_PARAMS};
|
||||
use std::{collections::HashMap, fs::File};
|
||||
use std::{error::Error, io::Read};
|
||||
use std::{io::BufReader, usize};
|
||||
use xml::reader::{EventReader, ParserConfig, XmlEvent};
|
||||
use zip::{read::ZipFile, ZipArchive};
|
||||
|
||||
fn get_root_file(mut container: ZipFile) -> Result<Option<String>, Box<dyn Error>> {
|
||||
let mut buf = String::new();
|
||||
container.read_to_string(&mut buf).unwrap();
|
||||
|
||||
// Get rid of the BOM mark, if any
|
||||
if buf.starts_with("\u{feff}") {
|
||||
buf = buf.strip_prefix("\u{feff}").unwrap().to_owned();
|
||||
}
|
||||
|
||||
let parser = EventReader::new(BufReader::new(buf.as_bytes()));
|
||||
|
||||
for e in parser {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "rootfile" => {
|
||||
for attr in attributes {
|
||||
if attr.name.local_name == "full-path" {
|
||||
return Ok(Some(attr.value));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(Box::new(e));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
struct Refine {
|
||||
role: String,
|
||||
file_as: String,
|
||||
}
|
||||
|
||||
fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
|
||||
let parser = ParserConfig::new()
|
||||
.trim_whitespace(true)
|
||||
.ignore_comments(true)
|
||||
.coalesce_characters(true)
|
||||
.create_reader(opf);
|
||||
|
||||
let mut is_epub3 = false;
|
||||
let mut creator_ids = Vec::new();
|
||||
let mut refines_found = false;
|
||||
let mut role_found = false;
|
||||
let mut refine_entries = HashMap::new();
|
||||
let mut curr_id = String::new();
|
||||
|
||||
for e in parser {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "package" => {
|
||||
for attr in attributes {
|
||||
if attr.name.local_name == "version" {
|
||||
if attr.value.starts_with("3") == true {
|
||||
is_epub3 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "creator" => {
|
||||
for attr in attributes {
|
||||
if attr.name.local_name == "file-as" {
|
||||
return Some(attr.value);
|
||||
}
|
||||
if is_epub3 && attr.name.local_name == "id" {
|
||||
creator_ids.push("#".to_owned() + attr.value.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "meta" => {
|
||||
if attributes.iter().any(|attr| {
|
||||
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
|
||||
}) && attributes
|
||||
.iter()
|
||||
.any(|attr| attr.name.local_name == "property" && attr.value == "file-as")
|
||||
{
|
||||
refines_found = true;
|
||||
curr_id = attributes
|
||||
.iter()
|
||||
.find(|a| a.name.local_name == "refines")
|
||||
.unwrap()
|
||||
.value
|
||||
.clone();
|
||||
} else if attributes.iter().any(|attr| {
|
||||
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
|
||||
}) && attributes
|
||||
.iter()
|
||||
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
|
||||
{
|
||||
role_found = true;
|
||||
curr_id = attributes
|
||||
.iter()
|
||||
.find(|a| a.name.local_name == "refines")
|
||||
.unwrap()
|
||||
.value
|
||||
.clone();
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::Characters(value)) => {
|
||||
if role_found == true {
|
||||
if value == "aut" {
|
||||
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
|
||||
role: "".to_string(),
|
||||
file_as: "".to_string(),
|
||||
});
|
||||
entry.role = value;
|
||||
}
|
||||
role_found = false;
|
||||
} else if refines_found == true {
|
||||
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
|
||||
role: "".to_string(),
|
||||
file_as: "".to_string(),
|
||||
});
|
||||
entry.file_as = value;
|
||||
refines_found = false;
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement { .. }) => {
|
||||
if refines_found == true {
|
||||
refines_found = false;
|
||||
}
|
||||
}
|
||||
Err(_e) => {
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if refine_entries.len() == 1 {
|
||||
return Some(refine_entries.values().next().unwrap().file_as.clone());
|
||||
} else if refine_entries.len() >= 2 {
|
||||
return Some(
|
||||
refine_entries
|
||||
.values()
|
||||
.into_iter()
|
||||
.filter(|v| v.role == "aut")
|
||||
.map(|v| v.file_as.clone())
|
||||
.collect::<Vec<String>>()
|
||||
.join(" & "),
|
||||
);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
struct Creator {
|
||||
role: String,
|
||||
name: String,
|
||||
}
|
||||
|
||||
fn get_attribute_creator(opf: ZipFile) -> Option<String> {
|
||||
let parser = ParserConfig::new()
|
||||
.trim_whitespace(true)
|
||||
.ignore_comments(true)
|
||||
.coalesce_characters(true)
|
||||
.create_reader(opf);
|
||||
|
||||
let mut is_epub3 = false;
|
||||
let mut creator_found = true;
|
||||
let mut creator_ids = Vec::new();
|
||||
let mut role_found = false;
|
||||
let mut creator_entries = HashMap::new();
|
||||
let mut epub2_creator_entries = Vec::new();
|
||||
let mut curr_id = String::new();
|
||||
|
||||
for e in parser {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "package" => {
|
||||
for attr in attributes {
|
||||
if attr.name.local_name == "version" {
|
||||
if attr.value.starts_with("3") == true {
|
||||
is_epub3 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "creator" => {
|
||||
creator_found = true;
|
||||
if !is_epub3 {
|
||||
match attributes
|
||||
.iter()
|
||||
.find(|attr| attr.name.local_name == "role")
|
||||
{
|
||||
Some(attr) => {
|
||||
epub2_creator_entries.push(Creator {
|
||||
role: attr.value.clone(),
|
||||
name: "".to_string(),
|
||||
});
|
||||
}
|
||||
None => {
|
||||
epub2_creator_entries.push(Creator {
|
||||
role: "aut".to_string(),
|
||||
name: "".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
for attr in attributes {
|
||||
if is_epub3 && attr.name.local_name == "id" {
|
||||
creator_ids.push("#".to_owned() + attr.value.as_str());
|
||||
//creator_entries.insert(attr.value.clone(), Creator{role: "".to_string(), name: "".to_string()});
|
||||
curr_id = "#".to_owned() + attr.value.as_str();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement {
|
||||
name, attributes, ..
|
||||
}) if name.local_name == "meta" => {
|
||||
if attributes.iter().any(|attr| {
|
||||
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
|
||||
}) && attributes
|
||||
.iter()
|
||||
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
|
||||
{
|
||||
role_found = true;
|
||||
curr_id = attributes
|
||||
.iter()
|
||||
.find(|a| a.name.local_name == "refines")
|
||||
.unwrap()
|
||||
.value
|
||||
.clone();
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::Characters(value)) => {
|
||||
if creator_found && is_epub3 == false {
|
||||
epub2_creator_entries.last_mut().unwrap().name = value.clone();
|
||||
} else if creator_found && is_epub3 == true {
|
||||
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
|
||||
role: "".to_string(),
|
||||
name: "".to_string(),
|
||||
});
|
||||
entry.name = value;
|
||||
creator_found = false;
|
||||
} else if role_found == true {
|
||||
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
|
||||
role: "".to_string(),
|
||||
name: "".to_string(),
|
||||
});
|
||||
entry.role = value;
|
||||
role_found = false;
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement { .. }) => {
|
||||
if creator_found == true {
|
||||
creator_found = false;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{}", e);
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if !is_epub3 && epub2_creator_entries.len() >= 1 {
|
||||
return Some(
|
||||
epub2_creator_entries
|
||||
.into_iter()
|
||||
.filter(|v| v.role == "aut")
|
||||
.map(|v| v.name.clone())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", "),
|
||||
);
|
||||
} else if creator_entries.len() >= 1 {
|
||||
return Some(
|
||||
creator_entries
|
||||
.values()
|
||||
.into_iter()
|
||||
.filter(|v| v.role == "aut")
|
||||
.map(|v| v.name.clone())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", "),
|
||||
);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn get_attribute_genre(opf: ZipFile) -> Option<String> {
|
||||
let parser = ParserConfig::new()
|
||||
.trim_whitespace(true)
|
||||
.ignore_comments(true)
|
||||
.coalesce_characters(true)
|
||||
.create_reader(opf);
|
||||
|
||||
let mut genre_found = false;
|
||||
|
||||
for e in parser {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement { name, .. }) if name.local_name == "subject" => {
|
||||
genre_found = true;
|
||||
}
|
||||
Ok(XmlEvent::Characters(value)) => {
|
||||
if genre_found {
|
||||
return Some(value);
|
||||
}
|
||||
}
|
||||
Ok(XmlEvent::StartElement { .. }) => {
|
||||
if genre_found == true {
|
||||
genre_found = false;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{}", e);
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
use rusqlite::{named_params, Connection, Transaction, NO_PARAMS};
|
||||
use std::usize;
|
||||
|
||||
struct BookEntry {
|
||||
id: i32,
|
||||
|
@ -460,53 +130,55 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
|
|||
continue;
|
||||
}
|
||||
|
||||
let file = File::open(entry.filepath.as_str());
|
||||
let file = match file {
|
||||
Err(_) => continue,
|
||||
Ok(file) => file,
|
||||
};
|
||||
if let Some(epub_metadata) = epub::get_epub_metadata(&entry.filepath) {
|
||||
let authors = epub_metadata
|
||||
.authors
|
||||
.iter()
|
||||
.filter(|aut| aut.firstauthor.len() > 0)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut archive = ZipArchive::new(BufReader::new(file)).unwrap();
|
||||
|
||||
let container = archive.by_name("META-INF/container.xml").unwrap();
|
||||
|
||||
if let Some(opf_file) = get_root_file(container).unwrap() {
|
||||
let opf = archive.by_name(opf_file.as_str()).unwrap();
|
||||
// firstauthor…
|
||||
if let Some(file_as) = get_attribute_file_as(opf) {
|
||||
if !file_as.split(" & ").all(|s| entry.firstauthor.contains(s)) {
|
||||
// Fix firstauthor…
|
||||
let firstauthors = authors
|
||||
.iter()
|
||||
.map(|aut| aut.firstauthor.clone())
|
||||
.collect::<Vec<_>>();
|
||||
if !firstauthors.iter().all(|s| entry.firstauthor.contains(s)) {
|
||||
let mut stmt = tx
|
||||
.prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id")
|
||||
.unwrap();
|
||||
stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id])
|
||||
.unwrap();
|
||||
stat.authors_fixed = stat.authors_fixed + 1;
|
||||
}
|
||||
}
|
||||
let opf = archive.by_name(opf_file.as_str()).unwrap();
|
||||
// author…
|
||||
if let Some(creator) = get_attribute_creator(opf) {
|
||||
if !creator.split(", ").all(|s| entry.author.contains(s))
|
||||
|| creator.len() < entry.author.len()
|
||||
{
|
||||
let mut stmt = tx
|
||||
.prepare("UPDATE books_impl SET author = :creator WHERE id = :book_id")
|
||||
.unwrap();
|
||||
stmt.execute_named(named_params![":creator": creator, ":book_id": entry.id])
|
||||
.unwrap();
|
||||
stat.authors_fixed = stat.authors_fixed + 1;
|
||||
}
|
||||
}
|
||||
// genre…
|
||||
if entry.genre.is_empty() {
|
||||
let opf = archive.by_name(opf_file.as_str()).unwrap();
|
||||
if let Some(genre) = get_attribute_genre(opf) {
|
||||
let mut stmt = tx
|
||||
.prepare(
|
||||
r#"INSERT INTO genres (name) SELECT :genre ON CONFLICT DO NOTHING"#,
|
||||
stmt.execute_named(
|
||||
named_params![":file_as": firstauthors.join(" & "), ":book_id": entry.id],
|
||||
)
|
||||
.unwrap();
|
||||
stmt.execute_named(named_params![":genre": &genre]).unwrap();
|
||||
stat.authors_fixed = stat.authors_fixed + 1;
|
||||
|
||||
println!("{}", firstauthors.join(" & "));
|
||||
}
|
||||
|
||||
// Fix author names…
|
||||
let authornames = authors
|
||||
.iter()
|
||||
.map(|aut| aut.name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
if !authornames.iter().all(|s| entry.author.contains(s)) {
|
||||
let mut stmt = tx
|
||||
.prepare("UPDATE books_impl SET author = :authors WHERE id = :book_id")
|
||||
.unwrap();
|
||||
stmt.execute_named(
|
||||
named_params![":authors": authornames.join(", "), ":book_id": entry.id],
|
||||
)
|
||||
.unwrap();
|
||||
stat.authors_fixed = stat.authors_fixed + 1;
|
||||
|
||||
println!("{}", authornames.join(" & "));
|
||||
}
|
||||
|
||||
if entry.genre.is_empty() && epub_metadata.genre.len() > 0 {
|
||||
let mut stmt = tx
|
||||
.prepare(r#"INSERT INTO genres (name) SELECT :genre ON CONFLICT DO NOTHING"#)
|
||||
.unwrap();
|
||||
stmt.execute_named(named_params![":genre": &epub_metadata.genre])
|
||||
.unwrap();
|
||||
let mut stmt = tx
|
||||
.prepare(
|
||||
r#"
|
||||
|
@ -517,13 +189,14 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
|
|||
ON CONFLICT DO NOTHING"#,
|
||||
)
|
||||
.unwrap();
|
||||
stmt.execute_named(named_params![":bookid": &entry.id, ":genre": &genre])
|
||||
stmt.execute_named(
|
||||
named_params![":bookid": &entry.id, ":genre": &epub_metadata.genre],
|
||||
)
|
||||
.unwrap();
|
||||
stat.genres_fixed = stat.genres_fixed + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ghost books
|
||||
let num = remove_ghost_books_from_db(tx);
|
||||
|
|
Loading…
Reference in a new issue