diff --git a/Cargo.lock b/Cargo.lock index fd7ac1f..269c889 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,7 +148,7 @@ dependencies = [ [[package]] name = "pbdbfixer" -version = "0.2.0" +version = "0.3.0" dependencies = [ "rusqlite", "xml-rs", diff --git a/Cargo.toml b/Cargo.toml index 7fef784..cfad558 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pbdbfixer" -version = "0.2.0" +version = "0.3.0" authors = ["Martin Brodbeck "] edition = "2018" diff --git a/src/main.rs b/src/main.rs index 8273e8d..e206a05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,22 @@ mod pocketbook; use rusqlite::{named_params, Connection, Result, Transaction, NO_PARAMS}; -use std::error::Error; -use std::fs::File; use std::io::BufReader; +use std::{collections::HashMap, fs::File}; +use std::{error::Error, io::Read}; use xml::reader::{EventReader, ParserConfig, XmlEvent}; use zip::{read::ZipFile, ZipArchive}; -fn get_root_file(container: ZipFile) -> Result, Box> { - let parser = EventReader::new(container); +fn get_root_file(mut container: ZipFile) -> Result, Box> { + let mut buf = String::new(); + container.read_to_string(&mut buf).unwrap(); + + // Get rid of the BOM mark, if any + if buf.starts_with("\u{feff}") { + buf = buf.strip_prefix("\u{feff}").unwrap().to_owned(); + } + + let parser = EventReader::new(BufReader::new(buf.as_bytes())); for e in parser { match e { @@ -30,6 +38,11 @@ fn get_root_file(container: ZipFile) -> Result, Box> { Ok(None) } +struct Refine { + role: String, + file_as: String, +} + fn get_attribute_file_as(opf: ZipFile) -> Option { let parser = ParserConfig::new() .trim_whitespace(true) @@ -37,10 +50,12 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { .coalesce_characters(true) .create_reader(opf); - let mut refines_found = false; - let mut refines_entries = Vec::new(); let mut is_epub3 = false; let mut creator_ids = Vec::new(); + let mut refines_found = false; + let mut role_found = false; + let mut refine_entries = HashMap::new(); + let mut curr_id = String::new(); for e in parser { match e { @@ -77,11 +92,43 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { .any(|attr| attr.name.local_name == "property" && attr.value == "file-as") { refines_found = true; + curr_id = attributes + .iter() + .find(|a| a.name.local_name == "refines") + .unwrap() + .value + .clone(); + } else if attributes.iter().any(|attr| { + attr.name.local_name == "refines" && creator_ids.contains(&attr.value) + }) && attributes + .iter() + .any(|attr| attr.name.local_name == "property" && attr.value == "role") + { + role_found = true; + curr_id = attributes + .iter() + .find(|a| a.name.local_name == "refines") + .unwrap() + .value + .clone(); } } Ok(XmlEvent::Characters(value)) => { - if refines_found == true { - refines_entries.push(value); + if role_found == true { + if value == "aut" { + let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine { + role: "".to_string(), + file_as: "".to_string(), + }); + entry.role = value; + } + role_found = false; + } else if refines_found == true { + let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine { + role: "".to_string(), + file_as: "".to_string(), + }); + entry.file_as = value; refines_found = false; } } @@ -97,10 +144,18 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { } } - if refines_entries.len() == 1 { - return Some(refines_entries.remove(0)); - } else if refines_entries.len() >= 2 { - return Some(refines_entries.join(" & ")); + if refine_entries.len() == 1 { + return Some(refine_entries.values().next().unwrap().file_as.clone()); + } else if refine_entries.len() >= 2 { + return Some( + refine_entries + .values() + .into_iter() + .filter(|v|v.role == "aut") + .map(|v| v.file_as.clone()) + .collect::>() + .join(" & "), + ); } None @@ -109,43 +164,54 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { struct BookEntry { id: i32, filepath: String, + author_sort: String, } -fn fix_firstauthor(tx: &Transaction) -> i32 { - let mut authors_fixed = 0; +fn get_epubs_from_database(tx: &Transaction) -> Vec { + let mut book_entries = Vec::new(); - // Get book ids from entries where we have something like "firstname lastname" in author - // but no "lastname, firstname" in fistauthor - // Get also book ids from the special case where we have multiple authors (separated by ", " in authors) - // but no ampersand ("&") in firstauthor - let mut stmt = tx.prepare(r" - SELECT files.book_id, folders.name, files.filename - FROM files INNER JOIN folders - ON files.folder_id = folders.id - WHERE files.book_id IN - ( - SELECT DISTINCT id FROM books_impl - WHERE (ext LIKE 'epub' AND author LIKE '% %' AND (firstauthor NOT LIKE '%\,%' ESCAPE '\' OR firstauthor LIKE '%&%')) - OR (ext LIKE 'epub' AND author LIKE '%\, %' ESCAPE '\' AND firstauthor NOT LIKE '%&%') - ) - AND files.storageid = 1 - ;").unwrap(); + let mut stmt = tx + .prepare( + r" + SELECT books.id, folders.name, files.filename, books.firstauthor + FROM books_impl books JOIN files + ON books.id = files.book_id + JOIN folders + ON folders.id = files.folder_id + WHERE files.storageid = 1 AND books.ext = 'epub' + ORDER BY books.id", + ) + .unwrap(); let mut rows = stmt.query(NO_PARAMS).unwrap(); - let mut bookentries = Vec::new(); while let Some(row) = rows.next().unwrap() { let book_id: i32 = row.get(0).unwrap(); let prefix: String = row.get(1).unwrap(); let filename: String = row.get(2).unwrap(); let filepath = format!("{}/{}", prefix, filename); - bookentries.push(BookEntry { + let author_sort: String = row.get(3).unwrap(); + + let entry = BookEntry { id: book_id, filepath, - }); + author_sort, + }; + + book_entries.push(entry); } - for entry in bookentries { + book_entries +} + +struct Statistics { + authors_fixed: i32, +} + +fn fix_db_entries(tx: &Transaction, book_entries: &Vec) -> Statistics { + let mut stat = Statistics { authors_fixed: 0 }; + + for entry in book_entries { let file = File::open(entry.filepath.as_str()); let file = match file { Err(_) => continue, @@ -158,29 +224,34 @@ fn fix_firstauthor(tx: &Transaction) -> i32 { if let Some(opf_file) = get_root_file(container).unwrap() { let opf = archive.by_name(opf_file.as_str()).unwrap(); + // firstauthor… if let Some(file_as) = get_attribute_file_as(opf) { - let mut stmt = tx - .prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id") - .unwrap(); - stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id]) - .unwrap(); - authors_fixed = authors_fixed + 1; + if file_as != entry.author_sort { + println!("::: '{}' vs. '{}'", entry.author_sort, file_as); + let mut stmt = tx + .prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id") + .unwrap(); + //stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id]) + // .unwrap(); + stat.authors_fixed = stat.authors_fixed + 1; + } } } } - authors_fixed + stat } fn main() { let mut conn = Connection::open("/mnt/ext1/system/explorer-3/explorer-3.db").unwrap(); let tx = conn.transaction().unwrap(); - let authors_fixed = fix_firstauthor(&tx); + let book_entries = get_epubs_from_database(&tx); + let stat = fix_db_entries(&tx, &book_entries); tx.commit().unwrap(); if cfg!(target_arch = "arm") { - if authors_fixed == 0 { + if stat.authors_fixed == 0 { pocketbook::dialog( pocketbook::Icon::Info, "The database seems to be ok.\nNothing had to be fixed.", @@ -188,8 +259,10 @@ fn main() { } else { pocketbook::dialog( pocketbook::Icon::Info, - &format!("Authors fixed: {}", &authors_fixed), + &format!("Authors fixed: {}", &stat.authors_fixed), ); } + } else { + println!("Authors fixed: {}", &stat.authors_fixed); } }