From fca2e0a993ec7d978f5d7ad0b6035110ba08475f Mon Sep 17 00:00:00 2001 From: Martin Brodbeck Date: Mon, 1 Feb 2021 19:39:36 +0100 Subject: [PATCH] check authors and remove ghost books --- src/main.rs | 240 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 228 insertions(+), 12 deletions(-) diff --git a/src/main.rs b/src/main.rs index e206a05..12d58b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -151,7 +151,7 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { refine_entries .values() .into_iter() - .filter(|v|v.role == "aut") + .filter(|v| v.role == "aut") .map(|v| v.file_as.clone()) .collect::>() .join(" & "), @@ -161,10 +161,149 @@ fn get_attribute_file_as(opf: ZipFile) -> Option { None } +struct Creator { + role: String, + name: String, +} + +fn get_attribute_creator(opf: ZipFile) -> Option { + let parser = ParserConfig::new() + .trim_whitespace(true) + .ignore_comments(true) + .coalesce_characters(true) + .create_reader(opf); + + let mut is_epub3 = false; + let mut creator_found = true; + let mut creator_ids = Vec::new(); + let mut role_found = false; + let mut creator_entries = HashMap::new(); + let mut epub2_creator_entries = Vec::new(); + let mut curr_id = String::new(); + + for e in parser { + match e { + Ok(XmlEvent::StartElement { + name, attributes, .. + }) if name.local_name == "package" => { + for attr in attributes { + if attr.name.local_name == "version" { + if attr.value.starts_with("3") == true { + is_epub3 = true; + } + } + } + } + Ok(XmlEvent::StartElement { + name, attributes, .. + }) if name.local_name == "creator" => { + creator_found = true; + if !is_epub3 { + match attributes + .iter() + .find(|attr| attr.name.local_name == "role") + { + Some(attr) => { + epub2_creator_entries.push(Creator { + role: attr.value.clone(), + name: "".to_string(), + }); + } + None => { + epub2_creator_entries.push(Creator { + role: "aut".to_string(), + name: "".to_string(), + }); + } + } + } + for attr in attributes { + if is_epub3 && attr.name.local_name == "id" { + creator_ids.push("#".to_owned() + attr.value.as_str()); + //creator_entries.insert(attr.value.clone(), Creator{role: "".to_string(), name: "".to_string()}); + curr_id = "#".to_owned() + attr.value.as_str(); + } + } + } + Ok(XmlEvent::StartElement { + name, attributes, .. + }) if name.local_name == "meta" => { + if attributes.iter().any(|attr| { + attr.name.local_name == "refines" && creator_ids.contains(&attr.value) + }) && attributes + .iter() + .any(|attr| attr.name.local_name == "property" && attr.value == "role") + { + role_found = true; + curr_id = attributes + .iter() + .find(|a| a.name.local_name == "refines") + .unwrap() + .value + .clone(); + } + } + Ok(XmlEvent::Characters(value)) => { + if creator_found && is_epub3 == false { + epub2_creator_entries.last_mut().unwrap().name = value.clone(); + } else if creator_found && is_epub3 == true { + let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator { + role: "".to_string(), + name: "".to_string(), + }); + entry.name = value; + creator_found = false; + } else if role_found == true { + let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator { + role: "".to_string(), + name: "".to_string(), + }); + entry.role = value; + role_found = false; + } + } + Ok(XmlEvent::StartElement { .. }) => { + if creator_found == true { + creator_found = false; + } + } + Err(e) => { + println!("{}", e); + break; + } + _ => {} + } + } + + if !is_epub3 && epub2_creator_entries.len() >= 1 { + return Some( + epub2_creator_entries + .into_iter() + .filter(|v| v.role == "aut") + .map(|v| v.name.clone()) + .collect::>() + .join(", "), + ); + } else if creator_entries.len() >= 1 { + return Some( + creator_entries + .values() + .into_iter() + .filter(|v| v.role == "aut") + .map(|v| v.name.clone()) + .collect::>() + .join(", "), + ); + } + + None +} + struct BookEntry { id: i32, filepath: String, - author_sort: String, + author: String, + firstauthor: String, } fn get_epubs_from_database(tx: &Transaction) -> Vec { @@ -173,7 +312,7 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec { let mut stmt = tx .prepare( r" - SELECT books.id, folders.name, files.filename, books.firstauthor + SELECT books.id, folders.name, files.filename, books.firstauthor, books.author FROM books_impl books JOIN files ON books.id = files.book_id JOIN folders @@ -190,12 +329,14 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec { let prefix: String = row.get(1).unwrap(); let filename: String = row.get(2).unwrap(); let filepath = format!("{}/{}", prefix, filename); - let author_sort: String = row.get(3).unwrap(); + let firstauthor: String = row.get(3).unwrap(); + let author: String = row.get(4).unwrap(); let entry = BookEntry { id: book_id, filepath, - author_sort, + firstauthor, + author, }; book_entries.push(entry); @@ -204,12 +345,62 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec { book_entries } +fn remove_ghost_books_from_db(tx: &Transaction) -> usize { + let mut stmt = tx + .prepare( + r" + DELETE FROM books_impl + WHERE id IN ( + SELECT books.id + FROM books_impl books + LEFT OUTER JOIN files + ON books.id = files.book_id + WHERE files.filename is NULL + )", + ) + .unwrap(); + + let num = stmt.execute(NO_PARAMS).unwrap(); + + tx.execute( + r"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )", + NO_PARAMS, + ) + .unwrap(); + tx.execute( + r"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )", + NO_PARAMS, + ) + .unwrap(); + tx.execute( + r"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )", + NO_PARAMS, + ) + .unwrap(); + tx.execute( + r"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )", + NO_PARAMS, + ) + .unwrap(); + tx.execute( + r"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )", + NO_PARAMS, + ) + .unwrap(); + + num +} + struct Statistics { authors_fixed: i32, + ghost_books_cleaned: usize, } fn fix_db_entries(tx: &Transaction, book_entries: &Vec) -> Statistics { - let mut stat = Statistics { authors_fixed: 0 }; + let mut stat = Statistics { + authors_fixed: 0, + ghost_books_cleaned: 0, + }; for entry in book_entries { let file = File::open(entry.filepath.as_str()); @@ -226,25 +417,44 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec) -> Statistics let opf = archive.by_name(opf_file.as_str()).unwrap(); // firstauthor… if let Some(file_as) = get_attribute_file_as(opf) { - if file_as != entry.author_sort { - println!("::: '{}' vs. '{}'", entry.author_sort, file_as); + if !file_as.split(" & ").all(|s| entry.firstauthor.contains(s)) { let mut stmt = tx .prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id") .unwrap(); - //stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id]) - // .unwrap(); + stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id]) + .unwrap(); + stat.authors_fixed = stat.authors_fixed + 1; + } + } + let opf = archive.by_name(opf_file.as_str()).unwrap(); + // author… + if let Some(creator) = get_attribute_creator(opf) { + if !creator.split(", ").all(|s| entry.author.contains(s)) + || creator.len() < entry.author.len() + { + let mut stmt = tx + .prepare("UPDATE books_impl SET author = :creator WHERE id = :book_id") + .unwrap(); + stmt.execute_named(named_params![":creator": creator, ":book_id": entry.id]) + .unwrap(); stat.authors_fixed = stat.authors_fixed + 1; } } } } + // ghost books + let num = remove_ghost_books_from_db(tx); + stat.ghost_books_cleaned = num; + stat } fn main() { let mut conn = Connection::open("/mnt/ext1/system/explorer-3/explorer-3.db").unwrap(); + conn.execute("PRAGMA foreign_keys = 0", NO_PARAMS).unwrap(); + let tx = conn.transaction().unwrap(); let book_entries = get_epubs_from_database(&tx); let stat = fix_db_entries(&tx, &book_entries); @@ -259,10 +469,16 @@ fn main() { } else { pocketbook::dialog( pocketbook::Icon::Info, - &format!("Authors fixed: {}", &stat.authors_fixed), + &format!( + "Authors fixed: {}\nBooks cleaned from DB: {}", + &stat.authors_fixed, &stat.ghost_books_cleaned + ), ); } } else { - println!("Authors fixed: {}", &stat.authors_fixed); + println!( + "Authors fixed: {}\nBooks cleaned from DB: {}", + &stat.authors_fixed, &stat.ghost_books_cleaned + ); } }