Compare commits

...

2 commits

Author SHA1 Message Date
a9620b3e34 Fixes and improvements
- Fix reading file-as from XML
- Now fixing first_author_letter
2021-02-12 09:13:31 +01:00
c6252464aa simplify creating XmlAut 2021-02-12 07:50:32 +01:00
4 changed files with 74 additions and 61 deletions

2
Cargo.lock generated
View file

@ -148,7 +148,7 @@ dependencies = [
[[package]] [[package]]
name = "pbdbfixer" name = "pbdbfixer"
version = "0.5.0" version = "0.6.0"
dependencies = [ dependencies = [
"quick-xml", "quick-xml",
"rusqlite", "rusqlite",

View file

@ -1,6 +1,6 @@
[package] [package]
name = "pbdbfixer" name = "pbdbfixer"
version = "0.5.0" version = "0.6.0"
authors = ["Martin Brodbeck <martin@brodbeck-online.de>"] authors = ["Martin Brodbeck <martin@brodbeck-online.de>"]
edition = "2018" edition = "2018"

View file

@ -96,6 +96,16 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
role: String, role: String,
} }
impl XmlAut {
fn new() -> Self {
XmlAut {
name: String::new(),
sort: String::new(),
role: String::new(),
}
}
}
let mut xml_authors = HashMap::new(); let mut xml_authors = HashMap::new();
loop { loop {
@ -121,14 +131,7 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
+ String::from_utf8(idval.unwrap().value.to_vec()) + String::from_utf8(idval.unwrap().value.to_vec())
.unwrap() .unwrap()
.as_str(); .as_str();
xml_authors.insert( xml_authors.insert(curr_id.clone(), XmlAut::new());
curr_id.clone(),
XmlAut {
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
},
);
} }
} else { } else {
if let Some(file_as_val) = e if let Some(file_as_val) = e
@ -136,45 +139,28 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"file-as")) .filter(|attr| attr.as_ref().unwrap().key.ends_with(b"file-as"))
.next() .next()
{ {
let ns = curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
String::from_utf8(file_as_val.as_ref().unwrap().key.to_vec()).unwrap(); let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
curr_id = "none".to_string() + ns.split(':').collect::<Vec<&str>>()[0];
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut {
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
});
entry.sort = file_as_val entry.sort = file_as_val
.unwrap() .unwrap()
.unescape_and_decode_value(&reader) .unescape_and_decode_value(&reader)
.unwrap_or_default(); .unwrap_or_default();
entry.role = "aut".to_string(); entry.role = "aut".to_string();
} } else if let Some(_role_val) = e
if let Some(role_val) = e
.attributes() .attributes()
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"role")) .filter(|attr| attr.as_ref().unwrap().key.ends_with(b"role"))
.next() .next()
{ {
let ns = curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
String::from_utf8(role_val.as_ref().unwrap().key.to_vec()).unwrap();
curr_id = "none".to_string() + ns.split(':').collect::<Vec<&str>>()[0];
} }
} }
} }
Ok(Event::Text(ref e)) if creator_found => { Ok(Event::Text(ref e)) if creator_found => {
if is_epub3 { if is_epub3 {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut { let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
});
entry.name = String::from_utf8(e.to_vec()).unwrap(); entry.name = String::from_utf8(e.to_vec()).unwrap();
} else { } else {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut { let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
});
entry.name = String::from_utf8(e.to_vec()).unwrap(); entry.name = String::from_utf8(e.to_vec()).unwrap();
entry.role = "aut".to_string(); entry.role = "aut".to_string();
} }
@ -203,21 +189,13 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
} }
} }
Ok(Event::Text(ref e)) if file_as_found && is_epub3 => { Ok(Event::Text(ref e)) if file_as_found && is_epub3 => {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut { let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
});
entry.sort = String::from_utf8(e.to_vec()).unwrap(); entry.sort = String::from_utf8(e.to_vec()).unwrap();
file_as_found = false; file_as_found = false;
} }
Ok(Event::Text(ref e)) if role_found && is_epub3 => { Ok(Event::Text(ref e)) if role_found && is_epub3 => {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut { let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
name: "".to_string(),
sort: "".to_string(),
role: "".to_string(),
});
entry.role = String::from_utf8(e.to_vec()).unwrap(); entry.role = String::from_utf8(e.to_vec()).unwrap();
role_found = false; role_found = false;
@ -234,6 +212,8 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
} }
} }
//println!("Meta: {:?}", &xml_authors);
epub_meta.authors = xml_authors epub_meta.authors = xml_authors
.into_iter() .into_iter()
.filter(|&(_, ref xml_author)| &xml_author.role == "aut" && &xml_author.name.len() > &0) .filter(|&(_, ref xml_author)| &xml_author.role == "aut" && &xml_author.name.len() > &0)
@ -243,5 +223,7 @@ pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
}) })
.collect(); .collect();
//println!("Meta: {:?}", &epub_meta);
Some(epub_meta) Some(epub_meta)
} }

View file

@ -11,6 +11,7 @@ struct BookEntry {
firstauthor: String, firstauthor: String,
has_drm: bool, has_drm: bool,
genre: String, genre: String,
first_author_letter: String,
} }
fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> { fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
@ -18,8 +19,9 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
let mut stmt = tx let mut stmt = tx
.prepare( .prepare(
r" r#"
SELECT books.id, folders.name, files.filename, books.firstauthor, books.author, genres.name SELECT books.id, folders.name, files.filename, books.firstauthor,
books.author, genres.name, first_author_letter
FROM books_impl books JOIN files FROM books_impl books JOIN files
ON books.id = files.book_id ON books.id = files.book_id
JOIN folders JOIN folders
@ -29,7 +31,7 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
LEFT OUTER JOIN genres LEFT OUTER JOIN genres
ON genres.id = btg.genreid ON genres.id = btg.genreid
WHERE files.storageid = 1 AND books.ext = 'epub' WHERE files.storageid = 1 AND books.ext = 'epub'
ORDER BY books.id", ORDER BY books.id"#,
) )
.unwrap(); .unwrap();
@ -47,6 +49,7 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
_ => false, _ => false,
}; };
let genre: String = row.get(5).unwrap_or_default(); let genre: String = row.get(5).unwrap_or_default();
let first_author_letter = row.get(6).unwrap_or_default();
let entry = BookEntry { let entry = BookEntry {
id: book_id, id: book_id,
@ -55,6 +58,7 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
author, author,
has_drm, has_drm,
genre, genre,
first_author_letter,
}; };
book_entries.push(entry); book_entries.push(entry);
@ -66,7 +70,7 @@ fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
fn remove_ghost_books_from_db(tx: &Transaction) -> usize { fn remove_ghost_books_from_db(tx: &Transaction) -> usize {
let mut stmt = tx let mut stmt = tx
.prepare( .prepare(
r" r#"
DELETE FROM books_impl DELETE FROM books_impl
WHERE id IN ( WHERE id IN (
SELECT books.id SELECT books.id
@ -74,34 +78,34 @@ fn remove_ghost_books_from_db(tx: &Transaction) -> usize {
LEFT OUTER JOIN files LEFT OUTER JOIN files
ON books.id = files.book_id ON books.id = files.book_id
WHERE files.filename is NULL WHERE files.filename is NULL
)", )"#,
) )
.unwrap(); .unwrap();
let num = stmt.execute(NO_PARAMS).unwrap(); let num = stmt.execute(NO_PARAMS).unwrap();
tx.execute( tx.execute(
r"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )", r#"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
NO_PARAMS, NO_PARAMS,
) )
.unwrap(); .unwrap();
tx.execute( tx.execute(
r"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )", r#"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )"#,
NO_PARAMS, NO_PARAMS,
) )
.unwrap(); .unwrap();
tx.execute( tx.execute(
r"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )", r#"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
NO_PARAMS, NO_PARAMS,
) )
.unwrap(); .unwrap();
tx.execute( tx.execute(
r"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )", r#"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
NO_PARAMS, NO_PARAMS,
) )
.unwrap(); .unwrap();
tx.execute( tx.execute(
r"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )", r#"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
NO_PARAMS, NO_PARAMS,
) )
.unwrap(); .unwrap();
@ -114,6 +118,7 @@ struct Statistics {
ghost_books_cleaned: usize, ghost_books_cleaned: usize,
drm_skipped: usize, drm_skipped: usize,
genres_fixed: usize, genres_fixed: usize,
sorting_fixed: usize,
} }
fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics { fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics {
@ -122,6 +127,7 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
ghost_books_cleaned: 0, ghost_books_cleaned: 0,
drm_skipped: 0, drm_skipped: 0,
genres_fixed: 0, genres_fixed: 0,
sorting_fixed: 0,
}; };
for entry in book_entries { for entry in book_entries {
@ -131,17 +137,14 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
} }
if let Some(epub_metadata) = epub::get_epub_metadata(&entry.filepath) { if let Some(epub_metadata) = epub::get_epub_metadata(&entry.filepath) {
let authors = epub_metadata // Fix firstauthor…
let mut firstauthors = epub_metadata
.authors .authors
.iter() .iter()
.filter(|aut| aut.firstauthor.len() > 0) .filter(|aut| aut.firstauthor.len() > 0)
.collect::<Vec<_>>();
// Fix firstauthor…
let firstauthors = authors
.iter()
.map(|aut| aut.firstauthor.clone()) .map(|aut| aut.firstauthor.clone())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
firstauthors.sort();
if !firstauthors.iter().all(|s| entry.firstauthor.contains(s)) { if !firstauthors.iter().all(|s| entry.firstauthor.contains(s)) {
let mut stmt = tx let mut stmt = tx
.prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id") .prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id")
@ -153,8 +156,28 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
stat.authors_fixed = stat.authors_fixed + 1; stat.authors_fixed = stat.authors_fixed + 1;
} }
// Fix first_author_letter
let first_author_letter = firstauthors
.join(" & ")
.chars()
.next()
.unwrap_or_default()
.to_string()
.to_uppercase();
if entry.first_author_letter != first_author_letter {
let mut stmt = tx
.prepare("UPDATE books_impl SET first_author_letter = :first_letter WHERE id = :book_id")
.unwrap();
stmt.execute_named(
named_params![":first_letter": first_author_letter,":book_id": entry.id],
)
.unwrap();
stat.sorting_fixed = stat.sorting_fixed + 1;
}
// Fix author names… // Fix author names…
let authornames = authors let authornames = epub_metadata
.authors
.iter() .iter()
.map(|aut| aut.name.clone()) .map(|aut| aut.name.clone())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -169,6 +192,7 @@ fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics
stat.authors_fixed = stat.authors_fixed + 1; stat.authors_fixed = stat.authors_fixed + 1;
} }
// Fix genre…
if entry.genre.is_empty() && epub_metadata.genre.len() > 0 { if entry.genre.is_empty() && epub_metadata.genre.len() > 0 {
let mut stmt = tx let mut stmt = tx
.prepare(r#"INSERT INTO genres (name) SELECT :genre ON CONFLICT DO NOTHING"#) .prepare(r#"INSERT INTO genres (name) SELECT :genre ON CONFLICT DO NOTHING"#)
@ -255,10 +279,12 @@ fn main() {
pocketbook::Icon::Info, pocketbook::Icon::Info,
&format!( &format!(
"Authors fixed: {}\n\ "Authors fixed: {}\n\
Sorting fixed: {}\n\
Genres fixed: {}\n\ Genres fixed: {}\n\
Books skipped (DRM): {}\n\ Books skipped (DRM): {}\n\
Books cleaned from DB: {}", Books cleaned from DB: {}",
&stat.authors_fixed, &stat.authors_fixed,
&stat.sorting_fixed,
&stat.genres_fixed, &stat.genres_fixed,
&stat.drm_skipped, &stat.drm_skipped,
&stat.ghost_books_cleaned &stat.ghost_books_cleaned
@ -269,10 +295,15 @@ fn main() {
} else { } else {
println!( println!(
"Authors fixed: {}\n\ "Authors fixed: {}\n\
Sorting fixed: {}\n\
Genres fixed: {}\n\ Genres fixed: {}\n\
Books skipped (DRM): {}\n\ Books skipped (DRM): {}\n\
Books cleaned from DB: {}", Books cleaned from DB: {}",
&stat.authors_fixed, &stat.genres_fixed, &stat.drm_skipped, &stat.ghost_books_cleaned &stat.authors_fixed,
&stat.sorting_fixed,
&stat.genres_fixed,
&stat.drm_skipped,
&stat.ghost_books_cleaned
); );
} }
} }