Compare commits

...

3 commits

4 changed files with 336 additions and 46 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
/target /target
.vscode

2
Cargo.lock generated
View file

@ -148,7 +148,7 @@ dependencies = [
[[package]] [[package]]
name = "pbdbfixer" name = "pbdbfixer"
version = "0.2.0" version = "0.3.0"
dependencies = [ dependencies = [
"rusqlite", "rusqlite",
"xml-rs", "xml-rs",

View file

@ -1,6 +1,6 @@
[package] [package]
name = "pbdbfixer" name = "pbdbfixer"
version = "0.2.0" version = "0.3.0"
authors = ["Martin Brodbeck <martin@brodbeck-online.de>"] authors = ["Martin Brodbeck <martin@brodbeck-online.de>"]
edition = "2018" edition = "2018"

View file

@ -1,14 +1,22 @@
mod pocketbook; mod pocketbook;
use rusqlite::{named_params, Connection, Result, Transaction, NO_PARAMS}; use rusqlite::{named_params, Connection, Result, Transaction, NO_PARAMS};
use std::error::Error;
use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use std::{collections::HashMap, fs::File};
use std::{error::Error, io::Read};
use xml::reader::{EventReader, ParserConfig, XmlEvent}; use xml::reader::{EventReader, ParserConfig, XmlEvent};
use zip::{read::ZipFile, ZipArchive}; use zip::{read::ZipFile, ZipArchive};
fn get_root_file(container: ZipFile) -> Result<Option<String>, Box<dyn Error>> { fn get_root_file(mut container: ZipFile) -> Result<Option<String>, Box<dyn Error>> {
let parser = EventReader::new(container); let mut buf = String::new();
container.read_to_string(&mut buf).unwrap();
// Get rid of the BOM mark, if any
if buf.starts_with("\u{feff}") {
buf = buf.strip_prefix("\u{feff}").unwrap().to_owned();
}
let parser = EventReader::new(BufReader::new(buf.as_bytes()));
for e in parser { for e in parser {
match e { match e {
@ -30,6 +38,11 @@ fn get_root_file(container: ZipFile) -> Result<Option<String>, Box<dyn Error>> {
Ok(None) Ok(None)
} }
struct Refine {
role: String,
file_as: String,
}
fn get_attribute_file_as(opf: ZipFile) -> Option<String> { fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
let parser = ParserConfig::new() let parser = ParserConfig::new()
.trim_whitespace(true) .trim_whitespace(true)
@ -37,10 +50,12 @@ fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
.coalesce_characters(true) .coalesce_characters(true)
.create_reader(opf); .create_reader(opf);
let mut refines_found = false;
let mut refines_entries = Vec::new();
let mut is_epub3 = false; let mut is_epub3 = false;
let mut creator_ids = Vec::new(); let mut creator_ids = Vec::new();
let mut refines_found = false;
let mut role_found = false;
let mut refine_entries = HashMap::new();
let mut curr_id = String::new();
for e in parser { for e in parser {
match e { match e {
@ -77,11 +92,43 @@ fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
.any(|attr| attr.name.local_name == "property" && attr.value == "file-as") .any(|attr| attr.name.local_name == "property" && attr.value == "file-as")
{ {
refines_found = true; refines_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
} else if attributes.iter().any(|attr| {
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
}) && attributes
.iter()
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
{
role_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
} }
} }
Ok(XmlEvent::Characters(value)) => { Ok(XmlEvent::Characters(value)) => {
if refines_found == true { if role_found == true {
refines_entries.push(value); if value == "aut" {
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
role: "".to_string(),
file_as: "".to_string(),
});
entry.role = value;
}
role_found = false;
} else if refines_found == true {
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
role: "".to_string(),
file_as: "".to_string(),
});
entry.file_as = value;
refines_found = false; refines_found = false;
} }
} }
@ -97,10 +144,156 @@ fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
} }
} }
if refines_entries.len() == 1 { if refine_entries.len() == 1 {
return Some(refines_entries.remove(0)); return Some(refine_entries.values().next().unwrap().file_as.clone());
} else if refines_entries.len() >= 2 { } else if refine_entries.len() >= 2 {
return Some(refines_entries.join(" & ")); return Some(
refine_entries
.values()
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.file_as.clone())
.collect::<Vec<String>>()
.join(" & "),
);
}
None
}
struct Creator {
role: String,
name: String,
}
fn get_attribute_creator(opf: ZipFile) -> Option<String> {
let parser = ParserConfig::new()
.trim_whitespace(true)
.ignore_comments(true)
.coalesce_characters(true)
.create_reader(opf);
let mut is_epub3 = false;
let mut creator_found = true;
let mut creator_ids = Vec::new();
let mut role_found = false;
let mut creator_entries = HashMap::new();
let mut epub2_creator_entries = Vec::new();
let mut curr_id = String::new();
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "package" => {
for attr in attributes {
if attr.name.local_name == "version" {
if attr.value.starts_with("3") == true {
is_epub3 = true;
}
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "creator" => {
creator_found = true;
if !is_epub3 {
match attributes
.iter()
.find(|attr| attr.name.local_name == "role")
{
Some(attr) => {
epub2_creator_entries.push(Creator {
role: attr.value.clone(),
name: "".to_string(),
});
}
None => {
epub2_creator_entries.push(Creator {
role: "aut".to_string(),
name: "".to_string(),
});
}
}
}
for attr in attributes {
if is_epub3 && attr.name.local_name == "id" {
creator_ids.push("#".to_owned() + attr.value.as_str());
//creator_entries.insert(attr.value.clone(), Creator{role: "".to_string(), name: "".to_string()});
curr_id = "#".to_owned() + attr.value.as_str();
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "meta" => {
if attributes.iter().any(|attr| {
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
}) && attributes
.iter()
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
{
role_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
}
}
Ok(XmlEvent::Characters(value)) => {
if creator_found && is_epub3 == false {
epub2_creator_entries.last_mut().unwrap().name = value.clone();
} else if creator_found && is_epub3 == true {
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
role: "".to_string(),
name: "".to_string(),
});
entry.name = value;
creator_found = false;
} else if role_found == true {
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
role: "".to_string(),
name: "".to_string(),
});
entry.role = value;
role_found = false;
}
}
Ok(XmlEvent::StartElement { .. }) => {
if creator_found == true {
creator_found = false;
}
}
Err(e) => {
println!("{}", e);
break;
}
_ => {}
}
}
if !is_epub3 && epub2_creator_entries.len() >= 1 {
return Some(
epub2_creator_entries
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.name.clone())
.collect::<Vec<String>>()
.join(", "),
);
} else if creator_entries.len() >= 1 {
return Some(
creator_entries
.values()
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.name.clone())
.collect::<Vec<String>>()
.join(", "),
);
} }
None None
@ -109,43 +302,107 @@ fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
struct BookEntry { struct BookEntry {
id: i32, id: i32,
filepath: String, filepath: String,
author: String,
firstauthor: String,
} }
fn fix_firstauthor(tx: &Transaction) -> i32 { fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
let mut authors_fixed = 0; let mut book_entries = Vec::new();
// Get book ids from entries where we have something like "firstname lastname" in author let mut stmt = tx
// but no "lastname, firstname" in fistauthor .prepare(
// Get also book ids from the special case where we have multiple authors (separated by ", " in authors) r"
// but no ampersand ("&") in firstauthor SELECT books.id, folders.name, files.filename, books.firstauthor, books.author
let mut stmt = tx.prepare(r" FROM books_impl books JOIN files
SELECT files.book_id, folders.name, files.filename ON books.id = files.book_id
FROM files INNER JOIN folders JOIN folders
ON files.folder_id = folders.id ON folders.id = files.folder_id
WHERE files.book_id IN WHERE files.storageid = 1 AND books.ext = 'epub'
( ORDER BY books.id",
SELECT DISTINCT id FROM books_impl
WHERE (ext LIKE 'epub' AND author LIKE '% %' AND (firstauthor NOT LIKE '%\,%' ESCAPE '\' OR firstauthor LIKE '%&amp;%'))
OR (ext LIKE 'epub' AND author LIKE '%\, %' ESCAPE '\' AND firstauthor NOT LIKE '%&%')
) )
AND files.storageid = 1 .unwrap();
;").unwrap();
let mut rows = stmt.query(NO_PARAMS).unwrap(); let mut rows = stmt.query(NO_PARAMS).unwrap();
let mut bookentries = Vec::new();
while let Some(row) = rows.next().unwrap() { while let Some(row) = rows.next().unwrap() {
let book_id: i32 = row.get(0).unwrap(); let book_id: i32 = row.get(0).unwrap();
let prefix: String = row.get(1).unwrap(); let prefix: String = row.get(1).unwrap();
let filename: String = row.get(2).unwrap(); let filename: String = row.get(2).unwrap();
let filepath = format!("{}/{}", prefix, filename); let filepath = format!("{}/{}", prefix, filename);
bookentries.push(BookEntry { let firstauthor: String = row.get(3).unwrap();
let author: String = row.get(4).unwrap();
let entry = BookEntry {
id: book_id, id: book_id,
filepath, filepath,
}); firstauthor,
author,
};
book_entries.push(entry);
} }
for entry in bookentries { book_entries
}
fn remove_ghost_books_from_db(tx: &Transaction) -> usize {
let mut stmt = tx
.prepare(
r"
DELETE FROM books_impl
WHERE id IN (
SELECT books.id
FROM books_impl books
LEFT OUTER JOIN files
ON books.id = files.book_id
WHERE files.filename is NULL
)",
)
.unwrap();
let num = stmt.execute(NO_PARAMS).unwrap();
tx.execute(
r"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
num
}
struct Statistics {
authors_fixed: i32,
ghost_books_cleaned: usize,
}
fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics {
let mut stat = Statistics {
authors_fixed: 0,
ghost_books_cleaned: 0,
};
for entry in book_entries {
let file = File::open(entry.filepath.as_str()); let file = File::open(entry.filepath.as_str());
let file = match file { let file = match file {
Err(_) => continue, Err(_) => continue,
@ -158,29 +415,53 @@ fn fix_firstauthor(tx: &Transaction) -> i32 {
if let Some(opf_file) = get_root_file(container).unwrap() { if let Some(opf_file) = get_root_file(container).unwrap() {
let opf = archive.by_name(opf_file.as_str()).unwrap(); let opf = archive.by_name(opf_file.as_str()).unwrap();
// firstauthor…
if let Some(file_as) = get_attribute_file_as(opf) { if let Some(file_as) = get_attribute_file_as(opf) {
if !file_as.split(" & ").all(|s| entry.firstauthor.contains(s)) {
let mut stmt = tx let mut stmt = tx
.prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id") .prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id")
.unwrap(); .unwrap();
stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id]) stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id])
.unwrap(); .unwrap();
authors_fixed = authors_fixed + 1; stat.authors_fixed = stat.authors_fixed + 1;
}
}
let opf = archive.by_name(opf_file.as_str()).unwrap();
// author…
if let Some(creator) = get_attribute_creator(opf) {
if !creator.split(", ").all(|s| entry.author.contains(s))
|| creator.len() < entry.author.len()
{
let mut stmt = tx
.prepare("UPDATE books_impl SET author = :creator WHERE id = :book_id")
.unwrap();
stmt.execute_named(named_params![":creator": creator, ":book_id": entry.id])
.unwrap();
stat.authors_fixed = stat.authors_fixed + 1;
}
} }
} }
} }
authors_fixed // ghost books
let num = remove_ghost_books_from_db(tx);
stat.ghost_books_cleaned = num;
stat
} }
fn main() { fn main() {
let mut conn = Connection::open("/mnt/ext1/system/explorer-3/explorer-3.db").unwrap(); let mut conn = Connection::open("/mnt/ext1/system/explorer-3/explorer-3.db").unwrap();
conn.execute("PRAGMA foreign_keys = 0", NO_PARAMS).unwrap();
let tx = conn.transaction().unwrap(); let tx = conn.transaction().unwrap();
let authors_fixed = fix_firstauthor(&tx); let book_entries = get_epubs_from_database(&tx);
let stat = fix_db_entries(&tx, &book_entries);
tx.commit().unwrap(); tx.commit().unwrap();
if cfg!(target_arch = "arm") { if cfg!(target_arch = "arm") {
if authors_fixed == 0 { if stat.authors_fixed == 0 {
pocketbook::dialog( pocketbook::dialog(
pocketbook::Icon::Info, pocketbook::Icon::Info,
"The database seems to be ok.\nNothing had to be fixed.", "The database seems to be ok.\nNothing had to be fixed.",
@ -188,8 +469,16 @@ fn main() {
} else { } else {
pocketbook::dialog( pocketbook::dialog(
pocketbook::Icon::Info, pocketbook::Icon::Info,
&format!("Authors fixed: {}", &authors_fixed), &format!(
"Authors fixed: {}\nBooks cleaned from DB: {}",
&stat.authors_fixed, &stat.ghost_books_cleaned
),
); );
} }
} else {
println!(
"Authors fixed: {}\nBooks cleaned from DB: {}",
&stat.authors_fixed, &stat.ghost_books_cleaned
);
} }
} }