Compare commits

...

33 Commits

Author SHA1 Message Date
Martin Brodbeck 5af608f45b Rust edition 2018 → 2021 3 months ago
Martin Brodbeck 6db590f333 Push to new version. 6 months ago
Martin Brodbeck 56624427d5 Adapted to the new database schema. 6 months ago
Martin Brodbeck 8ef3238ac3 Push to new version. 6 months ago
Martin Brodbeck 49853f9d2f Do not rely on database column ext. 6 months ago
Martin Brodbeck e1beced72c Merge branch 'master' of ssh://git.rustysoft.de:60022/martin/pbdbfixer 8 months ago
Martin Brodbeck 995bc5e3ed updated to rusqlite 0.25 8 months ago
Martin Brodbeck 3a794874dc software versions updated 9 months ago
Martin Brodbeck ef7710cca6 Calibre emphasized 10 months ago
Martin Brodbeck f50e5e09e6 firmware versions updated 10 months ago
Martin Brodbeck 2fcb2ac3e7 Considering the altered database structure 10 months ago
Martin Brodbeck ad179ac9e0
push to new dependency versions 11 months ago
Martin Brodbeck d683804623 feedback part added 11 months ago
Martin Brodbeck ec11c16477 build instructions improved 11 months ago
Martin Brodbeck 65007fdf50 push to new version 11 months ago
Martin Brodbeck a6594d4abc Also fetch metadata from Adobe DRM encoded books 11 months ago
Martin Brodbeck 9baff090a8 push to new version 11 months ago
Martin Brodbeck 94e1dedb1e set foreign_key pragma correctly 11 months ago
Martin Brodbeck da405064d4 update dependencies 11 months ago
Martin Brodbeck e2e2468543 more on usage and compatibility 11 months ago
Martin Brodbeck 735335941e db stuff moved to new database module 11 months ago
Martin Brodbeck 624987326d Also remove "false positive" authors 11 months ago
Martin Brodbeck a52b8a8288 fix missing series information 11 months ago
Martin Brodbeck 7d8ab91d85 readme updated 11 months ago
Martin Brodbeck 577ae2b889 Fix showing results screen 11 months ago
Martin Brodbeck a9620b3e34 Fixes and improvements 11 months ago
Martin Brodbeck c6252464aa simplify creating XmlAut 11 months ago
Martin Brodbeck 247591707d code cleanup 11 months ago
Martin Brodbeck 90c2d34c03 xml parsing completely rewritten 11 months ago
Martin Brodbeck 1cf8b008d0 give more information (features, warning, …) 11 months ago
Martin Brodbeck e4f6efe12a fix genres 11 months ago
Martin Brodbeck 77d87c751d typo 12 months ago
Martin Brodbeck 0f642a61a4 TL4 added 12 months ago
  1. 97
      Cargo.lock
  2. 8
      Cargo.toml
  3. 43
      README.md
  4. 272
      src/database.rs
  5. 316
      src/epub.rs
  6. 519
      src/main.rs

97
Cargo.lock

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler32"
version = "1.2.0"
@ -8,9 +10,14 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
[[package]]
name = "ahash"
version = "0.4.7"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "bitflags"
@ -36,9 +43,9 @@ dependencies = [
[[package]]
name = "bzip2-sys"
version = "0.1.9+1.0.8"
version = "0.1.10+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad3b39a260062fca31f7b0b12f207e8f2590a67d32ec7d59c20484b07ea7285e"
checksum = "17fa3d1ac1ca21c5c4e36a97f3c3eb25084576f6fc47bf0139c1123434216c6c"
dependencies = [
"cc",
"libc",
@ -47,9 +54,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.66"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
[[package]]
name = "cfg-if"
@ -96,35 +103,46 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if 1.0.0",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.9.1"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]]
name = "hashlink"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d99cf782f0dc4372d26846bec3de7804ceb5df083c2d4462c0b8d2330e894fa8"
checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
dependencies = [
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.83"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0c4e9c72ee9d69b767adebc5f4788462a3b45624acd919475c92597bcaf4f"
checksum = "265d751d31d6780a3f956bb5b8022feba2d94eeee5a84ba64f4212eedca42213"
[[package]]
name = "libsqlite3-sys"
version = "0.20.1"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64d31059f22935e6c31830db5249ba2b7ecd54fd73a9909286f0a67aa55c2fbd"
checksum = "290b64917f8b0cb885d9de0f9959fe1f775d7fa12f1da2db9001c1c8ab60f89d"
dependencies = [
"cc",
"pkg-config",
@ -146,12 +164,18 @@ dependencies = [
"adler32",
]
[[package]]
name = "once_cell"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
[[package]]
name = "pbdbfixer"
version = "0.3.1"
version = "0.8.3"
dependencies = [
"quick-xml",
"rusqlite",
"xml-rs",
"zip",
]
@ -170,20 +194,29 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "quick-xml"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
dependencies = [
"memchr",
]
[[package]]
name = "quote"
version = "1.0.8"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rusqlite"
version = "0.24.2"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38ee71cbab2c827ec0ac24e76f82eca723cee92c509a65f67dee393c25112"
checksum = "57adcf67c8faaf96f3248c2a7b419a0dbc52ebe36ba83dd57fe83827c1ea4eb3"
dependencies = [
"bitflags",
"fallible-iterator",
@ -213,18 +246,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.23"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.23"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
dependencies = [
"proc-macro2",
"quote",
@ -254,6 +287,12 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
@ -282,17 +321,11 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "xml-rs"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
[[package]]
name = "zip"
version = "0.5.9"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc2896475a242c41366941faa27264df2cb935185a92e059a004d0048feb2ac5"
checksum = "8264fcea9b7a036a4a5103d7153e988dbc2ebbafb34f68a3c2d404b6b82d74b6"
dependencies = [
"byteorder",
"bzip2",

8
Cargo.toml

@ -1,15 +1,15 @@
[package]
name = "pbdbfixer"
version = "0.3.1"
version = "0.8.3"
authors = ["Martin Brodbeck <martin@brodbeck-online.de>"]
edition = "2018"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
zip = "0.5"
xml-rs = "0.8"
quick-xml = "0.22"
[dependencies.rusqlite]
version = "0.24"
version = "0.25"
features = ["bundled"]

43
README.md

@ -6,22 +6,43 @@ EPUB files, this program tries fix these issues. It tries to identify
wrong database entries and fix it by reading the corresponding epub
metadata.
## Features
The app tries to fix the following issues in the database
- Correction of wrong firstauthor entries (books_impl table)
- Correction of wrong first_author_letter entries (books_impl table)
- Correction of wrong author entries (books_impl table)
- Removing deleted e-books from the database (various tables)
- Add missing genre if present in epub (genre and booktogenre tables)
- Add missing series information (books_impl table)
The best results are achieved when metadata has been carefully maintained with **Calibre**.
## Compatibility
This program is tested only on a PocketBook Touch HD 3 device (software
version 6.1.900). It might work with other PocketBook devices/software
versions. Please tell me, if it works for you (and do make a backup of the
explorer-3.db file before trying!).
This program is tested on a PocketBook
- *Touch HD 3* (software version 6.4)
- *Inkpad 3 Pro* (software version 6.4)
- *Touch Lux 4* (software version 6.3)
It might work with other PocketBook devices/software versions. Please tell me if it works for you (and do make a backup of the explorer-3.db file before trying!).
## Installation and Usage
Just copy the executable file into the PocketBook's application directory.
If you encounter duplicate authors in the PocketBook's library, open the
applications screen and tap on the PbDbFixer icon.
---
**WARNING**:
Use at your own risk. In case of doubt it is not a mistake to make a backup of the file `/system/explorer-3/explorer-3.db` beforehand.
---
Just copy the executable file into the PocketBook's application directory. If you encounter duplicate authors or other issues (see "Features" above) in the PocketBook's library, open the applications screen and tap on the PbDbFixer icon.
If you don't see any changes:
There might be an explorer (which shows your library) process already running. Then you should just stop/kill it with the task manager. Putting the device to sleep and then wake it up might also work. Afterwards, the changes should be visible to the explorer.
## Feedback
Feedback is highly appreciated. You can reach me via Matrix [@beedaddy:matrix.rustysoft.de](https://matrix.to/#/@beedaddy:matrix.rustysoft.de) or ask questions in the [PbDbFixer-Thread](https://www.e-reader-forum.de/t/pbdbfixer-noch-ein-tool-zum-korrigieren-von-metadaten.156702/) of the German *E-Reader Forum*.
## Build
To be able to build PbDbFixer, you have to have the cross compiler for
ARM CPUs installed. On Arch Linux, the AUR package `arm-linux-gnueabi-gcc75-linaro-bin`
does the job. Don't forget to tell `cargo` which compiler/linker it has to
invoke. In my case, I had to edit `~/.cargo/config`:
If you want to build PbDbFixer yourself, make sure that you have Rust's toolchain target `arm-unknown-linux-gnueabi` as well as the GCC cross compiler for ARM CPUs installed. On Arch Linux, the AUR package `arm-linux-gnueabi-gcc75-linaro-bin` does the job. Don't forget to tell `cargo` which compiler/linker it has to invoke. In my case, I had to edit `~/.cargo/config`:
```
[target.arm-unknown-linux-gnueabi]
linker = "arm-linux-gnueabi-gcc"

272
src/database.rs

@ -0,0 +1,272 @@
use rusqlite::{named_params, Connection, Transaction};
use crate::epub;
const DATABASE_FILE: &str = "/mnt/ext1/system/explorer-3/explorer-3.db";
pub struct BookEntry {
id: i32,
filepath: String,
author: String,
firstauthor: String,
genre: String,
first_author_letter: String,
series: String,
}
fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
let mut book_entries = Vec::new();
let version: i32 = tx
.query_row(r#"SELECT id FROM version"#, [], |r| r.get(0))
.unwrap();
let books_or_files = match version {
x if x >= 38 => "files",
_ => "books",
};
let stmt_str = format!(
r#"
SELECT books.id, folders.name, files.filename, books.firstauthor,
books.author, genres.name, first_author_letter, series
FROM books_impl books JOIN files
ON books.id = files.book_id
JOIN folders
ON folders.id = files.folder_id
LEFT OUTER JOIN booktogenre btg
ON books.id = btg.bookid
LEFT OUTER JOIN genres
ON genres.id = btg.genreid
WHERE files.storageid = 1 AND {}.ext = 'epub'
ORDER BY books.id"#,
&books_or_files
);
let mut stmt = tx.prepare(&stmt_str).unwrap();
let mut rows = stmt.query([]).unwrap();
while let Some(row) = rows.next().unwrap() {
let book_id: i32 = row.get(0).unwrap();
let prefix: String = row.get(1).unwrap();
let filename: String = row.get(2).unwrap();
let filepath = format!("{}/{}", prefix, filename);
let firstauthor: String = row.get(3).unwrap_or_default();
let author: String = row.get(4).unwrap_or_default();
let genre: String = row.get(5).unwrap_or_default();
let first_author_letter = row.get(6).unwrap_or_default();
let series: String = row.get(7).unwrap_or_default();
let entry = BookEntry {
id: book_id,
filepath,
firstauthor,
author,
genre,
first_author_letter,
series,
};
book_entries.push(entry);
}
book_entries
}
fn remove_ghost_books_from_db(tx: &Transaction) -> usize {
let mut stmt = tx
.prepare(
r#"
DELETE FROM books_impl
WHERE id IN (
SELECT books.id
FROM books_impl books
LEFT OUTER JOIN files
ON books.id = files.book_id
WHERE files.filename is NULL
)"#,
)
.unwrap();
let num = stmt.execute([]).unwrap();
tx.execute(
r#"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
let version: i32 = tx
.query_row(r#"SELECT id FROM version"#, [], |r| r.get(0))
.unwrap();
if version >= 37 {
tx.execute(
r#"DELETE FROM books_fast_hashes WHERE book_id NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
} else {
tx.execute(
r#"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
}
tx.execute(
r#"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
tx.execute(
r#"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
tx.execute(
r#"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )"#,
[],
)
.unwrap();
num
}
pub struct Statistics {
pub authors_fixed: i32,
pub ghost_books_cleaned: usize,
pub genres_fixed: usize,
pub sorting_fixed: usize,
pub series_fixed: usize,
}
impl Statistics {
pub fn anything_fixed(&self) -> bool {
&self.authors_fixed > &0
|| &self.genres_fixed > &0
|| &self.ghost_books_cleaned > &0
|| &self.sorting_fixed > &0
|| &self.series_fixed > &0
}
}
pub fn fix_db_entries() -> Statistics {
let mut stat = Statistics {
authors_fixed: 0,
ghost_books_cleaned: 0,
genres_fixed: 0,
sorting_fixed: 0,
series_fixed: 0,
};
let mut conn = Connection::open(DATABASE_FILE).unwrap();
conn.pragma_update(None, "foreign_keys", &0).unwrap();
let tx = conn.transaction().unwrap();
let book_entries = get_epubs_from_database(&tx);
for entry in book_entries {
if let Some(epub_metadata) = epub::get_epub_metadata(&entry.filepath) {
// Fix firstauthor…
let mut firstauthors = epub_metadata
.authors
.iter()
.filter(|aut| aut.firstauthor.len() > 0)
.map(|aut| aut.firstauthor.clone())
.collect::<Vec<_>>();
firstauthors.sort();
if !firstauthors.iter().all(|s| entry.firstauthor.contains(s)) {
let mut stmt = tx
.prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id")
.unwrap();
stmt.execute(
named_params![":file_as": firstauthors.join(" & "), ":book_id": entry.id],
)
.unwrap();
stat.authors_fixed = stat.authors_fixed + 1;
}
// Fix first_author_letter
let first_author_letter = firstauthors
.join(" & ")
.chars()
.next()
.unwrap_or_default()
.to_string()
.to_uppercase();
if first_author_letter != "\0" && (entry.first_author_letter != first_author_letter) {
let mut stmt = tx
.prepare("UPDATE books_impl SET first_author_letter = :first_letter WHERE id = :book_id")
.unwrap();
stmt.execute(
named_params![":first_letter": first_author_letter,":book_id": entry.id],
)
.unwrap();
stat.sorting_fixed = stat.sorting_fixed + 1;
}
// Fix author names…
let authornames = epub_metadata
.authors
.iter()
.map(|aut| aut.name.clone())
.collect::<Vec<_>>();
if !authornames.iter().all(|s| entry.author.contains(s))
|| authornames.join(", ").len() != entry.author.len()
{
let mut stmt = tx
.prepare("UPDATE books_impl SET author = :authors WHERE id = :book_id")
.unwrap();
stmt.execute(
named_params![":authors": authornames.join(", "), ":book_id": entry.id],
)
.unwrap();
stat.authors_fixed = stat.authors_fixed + 1;
}
// Fix genre…
if entry.genre.is_empty() && epub_metadata.genre.len() > 0 {
let mut stmt = tx
.prepare(r#"INSERT INTO genres (name) SELECT :genre ON CONFLICT DO NOTHING"#)
.unwrap();
stmt.execute(named_params![":genre": &epub_metadata.genre])
.unwrap();
let mut stmt = tx
.prepare(
r#"
INSERT INTO booktogenre (bookid, genreid)
VALUES (:bookid,
(SELECT id FROM genres WHERE name = :genre)
)
ON CONFLICT DO NOTHING"#,
)
.unwrap();
stmt.execute(named_params![":bookid": &entry.id, ":genre": &epub_metadata.genre])
.unwrap();
stat.genres_fixed = stat.genres_fixed + 1;
}
// Fix series…
if !epub_metadata.series.name.is_empty() && entry.series.is_empty() {
let mut stmt = tx
.prepare("UPDATE books_impl SET series = :series, numinseries = :series_index WHERE id = :book_id")
.unwrap();
stmt.execute(
named_params![":series": &epub_metadata.series.name, ":series_index": &epub_metadata.series.index, ":book_id": entry.id],
)
.unwrap();
stat.series_fixed = stat.series_fixed + 1;
}
}
}
// ghost books
let num = remove_ghost_books_from_db(&tx);
stat.ghost_books_cleaned = num;
tx.commit().unwrap();
stat
}

316
src/epub.rs

@ -0,0 +1,316 @@
use std::{
collections::HashMap,
fs::{self, File},
io::Read,
};
use quick_xml::{events::Event, Reader};
use zip::ZipArchive;
#[derive(Debug)]
pub struct Author {
pub name: String,
pub firstauthor: String,
}
#[derive(Debug)]
pub struct Series {
pub name: String,
pub index: i32,
}
impl Series {
fn new() -> Self {
Series {
name: String::new(),
index: 0,
}
}
}
#[derive(Debug)]
pub struct EpubMetadata {
pub authors: Vec<Author>,
pub genre: String,
pub series: Series,
}
impl EpubMetadata {
fn new() -> Self {
EpubMetadata {
authors: Vec::new(),
genre: String::new(),
series: Series::new(),
}
}
}
fn get_rootfile(archive: &mut ZipArchive<File>) -> String {
let mut container = archive.by_name("META-INF/container.xml").unwrap();
let mut xml_str_buffer = String::new();
container.read_to_string(&mut xml_str_buffer).unwrap();
let mut reader = Reader::from_str(&xml_str_buffer);
reader.trim_text(true);
let mut buf = Vec::new();
let mut opf_filename = String::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) if e.local_name() == b"rootfile" => {
opf_filename = String::from_utf8(
e.attributes()
.filter(|attr| attr.as_ref().unwrap().key == b"full-path")
.next()
.unwrap()
.unwrap()
.value
.to_vec(),
)
.unwrap();
break;
}
Ok(Event::Eof) => break,
_ => (),
}
}
opf_filename
}
pub fn get_epub_metadata(filename: &str) -> Option<EpubMetadata> {
let mut epub_meta = EpubMetadata::new();
let file = fs::File::open(&filename);
let file = match file {
Err(_) => return None,
Ok(file) => file,
};
let mut archive = ZipArchive::new(file).unwrap();
let opf_filename = get_rootfile(&mut archive);
let mut xml_str_buffer = String::new();
let mut opf = archive.by_name(&opf_filename).unwrap();
opf.read_to_string(&mut xml_str_buffer).unwrap();
let mut reader = Reader::from_str(&xml_str_buffer);
let mut buf = Vec::new();
let mut curr_id = String::new();
let mut creator_found = false;
let mut file_as_found = false;
let mut role_found = false;
let mut genre_found = false;
let mut series_found = false;
let mut series_index_found = false;
let mut is_epub3 = false;
#[derive(Debug)]
struct XmlAut {
name: String,
sort: String,
role: String,
}
impl XmlAut {
fn new() -> Self {
XmlAut {
name: String::new(),
sort: String::new(),
role: String::new(),
}
}
}
let mut xml_authors = HashMap::new();
loop {
match reader.read_event(&mut buf) {
// See if we have EPUB3 or EPUB2
Ok(Event::Start(ref e)) if e.local_name() == b"package" => {
if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"version"
&& attr.as_ref().unwrap().value.starts_with(b"3")
}) {
is_epub3 = true;
}
}
Ok(Event::Start(ref e)) if e.local_name() == b"creator" => {
creator_found = true;
if is_epub3 {
if let Some(idval) = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key == b"id")
.next()
{
curr_id = "#".to_string()
+ String::from_utf8(idval.unwrap().value.to_vec())
.unwrap()
.as_str();
xml_authors.insert(curr_id.clone(), XmlAut::new());
} else {
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.role = "aut".to_string();
}
} else {
if let Some(file_as_val) = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"file-as"))
.next()
{
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.sort = file_as_val
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap_or_default();
entry.role = "aut".to_string();
} else if let Some(_role_val) = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key.ends_with(b"role"))
.next()
{
curr_id = "none".to_string() + xml_authors.len().to_string().as_str();
}
}
}
Ok(Event::Text(ref e)) if creator_found => {
if is_epub3 {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.name = String::from_utf8(e.to_vec()).unwrap();
} else {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.name = String::from_utf8(e.to_vec()).unwrap();
entry.role = "aut".to_string();
}
creator_found = false;
}
Ok(Event::Start(ref e)) if e.local_name() == b"meta" && is_epub3 => {
if let Some(refines) = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key == b"refines")
.next()
{
if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"property"
&& attr.as_ref().unwrap().value.ends_with(b"file-as")
}) {
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
file_as_found = true;
} else if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"property"
&& attr.as_ref().unwrap().value.ends_with(b"role")
}) {
curr_id = String::from_utf8(refines.unwrap().value.to_vec()).unwrap();
role_found = true;
} else if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"property"
&& attr.as_ref().unwrap().value.ends_with(b"group-position")
}) {
series_index_found = true;
}
}
if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"property"
&& attr
.as_ref()
.unwrap()
.value
.ends_with(b"belongs-to-collection")
}) {
series_found = true;
}
}
Ok(Event::Empty(ref e)) if e.local_name() == b"meta" && !is_epub3 => {
if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"name"
&& attr
.as_ref()
.unwrap()
.unescaped_value()
.unwrap()
.ends_with(b"series")
}) {
epub_meta.series.name = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key == b"content")
.next()
.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap_or_default();
} else if e.attributes().any(|attr| {
attr.as_ref().unwrap().key == b"name"
&& attr
.as_ref()
.unwrap()
.unescaped_value()
.unwrap()
.ends_with(b"series_index")
}) {
let index_float = e
.attributes()
.filter(|attr| attr.as_ref().unwrap().key == b"content")
.next()
.unwrap()
.unwrap()
.unescape_and_decode_value(&reader)
.unwrap_or_default()
.parse::<f32>()
.unwrap_or_default();
epub_meta.series.index = index_float as i32;
}
}
Ok(Event::Text(ref e)) if file_as_found && is_epub3 => {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.sort = String::from_utf8(e.to_vec()).unwrap();
file_as_found = false;
}
Ok(Event::Text(ref e)) if role_found && is_epub3 => {
let entry = xml_authors.entry(curr_id.clone()).or_insert(XmlAut::new());
entry.role = String::from_utf8(e.to_vec()).unwrap();
role_found = false;
}
Ok(Event::Text(ref e)) if series_found && is_epub3 => {
epub_meta.series.name = String::from_utf8(e.to_vec()).unwrap();
series_found = false;
}
Ok(Event::Text(ref e)) if series_index_found && is_epub3 => {
epub_meta.series.index = String::from_utf8(e.to_vec())
.unwrap()
.parse()
.unwrap_or_default();
series_index_found = false;
}
Ok(Event::Start(ref e)) if e.local_name() == b"subject" => {
genre_found = true;
}
Ok(Event::Text(ref e)) if genre_found => {
epub_meta.genre = e.unescape_and_decode(&reader).unwrap();
genre_found = false;
}
Ok(Event::Eof) => break,
_ => (),
}
}
epub_meta.authors = xml_authors
.into_iter()
.filter(|&(_, ref xml_author)| &xml_author.role == "aut" && &xml_author.name.len() > &0)
.map(|(_key, value)| Author {
name: value.name,
firstauthor: value.sort,
})
.collect();
Some(epub_meta)
}

519
src/main.rs

@ -1,468 +1,7 @@
mod database;
mod epub;
mod pocketbook;
use rusqlite::{named_params, Connection, Result, Transaction, NO_PARAMS};
use std::io::BufReader;
use std::{collections::HashMap, fs::File};
use std::{error::Error, io::Read};
use xml::reader::{EventReader, ParserConfig, XmlEvent};
use zip::{read::ZipFile, ZipArchive};
fn get_root_file(mut container: ZipFile) -> Result<Option<String>, Box<dyn Error>> {
let mut buf = String::new();
container.read_to_string(&mut buf).unwrap();
// Get rid of the BOM mark, if any
if buf.starts_with("\u{feff}") {
buf = buf.strip_prefix("\u{feff}").unwrap().to_owned();
}
let parser = EventReader::new(BufReader::new(buf.as_bytes()));
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "rootfile" => {
for attr in attributes {
if attr.name.local_name == "full-path" {
return Ok(Some(attr.value));
}
}
}
Err(e) => {
return Err(Box::new(e));
}
_ => {}
}
}
Ok(None)
}
struct Refine {
role: String,
file_as: String,
}
fn get_attribute_file_as(opf: ZipFile) -> Option<String> {
let parser = ParserConfig::new()
.trim_whitespace(true)
.ignore_comments(true)
.coalesce_characters(true)
.create_reader(opf);
let mut is_epub3 = false;
let mut creator_ids = Vec::new();
let mut refines_found = false;
let mut role_found = false;
let mut refine_entries = HashMap::new();
let mut curr_id = String::new();
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "package" => {
for attr in attributes {
if attr.name.local_name == "version" {
if attr.value.starts_with("3") == true {
is_epub3 = true;
}
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "creator" => {
for attr in attributes {
if attr.name.local_name == "file-as" {
return Some(attr.value);
}
if is_epub3 && attr.name.local_name == "id" {
creator_ids.push("#".to_owned() + attr.value.as_str());
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "meta" => {
if attributes.iter().any(|attr| {
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
}) && attributes
.iter()
.any(|attr| attr.name.local_name == "property" && attr.value == "file-as")
{
refines_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
} else if attributes.iter().any(|attr| {
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
}) && attributes
.iter()
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
{
role_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
}
}
Ok(XmlEvent::Characters(value)) => {
if role_found == true {
if value == "aut" {
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
role: "".to_string(),
file_as: "".to_string(),
});
entry.role = value;
}
role_found = false;
} else if refines_found == true {
let entry = refine_entries.entry(curr_id.clone()).or_insert(Refine {
role: "".to_string(),
file_as: "".to_string(),
});
entry.file_as = value;
refines_found = false;
}
}
Ok(XmlEvent::StartElement { .. }) => {
if refines_found == true {
refines_found = false;
}
}
Err(_e) => {
break;
}
_ => {}
}
}
if refine_entries.len() == 1 {
return Some(refine_entries.values().next().unwrap().file_as.clone());
} else if refine_entries.len() >= 2 {
return Some(
refine_entries
.values()
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.file_as.clone())
.collect::<Vec<String>>()
.join(" & "),
);
}
None
}
struct Creator {
role: String,
name: String,
}
fn get_attribute_creator(opf: ZipFile) -> Option<String> {
let parser = ParserConfig::new()
.trim_whitespace(true)
.ignore_comments(true)
.coalesce_characters(true)
.create_reader(opf);
let mut is_epub3 = false;
let mut creator_found = true;
let mut creator_ids = Vec::new();
let mut role_found = false;
let mut creator_entries = HashMap::new();
let mut epub2_creator_entries = Vec::new();
let mut curr_id = String::new();
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "package" => {
for attr in attributes {
if attr.name.local_name == "version" {
if attr.value.starts_with("3") == true {
is_epub3 = true;
}
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "creator" => {
creator_found = true;
if !is_epub3 {
match attributes
.iter()
.find(|attr| attr.name.local_name == "role")
{
Some(attr) => {
epub2_creator_entries.push(Creator {
role: attr.value.clone(),
name: "".to_string(),
});
}
None => {
epub2_creator_entries.push(Creator {
role: "aut".to_string(),
name: "".to_string(),
});
}
}
}
for attr in attributes {
if is_epub3 && attr.name.local_name == "id" {
creator_ids.push("#".to_owned() + attr.value.as_str());
//creator_entries.insert(attr.value.clone(), Creator{role: "".to_string(), name: "".to_string()});
curr_id = "#".to_owned() + attr.value.as_str();
}
}
}
Ok(XmlEvent::StartElement {
name, attributes, ..
}) if name.local_name == "meta" => {
if attributes.iter().any(|attr| {
attr.name.local_name == "refines" && creator_ids.contains(&attr.value)
}) && attributes
.iter()
.any(|attr| attr.name.local_name == "property" && attr.value == "role")
{
role_found = true;
curr_id = attributes
.iter()
.find(|a| a.name.local_name == "refines")
.unwrap()
.value
.clone();
}
}
Ok(XmlEvent::Characters(value)) => {
if creator_found && is_epub3 == false {
epub2_creator_entries.last_mut().unwrap().name = value.clone();
} else if creator_found && is_epub3 == true {
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
role: "".to_string(),
name: "".to_string(),
});
entry.name = value;
creator_found = false;
} else if role_found == true {
let entry = creator_entries.entry(curr_id.clone()).or_insert(Creator {
role: "".to_string(),
name: "".to_string(),
});
entry.role = value;
role_found = false;
}
}
Ok(XmlEvent::StartElement { .. }) => {
if creator_found == true {
creator_found = false;
}
}
Err(e) => {
println!("{}", e);
break;
}
_ => {}
}
}
if !is_epub3 && epub2_creator_entries.len() >= 1 {
return Some(
epub2_creator_entries
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.name.clone())
.collect::<Vec<String>>()
.join(", "),
);
} else if creator_entries.len() >= 1 {
return Some(
creator_entries
.values()
.into_iter()
.filter(|v| v.role == "aut")
.map(|v| v.name.clone())
.collect::<Vec<String>>()
.join(", "),
);
}
None
}
struct BookEntry {
id: i32,
filepath: String,
author: String,
firstauthor: String,
has_drm: bool,
}
fn get_epubs_from_database(tx: &Transaction) -> Vec<BookEntry> {
let mut book_entries = Vec::new();
let mut stmt = tx
.prepare(
r"
SELECT books.id, folders.name, files.filename, books.firstauthor, books.author
FROM books_impl books JOIN files
ON books.id = files.book_id
JOIN folders
ON folders.id = files.folder_id
WHERE files.storageid = 1 AND books.ext = 'epub'
ORDER BY books.id",
)
.unwrap();
let mut rows = stmt.query(NO_PARAMS).unwrap();
while let Some(row) = rows.next().unwrap() {
let book_id: i32 = row.get(0).unwrap();
let prefix: String = row.get(1).unwrap();
let filename: String = row.get(2).unwrap();
let filepath = format!("{}/{}", prefix, filename);
let firstauthor: String = row.get(3).unwrap();
let author: String = row.get(4).unwrap();
let has_drm = match prefix.as_str() {
"/mnt/ext1/Digital Editions" => true,
_ => false,
};
let entry = BookEntry {
id: book_id,
filepath,
firstauthor,
author,
has_drm,
};
book_entries.push(entry);
}
book_entries
}
fn remove_ghost_books_from_db(tx: &Transaction) -> usize {
let mut stmt = tx
.prepare(
r"
DELETE FROM books_impl
WHERE id IN (
SELECT books.id
FROM books_impl books
LEFT OUTER JOIN files
ON books.id = files.book_id
WHERE files.filename is NULL
)",
)
.unwrap();
let num = stmt.execute(NO_PARAMS).unwrap();
tx.execute(
r"DELETE FROM books_settings WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM books_uids WHERE book_id NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM bookshelfs_books WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM booktogenre WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
tx.execute(
r"DELETE FROM social WHERE bookid NOT IN ( SELECT id FROM books_impl )",
NO_PARAMS,
)
.unwrap();
num
}
struct Statistics {
authors_fixed: i32,
ghost_books_cleaned: usize,
drm_skipped: usize,
}
fn fix_db_entries(tx: &Transaction, book_entries: &Vec<BookEntry>) -> Statistics {
let mut stat = Statistics {
authors_fixed: 0,
ghost_books_cleaned: 0,
drm_skipped: 0,
};
for entry in book_entries {
if entry.has_drm {
stat.drm_skipped = stat.drm_skipped + 1;
continue;
}
let file = File::open(entry.filepath.as_str());
let file = match file {
Err(_) => continue,
Ok(file) => file,
};
let mut archive = ZipArchive::new(BufReader::new(file)).unwrap();
let container = archive.by_name("META-INF/container.xml").unwrap();
if let Some(opf_file) = get_root_file(container).unwrap() {
let opf = archive.by_name(opf_file.as_str()).unwrap();
// firstauthor…
if let Some(file_as) = get_attribute_file_as(opf) {
if !file_as.split(" & ").all(|s| entry.firstauthor.contains(s)) {
let mut stmt = tx
.prepare("UPDATE books_impl SET firstauthor = :file_as WHERE id = :book_id")
.unwrap();
stmt.execute_named(named_params![":file_as": file_as, ":book_id": entry.id])
.unwrap();
stat.authors_fixed = stat.authors_fixed + 1;
}
}
let opf = archive.by_name(opf_file.as_str()).unwrap();
// author…
if let Some(creator) = get_attribute_creator(opf) {
if !creator.split(", ").all(|s| entry.author.contains(s))
|| creator.len() < entry.author.len()
{
let mut stmt = tx
.prepare("UPDATE books_impl SET author = :creator WHERE id = :book_id")
.unwrap();
stmt.execute_named(named_params![":creator": creator, ":book_id": entry.id])
.unwrap();
stat.authors_fixed = stat.authors_fixed + 1;
}
}
}
}
// ghost books
let num = remove_ghost_books_from_db(tx);
stat.ghost_books_cleaned = num;
stat
}
fn main() {
if cfg!(target_arch = "arm") {
let res = pocketbook::dialog(
@ -482,44 +21,30 @@ fn main() {
}
}
let mut conn = Connection::open("/mnt/ext1/system/explorer-3/explorer-3.db").unwrap();
conn.execute("PRAGMA foreign_keys = 0", NO_PARAMS).unwrap();
let tx = conn.transaction().unwrap();
let book_entries = get_epubs_from_database(&tx);
let stat = fix_db_entries(&tx, &book_entries);
tx.commit().unwrap();
let stat = database::fix_db_entries();
if cfg!(target_arch = "arm") {
if stat.authors_fixed == 0 {
if stat.drm_skipped == 0 {
pocketbook::dialog(
pocketbook::Icon::Info,
"The database seems to be ok.\n\
if stat.anything_fixed() == false {
pocketbook::dialog(
pocketbook::Icon::Info,
"The database seems to be ok.\n\
Nothing had to be fixed.",
&["OK"],
);
} else {
pocketbook::dialog(
pocketbook::Icon::Info,
&format!(
"The database seems to be ok.\n\
Nothing had to be fixed.\n\
(Books skipped (DRM): {})",
&stat.drm_skipped
),
&["OK"],
);
}
&["OK"],
);
} else {
pocketbook::dialog(
pocketbook::Icon::Info,
&format!(
"Authors fixed: {}\n\
Books skipped (DRM): {}\n\
Sorting fixed: {}\n\
Genres fixed: {}\n\
Series fixed: {}\n\
Books cleaned from DB: {}",
&stat.authors_fixed, &stat.drm_skipped, &stat.ghost_books_cleaned
&stat.authors_fixed,
&stat.sorting_fixed,