From 4e6afe7bf0a212cc6c139b3ab8d3b12a41e5b881 Mon Sep 17 00:00:00 2001 From: Kohei Watanabe Date: Sat, 2 Dec 2023 23:22:33 +0900 Subject: [PATCH] google-play-books --- browser.ts | 56 +++++++++- library.ts | 9 +- main.ts | 42 +++++--- platform.ts | 51 ++++++++- platforms/dmm-books.ts | 37 ++----- platforms/google-play-books.ts | 184 +++++++++++++++++++++++++++++++++ 6 files changed, 325 insertions(+), 54 deletions(-) create mode 100644 platforms/google-play-books.ts diff --git a/browser.ts b/browser.ts index d442f72..2500317 100644 --- a/browser.ts +++ b/browser.ts @@ -1,5 +1,55 @@ -import type { Browser, BrowserContext } from "playwright"; +import * as Playwright from "playwright"; import { chromium, devices } from "playwright"; +import type { Database } from "./database"; +import type { TPlatform } from "./platform"; -export { Browser, BrowserContext, chromium }; -export const { userAgent } = devices["Desktop Chrome"]; +export type Browser = { + loadBrowserContext(platform: TPlatform): Promise; + saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise; + newContext: () => Promise; + close: () => Promise; +}; + +export type BrowserContext = Playwright.BrowserContext; + +export async function createBrowser({ + db, + headless = true, +}: { + db: Database; + headless?: boolean; +}): Promise { + const { userAgent } = devices["Desktop Chrome"]; + const browser = await chromium.launch({ + headless, + args: ["--disable-blink-features=AutomationControlled"], + }); + + return { + async loadBrowserContext( + platform: TPlatform, + ): Promise { + const { secrets } = await db.get( + `select secrets from platforms where name = ?`, + platform, + ); + + const storageState = JSON.parse(secrets) ?? undefined; + const ctx = await browser.newContext({ storageState, userAgent }); + return ctx; + }, + async saveBrowserContext( + platform: TPlatform, + ctx: BrowserContext, + ): Promise { + const secrets = await ctx.storageState(); + await db.run( + `update platforms set secrets = ? where name = ?`, + JSON.stringify(secrets), + platform, + ); + }, + newContext: () => browser.newContext(), + close: () => browser.close(), + }; +} diff --git a/library.ts b/library.ts index e4818fb..d56acda 100644 --- a/library.ts +++ b/library.ts @@ -3,10 +3,11 @@ import { createWriteStream } from "node:fs"; import stream from "node:stream/promises"; import { Zip, ZipPassThrough } from "fflate"; import { Database } from "./database"; +import { type TPlatform, site } from "./platform"; export type Book = { id: number; - platform: "dmm-books" | "google-play-books"; + platform: TPlatform; readerUrl: string; title: string; authors: Array; @@ -15,9 +16,9 @@ export type Book = { export function createLibrary(db: Database) { return { async add(readerUrlOrBook: string | Book) { - const platform = "dmm-books"; - if (typeof readerUrlOrBook === "string") { + const platform = site(readerUrlOrBook); + await db.run( `insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`, platform, @@ -38,7 +39,7 @@ values((select id from platforms where name = ?), ?, ?, ?) on conflict(reader_url) do update set title = excluded.title, authors = excluded.authors `, - platform, + readerUrlOrBook.platform, readerUrlOrBook.readerUrl, readerUrlOrBook.title, JSON.stringify(readerUrlOrBook.authors), diff --git a/main.ts b/main.ts index a737723..76238a8 100644 --- a/main.ts +++ b/main.ts @@ -1,8 +1,8 @@ import util from "node:util"; -import { chromium } from "./browser"; +import { createBrowser } from "./browser"; import { createDatabase } from "./database"; import { type Book, createLibrary } from "./library"; -import { createPlatform } from "./platform"; +import { type TPlatform, createPlatform } from "./platform"; const options = { db: { @@ -18,21 +18,29 @@ const options = { default: "3", }, login: { - type: "boolean", + type: "string", async run() { const db = await createDatabase(args.values.db!); - const browser = await chromium.launch({ headless: false }); - const platform = createPlatform({ db, browser }); + const browser = await createBrowser({ db, headless: false }); + const platform = createPlatform({ + platform: args.values.login as TPlatform, + db, + browser, + }); await platform.login(); await browser.close(); }, }, logout: { - type: "boolean", + type: "string", async run() { const db = await createDatabase(args.values.db!); - const browser = await chromium.launch(); - const platform = createPlatform({ db, browser }); + const browser = await createBrowser({ db }); + const platform = createPlatform({ + platform: args.values.logout as TPlatform, + db, + browser, + }); await platform.logout(); await browser.close(); }, @@ -87,12 +95,16 @@ const options = { }, }, pull: { - type: "boolean", + type: "string", async run() { const db = await createDatabase(args.values.db!); const library = createLibrary(db); - const browser = await chromium.launch(); - const platform = createPlatform({ db, browser }); + const browser = await createBrowser({ db }); + const platform = createPlatform({ + platform: args.values.pull as TPlatform, + db, + browser, + }); for await (const book of platform.pull()) { await library.add(book); @@ -125,8 +137,12 @@ const options = { } for (const book of books) { - const browser = await chromium.launch(); - const platform = createPlatform({ db, browser }); + const browser = await createBrowser({ db }); + const platform = createPlatform({ + platform: book.platform, + db, + browser, + }); const dir = `${args.values["out-dir"]!}/${book.id}`; await platform.download(dir, book); await library.archive(dir, book, { diff --git a/platform.ts b/platform.ts index b5501a4..200aa95 100644 --- a/platform.ts +++ b/platform.ts @@ -1,8 +1,51 @@ -import type { Database } from "./database"; +import fs from "node:fs/promises"; +import path from "node:path"; +import type { Book } from "./library"; import type { Browser } from "./browser"; +import type { Database } from "./database"; import { DmmBooks } from "./platforms/dmm-books"; +import { GooglePlayBooks } from "./platforms/google-play-books"; -export function createPlatform(opt: { db: Database; browser: Browser }) { - const platform = DmmBooks(opt); - return platform; +const platforms = { + "dmm-books": DmmBooks, + "google-play-books": GooglePlayBooks, +}; + +export type TPlatform = keyof typeof platforms; + +export function site(url: string): TPlatform { + const { origin } = new URL(url); + + for (const [platform, { site }] of Object.entries(platforms)) { + if (site.includes(origin)) return platform as TPlatform; + } + + throw new Error(`Unsupported URL: ${url}`); +} + +export function createPlatform(opts: { + platform: TPlatform; + db: Database; + browser: Browser; +}) { + if (!(opts.platform in platforms)) { + throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`); + } + + const platform = platforms[opts.platform](opts.browser); + + return { + ...platform, + async download(dir: string, book: Book): Promise { + await fs.mkdir(path.dirname(dir), { recursive: true }); + await fs.mkdir(dir); + await platform.download(dir, book); + }, + async logout() { + await opts.db.run( + `update platforms set secrets = 'null' where name = ?`, + opts.platform, + ); + }, + }; } diff --git a/platforms/dmm-books.ts b/platforms/dmm-books.ts index bb4d1c2..edfbccc 100644 --- a/platforms/dmm-books.ts +++ b/platforms/dmm-books.ts @@ -1,8 +1,6 @@ import fs from "node:fs/promises"; -import path from "node:path"; import type { Book } from "../library"; -import { userAgent, type Browser, type BrowserContext } from "../browser"; -import type { Database } from "../database"; +import type { Browser, BrowserContext } from "../browser"; var NFBR: any; @@ -121,17 +119,7 @@ async function drawImage(file: { return dataUrl; } -export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) { - async function loadBrowserContext(): Promise { - const { secrets } = await db.get( - `select secrets from platforms where name = 'dmm-books'`, - ); - - const storageState = JSON.parse(secrets) ?? undefined; - const ctx = await browser.newContext({ storageState, userAgent }); - return ctx; - } - +export function DmmBooks(browser: Browser) { async function* getSeriesBooks( ctx: BrowserContext, series: { @@ -242,21 +230,11 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) { page.waitForURL("https://www.dmm.com/", { timeout: 0 }), page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }), ]); - const secrets = await ctx.storageState(); - await db.run( - `update platforms set secrets = ? where name = 'dmm-books'`, - JSON.stringify(secrets), - ); - }, - - async logout() { - await db.run( - `update platforms set secrets = 'null' where name = 'dmm-books'`, - ); + await browser.saveBrowserContext("dmm-books", ctx); }, async *pull(): AsyncGenerator { - const ctx = await loadBrowserContext(); + const ctx = await browser.loadBrowserContext("dmm-books"); yield* getAllBooks(ctx); @@ -264,12 +242,9 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) { }, async download(dir: string, book: Book) { - const ctx = await loadBrowserContext(); + const ctx = await browser.loadBrowserContext("dmm-books"); const page = await ctx.newPage(); - // TODO: --all - await fs.mkdir(path.dirname(dir), { recursive: true }); - await fs.mkdir(dir); await page.goto(book.readerUrl); const files = await page.evaluate(getFiles); @@ -301,3 +276,5 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) { }, }; } + +DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"]; diff --git a/platforms/google-play-books.ts b/platforms/google-play-books.ts new file mode 100644 index 0000000..647b2e1 --- /dev/null +++ b/platforms/google-play-books.ts @@ -0,0 +1,184 @@ +import fs from "node:fs/promises"; +import type { Book } from "../library"; +import type { Browser } from "../browser"; + +async function getFiles(): Promise> { + const pages: NodeListOf = await new Promise(async function ( + resolve, + reject, + ) { + const timeout = setTimeout(() => { + reject(new Error("Page loading timeout.")); + }, 60_000); + + let pages: NodeListOf; + + while (true) { + pages = document.querySelectorAll("reader-page"); + + const loaded = + pages.length > 0 && + [...pages].every((page) => page.classList.contains("-gb-loaded")); + + if (loaded) { + break; + } else { + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } + + resolve(pages); + clearTimeout(timeout); + }); + + const images: Array = [...pages].map( + (el) => el.querySelector("svg image")!, + ); + + const files = [...images].map((image) => ({ url: image.href.baseVal })); + return files; +} + +async function drawImage(file: { url: string }): Promise { + const res = await fetch(file.url); + const blob = await res.blob(); + const dataUrl: string = await new Promise((resolve, reject) => { + const fileReader = Object.assign(new FileReader(), { + onload(): void { + resolve(this.result); + }, + onerror(e: ErrorEvent): void { + const error = new Error(`${e.type}: ${e.message}`); + reject(error); + }, + }); + + fileReader.readAsDataURL(blob); + }); + + return dataUrl; +} + +export function GooglePlayBooks(browser: Browser) { + return { + async login() { + const ctx = await browser.newContext(); + const page = await ctx.newPage(); + await page.goto("https://accounts.google.com"); + await page.waitForURL( + (url) => url.origin === "https://myaccount.google.com", + { timeout: 0 }, + ); + await browser.saveBrowserContext("google-play-books", ctx); + }, + + async *pull(): AsyncGenerator { + const ctx = await browser.loadBrowserContext("google-play-books"); + const page = await ctx.newPage(); + await page.goto( + "https://play.google.com/books?type=comics&source=purchases", + ); + await page.waitForSelector("gpb-library-card"); + + for (const metadata of await page.$$("gpb-library-card .metadata")) { + const readerUrl = await metadata.$eval("a", (a) => a.href); + const [title, author] = (await metadata.innerText()).split("\n"); + + yield { + id: NaN, + platform: "google-play-books", + readerUrl, + title, + authors: [author], + }; + + process.stderr.write("."); + } + + process.stderr.write(`\n`); + }, + + async download(dir: string, book: Book) { + const ctx = await browser.loadBrowserContext("google-play-books"); + const page = await ctx.newPage(); + + await page.goto(book.readerUrl); + await page.waitForSelector(".display"); + + const frame = page.frames().at(-1); + + if (!frame) { + throw new Error("Frame not found."); + } + + await frame.evaluate(function scrollToTop() { + const viewport = document.querySelector("cdk-virtual-scroll-viewport"); + viewport?.scroll({ top: 0 }); + }); + + async function next(): Promise { + return await frame!.evaluate(function scroll() { + const viewport = document.querySelector( + "cdk-virtual-scroll-viewport", + ); + + if (!viewport) throw new Error("Viewport not found."); + + const hasNext = + 1 <= + Math.abs( + viewport.scrollHeight - + viewport.clientHeight - + viewport.scrollTop, + ); + + if (hasNext) { + viewport.scrollBy({ top: viewport.clientHeight }); + } + + return hasNext; + }); + } + + const fileMap: Map = new Map(); + + while (await next()) { + const files = await frame.evaluate(getFiles); + + for (const file of files) { + if (fileMap.has(file.url)) continue; + + const dataUrl = await frame.evaluate(drawImage, file); + fileMap.set(file.url, { ...file, dataUrl }); + process.stderr.write("."); + } + } + + const files = [...fileMap.values()]; + const digits = String(files.length).length; + + function pad(n: string) { + return n.padStart(digits, "0"); + } + + for (const [n, file] of Object.entries(files)) { + const [prefix, base64] = file.dataUrl.split(",", 2); + + if (!prefix.startsWith("data:image/jpeg;")) { + throw new Error("Only image/jpeg is supported."); + } + + if (!prefix.endsWith(";base64")) { + throw new Error("Only base64 is supported."); + } + + const buffer = Buffer.from(base64, "base64"); + await fs.writeFile(`${dir}/${pad(n)}.jpeg`, buffer); + } + + process.stderr.write(`\n`); + }, + }; +} + +GooglePlayBooks.site = ["https://play.google.com"];