google-play-books

This commit is contained in:
Nebel 2023-12-02 23:22:33 +09:00
parent 22f8e0e767
commit 4e6afe7bf0
Signed by: nebel
GPG key ID: 79807D08C6EF6460
6 changed files with 325 additions and 54 deletions

View file

@ -1,5 +1,55 @@
import type { Browser, BrowserContext } from "playwright";
import * as Playwright from "playwright";
import { chromium, devices } from "playwright";
import type { Database } from "./database";
import type { TPlatform } from "./platform";
export { Browser, BrowserContext, chromium };
export const { userAgent } = devices["Desktop Chrome"];
export type Browser = {
loadBrowserContext(platform: TPlatform): Promise<Playwright.BrowserContext>;
saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise<void>;
newContext: () => Promise<Playwright.BrowserContext>;
close: () => Promise<void>;
};
export type BrowserContext = Playwright.BrowserContext;
export async function createBrowser({
db,
headless = true,
}: {
db: Database;
headless?: boolean;
}): Promise<Browser> {
const { userAgent } = devices["Desktop Chrome"];
const browser = await chromium.launch({
headless,
args: ["--disable-blink-features=AutomationControlled"],
});
return {
async loadBrowserContext(
platform: TPlatform,
): Promise<Playwright.BrowserContext> {
const { secrets } = await db.get(
`select secrets from platforms where name = ?`,
platform,
);
const storageState = JSON.parse(secrets) ?? undefined;
const ctx = await browser.newContext({ storageState, userAgent });
return ctx;
},
async saveBrowserContext(
platform: TPlatform,
ctx: BrowserContext,
): Promise<void> {
const secrets = await ctx.storageState();
await db.run(
`update platforms set secrets = ? where name = ?`,
JSON.stringify(secrets),
platform,
);
},
newContext: () => browser.newContext(),
close: () => browser.close(),
};
}

View file

@ -3,10 +3,11 @@ import { createWriteStream } from "node:fs";
import stream from "node:stream/promises";
import { Zip, ZipPassThrough } from "fflate";
import { Database } from "./database";
import { type TPlatform, site } from "./platform";
export type Book = {
id: number;
platform: "dmm-books" | "google-play-books";
platform: TPlatform;
readerUrl: string;
title: string;
authors: Array<string>;
@ -15,9 +16,9 @@ export type Book = {
export function createLibrary(db: Database) {
return {
async add(readerUrlOrBook: string | Book) {
const platform = "dmm-books";
if (typeof readerUrlOrBook === "string") {
const platform = site(readerUrlOrBook);
await db.run(
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
platform,
@ -38,7 +39,7 @@ values((select id from platforms where name = ?), ?, ?, ?)
on conflict(reader_url)
do update set title = excluded.title, authors = excluded.authors
`,
platform,
readerUrlOrBook.platform,
readerUrlOrBook.readerUrl,
readerUrlOrBook.title,
JSON.stringify(readerUrlOrBook.authors),

42
main.ts
View file

@ -1,8 +1,8 @@
import util from "node:util";
import { chromium } from "./browser";
import { createBrowser } from "./browser";
import { createDatabase } from "./database";
import { type Book, createLibrary } from "./library";
import { createPlatform } from "./platform";
import { type TPlatform, createPlatform } from "./platform";
const options = {
db: {
@ -18,21 +18,29 @@ const options = {
default: "3",
},
login: {
type: "boolean",
type: "string",
async run() {
const db = await createDatabase(args.values.db!);
const browser = await chromium.launch({ headless: false });
const platform = createPlatform({ db, browser });
const browser = await createBrowser({ db, headless: false });
const platform = createPlatform({
platform: args.values.login as TPlatform,
db,
browser,
});
await platform.login();
await browser.close();
},
},
logout: {
type: "boolean",
type: "string",
async run() {
const db = await createDatabase(args.values.db!);
const browser = await chromium.launch();
const platform = createPlatform({ db, browser });
const browser = await createBrowser({ db });
const platform = createPlatform({
platform: args.values.logout as TPlatform,
db,
browser,
});
await platform.logout();
await browser.close();
},
@ -87,12 +95,16 @@ const options = {
},
},
pull: {
type: "boolean",
type: "string",
async run() {
const db = await createDatabase(args.values.db!);
const library = createLibrary(db);
const browser = await chromium.launch();
const platform = createPlatform({ db, browser });
const browser = await createBrowser({ db });
const platform = createPlatform({
platform: args.values.pull as TPlatform,
db,
browser,
});
for await (const book of platform.pull()) {
await library.add(book);
@ -125,8 +137,12 @@ const options = {
}
for (const book of books) {
const browser = await chromium.launch();
const platform = createPlatform({ db, browser });
const browser = await createBrowser({ db });
const platform = createPlatform({
platform: book.platform,
db,
browser,
});
const dir = `${args.values["out-dir"]!}/${book.id}`;
await platform.download(dir, book);
await library.archive(dir, book, {

View file

@ -1,8 +1,51 @@
import type { Database } from "./database";
import fs from "node:fs/promises";
import path from "node:path";
import type { Book } from "./library";
import type { Browser } from "./browser";
import type { Database } from "./database";
import { DmmBooks } from "./platforms/dmm-books";
import { GooglePlayBooks } from "./platforms/google-play-books";
export function createPlatform(opt: { db: Database; browser: Browser }) {
const platform = DmmBooks(opt);
return platform;
const platforms = {
"dmm-books": DmmBooks,
"google-play-books": GooglePlayBooks,
};
export type TPlatform = keyof typeof platforms;
export function site(url: string): TPlatform {
const { origin } = new URL(url);
for (const [platform, { site }] of Object.entries(platforms)) {
if (site.includes(origin)) return platform as TPlatform;
}
throw new Error(`Unsupported URL: ${url}`);
}
export function createPlatform(opts: {
platform: TPlatform;
db: Database;
browser: Browser;
}) {
if (!(opts.platform in platforms)) {
throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`);
}
const platform = platforms[opts.platform](opts.browser);
return {
...platform,
async download(dir: string, book: Book): Promise<void> {
await fs.mkdir(path.dirname(dir), { recursive: true });
await fs.mkdir(dir);
await platform.download(dir, book);
},
async logout() {
await opts.db.run(
`update platforms set secrets = 'null' where name = ?`,
opts.platform,
);
},
};
}

View file

@ -1,8 +1,6 @@
import fs from "node:fs/promises";
import path from "node:path";
import type { Book } from "../library";
import { userAgent, type Browser, type BrowserContext } from "../browser";
import type { Database } from "../database";
import type { Browser, BrowserContext } from "../browser";
var NFBR: any;
@ -121,17 +119,7 @@ async function drawImage(file: {
return dataUrl;
}
export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
async function loadBrowserContext(): Promise<BrowserContext> {
const { secrets } = await db.get(
`select secrets from platforms where name = 'dmm-books'`,
);
const storageState = JSON.parse(secrets) ?? undefined;
const ctx = await browser.newContext({ storageState, userAgent });
return ctx;
}
export function DmmBooks(browser: Browser) {
async function* getSeriesBooks(
ctx: BrowserContext,
series: {
@ -242,21 +230,11 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
]);
const secrets = await ctx.storageState();
await db.run(
`update platforms set secrets = ? where name = 'dmm-books'`,
JSON.stringify(secrets),
);
},
async logout() {
await db.run(
`update platforms set secrets = 'null' where name = 'dmm-books'`,
);
await browser.saveBrowserContext("dmm-books", ctx);
},
async *pull(): AsyncGenerator<Book> {
const ctx = await loadBrowserContext();
const ctx = await browser.loadBrowserContext("dmm-books");
yield* getAllBooks(ctx);
@ -264,12 +242,9 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
},
async download(dir: string, book: Book) {
const ctx = await loadBrowserContext();
const ctx = await browser.loadBrowserContext("dmm-books");
const page = await ctx.newPage();
// TODO: --all
await fs.mkdir(path.dirname(dir), { recursive: true });
await fs.mkdir(dir);
await page.goto(book.readerUrl);
const files = await page.evaluate(getFiles);
@ -301,3 +276,5 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
},
};
}
DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"];

View file

@ -0,0 +1,184 @@
import fs from "node:fs/promises";
import type { Book } from "../library";
import type { Browser } from "../browser";
async function getFiles(): Promise<Array<{ url: string }>> {
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
resolve,
reject,
) {
const timeout = setTimeout(() => {
reject(new Error("Page loading timeout."));
}, 60_000);
let pages: NodeListOf<HTMLElement>;
while (true) {
pages = document.querySelectorAll("reader-page");
const loaded =
pages.length > 0 &&
[...pages].every((page) => page.classList.contains("-gb-loaded"));
if (loaded) {
break;
} else {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
resolve(pages);
clearTimeout(timeout);
});
const images: Array<SVGImageElement> = [...pages].map(
(el) => el.querySelector("svg image")!,
);
const files = [...images].map((image) => ({ url: image.href.baseVal }));
return files;
}
async function drawImage(file: { url: string }): Promise<string> {
const res = await fetch(file.url);
const blob = await res.blob();
const dataUrl: string = await new Promise((resolve, reject) => {
const fileReader = Object.assign(new FileReader(), {
onload(): void {
resolve(this.result);
},
onerror(e: ErrorEvent): void {
const error = new Error(`${e.type}: ${e.message}`);
reject(error);
},
});
fileReader.readAsDataURL(blob);
});
return dataUrl;
}
export function GooglePlayBooks(browser: Browser) {
return {
async login() {
const ctx = await browser.newContext();
const page = await ctx.newPage();
await page.goto("https://accounts.google.com");
await page.waitForURL(
(url) => url.origin === "https://myaccount.google.com",
{ timeout: 0 },
);
await browser.saveBrowserContext("google-play-books", ctx);
},
async *pull(): AsyncGenerator<Book> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(
"https://play.google.com/books?type=comics&source=purchases",
);
await page.waitForSelector("gpb-library-card");
for (const metadata of await page.$$("gpb-library-card .metadata")) {
const readerUrl = await metadata.$eval("a", (a) => a.href);
const [title, author] = (await metadata.innerText()).split("\n");
yield {
id: NaN,
platform: "google-play-books",
readerUrl,
title,
authors: [author],
};
process.stderr.write(".");
}
process.stderr.write(`\n`);
},
async download(dir: string, book: Book) {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(book.readerUrl);
await page.waitForSelector(".display");
const frame = page.frames().at(-1);
if (!frame) {
throw new Error("Frame not found.");
}
await frame.evaluate(function scrollToTop() {
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
viewport?.scroll({ top: 0 });
});
async function next(): Promise<boolean> {
return await frame!.evaluate(function scroll() {
const viewport = document.querySelector(
"cdk-virtual-scroll-viewport",
);
if (!viewport) throw new Error("Viewport not found.");
const hasNext =
1 <=
Math.abs(
viewport.scrollHeight -
viewport.clientHeight -
viewport.scrollTop,
);
if (hasNext) {
viewport.scrollBy({ top: viewport.clientHeight });
}
return hasNext;
});
}
const fileMap: Map<string, { url: string; dataUrl: string }> = new Map();
while (await next()) {
const files = await frame.evaluate(getFiles);
for (const file of files) {
if (fileMap.has(file.url)) continue;
const dataUrl = await frame.evaluate(drawImage, file);
fileMap.set(file.url, { ...file, dataUrl });
process.stderr.write(".");
}
}
const files = [...fileMap.values()];
const digits = String(files.length).length;
function pad(n: string) {
return n.padStart(digits, "0");
}
for (const [n, file] of Object.entries(files)) {
const [prefix, base64] = file.dataUrl.split(",", 2);
if (!prefix.startsWith("data:image/jpeg;")) {
throw new Error("Only image/jpeg is supported.");
}
if (!prefix.endsWith(";base64")) {
throw new Error("Only base64 is supported.");
}
const buffer = Buffer.from(base64, "base64");
await fs.writeFile(`${dir}/${pad(n)}.jpeg`, buffer);
}
process.stderr.write(`\n`);
},
};
}
GooglePlayBooks.site = ["https://play.google.com"];