google-play-books
This commit is contained in:
parent
22f8e0e767
commit
4e6afe7bf0
6 changed files with 325 additions and 54 deletions
56
browser.ts
56
browser.ts
|
@ -1,5 +1,55 @@
|
||||||
import type { Browser, BrowserContext } from "playwright";
|
import * as Playwright from "playwright";
|
||||||
import { chromium, devices } from "playwright";
|
import { chromium, devices } from "playwright";
|
||||||
|
import type { Database } from "./database";
|
||||||
|
import type { TPlatform } from "./platform";
|
||||||
|
|
||||||
export { Browser, BrowserContext, chromium };
|
export type Browser = {
|
||||||
export const { userAgent } = devices["Desktop Chrome"];
|
loadBrowserContext(platform: TPlatform): Promise<Playwright.BrowserContext>;
|
||||||
|
saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise<void>;
|
||||||
|
newContext: () => Promise<Playwright.BrowserContext>;
|
||||||
|
close: () => Promise<void>;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type BrowserContext = Playwright.BrowserContext;
|
||||||
|
|
||||||
|
export async function createBrowser({
|
||||||
|
db,
|
||||||
|
headless = true,
|
||||||
|
}: {
|
||||||
|
db: Database;
|
||||||
|
headless?: boolean;
|
||||||
|
}): Promise<Browser> {
|
||||||
|
const { userAgent } = devices["Desktop Chrome"];
|
||||||
|
const browser = await chromium.launch({
|
||||||
|
headless,
|
||||||
|
args: ["--disable-blink-features=AutomationControlled"],
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
async loadBrowserContext(
|
||||||
|
platform: TPlatform,
|
||||||
|
): Promise<Playwright.BrowserContext> {
|
||||||
|
const { secrets } = await db.get(
|
||||||
|
`select secrets from platforms where name = ?`,
|
||||||
|
platform,
|
||||||
|
);
|
||||||
|
|
||||||
|
const storageState = JSON.parse(secrets) ?? undefined;
|
||||||
|
const ctx = await browser.newContext({ storageState, userAgent });
|
||||||
|
return ctx;
|
||||||
|
},
|
||||||
|
async saveBrowserContext(
|
||||||
|
platform: TPlatform,
|
||||||
|
ctx: BrowserContext,
|
||||||
|
): Promise<void> {
|
||||||
|
const secrets = await ctx.storageState();
|
||||||
|
await db.run(
|
||||||
|
`update platforms set secrets = ? where name = ?`,
|
||||||
|
JSON.stringify(secrets),
|
||||||
|
platform,
|
||||||
|
);
|
||||||
|
},
|
||||||
|
newContext: () => browser.newContext(),
|
||||||
|
close: () => browser.close(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -3,10 +3,11 @@ import { createWriteStream } from "node:fs";
|
||||||
import stream from "node:stream/promises";
|
import stream from "node:stream/promises";
|
||||||
import { Zip, ZipPassThrough } from "fflate";
|
import { Zip, ZipPassThrough } from "fflate";
|
||||||
import { Database } from "./database";
|
import { Database } from "./database";
|
||||||
|
import { type TPlatform, site } from "./platform";
|
||||||
|
|
||||||
export type Book = {
|
export type Book = {
|
||||||
id: number;
|
id: number;
|
||||||
platform: "dmm-books" | "google-play-books";
|
platform: TPlatform;
|
||||||
readerUrl: string;
|
readerUrl: string;
|
||||||
title: string;
|
title: string;
|
||||||
authors: Array<string>;
|
authors: Array<string>;
|
||||||
|
@ -15,9 +16,9 @@ export type Book = {
|
||||||
export function createLibrary(db: Database) {
|
export function createLibrary(db: Database) {
|
||||||
return {
|
return {
|
||||||
async add(readerUrlOrBook: string | Book) {
|
async add(readerUrlOrBook: string | Book) {
|
||||||
const platform = "dmm-books";
|
|
||||||
|
|
||||||
if (typeof readerUrlOrBook === "string") {
|
if (typeof readerUrlOrBook === "string") {
|
||||||
|
const platform = site(readerUrlOrBook);
|
||||||
|
|
||||||
await db.run(
|
await db.run(
|
||||||
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
|
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
|
||||||
platform,
|
platform,
|
||||||
|
@ -38,7 +39,7 @@ values((select id from platforms where name = ?), ?, ?, ?)
|
||||||
on conflict(reader_url)
|
on conflict(reader_url)
|
||||||
do update set title = excluded.title, authors = excluded.authors
|
do update set title = excluded.title, authors = excluded.authors
|
||||||
`,
|
`,
|
||||||
platform,
|
readerUrlOrBook.platform,
|
||||||
readerUrlOrBook.readerUrl,
|
readerUrlOrBook.readerUrl,
|
||||||
readerUrlOrBook.title,
|
readerUrlOrBook.title,
|
||||||
JSON.stringify(readerUrlOrBook.authors),
|
JSON.stringify(readerUrlOrBook.authors),
|
||||||
|
|
42
main.ts
42
main.ts
|
@ -1,8 +1,8 @@
|
||||||
import util from "node:util";
|
import util from "node:util";
|
||||||
import { chromium } from "./browser";
|
import { createBrowser } from "./browser";
|
||||||
import { createDatabase } from "./database";
|
import { createDatabase } from "./database";
|
||||||
import { type Book, createLibrary } from "./library";
|
import { type Book, createLibrary } from "./library";
|
||||||
import { createPlatform } from "./platform";
|
import { type TPlatform, createPlatform } from "./platform";
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
db: {
|
db: {
|
||||||
|
@ -18,21 +18,29 @@ const options = {
|
||||||
default: "3",
|
default: "3",
|
||||||
},
|
},
|
||||||
login: {
|
login: {
|
||||||
type: "boolean",
|
type: "string",
|
||||||
async run() {
|
async run() {
|
||||||
const db = await createDatabase(args.values.db!);
|
const db = await createDatabase(args.values.db!);
|
||||||
const browser = await chromium.launch({ headless: false });
|
const browser = await createBrowser({ db, headless: false });
|
||||||
const platform = createPlatform({ db, browser });
|
const platform = createPlatform({
|
||||||
|
platform: args.values.login as TPlatform,
|
||||||
|
db,
|
||||||
|
browser,
|
||||||
|
});
|
||||||
await platform.login();
|
await platform.login();
|
||||||
await browser.close();
|
await browser.close();
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
logout: {
|
logout: {
|
||||||
type: "boolean",
|
type: "string",
|
||||||
async run() {
|
async run() {
|
||||||
const db = await createDatabase(args.values.db!);
|
const db = await createDatabase(args.values.db!);
|
||||||
const browser = await chromium.launch();
|
const browser = await createBrowser({ db });
|
||||||
const platform = createPlatform({ db, browser });
|
const platform = createPlatform({
|
||||||
|
platform: args.values.logout as TPlatform,
|
||||||
|
db,
|
||||||
|
browser,
|
||||||
|
});
|
||||||
await platform.logout();
|
await platform.logout();
|
||||||
await browser.close();
|
await browser.close();
|
||||||
},
|
},
|
||||||
|
@ -87,12 +95,16 @@ const options = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pull: {
|
pull: {
|
||||||
type: "boolean",
|
type: "string",
|
||||||
async run() {
|
async run() {
|
||||||
const db = await createDatabase(args.values.db!);
|
const db = await createDatabase(args.values.db!);
|
||||||
const library = createLibrary(db);
|
const library = createLibrary(db);
|
||||||
const browser = await chromium.launch();
|
const browser = await createBrowser({ db });
|
||||||
const platform = createPlatform({ db, browser });
|
const platform = createPlatform({
|
||||||
|
platform: args.values.pull as TPlatform,
|
||||||
|
db,
|
||||||
|
browser,
|
||||||
|
});
|
||||||
|
|
||||||
for await (const book of platform.pull()) {
|
for await (const book of platform.pull()) {
|
||||||
await library.add(book);
|
await library.add(book);
|
||||||
|
@ -125,8 +137,12 @@ const options = {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const book of books) {
|
for (const book of books) {
|
||||||
const browser = await chromium.launch();
|
const browser = await createBrowser({ db });
|
||||||
const platform = createPlatform({ db, browser });
|
const platform = createPlatform({
|
||||||
|
platform: book.platform,
|
||||||
|
db,
|
||||||
|
browser,
|
||||||
|
});
|
||||||
const dir = `${args.values["out-dir"]!}/${book.id}`;
|
const dir = `${args.values["out-dir"]!}/${book.id}`;
|
||||||
await platform.download(dir, book);
|
await platform.download(dir, book);
|
||||||
await library.archive(dir, book, {
|
await library.archive(dir, book, {
|
||||||
|
|
51
platform.ts
51
platform.ts
|
@ -1,8 +1,51 @@
|
||||||
import type { Database } from "./database";
|
import fs from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
import type { Book } from "./library";
|
||||||
import type { Browser } from "./browser";
|
import type { Browser } from "./browser";
|
||||||
|
import type { Database } from "./database";
|
||||||
import { DmmBooks } from "./platforms/dmm-books";
|
import { DmmBooks } from "./platforms/dmm-books";
|
||||||
|
import { GooglePlayBooks } from "./platforms/google-play-books";
|
||||||
|
|
||||||
export function createPlatform(opt: { db: Database; browser: Browser }) {
|
const platforms = {
|
||||||
const platform = DmmBooks(opt);
|
"dmm-books": DmmBooks,
|
||||||
return platform;
|
"google-play-books": GooglePlayBooks,
|
||||||
|
};
|
||||||
|
|
||||||
|
export type TPlatform = keyof typeof platforms;
|
||||||
|
|
||||||
|
export function site(url: string): TPlatform {
|
||||||
|
const { origin } = new URL(url);
|
||||||
|
|
||||||
|
for (const [platform, { site }] of Object.entries(platforms)) {
|
||||||
|
if (site.includes(origin)) return platform as TPlatform;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Unsupported URL: ${url}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createPlatform(opts: {
|
||||||
|
platform: TPlatform;
|
||||||
|
db: Database;
|
||||||
|
browser: Browser;
|
||||||
|
}) {
|
||||||
|
if (!(opts.platform in platforms)) {
|
||||||
|
throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const platform = platforms[opts.platform](opts.browser);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...platform,
|
||||||
|
async download(dir: string, book: Book): Promise<void> {
|
||||||
|
await fs.mkdir(path.dirname(dir), { recursive: true });
|
||||||
|
await fs.mkdir(dir);
|
||||||
|
await platform.download(dir, book);
|
||||||
|
},
|
||||||
|
async logout() {
|
||||||
|
await opts.db.run(
|
||||||
|
`update platforms set secrets = 'null' where name = ?`,
|
||||||
|
opts.platform,
|
||||||
|
);
|
||||||
|
},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import path from "node:path";
|
|
||||||
import type { Book } from "../library";
|
import type { Book } from "../library";
|
||||||
import { userAgent, type Browser, type BrowserContext } from "../browser";
|
import type { Browser, BrowserContext } from "../browser";
|
||||||
import type { Database } from "../database";
|
|
||||||
|
|
||||||
var NFBR: any;
|
var NFBR: any;
|
||||||
|
|
||||||
|
@ -121,17 +119,7 @@ async function drawImage(file: {
|
||||||
return dataUrl;
|
return dataUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
export function DmmBooks(browser: Browser) {
|
||||||
async function loadBrowserContext(): Promise<BrowserContext> {
|
|
||||||
const { secrets } = await db.get(
|
|
||||||
`select secrets from platforms where name = 'dmm-books'`,
|
|
||||||
);
|
|
||||||
|
|
||||||
const storageState = JSON.parse(secrets) ?? undefined;
|
|
||||||
const ctx = await browser.newContext({ storageState, userAgent });
|
|
||||||
return ctx;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function* getSeriesBooks(
|
async function* getSeriesBooks(
|
||||||
ctx: BrowserContext,
|
ctx: BrowserContext,
|
||||||
series: {
|
series: {
|
||||||
|
@ -242,21 +230,11 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
||||||
page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
|
page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
|
||||||
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
|
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
|
||||||
]);
|
]);
|
||||||
const secrets = await ctx.storageState();
|
await browser.saveBrowserContext("dmm-books", ctx);
|
||||||
await db.run(
|
|
||||||
`update platforms set secrets = ? where name = 'dmm-books'`,
|
|
||||||
JSON.stringify(secrets),
|
|
||||||
);
|
|
||||||
},
|
|
||||||
|
|
||||||
async logout() {
|
|
||||||
await db.run(
|
|
||||||
`update platforms set secrets = 'null' where name = 'dmm-books'`,
|
|
||||||
);
|
|
||||||
},
|
},
|
||||||
|
|
||||||
async *pull(): AsyncGenerator<Book> {
|
async *pull(): AsyncGenerator<Book> {
|
||||||
const ctx = await loadBrowserContext();
|
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||||
|
|
||||||
yield* getAllBooks(ctx);
|
yield* getAllBooks(ctx);
|
||||||
|
|
||||||
|
@ -264,12 +242,9 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
||||||
},
|
},
|
||||||
|
|
||||||
async download(dir: string, book: Book) {
|
async download(dir: string, book: Book) {
|
||||||
const ctx = await loadBrowserContext();
|
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||||
const page = await ctx.newPage();
|
const page = await ctx.newPage();
|
||||||
|
|
||||||
// TODO: --all
|
|
||||||
await fs.mkdir(path.dirname(dir), { recursive: true });
|
|
||||||
await fs.mkdir(dir);
|
|
||||||
await page.goto(book.readerUrl);
|
await page.goto(book.readerUrl);
|
||||||
|
|
||||||
const files = await page.evaluate(getFiles);
|
const files = await page.evaluate(getFiles);
|
||||||
|
@ -301,3 +276,5 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"];
|
||||||
|
|
184
platforms/google-play-books.ts
Normal file
184
platforms/google-play-books.ts
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import type { Book } from "../library";
|
||||||
|
import type { Browser } from "../browser";
|
||||||
|
|
||||||
|
async function getFiles(): Promise<Array<{ url: string }>> {
|
||||||
|
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
|
||||||
|
resolve,
|
||||||
|
reject,
|
||||||
|
) {
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
reject(new Error("Page loading timeout."));
|
||||||
|
}, 60_000);
|
||||||
|
|
||||||
|
let pages: NodeListOf<HTMLElement>;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
pages = document.querySelectorAll("reader-page");
|
||||||
|
|
||||||
|
const loaded =
|
||||||
|
pages.length > 0 &&
|
||||||
|
[...pages].every((page) => page.classList.contains("-gb-loaded"));
|
||||||
|
|
||||||
|
if (loaded) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve(pages);
|
||||||
|
clearTimeout(timeout);
|
||||||
|
});
|
||||||
|
|
||||||
|
const images: Array<SVGImageElement> = [...pages].map(
|
||||||
|
(el) => el.querySelector("svg image")!,
|
||||||
|
);
|
||||||
|
|
||||||
|
const files = [...images].map((image) => ({ url: image.href.baseVal }));
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function drawImage(file: { url: string }): Promise<string> {
|
||||||
|
const res = await fetch(file.url);
|
||||||
|
const blob = await res.blob();
|
||||||
|
const dataUrl: string = await new Promise((resolve, reject) => {
|
||||||
|
const fileReader = Object.assign(new FileReader(), {
|
||||||
|
onload(): void {
|
||||||
|
resolve(this.result);
|
||||||
|
},
|
||||||
|
onerror(e: ErrorEvent): void {
|
||||||
|
const error = new Error(`${e.type}: ${e.message}`);
|
||||||
|
reject(error);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
fileReader.readAsDataURL(blob);
|
||||||
|
});
|
||||||
|
|
||||||
|
return dataUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function GooglePlayBooks(browser: Browser) {
|
||||||
|
return {
|
||||||
|
async login() {
|
||||||
|
const ctx = await browser.newContext();
|
||||||
|
const page = await ctx.newPage();
|
||||||
|
await page.goto("https://accounts.google.com");
|
||||||
|
await page.waitForURL(
|
||||||
|
(url) => url.origin === "https://myaccount.google.com",
|
||||||
|
{ timeout: 0 },
|
||||||
|
);
|
||||||
|
await browser.saveBrowserContext("google-play-books", ctx);
|
||||||
|
},
|
||||||
|
|
||||||
|
async *pull(): AsyncGenerator<Book> {
|
||||||
|
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||||
|
const page = await ctx.newPage();
|
||||||
|
await page.goto(
|
||||||
|
"https://play.google.com/books?type=comics&source=purchases",
|
||||||
|
);
|
||||||
|
await page.waitForSelector("gpb-library-card");
|
||||||
|
|
||||||
|
for (const metadata of await page.$$("gpb-library-card .metadata")) {
|
||||||
|
const readerUrl = await metadata.$eval("a", (a) => a.href);
|
||||||
|
const [title, author] = (await metadata.innerText()).split("\n");
|
||||||
|
|
||||||
|
yield {
|
||||||
|
id: NaN,
|
||||||
|
platform: "google-play-books",
|
||||||
|
readerUrl,
|
||||||
|
title,
|
||||||
|
authors: [author],
|
||||||
|
};
|
||||||
|
|
||||||
|
process.stderr.write(".");
|
||||||
|
}
|
||||||
|
|
||||||
|
process.stderr.write(`\n`);
|
||||||
|
},
|
||||||
|
|
||||||
|
async download(dir: string, book: Book) {
|
||||||
|
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||||
|
const page = await ctx.newPage();
|
||||||
|
|
||||||
|
await page.goto(book.readerUrl);
|
||||||
|
await page.waitForSelector(".display");
|
||||||
|
|
||||||
|
const frame = page.frames().at(-1);
|
||||||
|
|
||||||
|
if (!frame) {
|
||||||
|
throw new Error("Frame not found.");
|
||||||
|
}
|
||||||
|
|
||||||
|
await frame.evaluate(function scrollToTop() {
|
||||||
|
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
|
||||||
|
viewport?.scroll({ top: 0 });
|
||||||
|
});
|
||||||
|
|
||||||
|
async function next(): Promise<boolean> {
|
||||||
|
return await frame!.evaluate(function scroll() {
|
||||||
|
const viewport = document.querySelector(
|
||||||
|
"cdk-virtual-scroll-viewport",
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!viewport) throw new Error("Viewport not found.");
|
||||||
|
|
||||||
|
const hasNext =
|
||||||
|
1 <=
|
||||||
|
Math.abs(
|
||||||
|
viewport.scrollHeight -
|
||||||
|
viewport.clientHeight -
|
||||||
|
viewport.scrollTop,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasNext) {
|
||||||
|
viewport.scrollBy({ top: viewport.clientHeight });
|
||||||
|
}
|
||||||
|
|
||||||
|
return hasNext;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileMap: Map<string, { url: string; dataUrl: string }> = new Map();
|
||||||
|
|
||||||
|
while (await next()) {
|
||||||
|
const files = await frame.evaluate(getFiles);
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
if (fileMap.has(file.url)) continue;
|
||||||
|
|
||||||
|
const dataUrl = await frame.evaluate(drawImage, file);
|
||||||
|
fileMap.set(file.url, { ...file, dataUrl });
|
||||||
|
process.stderr.write(".");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = [...fileMap.values()];
|
||||||
|
const digits = String(files.length).length;
|
||||||
|
|
||||||
|
function pad(n: string) {
|
||||||
|
return n.padStart(digits, "0");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [n, file] of Object.entries(files)) {
|
||||||
|
const [prefix, base64] = file.dataUrl.split(",", 2);
|
||||||
|
|
||||||
|
if (!prefix.startsWith("data:image/jpeg;")) {
|
||||||
|
throw new Error("Only image/jpeg is supported.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!prefix.endsWith(";base64")) {
|
||||||
|
throw new Error("Only base64 is supported.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const buffer = Buffer.from(base64, "base64");
|
||||||
|
await fs.writeFile(`${dir}/${pad(n)}.jpeg`, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.stderr.write(`\n`);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
GooglePlayBooks.site = ["https://play.google.com"];
|
Loading…
Add table
Reference in a new issue