google-play-books
This commit is contained in:
parent
22f8e0e767
commit
4e6afe7bf0
6 changed files with 325 additions and 54 deletions
56
browser.ts
56
browser.ts
|
@ -1,5 +1,55 @@
|
|||
import type { Browser, BrowserContext } from "playwright";
|
||||
import * as Playwright from "playwright";
|
||||
import { chromium, devices } from "playwright";
|
||||
import type { Database } from "./database";
|
||||
import type { TPlatform } from "./platform";
|
||||
|
||||
export { Browser, BrowserContext, chromium };
|
||||
export const { userAgent } = devices["Desktop Chrome"];
|
||||
export type Browser = {
|
||||
loadBrowserContext(platform: TPlatform): Promise<Playwright.BrowserContext>;
|
||||
saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise<void>;
|
||||
newContext: () => Promise<Playwright.BrowserContext>;
|
||||
close: () => Promise<void>;
|
||||
};
|
||||
|
||||
export type BrowserContext = Playwright.BrowserContext;
|
||||
|
||||
export async function createBrowser({
|
||||
db,
|
||||
headless = true,
|
||||
}: {
|
||||
db: Database;
|
||||
headless?: boolean;
|
||||
}): Promise<Browser> {
|
||||
const { userAgent } = devices["Desktop Chrome"];
|
||||
const browser = await chromium.launch({
|
||||
headless,
|
||||
args: ["--disable-blink-features=AutomationControlled"],
|
||||
});
|
||||
|
||||
return {
|
||||
async loadBrowserContext(
|
||||
platform: TPlatform,
|
||||
): Promise<Playwright.BrowserContext> {
|
||||
const { secrets } = await db.get(
|
||||
`select secrets from platforms where name = ?`,
|
||||
platform,
|
||||
);
|
||||
|
||||
const storageState = JSON.parse(secrets) ?? undefined;
|
||||
const ctx = await browser.newContext({ storageState, userAgent });
|
||||
return ctx;
|
||||
},
|
||||
async saveBrowserContext(
|
||||
platform: TPlatform,
|
||||
ctx: BrowserContext,
|
||||
): Promise<void> {
|
||||
const secrets = await ctx.storageState();
|
||||
await db.run(
|
||||
`update platforms set secrets = ? where name = ?`,
|
||||
JSON.stringify(secrets),
|
||||
platform,
|
||||
);
|
||||
},
|
||||
newContext: () => browser.newContext(),
|
||||
close: () => browser.close(),
|
||||
};
|
||||
}
|
||||
|
|
|
@ -3,10 +3,11 @@ import { createWriteStream } from "node:fs";
|
|||
import stream from "node:stream/promises";
|
||||
import { Zip, ZipPassThrough } from "fflate";
|
||||
import { Database } from "./database";
|
||||
import { type TPlatform, site } from "./platform";
|
||||
|
||||
export type Book = {
|
||||
id: number;
|
||||
platform: "dmm-books" | "google-play-books";
|
||||
platform: TPlatform;
|
||||
readerUrl: string;
|
||||
title: string;
|
||||
authors: Array<string>;
|
||||
|
@ -15,9 +16,9 @@ export type Book = {
|
|||
export function createLibrary(db: Database) {
|
||||
return {
|
||||
async add(readerUrlOrBook: string | Book) {
|
||||
const platform = "dmm-books";
|
||||
|
||||
if (typeof readerUrlOrBook === "string") {
|
||||
const platform = site(readerUrlOrBook);
|
||||
|
||||
await db.run(
|
||||
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
|
||||
platform,
|
||||
|
@ -38,7 +39,7 @@ values((select id from platforms where name = ?), ?, ?, ?)
|
|||
on conflict(reader_url)
|
||||
do update set title = excluded.title, authors = excluded.authors
|
||||
`,
|
||||
platform,
|
||||
readerUrlOrBook.platform,
|
||||
readerUrlOrBook.readerUrl,
|
||||
readerUrlOrBook.title,
|
||||
JSON.stringify(readerUrlOrBook.authors),
|
||||
|
|
42
main.ts
42
main.ts
|
@ -1,8 +1,8 @@
|
|||
import util from "node:util";
|
||||
import { chromium } from "./browser";
|
||||
import { createBrowser } from "./browser";
|
||||
import { createDatabase } from "./database";
|
||||
import { type Book, createLibrary } from "./library";
|
||||
import { createPlatform } from "./platform";
|
||||
import { type TPlatform, createPlatform } from "./platform";
|
||||
|
||||
const options = {
|
||||
db: {
|
||||
|
@ -18,21 +18,29 @@ const options = {
|
|||
default: "3",
|
||||
},
|
||||
login: {
|
||||
type: "boolean",
|
||||
type: "string",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const platform = createPlatform({ db, browser });
|
||||
const browser = await createBrowser({ db, headless: false });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.login as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
await platform.login();
|
||||
await browser.close();
|
||||
},
|
||||
},
|
||||
logout: {
|
||||
type: "boolean",
|
||||
type: "string",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.logout as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
await platform.logout();
|
||||
await browser.close();
|
||||
},
|
||||
|
@ -87,12 +95,16 @@ const options = {
|
|||
},
|
||||
},
|
||||
pull: {
|
||||
type: "boolean",
|
||||
type: "string",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const library = createLibrary(db);
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.pull as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
|
||||
for await (const book of platform.pull()) {
|
||||
await library.add(book);
|
||||
|
@ -125,8 +137,12 @@ const options = {
|
|||
}
|
||||
|
||||
for (const book of books) {
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: book.platform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
const dir = `${args.values["out-dir"]!}/${book.id}`;
|
||||
await platform.download(dir, book);
|
||||
await library.archive(dir, book, {
|
||||
|
|
51
platform.ts
51
platform.ts
|
@ -1,8 +1,51 @@
|
|||
import type { Database } from "./database";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { Book } from "./library";
|
||||
import type { Browser } from "./browser";
|
||||
import type { Database } from "./database";
|
||||
import { DmmBooks } from "./platforms/dmm-books";
|
||||
import { GooglePlayBooks } from "./platforms/google-play-books";
|
||||
|
||||
export function createPlatform(opt: { db: Database; browser: Browser }) {
|
||||
const platform = DmmBooks(opt);
|
||||
return platform;
|
||||
const platforms = {
|
||||
"dmm-books": DmmBooks,
|
||||
"google-play-books": GooglePlayBooks,
|
||||
};
|
||||
|
||||
export type TPlatform = keyof typeof platforms;
|
||||
|
||||
export function site(url: string): TPlatform {
|
||||
const { origin } = new URL(url);
|
||||
|
||||
for (const [platform, { site }] of Object.entries(platforms)) {
|
||||
if (site.includes(origin)) return platform as TPlatform;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported URL: ${url}`);
|
||||
}
|
||||
|
||||
export function createPlatform(opts: {
|
||||
platform: TPlatform;
|
||||
db: Database;
|
||||
browser: Browser;
|
||||
}) {
|
||||
if (!(opts.platform in platforms)) {
|
||||
throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`);
|
||||
}
|
||||
|
||||
const platform = platforms[opts.platform](opts.browser);
|
||||
|
||||
return {
|
||||
...platform,
|
||||
async download(dir: string, book: Book): Promise<void> {
|
||||
await fs.mkdir(path.dirname(dir), { recursive: true });
|
||||
await fs.mkdir(dir);
|
||||
await platform.download(dir, book);
|
||||
},
|
||||
async logout() {
|
||||
await opts.db.run(
|
||||
`update platforms set secrets = 'null' where name = ?`,
|
||||
opts.platform,
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { Book } from "../library";
|
||||
import { userAgent, type Browser, type BrowserContext } from "../browser";
|
||||
import type { Database } from "../database";
|
||||
import type { Browser, BrowserContext } from "../browser";
|
||||
|
||||
var NFBR: any;
|
||||
|
||||
|
@ -121,17 +119,7 @@ async function drawImage(file: {
|
|||
return dataUrl;
|
||||
}
|
||||
|
||||
export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
||||
async function loadBrowserContext(): Promise<BrowserContext> {
|
||||
const { secrets } = await db.get(
|
||||
`select secrets from platforms where name = 'dmm-books'`,
|
||||
);
|
||||
|
||||
const storageState = JSON.parse(secrets) ?? undefined;
|
||||
const ctx = await browser.newContext({ storageState, userAgent });
|
||||
return ctx;
|
||||
}
|
||||
|
||||
export function DmmBooks(browser: Browser) {
|
||||
async function* getSeriesBooks(
|
||||
ctx: BrowserContext,
|
||||
series: {
|
||||
|
@ -242,21 +230,11 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
|||
page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
|
||||
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
|
||||
]);
|
||||
const secrets = await ctx.storageState();
|
||||
await db.run(
|
||||
`update platforms set secrets = ? where name = 'dmm-books'`,
|
||||
JSON.stringify(secrets),
|
||||
);
|
||||
},
|
||||
|
||||
async logout() {
|
||||
await db.run(
|
||||
`update platforms set secrets = 'null' where name = 'dmm-books'`,
|
||||
);
|
||||
await browser.saveBrowserContext("dmm-books", ctx);
|
||||
},
|
||||
|
||||
async *pull(): AsyncGenerator<Book> {
|
||||
const ctx = await loadBrowserContext();
|
||||
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||
|
||||
yield* getAllBooks(ctx);
|
||||
|
||||
|
@ -264,12 +242,9 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
|||
},
|
||||
|
||||
async download(dir: string, book: Book) {
|
||||
const ctx = await loadBrowserContext();
|
||||
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||
const page = await ctx.newPage();
|
||||
|
||||
// TODO: --all
|
||||
await fs.mkdir(path.dirname(dir), { recursive: true });
|
||||
await fs.mkdir(dir);
|
||||
await page.goto(book.readerUrl);
|
||||
|
||||
const files = await page.evaluate(getFiles);
|
||||
|
@ -301,3 +276,5 @@ export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
|||
},
|
||||
};
|
||||
}
|
||||
|
||||
DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"];
|
||||
|
|
184
platforms/google-play-books.ts
Normal file
184
platforms/google-play-books.ts
Normal file
|
@ -0,0 +1,184 @@
|
|||
import fs from "node:fs/promises";
|
||||
import type { Book } from "../library";
|
||||
import type { Browser } from "../browser";
|
||||
|
||||
async function getFiles(): Promise<Array<{ url: string }>> {
|
||||
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
|
||||
resolve,
|
||||
reject,
|
||||
) {
|
||||
const timeout = setTimeout(() => {
|
||||
reject(new Error("Page loading timeout."));
|
||||
}, 60_000);
|
||||
|
||||
let pages: NodeListOf<HTMLElement>;
|
||||
|
||||
while (true) {
|
||||
pages = document.querySelectorAll("reader-page");
|
||||
|
||||
const loaded =
|
||||
pages.length > 0 &&
|
||||
[...pages].every((page) => page.classList.contains("-gb-loaded"));
|
||||
|
||||
if (loaded) {
|
||||
break;
|
||||
} else {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
}
|
||||
}
|
||||
|
||||
resolve(pages);
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
|
||||
const images: Array<SVGImageElement> = [...pages].map(
|
||||
(el) => el.querySelector("svg image")!,
|
||||
);
|
||||
|
||||
const files = [...images].map((image) => ({ url: image.href.baseVal }));
|
||||
return files;
|
||||
}
|
||||
|
||||
async function drawImage(file: { url: string }): Promise<string> {
|
||||
const res = await fetch(file.url);
|
||||
const blob = await res.blob();
|
||||
const dataUrl: string = await new Promise((resolve, reject) => {
|
||||
const fileReader = Object.assign(new FileReader(), {
|
||||
onload(): void {
|
||||
resolve(this.result);
|
||||
},
|
||||
onerror(e: ErrorEvent): void {
|
||||
const error = new Error(`${e.type}: ${e.message}`);
|
||||
reject(error);
|
||||
},
|
||||
});
|
||||
|
||||
fileReader.readAsDataURL(blob);
|
||||
});
|
||||
|
||||
return dataUrl;
|
||||
}
|
||||
|
||||
export function GooglePlayBooks(browser: Browser) {
|
||||
return {
|
||||
async login() {
|
||||
const ctx = await browser.newContext();
|
||||
const page = await ctx.newPage();
|
||||
await page.goto("https://accounts.google.com");
|
||||
await page.waitForURL(
|
||||
(url) => url.origin === "https://myaccount.google.com",
|
||||
{ timeout: 0 },
|
||||
);
|
||||
await browser.saveBrowserContext("google-play-books", ctx);
|
||||
},
|
||||
|
||||
async *pull(): AsyncGenerator<Book> {
|
||||
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||
const page = await ctx.newPage();
|
||||
await page.goto(
|
||||
"https://play.google.com/books?type=comics&source=purchases",
|
||||
);
|
||||
await page.waitForSelector("gpb-library-card");
|
||||
|
||||
for (const metadata of await page.$$("gpb-library-card .metadata")) {
|
||||
const readerUrl = await metadata.$eval("a", (a) => a.href);
|
||||
const [title, author] = (await metadata.innerText()).split("\n");
|
||||
|
||||
yield {
|
||||
id: NaN,
|
||||
platform: "google-play-books",
|
||||
readerUrl,
|
||||
title,
|
||||
authors: [author],
|
||||
};
|
||||
|
||||
process.stderr.write(".");
|
||||
}
|
||||
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
|
||||
async download(dir: string, book: Book) {
|
||||
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||
const page = await ctx.newPage();
|
||||
|
||||
await page.goto(book.readerUrl);
|
||||
await page.waitForSelector(".display");
|
||||
|
||||
const frame = page.frames().at(-1);
|
||||
|
||||
if (!frame) {
|
||||
throw new Error("Frame not found.");
|
||||
}
|
||||
|
||||
await frame.evaluate(function scrollToTop() {
|
||||
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
|
||||
viewport?.scroll({ top: 0 });
|
||||
});
|
||||
|
||||
async function next(): Promise<boolean> {
|
||||
return await frame!.evaluate(function scroll() {
|
||||
const viewport = document.querySelector(
|
||||
"cdk-virtual-scroll-viewport",
|
||||
);
|
||||
|
||||
if (!viewport) throw new Error("Viewport not found.");
|
||||
|
||||
const hasNext =
|
||||
1 <=
|
||||
Math.abs(
|
||||
viewport.scrollHeight -
|
||||
viewport.clientHeight -
|
||||
viewport.scrollTop,
|
||||
);
|
||||
|
||||
if (hasNext) {
|
||||
viewport.scrollBy({ top: viewport.clientHeight });
|
||||
}
|
||||
|
||||
return hasNext;
|
||||
});
|
||||
}
|
||||
|
||||
const fileMap: Map<string, { url: string; dataUrl: string }> = new Map();
|
||||
|
||||
while (await next()) {
|
||||
const files = await frame.evaluate(getFiles);
|
||||
|
||||
for (const file of files) {
|
||||
if (fileMap.has(file.url)) continue;
|
||||
|
||||
const dataUrl = await frame.evaluate(drawImage, file);
|
||||
fileMap.set(file.url, { ...file, dataUrl });
|
||||
process.stderr.write(".");
|
||||
}
|
||||
}
|
||||
|
||||
const files = [...fileMap.values()];
|
||||
const digits = String(files.length).length;
|
||||
|
||||
function pad(n: string) {
|
||||
return n.padStart(digits, "0");
|
||||
}
|
||||
|
||||
for (const [n, file] of Object.entries(files)) {
|
||||
const [prefix, base64] = file.dataUrl.split(",", 2);
|
||||
|
||||
if (!prefix.startsWith("data:image/jpeg;")) {
|
||||
throw new Error("Only image/jpeg is supported.");
|
||||
}
|
||||
|
||||
if (!prefix.endsWith(";base64")) {
|
||||
throw new Error("Only base64 is supported.");
|
||||
}
|
||||
|
||||
const buffer = Buffer.from(base64, "base64");
|
||||
await fs.writeFile(`${dir}/${pad(n)}.jpeg`, buffer);
|
||||
}
|
||||
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
GooglePlayBooks.site = ["https://play.google.com"];
|
Loading…
Add table
Reference in a new issue