Compare commits
No commits in common. "7f36682b0491457161ee40dbe1bd724cef7be03d" and "22f8e0e767df740cd21c58526b1ee802268ce9f3" have entirely different histories.
7f36682b04
...
22f8e0e767
6 changed files with 97 additions and 366 deletions
56
browser.ts
56
browser.ts
|
@ -1,55 +1,5 @@
|
|||
import * as Playwright from "playwright";
|
||||
import type { Browser, BrowserContext } from "playwright";
|
||||
import { chromium, devices } from "playwright";
|
||||
import type { Database } from "./database";
|
||||
import type { TPlatform } from "./platform";
|
||||
|
||||
export type Browser = {
|
||||
loadBrowserContext(platform: TPlatform): Promise<Playwright.BrowserContext>;
|
||||
saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise<void>;
|
||||
newContext: () => Promise<Playwright.BrowserContext>;
|
||||
close: () => Promise<void>;
|
||||
};
|
||||
|
||||
export type BrowserContext = Playwright.BrowserContext;
|
||||
|
||||
export async function createBrowser({
|
||||
db,
|
||||
headless = true,
|
||||
}: {
|
||||
db: Database;
|
||||
headless?: boolean;
|
||||
}): Promise<Browser> {
|
||||
const { userAgent } = devices["Desktop Chrome"];
|
||||
const browser = await chromium.launch({
|
||||
headless,
|
||||
args: ["--disable-blink-features=AutomationControlled"],
|
||||
});
|
||||
|
||||
return {
|
||||
async loadBrowserContext(
|
||||
platform: TPlatform,
|
||||
): Promise<Playwright.BrowserContext> {
|
||||
const { secrets } = await db.get(
|
||||
`select secrets from platforms where name = ?`,
|
||||
platform,
|
||||
);
|
||||
|
||||
const storageState = JSON.parse(secrets) ?? undefined;
|
||||
const ctx = await browser.newContext({ storageState, userAgent });
|
||||
return ctx;
|
||||
},
|
||||
async saveBrowserContext(
|
||||
platform: TPlatform,
|
||||
ctx: BrowserContext,
|
||||
): Promise<void> {
|
||||
const secrets = await ctx.storageState();
|
||||
await db.run(
|
||||
`update platforms set secrets = ? where name = ?`,
|
||||
JSON.stringify(secrets),
|
||||
platform,
|
||||
);
|
||||
},
|
||||
newContext: () => browser.newContext(),
|
||||
close: () => browser.close(),
|
||||
};
|
||||
}
|
||||
export { Browser, BrowserContext, chromium };
|
||||
export const { userAgent } = devices["Desktop Chrome"];
|
||||
|
|
|
@ -3,11 +3,10 @@ import { createWriteStream } from "node:fs";
|
|||
import stream from "node:stream/promises";
|
||||
import { Zip, ZipPassThrough } from "fflate";
|
||||
import { Database } from "./database";
|
||||
import { type TPlatform, site } from "./platform";
|
||||
|
||||
export type Book = {
|
||||
id: number;
|
||||
platform: TPlatform;
|
||||
platform: "dmm-books" | "google-play-books";
|
||||
readerUrl: string;
|
||||
title: string;
|
||||
authors: Array<string>;
|
||||
|
@ -16,9 +15,9 @@ export type Book = {
|
|||
export function createLibrary(db: Database) {
|
||||
return {
|
||||
async add(readerUrlOrBook: string | Book) {
|
||||
if (typeof readerUrlOrBook === "string") {
|
||||
const platform = site(readerUrlOrBook);
|
||||
const platform = "dmm-books";
|
||||
|
||||
if (typeof readerUrlOrBook === "string") {
|
||||
await db.run(
|
||||
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
|
||||
platform,
|
||||
|
@ -39,7 +38,7 @@ values((select id from platforms where name = ?), ?, ?, ?)
|
|||
on conflict(reader_url)
|
||||
do update set title = excluded.title, authors = excluded.authors
|
||||
`,
|
||||
readerUrlOrBook.platform,
|
||||
platform,
|
||||
readerUrlOrBook.readerUrl,
|
||||
readerUrlOrBook.title,
|
||||
JSON.stringify(readerUrlOrBook.authors),
|
||||
|
|
42
main.ts
42
main.ts
|
@ -1,8 +1,8 @@
|
|||
import util from "node:util";
|
||||
import { createBrowser } from "./browser";
|
||||
import { chromium } from "./browser";
|
||||
import { createDatabase } from "./database";
|
||||
import { type Book, createLibrary } from "./library";
|
||||
import { type TPlatform, createPlatform } from "./platform";
|
||||
import { createPlatform } from "./platform";
|
||||
|
||||
const options = {
|
||||
db: {
|
||||
|
@ -18,29 +18,21 @@ const options = {
|
|||
default: "3",
|
||||
},
|
||||
login: {
|
||||
type: "string",
|
||||
type: "boolean",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const browser = await createBrowser({ db, headless: false });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.login as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const platform = createPlatform({ db, browser });
|
||||
await platform.login();
|
||||
await browser.close();
|
||||
},
|
||||
},
|
||||
logout: {
|
||||
type: "string",
|
||||
type: "boolean",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.logout as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
await platform.logout();
|
||||
await browser.close();
|
||||
},
|
||||
|
@ -95,16 +87,12 @@ const options = {
|
|||
},
|
||||
},
|
||||
pull: {
|
||||
type: "string",
|
||||
type: "boolean",
|
||||
async run() {
|
||||
const db = await createDatabase(args.values.db!);
|
||||
const library = createLibrary(db);
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: args.values.pull as TPlatform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
|
||||
for await (const book of platform.pull()) {
|
||||
await library.add(book);
|
||||
|
@ -137,12 +125,8 @@ const options = {
|
|||
}
|
||||
|
||||
for (const book of books) {
|
||||
const browser = await createBrowser({ db });
|
||||
const platform = createPlatform({
|
||||
platform: book.platform,
|
||||
db,
|
||||
browser,
|
||||
});
|
||||
const browser = await chromium.launch();
|
||||
const platform = createPlatform({ db, browser });
|
||||
const dir = `${args.values["out-dir"]!}/${book.id}`;
|
||||
await platform.download(dir, book);
|
||||
await library.archive(dir, book, {
|
||||
|
|
88
platform.ts
88
platform.ts
|
@ -1,88 +1,8 @@
|
|||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { Book } from "./library";
|
||||
import type { Browser } from "./browser";
|
||||
import type { Database } from "./database";
|
||||
import type { Browser } from "./browser";
|
||||
import { DmmBooks } from "./platforms/dmm-books";
|
||||
import { GooglePlayBooks } from "./platforms/google-play-books";
|
||||
|
||||
const platforms = {
|
||||
"dmm-books": DmmBooks,
|
||||
"google-play-books": GooglePlayBooks,
|
||||
};
|
||||
|
||||
export type TPlatform = keyof typeof platforms;
|
||||
|
||||
export function site(url: string): TPlatform {
|
||||
const { origin } = new URL(url);
|
||||
|
||||
for (const [platform, { site }] of Object.entries(platforms)) {
|
||||
if (site.includes(origin)) return platform as TPlatform;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported URL: ${url}`);
|
||||
}
|
||||
|
||||
export function createPlatform(opts: {
|
||||
platform: TPlatform;
|
||||
db: Database;
|
||||
browser: Browser;
|
||||
}) {
|
||||
if (!(opts.platform in platforms)) {
|
||||
throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`);
|
||||
}
|
||||
|
||||
const platform = platforms[opts.platform](opts.browser);
|
||||
|
||||
return {
|
||||
...platform,
|
||||
|
||||
async download(dir: string, book: Book): Promise<void> {
|
||||
await fs.mkdir(path.dirname(dir), { recursive: true });
|
||||
await fs.mkdir(dir);
|
||||
|
||||
const files: Array<() => Promise<string>> = await platform.getFiles(book);
|
||||
const digits = String(files.length).length;
|
||||
|
||||
function pad(n: string) {
|
||||
return n.padStart(digits, "0");
|
||||
}
|
||||
|
||||
const supportedTypes = {
|
||||
"image/png": "png",
|
||||
"image/jpeg": "jpg",
|
||||
};
|
||||
|
||||
for (const [n, dataUrl] of Object.entries(files)) {
|
||||
const [prefix, base64] = (await dataUrl()).split(",", 2);
|
||||
const [, type, encoding] =
|
||||
/^data:([^;]*)(;base64)?$/.exec(prefix) ?? [];
|
||||
|
||||
const extension = supportedTypes[type];
|
||||
if (!extension) {
|
||||
throw new Error(
|
||||
`It was ${type}. The image must be a file of type: ${[
|
||||
...Object.keys(supportedTypes),
|
||||
].join(", ")}.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (encoding !== ";base64") {
|
||||
throw new Error("Only base64 is supported.");
|
||||
}
|
||||
|
||||
const buffer = Buffer.from(base64, "base64");
|
||||
await fs.writeFile(`${dir}/${pad(n)}.${extension}`, buffer);
|
||||
}
|
||||
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
|
||||
async logout() {
|
||||
await opts.db.run(
|
||||
`update platforms set secrets = 'null' where name = ?`,
|
||||
opts.platform,
|
||||
);
|
||||
},
|
||||
};
|
||||
export function createPlatform(opt: { db: Database; browser: Browser }) {
|
||||
const platform = DmmBooks(opt);
|
||||
return platform;
|
||||
}
|
||||
|
|
|
@ -1,16 +1,12 @@
|
|||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { Book } from "../library";
|
||||
import type { Browser, BrowserContext } from "../browser";
|
||||
|
||||
type ImageFile = {
|
||||
url: string;
|
||||
blocks: Array<Record<string, number>>;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
import { userAgent, type Browser, type BrowserContext } from "../browser";
|
||||
import type { Database } from "../database";
|
||||
|
||||
var NFBR: any;
|
||||
|
||||
async function getImageFiles(): Promise<Array<ImageFile>> {
|
||||
async function getFiles() {
|
||||
const params = new URLSearchParams(location.search);
|
||||
const model = new NFBR.a6G.Model({
|
||||
settings: new NFBR.Settings("NFBR.SettingData"),
|
||||
|
@ -35,7 +31,12 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
|
|||
const a5n = new NFBR.a5n();
|
||||
await a5n.a5s(content, "configuration", a6l);
|
||||
|
||||
const imageFiles: Array<ImageFile> = [];
|
||||
const files: Array<{
|
||||
url: string;
|
||||
blocks: [];
|
||||
width: number;
|
||||
height: number;
|
||||
}> = [];
|
||||
|
||||
for (const index of Object.keys(content.files)) {
|
||||
const file = content.files[index];
|
||||
|
@ -68,7 +69,7 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
|
|||
|
||||
const url = `${a5w.url}${page.url}`;
|
||||
|
||||
imageFiles.push({
|
||||
files.push({
|
||||
url,
|
||||
blocks,
|
||||
width: Width,
|
||||
|
@ -76,19 +77,24 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
|
|||
});
|
||||
}
|
||||
|
||||
return imageFiles;
|
||||
return files;
|
||||
}
|
||||
|
||||
async function drawImage(imageFile: ImageFile) {
|
||||
async function drawImage(file: {
|
||||
url: string;
|
||||
blocks: Array<Record<string, number>>;
|
||||
width: number;
|
||||
height: number;
|
||||
}) {
|
||||
const canvas = Object.assign(document.createElement("canvas"), {
|
||||
width: imageFile.width,
|
||||
height: imageFile.height,
|
||||
width: file.width,
|
||||
height: file.height,
|
||||
});
|
||||
|
||||
const image = (await new Promise((resolve) => {
|
||||
Object.assign(new Image(), {
|
||||
crossOrigin: "use-credentials",
|
||||
src: imageFile.url,
|
||||
src: file.url,
|
||||
onload() {
|
||||
resolve(this);
|
||||
},
|
||||
|
@ -97,7 +103,7 @@ async function drawImage(imageFile: ImageFile) {
|
|||
|
||||
const ctx = canvas.getContext("2d")!;
|
||||
|
||||
for (const q of imageFile.blocks) {
|
||||
for (const q of file.blocks) {
|
||||
ctx.drawImage(
|
||||
image,
|
||||
q.destX,
|
||||
|
@ -115,7 +121,17 @@ async function drawImage(imageFile: ImageFile) {
|
|||
return dataUrl;
|
||||
}
|
||||
|
||||
export function DmmBooks(browser: Browser) {
|
||||
export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
|
||||
async function loadBrowserContext(): Promise<BrowserContext> {
|
||||
const { secrets } = await db.get(
|
||||
`select secrets from platforms where name = 'dmm-books'`,
|
||||
);
|
||||
|
||||
const storageState = JSON.parse(secrets) ?? undefined;
|
||||
const ctx = await browser.newContext({ storageState, userAgent });
|
||||
return ctx;
|
||||
}
|
||||
|
||||
async function* getSeriesBooks(
|
||||
ctx: BrowserContext,
|
||||
series: {
|
||||
|
@ -226,34 +242,62 @@ export function DmmBooks(browser: Browser) {
|
|||
page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
|
||||
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
|
||||
]);
|
||||
await browser.saveBrowserContext("dmm-books", ctx);
|
||||
const secrets = await ctx.storageState();
|
||||
await db.run(
|
||||
`update platforms set secrets = ? where name = 'dmm-books'`,
|
||||
JSON.stringify(secrets),
|
||||
);
|
||||
},
|
||||
|
||||
async logout() {
|
||||
await db.run(
|
||||
`update platforms set secrets = 'null' where name = 'dmm-books'`,
|
||||
);
|
||||
},
|
||||
|
||||
async *pull(): AsyncGenerator<Book> {
|
||||
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||
const ctx = await loadBrowserContext();
|
||||
|
||||
yield* getAllBooks(ctx);
|
||||
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
|
||||
async getFiles(book: Book): Promise<Array<() => Promise<string>>> {
|
||||
const ctx = await browser.loadBrowserContext("dmm-books");
|
||||
async download(dir: string, book: Book) {
|
||||
const ctx = await loadBrowserContext();
|
||||
const page = await ctx.newPage();
|
||||
|
||||
// TODO: --all
|
||||
await fs.mkdir(path.dirname(dir), { recursive: true });
|
||||
await fs.mkdir(dir);
|
||||
await page.goto(book.readerUrl);
|
||||
|
||||
const imageFiles = await page.evaluate(getImageFiles);
|
||||
const files = await page.evaluate(getFiles);
|
||||
const digits = String(files.length).length;
|
||||
|
||||
return imageFiles.map((imageFile) => async () => {
|
||||
const dataUrl = await page.evaluate(drawImage, imageFile);
|
||||
function pad(n: string) {
|
||||
return n.padStart(digits, "0");
|
||||
}
|
||||
|
||||
for (const [n, file] of Object.entries(files)) {
|
||||
const dataUrl = await page.evaluate(drawImage, file);
|
||||
const [prefix, base64] = dataUrl.split(",", 2);
|
||||
|
||||
if (!prefix.startsWith("data:image/png;")) {
|
||||
throw new Error("Only image/png is supported.");
|
||||
}
|
||||
|
||||
if (!prefix.endsWith(";base64")) {
|
||||
throw new Error("Only base64 is supported.");
|
||||
}
|
||||
|
||||
const buffer = Buffer.from(base64, "base64");
|
||||
await fs.writeFile(`${dir}/${pad(n)}.png`, buffer);
|
||||
|
||||
process.stderr.write(".");
|
||||
}
|
||||
|
||||
return dataUrl;
|
||||
});
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"];
|
||||
|
|
|
@ -1,166 +0,0 @@
|
|||
import type { Book } from "../library";
|
||||
import type { Browser } from "../browser";
|
||||
|
||||
type ImageFile = {
|
||||
url: string;
|
||||
};
|
||||
|
||||
async function getImageFiles(): Promise<Array<ImageFile>> {
|
||||
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
|
||||
resolve,
|
||||
reject,
|
||||
) {
|
||||
const timeout = setTimeout(() => {
|
||||
reject(new Error("Page loading timeout."));
|
||||
}, 60_000);
|
||||
|
||||
let pages: NodeListOf<HTMLElement>;
|
||||
|
||||
while (true) {
|
||||
pages = document.querySelectorAll("reader-page");
|
||||
|
||||
const loaded =
|
||||
pages.length > 0 &&
|
||||
[...pages].every((page) => page.classList.contains("-gb-loaded"));
|
||||
|
||||
if (loaded) {
|
||||
break;
|
||||
} else {
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
}
|
||||
}
|
||||
|
||||
resolve(pages);
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
|
||||
const images: Array<SVGImageElement> = [...pages].map(
|
||||
(el) => el.querySelector("svg image")!,
|
||||
);
|
||||
|
||||
return [...images].map((image) => ({ url: image.href.baseVal }));
|
||||
}
|
||||
|
||||
async function fetchImage(imageFile: ImageFile): Promise<string> {
|
||||
const res = await fetch(imageFile.url);
|
||||
const blob = await res.blob();
|
||||
const dataUrl: string = await new Promise((resolve, reject) => {
|
||||
const fileReader = Object.assign(new FileReader(), {
|
||||
onload(): void {
|
||||
resolve(this.result);
|
||||
},
|
||||
onerror(e: ErrorEvent): void {
|
||||
const error = new Error(`${e.type}: ${e.message}`);
|
||||
reject(error);
|
||||
},
|
||||
});
|
||||
|
||||
fileReader.readAsDataURL(blob);
|
||||
});
|
||||
|
||||
return dataUrl;
|
||||
}
|
||||
|
||||
export function GooglePlayBooks(browser: Browser) {
|
||||
return {
|
||||
async login() {
|
||||
const ctx = await browser.newContext();
|
||||
const page = await ctx.newPage();
|
||||
await page.goto("https://accounts.google.com");
|
||||
await page.waitForURL(
|
||||
(url) => url.origin === "https://myaccount.google.com",
|
||||
{ timeout: 0 },
|
||||
);
|
||||
await browser.saveBrowserContext("google-play-books", ctx);
|
||||
},
|
||||
|
||||
async *pull(): AsyncGenerator<Book> {
|
||||
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||
const page = await ctx.newPage();
|
||||
await page.goto(
|
||||
"https://play.google.com/books?type=comics&source=purchases",
|
||||
);
|
||||
await page.waitForSelector("gpb-library-card");
|
||||
|
||||
for (const metadata of await page.$$("gpb-library-card .metadata")) {
|
||||
const readerUrl = await metadata.$eval("a", (a) => a.href);
|
||||
const [title, author] = (await metadata.innerText()).split("\n");
|
||||
|
||||
yield {
|
||||
id: NaN,
|
||||
platform: "google-play-books",
|
||||
readerUrl,
|
||||
title,
|
||||
authors: [author],
|
||||
};
|
||||
|
||||
process.stderr.write(".");
|
||||
}
|
||||
|
||||
process.stderr.write(`\n`);
|
||||
},
|
||||
|
||||
async getFiles(book: Book): Promise<Array<() => Promise<string>>> {
|
||||
const ctx = await browser.loadBrowserContext("google-play-books");
|
||||
const page = await ctx.newPage();
|
||||
|
||||
await page.goto(book.readerUrl);
|
||||
await page.waitForSelector(".display");
|
||||
|
||||
const frame = page.frames().at(-1);
|
||||
|
||||
if (!frame) {
|
||||
throw new Error("Frame not found.");
|
||||
}
|
||||
|
||||
await frame.evaluate(function scrollToTop() {
|
||||
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
|
||||
viewport?.scroll({ top: 0 });
|
||||
});
|
||||
|
||||
async function next(): Promise<boolean> {
|
||||
return await frame!.evaluate(function scroll() {
|
||||
const viewport = document.querySelector(
|
||||
"cdk-virtual-scroll-viewport",
|
||||
);
|
||||
|
||||
if (!viewport) throw new Error("Viewport not found.");
|
||||
|
||||
const hasNext =
|
||||
1 <=
|
||||
Math.abs(
|
||||
viewport.scrollHeight -
|
||||
viewport.clientHeight -
|
||||
viewport.scrollTop,
|
||||
);
|
||||
|
||||
if (hasNext) {
|
||||
viewport.scrollBy({ top: viewport.clientHeight });
|
||||
}
|
||||
|
||||
return hasNext;
|
||||
});
|
||||
}
|
||||
|
||||
const fileMap: Map<string, () => Promise<string>> = new Map();
|
||||
|
||||
while (await next()) {
|
||||
const imageFiles = await frame.evaluate(getImageFiles);
|
||||
|
||||
for (const imageFile of imageFiles) {
|
||||
if (fileMap.has(imageFile.url)) continue;
|
||||
|
||||
const dataUrl = await frame.evaluate(fetchImage, imageFile);
|
||||
|
||||
process.stderr.write(".");
|
||||
|
||||
fileMap.set(imageFile.url, async () => dataUrl);
|
||||
}
|
||||
}
|
||||
|
||||
return [...fileMap.values()];
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
GooglePlayBooks.site = ["https://play.google.com"];
|
Loading…
Add table
Reference in a new issue