Compare commits

..

No commits in common. "7f36682b0491457161ee40dbe1bd724cef7be03d" and "22f8e0e767df740cd21c58526b1ee802268ce9f3" have entirely different histories.

6 changed files with 97 additions and 366 deletions

View file

@ -1,55 +1,5 @@
import * as Playwright from "playwright"; import type { Browser, BrowserContext } from "playwright";
import { chromium, devices } from "playwright"; import { chromium, devices } from "playwright";
import type { Database } from "./database";
import type { TPlatform } from "./platform";
export type Browser = { export { Browser, BrowserContext, chromium };
loadBrowserContext(platform: TPlatform): Promise<Playwright.BrowserContext>; export const { userAgent } = devices["Desktop Chrome"];
saveBrowserContext(platform: TPlatform, ctx: BrowserContext): Promise<void>;
newContext: () => Promise<Playwright.BrowserContext>;
close: () => Promise<void>;
};
export type BrowserContext = Playwright.BrowserContext;
export async function createBrowser({
db,
headless = true,
}: {
db: Database;
headless?: boolean;
}): Promise<Browser> {
const { userAgent } = devices["Desktop Chrome"];
const browser = await chromium.launch({
headless,
args: ["--disable-blink-features=AutomationControlled"],
});
return {
async loadBrowserContext(
platform: TPlatform,
): Promise<Playwright.BrowserContext> {
const { secrets } = await db.get(
`select secrets from platforms where name = ?`,
platform,
);
const storageState = JSON.parse(secrets) ?? undefined;
const ctx = await browser.newContext({ storageState, userAgent });
return ctx;
},
async saveBrowserContext(
platform: TPlatform,
ctx: BrowserContext,
): Promise<void> {
const secrets = await ctx.storageState();
await db.run(
`update platforms set secrets = ? where name = ?`,
JSON.stringify(secrets),
platform,
);
},
newContext: () => browser.newContext(),
close: () => browser.close(),
};
}

View file

@ -3,11 +3,10 @@ import { createWriteStream } from "node:fs";
import stream from "node:stream/promises"; import stream from "node:stream/promises";
import { Zip, ZipPassThrough } from "fflate"; import { Zip, ZipPassThrough } from "fflate";
import { Database } from "./database"; import { Database } from "./database";
import { type TPlatform, site } from "./platform";
export type Book = { export type Book = {
id: number; id: number;
platform: TPlatform; platform: "dmm-books" | "google-play-books";
readerUrl: string; readerUrl: string;
title: string; title: string;
authors: Array<string>; authors: Array<string>;
@ -16,9 +15,9 @@ export type Book = {
export function createLibrary(db: Database) { export function createLibrary(db: Database) {
return { return {
async add(readerUrlOrBook: string | Book) { async add(readerUrlOrBook: string | Book) {
if (typeof readerUrlOrBook === "string") { const platform = "dmm-books";
const platform = site(readerUrlOrBook);
if (typeof readerUrlOrBook === "string") {
await db.run( await db.run(
`insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`, `insert into books(platform_id, reader_url) values((select id from platforms where name = ?), ?)`,
platform, platform,
@ -39,7 +38,7 @@ values((select id from platforms where name = ?), ?, ?, ?)
on conflict(reader_url) on conflict(reader_url)
do update set title = excluded.title, authors = excluded.authors do update set title = excluded.title, authors = excluded.authors
`, `,
readerUrlOrBook.platform, platform,
readerUrlOrBook.readerUrl, readerUrlOrBook.readerUrl,
readerUrlOrBook.title, readerUrlOrBook.title,
JSON.stringify(readerUrlOrBook.authors), JSON.stringify(readerUrlOrBook.authors),

42
main.ts
View file

@ -1,8 +1,8 @@
import util from "node:util"; import util from "node:util";
import { createBrowser } from "./browser"; import { chromium } from "./browser";
import { createDatabase } from "./database"; import { createDatabase } from "./database";
import { type Book, createLibrary } from "./library"; import { type Book, createLibrary } from "./library";
import { type TPlatform, createPlatform } from "./platform"; import { createPlatform } from "./platform";
const options = { const options = {
db: { db: {
@ -18,29 +18,21 @@ const options = {
default: "3", default: "3",
}, },
login: { login: {
type: "string", type: "boolean",
async run() { async run() {
const db = await createDatabase(args.values.db!); const db = await createDatabase(args.values.db!);
const browser = await createBrowser({ db, headless: false }); const browser = await chromium.launch({ headless: false });
const platform = createPlatform({ const platform = createPlatform({ db, browser });
platform: args.values.login as TPlatform,
db,
browser,
});
await platform.login(); await platform.login();
await browser.close(); await browser.close();
}, },
}, },
logout: { logout: {
type: "string", type: "boolean",
async run() { async run() {
const db = await createDatabase(args.values.db!); const db = await createDatabase(args.values.db!);
const browser = await createBrowser({ db }); const browser = await chromium.launch();
const platform = createPlatform({ const platform = createPlatform({ db, browser });
platform: args.values.logout as TPlatform,
db,
browser,
});
await platform.logout(); await platform.logout();
await browser.close(); await browser.close();
}, },
@ -95,16 +87,12 @@ const options = {
}, },
}, },
pull: { pull: {
type: "string", type: "boolean",
async run() { async run() {
const db = await createDatabase(args.values.db!); const db = await createDatabase(args.values.db!);
const library = createLibrary(db); const library = createLibrary(db);
const browser = await createBrowser({ db }); const browser = await chromium.launch();
const platform = createPlatform({ const platform = createPlatform({ db, browser });
platform: args.values.pull as TPlatform,
db,
browser,
});
for await (const book of platform.pull()) { for await (const book of platform.pull()) {
await library.add(book); await library.add(book);
@ -137,12 +125,8 @@ const options = {
} }
for (const book of books) { for (const book of books) {
const browser = await createBrowser({ db }); const browser = await chromium.launch();
const platform = createPlatform({ const platform = createPlatform({ db, browser });
platform: book.platform,
db,
browser,
});
const dir = `${args.values["out-dir"]!}/${book.id}`; const dir = `${args.values["out-dir"]!}/${book.id}`;
await platform.download(dir, book); await platform.download(dir, book);
await library.archive(dir, book, { await library.archive(dir, book, {

View file

@ -1,88 +1,8 @@
import fs from "node:fs/promises";
import path from "node:path";
import type { Book } from "./library";
import type { Browser } from "./browser";
import type { Database } from "./database"; import type { Database } from "./database";
import type { Browser } from "./browser";
import { DmmBooks } from "./platforms/dmm-books"; import { DmmBooks } from "./platforms/dmm-books";
import { GooglePlayBooks } from "./platforms/google-play-books";
const platforms = { export function createPlatform(opt: { db: Database; browser: Browser }) {
"dmm-books": DmmBooks, const platform = DmmBooks(opt);
"google-play-books": GooglePlayBooks, return platform;
};
export type TPlatform = keyof typeof platforms;
export function site(url: string): TPlatform {
const { origin } = new URL(url);
for (const [platform, { site }] of Object.entries(platforms)) {
if (site.includes(origin)) return platform as TPlatform;
}
throw new Error(`Unsupported URL: ${url}`);
}
export function createPlatform(opts: {
platform: TPlatform;
db: Database;
browser: Browser;
}) {
if (!(opts.platform in platforms)) {
throw new Error(`Available platform: ${Object.keys(platforms).join(", ")}`);
}
const platform = platforms[opts.platform](opts.browser);
return {
...platform,
async download(dir: string, book: Book): Promise<void> {
await fs.mkdir(path.dirname(dir), { recursive: true });
await fs.mkdir(dir);
const files: Array<() => Promise<string>> = await platform.getFiles(book);
const digits = String(files.length).length;
function pad(n: string) {
return n.padStart(digits, "0");
}
const supportedTypes = {
"image/png": "png",
"image/jpeg": "jpg",
};
for (const [n, dataUrl] of Object.entries(files)) {
const [prefix, base64] = (await dataUrl()).split(",", 2);
const [, type, encoding] =
/^data:([^;]*)(;base64)?$/.exec(prefix) ?? [];
const extension = supportedTypes[type];
if (!extension) {
throw new Error(
`It was ${type}. The image must be a file of type: ${[
...Object.keys(supportedTypes),
].join(", ")}.`,
);
}
if (encoding !== ";base64") {
throw new Error("Only base64 is supported.");
}
const buffer = Buffer.from(base64, "base64");
await fs.writeFile(`${dir}/${pad(n)}.${extension}`, buffer);
}
process.stderr.write(`\n`);
},
async logout() {
await opts.db.run(
`update platforms set secrets = 'null' where name = ?`,
opts.platform,
);
},
};
} }

View file

@ -1,16 +1,12 @@
import fs from "node:fs/promises";
import path from "node:path";
import type { Book } from "../library"; import type { Book } from "../library";
import type { Browser, BrowserContext } from "../browser"; import { userAgent, type Browser, type BrowserContext } from "../browser";
import type { Database } from "../database";
type ImageFile = {
url: string;
blocks: Array<Record<string, number>>;
width: number;
height: number;
};
var NFBR: any; var NFBR: any;
async function getImageFiles(): Promise<Array<ImageFile>> { async function getFiles() {
const params = new URLSearchParams(location.search); const params = new URLSearchParams(location.search);
const model = new NFBR.a6G.Model({ const model = new NFBR.a6G.Model({
settings: new NFBR.Settings("NFBR.SettingData"), settings: new NFBR.Settings("NFBR.SettingData"),
@ -35,7 +31,12 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
const a5n = new NFBR.a5n(); const a5n = new NFBR.a5n();
await a5n.a5s(content, "configuration", a6l); await a5n.a5s(content, "configuration", a6l);
const imageFiles: Array<ImageFile> = []; const files: Array<{
url: string;
blocks: [];
width: number;
height: number;
}> = [];
for (const index of Object.keys(content.files)) { for (const index of Object.keys(content.files)) {
const file = content.files[index]; const file = content.files[index];
@ -68,7 +69,7 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
const url = `${a5w.url}${page.url}`; const url = `${a5w.url}${page.url}`;
imageFiles.push({ files.push({
url, url,
blocks, blocks,
width: Width, width: Width,
@ -76,19 +77,24 @@ async function getImageFiles(): Promise<Array<ImageFile>> {
}); });
} }
return imageFiles; return files;
} }
async function drawImage(imageFile: ImageFile) { async function drawImage(file: {
url: string;
blocks: Array<Record<string, number>>;
width: number;
height: number;
}) {
const canvas = Object.assign(document.createElement("canvas"), { const canvas = Object.assign(document.createElement("canvas"), {
width: imageFile.width, width: file.width,
height: imageFile.height, height: file.height,
}); });
const image = (await new Promise((resolve) => { const image = (await new Promise((resolve) => {
Object.assign(new Image(), { Object.assign(new Image(), {
crossOrigin: "use-credentials", crossOrigin: "use-credentials",
src: imageFile.url, src: file.url,
onload() { onload() {
resolve(this); resolve(this);
}, },
@ -97,7 +103,7 @@ async function drawImage(imageFile: ImageFile) {
const ctx = canvas.getContext("2d")!; const ctx = canvas.getContext("2d")!;
for (const q of imageFile.blocks) { for (const q of file.blocks) {
ctx.drawImage( ctx.drawImage(
image, image,
q.destX, q.destX,
@ -115,7 +121,17 @@ async function drawImage(imageFile: ImageFile) {
return dataUrl; return dataUrl;
} }
export function DmmBooks(browser: Browser) { export function DmmBooks({ db, browser }: { db: Database; browser: Browser }) {
async function loadBrowserContext(): Promise<BrowserContext> {
const { secrets } = await db.get(
`select secrets from platforms where name = 'dmm-books'`,
);
const storageState = JSON.parse(secrets) ?? undefined;
const ctx = await browser.newContext({ storageState, userAgent });
return ctx;
}
async function* getSeriesBooks( async function* getSeriesBooks(
ctx: BrowserContext, ctx: BrowserContext,
series: { series: {
@ -226,34 +242,62 @@ export function DmmBooks(browser: Browser) {
page.waitForURL("https://www.dmm.com/", { timeout: 0 }), page.waitForURL("https://www.dmm.com/", { timeout: 0 }),
page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }), page.waitForURL("https://www.dmm.co.jp/top/", { timeout: 0 }),
]); ]);
await browser.saveBrowserContext("dmm-books", ctx); const secrets = await ctx.storageState();
await db.run(
`update platforms set secrets = ? where name = 'dmm-books'`,
JSON.stringify(secrets),
);
},
async logout() {
await db.run(
`update platforms set secrets = 'null' where name = 'dmm-books'`,
);
}, },
async *pull(): AsyncGenerator<Book> { async *pull(): AsyncGenerator<Book> {
const ctx = await browser.loadBrowserContext("dmm-books"); const ctx = await loadBrowserContext();
yield* getAllBooks(ctx); yield* getAllBooks(ctx);
process.stderr.write(`\n`); process.stderr.write(`\n`);
}, },
async getFiles(book: Book): Promise<Array<() => Promise<string>>> { async download(dir: string, book: Book) {
const ctx = await browser.loadBrowserContext("dmm-books"); const ctx = await loadBrowserContext();
const page = await ctx.newPage(); const page = await ctx.newPage();
// TODO: --all
await fs.mkdir(path.dirname(dir), { recursive: true });
await fs.mkdir(dir);
await page.goto(book.readerUrl); await page.goto(book.readerUrl);
const imageFiles = await page.evaluate(getImageFiles); const files = await page.evaluate(getFiles);
const digits = String(files.length).length;
return imageFiles.map((imageFile) => async () => { function pad(n: string) {
const dataUrl = await page.evaluate(drawImage, imageFile); return n.padStart(digits, "0");
}
for (const [n, file] of Object.entries(files)) {
const dataUrl = await page.evaluate(drawImage, file);
const [prefix, base64] = dataUrl.split(",", 2);
if (!prefix.startsWith("data:image/png;")) {
throw new Error("Only image/png is supported.");
}
if (!prefix.endsWith(";base64")) {
throw new Error("Only base64 is supported.");
}
const buffer = Buffer.from(base64, "base64");
await fs.writeFile(`${dir}/${pad(n)}.png`, buffer);
process.stderr.write("."); process.stderr.write(".");
}
return dataUrl; process.stderr.write(`\n`);
});
}, },
}; };
} }
DmmBooks.site = ["https://book.dmm.com", "https://book.dmm.co.jp"];

View file

@ -1,166 +0,0 @@
import type { Book } from "../library";
import type { Browser } from "../browser";
type ImageFile = {
url: string;
};
async function getImageFiles(): Promise<Array<ImageFile>> {
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
resolve,
reject,
) {
const timeout = setTimeout(() => {
reject(new Error("Page loading timeout."));
}, 60_000);
let pages: NodeListOf<HTMLElement>;
while (true) {
pages = document.querySelectorAll("reader-page");
const loaded =
pages.length > 0 &&
[...pages].every((page) => page.classList.contains("-gb-loaded"));
if (loaded) {
break;
} else {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
resolve(pages);
clearTimeout(timeout);
});
const images: Array<SVGImageElement> = [...pages].map(
(el) => el.querySelector("svg image")!,
);
return [...images].map((image) => ({ url: image.href.baseVal }));
}
async function fetchImage(imageFile: ImageFile): Promise<string> {
const res = await fetch(imageFile.url);
const blob = await res.blob();
const dataUrl: string = await new Promise((resolve, reject) => {
const fileReader = Object.assign(new FileReader(), {
onload(): void {
resolve(this.result);
},
onerror(e: ErrorEvent): void {
const error = new Error(`${e.type}: ${e.message}`);
reject(error);
},
});
fileReader.readAsDataURL(blob);
});
return dataUrl;
}
export function GooglePlayBooks(browser: Browser) {
return {
async login() {
const ctx = await browser.newContext();
const page = await ctx.newPage();
await page.goto("https://accounts.google.com");
await page.waitForURL(
(url) => url.origin === "https://myaccount.google.com",
{ timeout: 0 },
);
await browser.saveBrowserContext("google-play-books", ctx);
},
async *pull(): AsyncGenerator<Book> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(
"https://play.google.com/books?type=comics&source=purchases",
);
await page.waitForSelector("gpb-library-card");
for (const metadata of await page.$$("gpb-library-card .metadata")) {
const readerUrl = await metadata.$eval("a", (a) => a.href);
const [title, author] = (await metadata.innerText()).split("\n");
yield {
id: NaN,
platform: "google-play-books",
readerUrl,
title,
authors: [author],
};
process.stderr.write(".");
}
process.stderr.write(`\n`);
},
async getFiles(book: Book): Promise<Array<() => Promise<string>>> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(book.readerUrl);
await page.waitForSelector(".display");
const frame = page.frames().at(-1);
if (!frame) {
throw new Error("Frame not found.");
}
await frame.evaluate(function scrollToTop() {
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
viewport?.scroll({ top: 0 });
});
async function next(): Promise<boolean> {
return await frame!.evaluate(function scroll() {
const viewport = document.querySelector(
"cdk-virtual-scroll-viewport",
);
if (!viewport) throw new Error("Viewport not found.");
const hasNext =
1 <=
Math.abs(
viewport.scrollHeight -
viewport.clientHeight -
viewport.scrollTop,
);
if (hasNext) {
viewport.scrollBy({ top: viewport.clientHeight });
}
return hasNext;
});
}
const fileMap: Map<string, () => Promise<string>> = new Map();
while (await next()) {
const imageFiles = await frame.evaluate(getImageFiles);
for (const imageFile of imageFiles) {
if (fileMap.has(imageFile.url)) continue;
const dataUrl = await frame.evaluate(fetchImage, imageFile);
process.stderr.write(".");
fileMap.set(imageFile.url, async () => dataUrl);
}
}
return [...fileMap.values()];
},
};
}
GooglePlayBooks.site = ["https://play.google.com"];