gadl/platforms/google-play-books.ts
2023-12-03 00:48:24 +09:00

166 lines
4.4 KiB
TypeScript

import type { Book } from "../library";
import type { Browser } from "../browser";
type ImageFile = {
url: string;
};
async function getImageFiles(): Promise<Array<ImageFile>> {
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
resolve,
reject,
) {
const timeout = setTimeout(() => {
reject(new Error("Page loading timeout."));
}, 60_000);
let pages: NodeListOf<HTMLElement>;
while (true) {
pages = document.querySelectorAll("reader-page");
const loaded =
pages.length > 0 &&
[...pages].every((page) => page.classList.contains("-gb-loaded"));
if (loaded) {
break;
} else {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
resolve(pages);
clearTimeout(timeout);
});
const images: Array<SVGImageElement> = [...pages].map(
(el) => el.querySelector("svg image")!,
);
return [...images].map((image) => ({ url: image.href.baseVal }));
}
async function fetchImage(imageFile: ImageFile): Promise<string> {
const res = await fetch(imageFile.url);
const blob = await res.blob();
const dataUrl: string = await new Promise((resolve, reject) => {
const fileReader = Object.assign(new FileReader(), {
onload(): void {
resolve(this.result);
},
onerror(e: ErrorEvent): void {
const error = new Error(`${e.type}: ${e.message}`);
reject(error);
},
});
fileReader.readAsDataURL(blob);
});
return dataUrl;
}
export function GooglePlayBooks(browser: Browser) {
return {
async login() {
const ctx = await browser.newContext();
const page = await ctx.newPage();
await page.goto("https://accounts.google.com");
await page.waitForURL(
(url) => url.origin === "https://myaccount.google.com",
{ timeout: 0 },
);
await browser.saveBrowserContext("google-play-books", ctx);
},
async *pull(): AsyncGenerator<Book> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(
"https://play.google.com/books?type=comics&source=purchases",
);
await page.waitForSelector("gpb-library-card");
for (const metadata of await page.$$("gpb-library-card .metadata")) {
const readerUrl = await metadata.$eval("a", (a) => a.href);
const [title, author] = (await metadata.innerText()).split("\n");
yield {
id: NaN,
platform: "google-play-books",
readerUrl,
title,
authors: [author],
};
process.stderr.write(".");
}
process.stderr.write(`\n`);
},
async getFiles(book: Book): Promise<Array<() => Promise<string>>> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(book.readerUrl);
await page.waitForSelector(".display");
const frame = page.frames().at(-1);
if (!frame) {
throw new Error("Frame not found.");
}
await frame.evaluate(function scrollToTop() {
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
viewport?.scroll({ top: 0 });
});
async function next(): Promise<boolean> {
return await frame!.evaluate(function scroll() {
const viewport = document.querySelector(
"cdk-virtual-scroll-viewport",
);
if (!viewport) throw new Error("Viewport not found.");
const hasNext =
1 <=
Math.abs(
viewport.scrollHeight -
viewport.clientHeight -
viewport.scrollTop,
);
if (hasNext) {
viewport.scrollBy({ top: viewport.clientHeight });
}
return hasNext;
});
}
const fileMap: Map<string, () => Promise<string>> = new Map();
while (await next()) {
const imageFiles = await frame.evaluate(getImageFiles);
for (const imageFile of imageFiles) {
if (fileMap.has(imageFile.url)) continue;
const dataUrl = await frame.evaluate(fetchImage, imageFile);
process.stderr.write(".");
fileMap.set(imageFile.url, async () => dataUrl);
}
}
return [...fileMap.values()];
},
};
}
GooglePlayBooks.site = ["https://play.google.com"];