gadl/platforms/google-play-books.ts

137 lines
3.7 KiB
TypeScript
Raw Permalink Normal View History

2023-12-02 23:22:33 +09:00
import type { Book } from "../library";
2023-12-03 05:54:45 +09:00
import type { Browser, ImageFile } from "../browser";
2023-12-03 00:48:24 +09:00
async function getImageFiles(): Promise<Array<ImageFile>> {
2023-12-02 23:22:33 +09:00
const pages: NodeListOf<HTMLElement> = await new Promise(async function (
resolve,
reject,
) {
const timeout = setTimeout(() => {
reject(new Error("Page loading timeout."));
}, 60_000);
let pages: NodeListOf<HTMLElement>;
while (true) {
pages = document.querySelectorAll("reader-page");
const loaded =
pages.length > 0 &&
[...pages].every((page) => page.classList.contains("-gb-loaded"));
if (loaded) {
break;
} else {
await new Promise((resolve) => setTimeout(resolve, 100));
}
}
resolve(pages);
clearTimeout(timeout);
});
const images: Array<SVGImageElement> = [...pages].map(
(el) => el.querySelector("svg image")!,
);
2023-12-03 00:48:24 +09:00
return [...images].map((image) => ({ url: image.href.baseVal }));
2023-12-02 23:22:33 +09:00
}
export function GooglePlayBooks(browser: Browser) {
return {
async *pull(): AsyncGenerator<Book> {
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(
"https://play.google.com/books?type=comics&source=purchases",
);
await page.waitForSelector("gpb-library-card");
for (const metadata of await page.$$("gpb-library-card .metadata")) {
const readerUrl = await metadata.$eval("a", (a) => a.href);
const [title, author] = (await metadata.innerText()).split("\n");
yield {
id: NaN,
platform: "google-play-books",
readerUrl,
title,
authors: [author],
};
process.stderr.write(".");
}
process.stderr.write(`\n`);
},
2023-12-03 16:14:38 +09:00
async getFiles(book: Book): Promise<Array<() => Promise<Blob>>> {
2023-12-02 23:22:33 +09:00
const ctx = await browser.loadBrowserContext("google-play-books");
const page = await ctx.newPage();
await page.goto(book.readerUrl);
await page.waitForSelector(".display");
const frame = page.frames().at(-1);
if (!frame) {
throw new Error("Frame not found.");
}
await frame.evaluate(function scrollToTop() {
const viewport = document.querySelector("cdk-virtual-scroll-viewport");
viewport?.scroll({ top: 0 });
});
async function next(): Promise<boolean> {
return await frame!.evaluate(function scroll() {
const viewport = document.querySelector(
"cdk-virtual-scroll-viewport",
);
if (!viewport) throw new Error("Viewport not found.");
const hasNext =
1 <=
Math.abs(
viewport.scrollHeight -
viewport.clientHeight -
viewport.scrollTop,
);
if (hasNext) {
viewport.scrollBy({ top: viewport.clientHeight });
}
return hasNext;
});
}
2023-12-03 16:14:38 +09:00
const fileMap: Map<string, () => Promise<Blob>> = new Map();
2023-12-02 23:22:33 +09:00
while (await next()) {
2023-12-03 00:48:24 +09:00
const imageFiles = await frame.evaluate(getImageFiles);
2023-12-02 23:22:33 +09:00
2023-12-03 00:48:24 +09:00
for (const imageFile of imageFiles) {
if (fileMap.has(imageFile.url)) continue;
2023-12-02 23:22:33 +09:00
2023-12-03 16:14:38 +09:00
const blob = await browser.drawImage(frame, imageFile);
2023-12-02 23:22:33 +09:00
2023-12-03 00:48:24 +09:00
process.stderr.write(".");
2023-12-02 23:22:33 +09:00
2023-12-03 16:14:38 +09:00
fileMap.set(imageFile.url, async () => blob);
2023-12-02 23:22:33 +09:00
}
}
2023-12-03 00:48:24 +09:00
return [...fileMap.values()];
2023-12-02 23:22:33 +09:00
},
2023-12-03 16:14:38 +09:00
loginEndpoints: ["https://accounts.google.com"],
loginSuccessUrl: (url: URL) =>
url.origin === "https://myaccount.google.com",
logoutEndpoints: ["https://accounts.google.com/Logout"],
2023-12-02 23:22:33 +09:00
};
}
2023-12-03 05:54:45 +09:00
GooglePlayBooks.siteUrl = (url: URL) =>
url.origin === "https://play.google.com";