1
0
cvsa-legacy/src/annualArchive.ts
alikia2x b88692e7c1
add: annual archive script, recover the FC proxy
improve the logger, add some error handling in getVideoInfo
2026-01-01 04:49:36 +08:00

96 lines
2.5 KiB
TypeScript

import { bilibiliMetadata, db, eta, type VideoSnapshotType } from "@core/drizzle";
import { SECOND } from "@core/lib";
import logger from "@core/log";
import { NetSchedulerError } from "@core/net/delegate";
import { bulkGetVideoStats } from "@crawler/net/bulkGetVideoStats";
import { desc, eq } from "drizzle-orm";
const store = Bun.file(`temp/annualSnapshots.json`);
const snapshots: Omit<VideoSnapshotType, "id">[] = [];
if (await store.exists()) {
// load
}
const aids = await db
.select({
aid: bilibiliMetadata.aid,
})
.from(bilibiliMetadata)
.leftJoin(eta, eq(bilibiliMetadata.aid, eta.aid))
.orderBy(desc(eta.speed))
.then((rows) => {
const mapped = rows.map((row) => row.aid);
return mapped.filter((item): item is number => item !== null);
});
const totalAids = aids.length;
logger.log(`Total aids: ${totalAids}`);
const bulkSize = 50;
const groupedAids = [];
for (let i = 0; i < aids.length; i += bulkSize) {
groupedAids.push(aids.slice(i, i + bulkSize));
}
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const serialize = async () => {
const json = JSON.stringify(snapshots, null, 4);
await store.write(json);
};
let aidsProcessed = 0;
const requestForSnapshot = async (aids: number[], depth: number = 0) => {
if (depth > 10) {
logger.error(`Cannot fetch metadata for aids: ${aids.join(",")}, depth: ${depth}`);
return;
}
try {
const rawData = await bulkGetVideoStats(aids, "annualArchive");
logger.log(`Fetched metadata for ${aids.length} aids, depth: ${depth}`);
if (typeof rawData === "number") {
await sleep(2 * SECOND);
return requestForSnapshot(aids, depth + 1);
}
for (const item of rawData.data) {
snapshots.push({
aid: item.id,
coins: item.cnt_info.coin,
createdAt: new Date(rawData.time).toISOString(),
danmakus: item.cnt_info.danmaku,
favorites: item.cnt_info.collect,
likes: item.cnt_info.thumb_up,
replies: item.cnt_info.reply,
shares: item.cnt_info.share,
views: item.cnt_info.play,
});
aidsProcessed += 1;
}
} catch (e) {
if (e instanceof NetSchedulerError) {
requestForSnapshot(aids, 1);
}
}
};
const taskFactories = groupedAids.map((group) => () => requestForSnapshot(group));
const concurrency = 100;
for (let i = 0; i < taskFactories.length; i += concurrency) {
const batch = taskFactories.slice(i, i + concurrency);
await Promise.all(batch.map((factory) => factory()));
logger.log(`Processed ${aidsProcessed} of ${totalAids}`);
await sleep(1.7 * SECOND);
serialize();
}
await serialize();
process.exit(0);