From e0227ce4fa508b9ca8f6a05fc634246b430ac6bc Mon Sep 17 00:00:00 2001 From: alikia2x Date: Sun, 8 Feb 2026 21:58:38 +0800 Subject: [PATCH] add: getLatestVideoAids script --- packages/core/net/delegate.ts | 2 +- packages/crawler/net/getLatestVideoAids.ts | 2 +- src/getAllAids.ts | 88 +++++++++++++++++++++- 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/packages/core/net/delegate.ts b/packages/core/net/delegate.ts index 98a9715..7f2cccc 100644 --- a/packages/core/net/delegate.ts +++ b/packages/core/net/delegate.ts @@ -173,7 +173,7 @@ const config = createNetworkConfig({ proxies: proxies, tasks: { annualArchive: { - provider: "bilibili", + provider: "test", proxies: [...aliProxies], }, bulkSnapshot: { diff --git a/packages/crawler/net/getLatestVideoAids.ts b/packages/crawler/net/getLatestVideoAids.ts index 319f7f1..1f26b95 100644 --- a/packages/crawler/net/getLatestVideoAids.ts +++ b/packages/crawler/net/getLatestVideoAids.ts @@ -12,7 +12,7 @@ export async function getLatestVideoAids( const errMessage = `Error fetching latest aid for ${range}:`; const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`; const { data } = await networkDelegate.request(url, "getLatestVideos"); - if (data.code != 0) { + if (data.code !== 0) { logger.error(errMessage + data.message, "net", "getLastestVideos"); return []; } diff --git a/src/getAllAids.ts b/src/getAllAids.ts index 9b3ea6a..bb9a585 100644 --- a/src/getAllAids.ts +++ b/src/getAllAids.ts @@ -1,2 +1,88 @@ -import { getLatestVideoAids } from "@crawler/net/getLatestVideoAids"; +import logger from "@core/log"; +import type { VideoListResponse } from "@core/net/bilibili.d"; +import networkDelegate from "@core/net/delegate"; +const cacheFile = Bun.file("temp/aidCache.json"); +const aidSet = new Set(); +try { + const cache = (await cacheFile.json()) as number[]; + for (const aid of cache) { + aidSet.add(aid); + } +} catch {} + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +async function getLatestVideos(page: number = 1, pageSize: number = 50) { + const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`; + const { data } = await networkDelegate.request(url, "annualArchive"); + if (data.code !== 0) { + logger.error(data.message, "net", "getLastestVideos"); + return null; + } + return data.data; +} + +async function getAidPage(page: number) { + try { + const data = await getLatestVideos(page); + if (!data) { + return page; + } + if (data.archives.length === 0) { + return -1; + } + for (const video of data.archives) { + aidSet.add(video.aid); + } + logger.log(`Fetched page ${page} with ${data.archives.length} videos`); + return 0; + } catch (e) { + logger.error(e as Error, "net", "getAidPage"); + return page; + } +} + +const concurrency = 35; +const groupTime = 3000; + +let minPage = 1; +let maxPage = minPage + concurrency - 1; +const tasks = []; +while (true) { + try { + const startTime = performance.now(); + if (tasks.length === 0) { + for (let i = minPage; i <= maxPage; i++) { + tasks.push(getAidPage(i)); + } + minPage += concurrency; + maxPage += concurrency; + } + const results: number[] = await Promise.all(tasks); + const reachEnd = results.some((result) => result === -1); + if (reachEnd) { + break; + } + const erroredPages = results.filter((result) => result > 0); + for (const page of erroredPages) { + tasks.push(getAidPage(page)); + } + tasks.splice(0, concurrency); + logger.log(`Processed page ${minPage} to ${maxPage}`); + + const endTime = performance.now(); + const sleepTime = groupTime - (endTime - startTime); + if (sleepTime > 0) { + await sleep(sleepTime); + } + } catch (e) { + logger.error(e as Error, "net", "getAllAids"); + } finally { + await cacheFile.write(JSON.stringify(Array.from(aidSet))); + } +} + +await cacheFile.write(JSON.stringify(Array.from(aidSet))); + +process.exit(0);