add: annual archive script, recover the FC proxy
improve the logger, add some error handling in getVideoInfo
This commit is contained in:
parent
c55cfb36fc
commit
b88692e7c1
@ -9,6 +9,7 @@
|
||||
<file url="file://$PROJECT_DIR$/src/importSnapshots.ts" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef/console.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef/console_1.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef/console_3.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef/console_4.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef/console_5.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
|
||||
</component>
|
||||
|
||||
2
bun.lock
2
bun.lock
@ -45,7 +45,7 @@
|
||||
"name": "cf-worker",
|
||||
"version": "0.0.0",
|
||||
"dependencies": {
|
||||
"@alikia/http-parser": "^1.0.2",
|
||||
"@alikia/http-parser": "1.0.2",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/vitest-pool-workers": "^0.8.19",
|
||||
|
||||
@ -16,6 +16,6 @@
|
||||
"wrangler": "^4.56.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@alikia/http-parser": "^1.0.2"
|
||||
"@alikia/http-parser": "1.0.2"
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,19 +23,11 @@ interface ProxyRequest {
|
||||
headers?: Record<string, string>;
|
||||
}
|
||||
|
||||
interface ParsedHeaders {
|
||||
status: number;
|
||||
statusText: string;
|
||||
headers: Headers;
|
||||
headerEnd: number;
|
||||
}
|
||||
|
||||
const CONFIG: ProxyConfig = {
|
||||
TIMEOUT_MS: 5000,
|
||||
};
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
function concatUint8Arrays(...arrays: Uint8Array[]): Uint8Array {
|
||||
const total = arrays.reduce((sum, arr) => sum + arr.length, 0);
|
||||
@ -48,31 +40,6 @@ function concatUint8Arrays(...arrays: Uint8Array[]): Uint8Array {
|
||||
return result;
|
||||
}
|
||||
|
||||
function parseHttpHeaders(buff: Uint8Array): ParsedHeaders | null {
|
||||
const text = decoder.decode(buff);
|
||||
const headerEnd = text.indexOf("\r\n\r\n");
|
||||
if (headerEnd === -1) return null;
|
||||
|
||||
const lines = text.slice(0, headerEnd).split("\r\n");
|
||||
const statusMatch = lines[0].match(/HTTP\/1\.[01] (\d+) (.*)/);
|
||||
if (!statusMatch) throw new Error("Invalid status line");
|
||||
|
||||
const headers = new Headers();
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const idx = lines[i].indexOf(": ");
|
||||
if (idx !== -1) {
|
||||
headers.append(lines[i].slice(0, idx), lines[i].slice(idx + 2));
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
headerEnd,
|
||||
headers,
|
||||
status: Number(statusMatch[1]),
|
||||
statusText: statusMatch[2],
|
||||
};
|
||||
}
|
||||
|
||||
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
@ -198,6 +165,16 @@ function createErrorResponse(message: string, status: number, requestId: string)
|
||||
);
|
||||
}
|
||||
|
||||
function isJSON(str: string) {
|
||||
if (typeof str !== "string") return false;
|
||||
try {
|
||||
const result = JSON.parse(str);
|
||||
return typeof result === "object" && result !== null;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
async fetch(request: Request, _env: Env, _ctx: ExecutionContext): Promise<Response> {
|
||||
const requestId = crypto.randomUUID().slice(0, 8); // Track this specific request
|
||||
@ -223,12 +200,14 @@ export default {
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(`[${requestId}] Attempting handleSocket...`);
|
||||
const data = await withTimeout(
|
||||
handleSocket(targetUrl, customHeaders, requestTime),
|
||||
CONFIG.TIMEOUT_MS
|
||||
);
|
||||
console.log(`[${requestId}] Success via handleSocket (${Date.now() - requestTime}ms)`);
|
||||
const json = isJSON(data.data);
|
||||
console.log(
|
||||
`[${requestId}] Success via handleSocket (${Date.now() - requestTime}ms) ${data.data.length} bytes, isJSON: ${json}, rawData: ${data.data}`
|
||||
);
|
||||
return createJsonResponse(data, requestId);
|
||||
} catch (socketErr: any) {
|
||||
console.warn(
|
||||
|
||||
@ -2,46 +2,86 @@ import chalk from "chalk";
|
||||
import type { TransformableInfo } from "logform";
|
||||
import winston, { format, transports } from "winston";
|
||||
|
||||
const customFormat = format.printf((info: TransformableInfo) => {
|
||||
const { timestamp, level, message, service, codePath, error } = info;
|
||||
/* -------------------------------------------------
|
||||
* Bun-style console formatter
|
||||
* ------------------------------------------------- */
|
||||
function formatLikeConsole(input: unknown, colors: boolean): string {
|
||||
const inspect = (value: unknown) =>
|
||||
Bun.inspect(value, {
|
||||
colors,
|
||||
compact: false,
|
||||
depth: 6,
|
||||
});
|
||||
|
||||
// console.log(...args)
|
||||
if (Array.isArray(input)) {
|
||||
return input.map(inspect).join(" ");
|
||||
}
|
||||
|
||||
if (input instanceof Error) {
|
||||
const stack = input.stack ?? input.message;
|
||||
return colors ? chalk.red(stack) : stack;
|
||||
}
|
||||
|
||||
if (typeof input === "string") {
|
||||
return input;
|
||||
}
|
||||
|
||||
return inspect(input);
|
||||
}
|
||||
|
||||
/* -------------------------------------------------
|
||||
* Console format (colored, human-readable)
|
||||
* ------------------------------------------------- */
|
||||
const customConsoleFormat = format.printf((info: TransformableInfo) => {
|
||||
const { timestamp, level, message, service, codePath } = info;
|
||||
|
||||
const coloredService = service ? chalk.magenta(service) : "";
|
||||
const coloredCodePath = codePath ? chalk.grey(`@${codePath}`) : "";
|
||||
const colon = service || codePath ? ": " : "";
|
||||
const err = error as Error | undefined;
|
||||
if (err) {
|
||||
return `${timestamp} [${level}] ${coloredService}${colon}${message}\n${chalk.red(err.stack) ?? ""}`;
|
||||
}
|
||||
|
||||
return coloredCodePath
|
||||
? `${timestamp} [${level}] ${coloredService}${coloredCodePath}${colon}${message}`
|
||||
: `${timestamp} [${level}] ${coloredService}${colon}${message}`;
|
||||
const renderedMessage = formatLikeConsole(message, true);
|
||||
|
||||
return `${timestamp} [${level}] ${coloredService}${coloredCodePath}${colon}${renderedMessage}`;
|
||||
});
|
||||
|
||||
const timestampFormat = format.timestamp({ format: "YYYY-MM-DD HH:mm:ss.SSSZZ" });
|
||||
/* -------------------------------------------------
|
||||
* Timestamp
|
||||
* ------------------------------------------------- */
|
||||
const timestampFormat = format.timestamp({
|
||||
format: "YYYY-MM-DD HH:mm:ss.SSSZZ",
|
||||
});
|
||||
|
||||
/* -------------------------------------------------
|
||||
* File transport factory (no colors)
|
||||
* ------------------------------------------------- */
|
||||
const createTransport = (level: string, filename: string) => {
|
||||
const MB = 1000000;
|
||||
let maxsize;
|
||||
let maxFiles;
|
||||
let tailable;
|
||||
const MB = 1_000_000;
|
||||
let maxsize: number | undefined;
|
||||
let maxFiles: number | undefined;
|
||||
const tailable = false;
|
||||
|
||||
if (level === "silly") {
|
||||
maxsize = 500 * MB;
|
||||
maxFiles = undefined;
|
||||
tailable = false;
|
||||
} else if (level === "warn") {
|
||||
maxsize = 10 * MB;
|
||||
maxFiles = 5;
|
||||
tailable = false;
|
||||
}
|
||||
function replacer(key: unknown, value: unknown) {
|
||||
|
||||
function replacer(_: unknown, value: unknown) {
|
||||
if (typeof value === "bigint") {
|
||||
return value.toString();
|
||||
}
|
||||
if (key === "error") {
|
||||
return undefined;
|
||||
if (value instanceof Error) {
|
||||
return {
|
||||
message: value.message,
|
||||
name: value.name,
|
||||
stack: value.stack,
|
||||
};
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
return new transports.File({
|
||||
filename,
|
||||
format: format.combine(timestampFormat, format.json({ replacer })),
|
||||
@ -52,20 +92,21 @@ const createTransport = (level: string, filename: string) => {
|
||||
});
|
||||
};
|
||||
|
||||
/* -------------------------------------------------
|
||||
* Paths
|
||||
* ------------------------------------------------- */
|
||||
const sillyLogPath = process.env["LOG_VERBOSE"] ?? "logs/verbose.log";
|
||||
const warnLogPath = process.env["LOG_WARN"] ?? "logs/warn.log";
|
||||
const errorLogPath = process.env["LOG_ERROR"] ?? "logs/error.log";
|
||||
|
||||
/* -------------------------------------------------
|
||||
* Winston logger
|
||||
* ------------------------------------------------- */
|
||||
const winstonLogger = winston.createLogger({
|
||||
levels: winston.config.npm.levels,
|
||||
transports: [
|
||||
new transports.Console({
|
||||
format: format.combine(
|
||||
format.timestamp({ format: "YYYY-MM-DD HH:mm:ss.SSSZZ" }),
|
||||
format.colorize(),
|
||||
format.errors({ stack: true }),
|
||||
customFormat
|
||||
),
|
||||
format: format.combine(timestampFormat, format.colorize(), customConsoleFormat),
|
||||
level: "debug",
|
||||
}),
|
||||
createTransport("silly", sillyLogPath),
|
||||
@ -74,28 +115,36 @@ const winstonLogger = winston.createLogger({
|
||||
],
|
||||
});
|
||||
|
||||
/* -------------------------------------------------
|
||||
* Public logger API
|
||||
* ------------------------------------------------- */
|
||||
const logger = {
|
||||
debug: (message: string, service?: string, codePath?: string) => {
|
||||
winstonLogger.debug(message, { codePath, service });
|
||||
debug: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.debug(message as string, { codePath, service });
|
||||
},
|
||||
error: (error: string | Error, service?: string, codePath?: string) => {
|
||||
if (error instanceof Error) {
|
||||
winstonLogger.error(error.message, { codePath, error: error, service });
|
||||
} else {
|
||||
winstonLogger.error(error, { codePath, service });
|
||||
}
|
||||
|
||||
error: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.error(message as string, { codePath, service });
|
||||
},
|
||||
log: (message: string, service?: string, codePath?: string) => {
|
||||
winstonLogger.info(message, { codePath, service });
|
||||
|
||||
info: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.info(message as string, { codePath, service });
|
||||
},
|
||||
silly: (message: string, service?: string, codePath?: string) => {
|
||||
winstonLogger.silly(message, { codePath, service });
|
||||
|
||||
log: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.info(message as string, { codePath, service });
|
||||
},
|
||||
verbose: (message: string, service?: string, codePath?: string) => {
|
||||
winstonLogger.verbose(message, { codePath, service });
|
||||
|
||||
silly: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.silly(message as string, { codePath, service });
|
||||
},
|
||||
warn: (message: string, service?: string, codePath?: string) => {
|
||||
winstonLogger.warn(message, { codePath, service });
|
||||
|
||||
verbose: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.verbose(message as string, { codePath, service });
|
||||
},
|
||||
|
||||
warn: (message: unknown, service?: string, codePath?: string) => {
|
||||
winstonLogger.warn(message as string, { codePath, service });
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@ -23,7 +23,7 @@ import { ReplyError } from "ioredis";
|
||||
|
||||
type ProxyType = "native" | "alicloud-fc" | "ip-proxy" | "cf-worker";
|
||||
|
||||
const aliRegions = ["hangzhou"] as const;
|
||||
const aliRegions = ["hangzhou", "beijing", "shanghai", "chengdu"] as const;
|
||||
type AliRegion = (typeof aliRegions)[number];
|
||||
|
||||
function createAliProxiesObject<T extends readonly string[]>(regions: T) {
|
||||
@ -42,8 +42,8 @@ function createAliProxiesObject<T extends readonly string[]>(regions: T) {
|
||||
);
|
||||
}
|
||||
|
||||
const _aliProxiesObject = createAliProxiesObject(aliRegions);
|
||||
const _aliProxies = aliRegions.map((region) => `alicloud_${region}` as `alicloud_${AliRegion}`);
|
||||
const aliProxiesObject = createAliProxiesObject(aliRegions);
|
||||
const aliProxies = aliRegions.map((region) => `alicloud_${region}` as `alicloud_${AliRegion}`);
|
||||
|
||||
const proxies = {
|
||||
"cf-worker": {
|
||||
@ -56,6 +56,8 @@ const proxies = {
|
||||
data: {},
|
||||
type: "native" as const,
|
||||
},
|
||||
|
||||
...aliProxiesObject,
|
||||
} satisfies Record<string, ProxyDef>;
|
||||
|
||||
interface FCResponse {
|
||||
@ -152,14 +154,9 @@ bili_normal[0].max = 5;
|
||||
bili_normal[1].max = 40;
|
||||
bili_normal[2].max = 200;
|
||||
|
||||
const bili_strict = structuredClone(biliLimiterConfig);
|
||||
bili_strict[0].max = 1;
|
||||
bili_strict[1].max = 6;
|
||||
bili_strict[2].max = 100;
|
||||
|
||||
type MyProxyKeys = keyof typeof proxies;
|
||||
|
||||
const _fcProxies = aliRegions.map((region) => `alicloud_${region}`) as MyProxyKeys[];
|
||||
const fcProxies = aliRegions.map((region) => `alicloud_${region}`) as MyProxyKeys[];
|
||||
|
||||
function createNetworkConfig<ProviderKeys extends string, TaskKeys extends string>(
|
||||
config: NetworkConfigInternal<ProviderKeys, TaskKeys>
|
||||
@ -169,36 +166,41 @@ function createNetworkConfig<ProviderKeys extends string, TaskKeys extends strin
|
||||
|
||||
const config = createNetworkConfig({
|
||||
providers: {
|
||||
bilibili: { limiters: [] },
|
||||
// test: { limiters: [] },
|
||||
bilibili: { limiters: biliLimiterConfig },
|
||||
test: { limiters: [] },
|
||||
testCF: { limiters: [] },
|
||||
},
|
||||
proxies: proxies,
|
||||
tasks: {
|
||||
bulkSnapshot: {
|
||||
annualArchive: {
|
||||
provider: "bilibili",
|
||||
proxies: ["cf-worker"],
|
||||
proxies: [...aliProxies],
|
||||
},
|
||||
bulkSnapshot: {
|
||||
limiters: bili_normal,
|
||||
provider: "bilibili",
|
||||
proxies: ["alicloud_hangzhou"],
|
||||
},
|
||||
getLatestVideos: {
|
||||
provider: "bilibili",
|
||||
proxies: "all",
|
||||
proxies: ["alicloud_hangzhou", "cf-worker"],
|
||||
},
|
||||
getVideoInfo: {
|
||||
provider: "bilibili",
|
||||
proxies: "all",
|
||||
proxies: ["alicloud_hangzhou", "cf-worker"],
|
||||
},
|
||||
snapshotMilestoneVideo: {
|
||||
provider: "bilibili",
|
||||
proxies: ["cf-worker"],
|
||||
proxies: ["alicloud_hangzhou", "cf-worker", "native"],
|
||||
},
|
||||
snapshotVideo: {
|
||||
provider: "bilibili",
|
||||
proxies: ["cf-worker"],
|
||||
proxies: ["alicloud_hangzhou", "cf-worker"],
|
||||
},
|
||||
test: {
|
||||
provider: "test",
|
||||
proxies: fcProxies,
|
||||
},
|
||||
// test: {
|
||||
// provider: "test",
|
||||
// proxies: fcProxies,
|
||||
// },
|
||||
testCf: {
|
||||
provider: "testCF",
|
||||
proxies: ["cf-worker"],
|
||||
|
||||
@ -2,12 +2,18 @@ import logger from "@core/log";
|
||||
import type { VideoDetailsData, VideoDetailsResponse } from "@core/net/bilibili.d";
|
||||
import networkDelegate from "@core/net/delegate";
|
||||
|
||||
/**
|
||||
* Fetch detailed video metadata from bilibili API
|
||||
* @param aid The aid of the video
|
||||
* @returns The detailed metadata of the video, or null if the video does not exist
|
||||
* @throws {NetSchedulerError} The caller would need to handle this error
|
||||
*/
|
||||
export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
|
||||
const { data } = await networkDelegate.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
logger.error(`${errMessage + data.code}-${data.message}`, "net", "fn:getVideoInfo");
|
||||
return null;
|
||||
}
|
||||
return data.data;
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import { bilibiliUser, db, videoSnapshot } from "@core/drizzle";
|
||||
import { SECOND } from "@core/lib";
|
||||
import logger from "@core/log";
|
||||
import { NetSchedulerError } from "@core/net/delegate";
|
||||
import { getVideoDetails } from "@core/net/getVideoDetails";
|
||||
import type { Job } from "bullmq";
|
||||
import {
|
||||
@ -9,7 +11,7 @@ import {
|
||||
} from "db/bilibili_metadata";
|
||||
import { eq } from "drizzle-orm";
|
||||
import { snapshotCounter } from "metrics";
|
||||
import { ClassifyVideoQueue, latestVideosEventsProducer } from "mq/index";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue, latestVideosEventsProducer } from "mq/index";
|
||||
import type { GetVideoInfoJobData } from "mq/schema";
|
||||
import { insertIntoSongs } from "mq/task/collectSongs";
|
||||
|
||||
@ -46,73 +48,91 @@ export const getVideoInfoWorker = async (job: Job<GetVideoInfoJobData>): Promise
|
||||
await publishAddsongEvent(songs[0].id, job.data.uid);
|
||||
return;
|
||||
}
|
||||
const data = await getVideoDetails(aid);
|
||||
if (data === null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const data = await getVideoDetails(aid);
|
||||
if (data === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const uid = data.View.owner.mid;
|
||||
const uid = data.View.owner.mid;
|
||||
|
||||
await insertIntoMetadata({
|
||||
aid,
|
||||
bvid: data.View.bvid,
|
||||
coverUrl: data.View.pic,
|
||||
description: data.View.desc,
|
||||
duration: data.View.duration,
|
||||
publishedAt: new Date(data.View.pubdate * 1000).toISOString(),
|
||||
tags: data.Tags.filter((tag) => !["old_channel", "topic"].indexOf(tag.tag_type))
|
||||
.map((tag) => tag.tag_name)
|
||||
.join(","),
|
||||
title: data.View.title,
|
||||
uid: uid,
|
||||
});
|
||||
|
||||
const userExists = await userExistsInBiliUsers(aid);
|
||||
if (!userExists) {
|
||||
await db.insert(bilibiliUser).values({
|
||||
avatar: data.View.owner.face,
|
||||
desc: data.Card.card.sign,
|
||||
fans: data.Card.follower,
|
||||
uid,
|
||||
username: data.View.owner.name,
|
||||
await insertIntoMetadata({
|
||||
aid,
|
||||
bvid: data.View.bvid,
|
||||
coverUrl: data.View.pic,
|
||||
description: data.View.desc,
|
||||
duration: data.View.duration,
|
||||
publishedAt: new Date(data.View.pubdate * 1000).toISOString(),
|
||||
tags: data.Tags.filter((tag) => !["old_channel", "topic"].indexOf(tag.tag_type))
|
||||
.map((tag) => tag.tag_name)
|
||||
.join(","),
|
||||
title: data.View.title,
|
||||
uid: uid,
|
||||
});
|
||||
} else {
|
||||
await db
|
||||
.update(bilibiliUser)
|
||||
.set({
|
||||
|
||||
const userExists = await userExistsInBiliUsers(aid);
|
||||
if (!userExists) {
|
||||
await db.insert(bilibiliUser).values({
|
||||
avatar: data.View.owner.face,
|
||||
desc: data.Card.card.sign,
|
||||
fans: data.Card.follower,
|
||||
uid,
|
||||
username: data.View.owner.name,
|
||||
})
|
||||
.where(eq(bilibiliUser.uid, uid));
|
||||
});
|
||||
} else {
|
||||
await db
|
||||
.update(bilibiliUser)
|
||||
.set({
|
||||
avatar: data.View.owner.face,
|
||||
desc: data.Card.card.sign,
|
||||
fans: data.Card.follower,
|
||||
username: data.View.owner.name,
|
||||
})
|
||||
.where(eq(bilibiliUser.uid, uid));
|
||||
}
|
||||
|
||||
const stat = data.View.stat;
|
||||
|
||||
await db.insert(videoSnapshot).values({
|
||||
aid,
|
||||
coins: stat.coin,
|
||||
danmakus: stat.danmaku,
|
||||
favorites: stat.favorite,
|
||||
likes: stat.like,
|
||||
replies: stat.reply,
|
||||
shares: stat.share,
|
||||
views: stat.view,
|
||||
});
|
||||
|
||||
snapshotCounter.add(1);
|
||||
|
||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
||||
|
||||
if (!insertSongs) {
|
||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
||||
return;
|
||||
}
|
||||
const songs = await insertIntoSongs(aid);
|
||||
if (songs.length === 0) {
|
||||
logger.warn(`Failed to insert song for aid: ${aid}`, "mq", "fn:getVideoInfoWorker");
|
||||
return;
|
||||
}
|
||||
await publishAddsongEvent(songs[0].id, job.data.uid);
|
||||
} catch (e) {
|
||||
if (e instanceof NetSchedulerError) {
|
||||
await LatestVideosQueue.add(
|
||||
"getVideoInfo",
|
||||
{ aid },
|
||||
{
|
||||
attempts: 10,
|
||||
backoff: {
|
||||
delay: 30 * SECOND,
|
||||
jitter: 1,
|
||||
type: "fixed",
|
||||
},
|
||||
}
|
||||
);
|
||||
}
|
||||
logger.error(e, "mq", "fn:getVideoInfoWorker");
|
||||
}
|
||||
|
||||
const stat = data.View.stat;
|
||||
|
||||
await db.insert(videoSnapshot).values({
|
||||
aid,
|
||||
coins: stat.coin,
|
||||
danmakus: stat.danmaku,
|
||||
favorites: stat.favorite,
|
||||
likes: stat.like,
|
||||
replies: stat.reply,
|
||||
shares: stat.share,
|
||||
views: stat.view,
|
||||
});
|
||||
|
||||
snapshotCounter.add(1);
|
||||
|
||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
||||
|
||||
if (!insertSongs) {
|
||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
||||
return;
|
||||
}
|
||||
const songs = await insertIntoSongs(aid);
|
||||
if (songs.length === 0) {
|
||||
logger.warn(`Failed to insert song for aid: ${aid}`, "mq", "fn:getVideoInfoWorker");
|
||||
return;
|
||||
}
|
||||
await publishAddsongEvent(songs[0].id, job.data.uid);
|
||||
};
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import logger from "@core/log";
|
||||
import type { MediaListInfoData, MediaListInfoResponse } from "@core/net/bilibili.d";
|
||||
import networkDelegate from "@core/net/delegate";
|
||||
import networkDelegate, { type RequestTasks } from "@core/net/delegate";
|
||||
|
||||
/*
|
||||
* Bulk fetch video metadata from bilibili API
|
||||
@ -11,7 +11,10 @@ import networkDelegate from "@core/net/delegate";
|
||||
* - The native `fetch` function threw an error: with error code `FETCH_ERROR`
|
||||
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
|
||||
*/
|
||||
export async function bulkGetVideoStats(aids: number[]): Promise<
|
||||
export async function bulkGetVideoStats(
|
||||
aids: number[],
|
||||
task?: RequestTasks
|
||||
): Promise<
|
||||
| {
|
||||
data: MediaListInfoData;
|
||||
time: number;
|
||||
@ -25,7 +28,7 @@ export async function bulkGetVideoStats(aids: number[]): Promise<
|
||||
}
|
||||
const { data, time } = await networkDelegate.request<MediaListInfoResponse>(
|
||||
url,
|
||||
"bulkSnapshot"
|
||||
task ?? "bulkSnapshot"
|
||||
);
|
||||
const errMessage = `Error fetching metadata for aid list: ${aids.join(",")}:`;
|
||||
if (data.code !== 0) {
|
||||
|
||||
95
src/annualArchive.ts
Normal file
95
src/annualArchive.ts
Normal file
@ -0,0 +1,95 @@
|
||||
import { bilibiliMetadata, db, eta, type VideoSnapshotType } from "@core/drizzle";
|
||||
import { SECOND } from "@core/lib";
|
||||
import logger from "@core/log";
|
||||
import { NetSchedulerError } from "@core/net/delegate";
|
||||
import { bulkGetVideoStats } from "@crawler/net/bulkGetVideoStats";
|
||||
import { desc, eq } from "drizzle-orm";
|
||||
|
||||
const store = Bun.file(`temp/annualSnapshots.json`);
|
||||
|
||||
const snapshots: Omit<VideoSnapshotType, "id">[] = [];
|
||||
|
||||
if (await store.exists()) {
|
||||
// load
|
||||
}
|
||||
|
||||
const aids = await db
|
||||
.select({
|
||||
aid: bilibiliMetadata.aid,
|
||||
})
|
||||
.from(bilibiliMetadata)
|
||||
.leftJoin(eta, eq(bilibiliMetadata.aid, eta.aid))
|
||||
.orderBy(desc(eta.speed))
|
||||
.then((rows) => {
|
||||
const mapped = rows.map((row) => row.aid);
|
||||
return mapped.filter((item): item is number => item !== null);
|
||||
});
|
||||
|
||||
const totalAids = aids.length;
|
||||
|
||||
logger.log(`Total aids: ${totalAids}`);
|
||||
|
||||
const bulkSize = 50;
|
||||
|
||||
const groupedAids = [];
|
||||
for (let i = 0; i < aids.length; i += bulkSize) {
|
||||
groupedAids.push(aids.slice(i, i + bulkSize));
|
||||
}
|
||||
|
||||
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
||||
const serialize = async () => {
|
||||
const json = JSON.stringify(snapshots, null, 4);
|
||||
await store.write(json);
|
||||
};
|
||||
|
||||
let aidsProcessed = 0;
|
||||
|
||||
const requestForSnapshot = async (aids: number[], depth: number = 0) => {
|
||||
if (depth > 10) {
|
||||
logger.error(`Cannot fetch metadata for aids: ${aids.join(",")}, depth: ${depth}`);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const rawData = await bulkGetVideoStats(aids, "annualArchive");
|
||||
logger.log(`Fetched metadata for ${aids.length} aids, depth: ${depth}`);
|
||||
if (typeof rawData === "number") {
|
||||
await sleep(2 * SECOND);
|
||||
return requestForSnapshot(aids, depth + 1);
|
||||
}
|
||||
for (const item of rawData.data) {
|
||||
snapshots.push({
|
||||
aid: item.id,
|
||||
coins: item.cnt_info.coin,
|
||||
createdAt: new Date(rawData.time).toISOString(),
|
||||
danmakus: item.cnt_info.danmaku,
|
||||
favorites: item.cnt_info.collect,
|
||||
likes: item.cnt_info.thumb_up,
|
||||
replies: item.cnt_info.reply,
|
||||
shares: item.cnt_info.share,
|
||||
views: item.cnt_info.play,
|
||||
});
|
||||
aidsProcessed += 1;
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof NetSchedulerError) {
|
||||
requestForSnapshot(aids, 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const taskFactories = groupedAids.map((group) => () => requestForSnapshot(group));
|
||||
|
||||
const concurrency = 100;
|
||||
|
||||
for (let i = 0; i < taskFactories.length; i += concurrency) {
|
||||
const batch = taskFactories.slice(i, i + concurrency);
|
||||
|
||||
await Promise.all(batch.map((factory) => factory()));
|
||||
|
||||
logger.log(`Processed ${aidsProcessed} of ${totalAids}`);
|
||||
await sleep(1.7 * SECOND);
|
||||
serialize();
|
||||
}
|
||||
await serialize();
|
||||
process.exit(0);
|
||||
373
src/extractAidsFromEvocalRank.ts
Normal file
373
src/extractAidsFromEvocalRank.ts
Normal file
@ -0,0 +1,373 @@
|
||||
export interface Root {
|
||||
version: number;
|
||||
ranknum: number;
|
||||
url: string;
|
||||
coverurl: string;
|
||||
pubdate: string;
|
||||
generate_time: string;
|
||||
generate_timestamp: number;
|
||||
collect_start_time: string;
|
||||
collect_end_time: string;
|
||||
collect_start_time_timestamp: number;
|
||||
collect_end_time_timestamp: number;
|
||||
main_rank: MainRank[];
|
||||
second_rank: SecondRank[];
|
||||
super_hit: SuperHit[];
|
||||
pick_up: PickUp[];
|
||||
oth_pickup: any[];
|
||||
Vocaloid_pick_up: VocaloidPickUp[];
|
||||
"history-1-year": History1Year[];
|
||||
"history-10-year": History10Year[];
|
||||
ed: Ed[];
|
||||
op: Op[];
|
||||
statistic: Statistic;
|
||||
thanks_list: any[];
|
||||
}
|
||||
|
||||
export interface MainRank {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: any;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource;
|
||||
rank: number;
|
||||
ext_rank: ExtRank;
|
||||
}
|
||||
|
||||
export interface ReferSource {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface ExtRank {
|
||||
vocaloid?: number;
|
||||
}
|
||||
|
||||
export interface SecondRank {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource2;
|
||||
rank: number;
|
||||
ext_rank: ExtRank2;
|
||||
}
|
||||
|
||||
export interface ReferSource2 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface ExtRank2 {
|
||||
vocaloid?: number;
|
||||
}
|
||||
|
||||
export interface SuperHit {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource3;
|
||||
superHit_times: number;
|
||||
rank: string;
|
||||
}
|
||||
|
||||
export interface ReferSource3 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface PickUp {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource4;
|
||||
rank: number;
|
||||
ext_rank: ExtRank4;
|
||||
}
|
||||
|
||||
export interface ReferSource4 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface ExtRank4 {
|
||||
vocaloid?: number;
|
||||
}
|
||||
|
||||
export interface VocaloidPickUp {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource5;
|
||||
rank: number;
|
||||
ext_rank: ExtRank5;
|
||||
}
|
||||
|
||||
export interface ReferSource5 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface ExtRank5 {
|
||||
vocaloid: number;
|
||||
}
|
||||
|
||||
export interface History1Year {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource6;
|
||||
rank: number;
|
||||
ext_rank: ExtRank6;
|
||||
}
|
||||
|
||||
export interface ReferSource6 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface ExtRank6 {
|
||||
vocaloid?: number;
|
||||
}
|
||||
|
||||
export interface History10Year {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
referSource: ReferSource7;
|
||||
rank: number;
|
||||
}
|
||||
|
||||
export interface ReferSource7 {
|
||||
point: number;
|
||||
play: number;
|
||||
coin: number;
|
||||
comment: number;
|
||||
danmaku: number;
|
||||
favorite: number;
|
||||
like: number;
|
||||
share: number;
|
||||
}
|
||||
|
||||
export interface Ed {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
}
|
||||
|
||||
export interface Op {
|
||||
url: string;
|
||||
avid: string;
|
||||
coverurl: string;
|
||||
title: string;
|
||||
pubdate: string;
|
||||
}
|
||||
|
||||
export interface Statistic {
|
||||
diff: Diff;
|
||||
total_collect_count: number;
|
||||
new_video_count: number;
|
||||
new_in_rank_count: number;
|
||||
new_in_mainrank_count: number;
|
||||
pick_up_count: number;
|
||||
oth_pick_up_count: number;
|
||||
new_vc_in_rank_count: number;
|
||||
new_vc_in_mainrank_count: number;
|
||||
vc_in_rank_count: number;
|
||||
vc_in_mainrank_count: number;
|
||||
new_sv_in_rank_count: number;
|
||||
new_sv_in_mainrank_count: number;
|
||||
sv_in_rank_count: number;
|
||||
sv_in_mainrank_count: number;
|
||||
new_ace_in_rank_count: number;
|
||||
new_ace_in_mainrank_count: number;
|
||||
ace_in_rank_count: number;
|
||||
ace_in_mainrank_count: number;
|
||||
}
|
||||
|
||||
export interface Diff {
|
||||
total_play: number;
|
||||
new_video_count: number;
|
||||
new_in_rank_count: number;
|
||||
new_in_mainrank_count: number;
|
||||
new_vc_in_rank_count: number;
|
||||
new_vc_in_mainrank_count: number;
|
||||
vc_in_rank_count: number;
|
||||
vc_in_mainrank_count: number;
|
||||
new_sv_in_rank_count: number;
|
||||
new_sv_in_mainrank_count: number;
|
||||
sv_in_rank_count: number;
|
||||
sv_in_mainrank_count: number;
|
||||
new_ace_in_rank_count: number;
|
||||
new_ace_in_mainrank_count: number;
|
||||
ace_in_rank_count: number;
|
||||
ace_in_mainrank_count: number;
|
||||
}
|
||||
|
||||
const aids = new Set<string>();
|
||||
const f = Bun.file("evocalrank.json");
|
||||
|
||||
for (let i = 699; i >= 520; i--) {
|
||||
const url = `https://www.evocalrank.com/data/rank_data/${i}.json`;
|
||||
const response = await fetch(url);
|
||||
const data = await response.json() as Partial<Root>;
|
||||
if (data.main_rank) {
|
||||
for (const item of data.main_rank) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.second_rank) {
|
||||
for (const item of data.second_rank) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.pick_up) {
|
||||
for (const item of data.pick_up) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.super_hit) {
|
||||
for (const item of data.super_hit) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.Vocaloid_pick_up) {
|
||||
for (const item of data.Vocaloid_pick_up) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.ed) {
|
||||
for (const item of data.ed) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data.op) {
|
||||
for (const item of data.op) {
|
||||
if (item.avid) {
|
||||
aids.add(item.avid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const serialized = JSON.stringify([...aids], null, 4);
|
||||
await f.write(serialized);
|
||||
console.log(`${i} ${aids.size}`);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user