ref: format

This commit is contained in:
alikia2x (寒寒) 2025-03-29 22:23:31 +08:00
parent 291a21d82a
commit e0776a452e
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
15 changed files with 164 additions and 78 deletions

View File

@ -1,8 +1,20 @@
{
"lock": false,
"workspace": ["./packages/crawler", "./packages/frontend", "./packages/backend", "./packages/core"],
"nodeModulesDir": "auto",
"lock": false,
"workspace": [
"./packages/crawler",
"./packages/frontend",
"./packages/backend",
"./packages/core"
],
"nodeModulesDir": "auto",
"tasks": {
"crawler": "deno task --filter 'crawler' all"
},
"fmt": {
"useTabs": true,
"lineWidth": 120,
"indentWidth": 4,
"semiColons": true,
"proseWrap": "always"
}
}

View File

@ -28,7 +28,7 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days'
GROUP BY 1
ORDER BY window_start
`
`;
await redisClient.del(REDIS_KEY);
@ -36,7 +36,7 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
for (const row of result.rows) {
const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE));
const offset = (currentWindow + targetOffset);
const offset = currentWindow + targetOffset;
if (offset >= 0) {
await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count));
}
@ -186,7 +186,13 @@ export async function getSnapshotScheduleCountWithinRange(client: Client, start:
* @param aid The aid of the video.
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
*/
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number, force: boolean = false) {
export async function scheduleSnapshot(
client: Client,
aid: number,
type: string,
targetTime: number,
force: boolean = false,
) {
if (await videoHasActiveSchedule(client, aid) && !force) return;
let adjustedTime = new Date(targetTime);
if (type !== "milestone" && type !== "new") {
@ -199,7 +205,13 @@ export async function scheduleSnapshot(client: Client, aid: number, type: string
);
}
export async function bulkScheduleSnapshot(client: Client, aids: number[], type: string, targetTime: number, force: boolean = false) {
export async function bulkScheduleSnapshot(
client: Client,
aids: number[],
type: string,
targetTime: number,
force: boolean = false,
) {
for (const aid of aids) {
await scheduleSnapshot(client, aid, type, targetTime, force);
}
@ -237,12 +249,12 @@ export async function adjustSnapshotTime(
if (delayedDate.getTime() < now.getTime()) {
const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1);
timePerIteration = elapsed / (i + 1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
return now;
}
const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1);
timePerIteration = elapsed / (i + 1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
return delayedDate;
}
@ -253,7 +265,6 @@ export async function adjustSnapshotTime(
return expectedStartTime;
}
export async function getSnapshotsInNextSecond(client: Client) {
const query = `
SELECT *
@ -272,7 +283,7 @@ export async function getSnapshotsInNextSecond(client: Client) {
}
export async function getBulkSnapshotsInNextSecond(client: Client) {
const query = `
const query = `
SELECT *
FROM snapshot_schedule
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'

View File

@ -1,5 +1,5 @@
{
"name": "@cvsa/crawler",
"name": "@cvsa/crawler",
"tasks": {
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
@ -26,11 +26,11 @@
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"mq/": "./mq/",
"db/": "./db/",
"log/": "./log/",
"net/": "./net/",
"ml/": "./ml/",
"utils/": "./utils/",
"db/": "./db/",
"log/": "./log/",
"net/": "./net/",
"ml/": "./ml/",
"utils/": "./utils/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
@ -39,12 +39,5 @@
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"fmt": {
"useTabs": true,
"lineWidth": 120,
"indentWidth": 4,
"semiColons": true,
"proseWrap": "always"
},
"exports": "./main.ts"
"exports": "./main.ts"
}

View File

@ -7,6 +7,7 @@ import {
bulkSetSnapshotStatus,
findClosestSnapshot,
findSnapshotBefore,
getBulkSnapshotsInNextSecond,
getLatestSnapshot,
getSnapshotsInNextSecond,
getVideosWithoutActiveSnapshotSchedule,
@ -15,7 +16,6 @@ import {
setSnapshotStatus,
snapshotScheduleExists,
videoHasProcessingSchedule,
getBulkSnapshotsInNextSecond
} from "db/snapshotSchedule.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
@ -282,8 +282,7 @@ export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
}
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
await bulkSetSnapshotStatus(client, ids, "failed");
}
finally {
} finally {
client.release();
}
};

View File

@ -13,10 +13,9 @@ export type MediaListInfoResponse = BaseResponse<MediaListInfoData>;
export type MediaListInfoData = MediaListInfoItem[];
export interface MediaListInfoItem {
attr: number;
bvid: string;
bvid: string;
id: number;
cnt_info: {
coin: number;
@ -26,7 +25,7 @@ export interface MediaListInfoItem {
reply: number;
share: number;
thumb_up: number;
}
};
}
interface VideoInfoData {

View File

@ -13,7 +13,7 @@ import logger from "log/logger.ts";
*/
export async function bulkGetVideoStats(aids: number[]): Promise<MediaListInfoData | number> {
const baseURL = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
let url = baseURL;
let url = baseURL;
for (const aid of aids) {
url += `${aid}:2,`;
}

View File

@ -6,13 +6,13 @@ import { lockManager } from "mq/lockManager.ts";
import { WorkerError } from "mq/schema.ts";
import { getVideoInfoWorker } from "mq/exec/getLatestVideos.ts";
import {
bulkSnapshotTickWorker,
collectMilestoneSnapshotsWorker,
regularSnapshotsWorker,
snapshotTickWorker,
takeSnapshotForVideoWorker,
scheduleCleanupWorker,
snapshotTickWorker,
takeBulkSnapshotForVideosWorker,
bulkSnapshotTickWorker
takeSnapshotForVideoWorker,
} from "mq/exec/snapshotTick.ts";
Deno.addSignalListener("SIGINT", async () => {

View File

@ -28,15 +28,16 @@ Inside of your Astro project, you'll see the following folders and files:
└── package.json
```
To learn more about the folder structure of an Astro project, refer to [our guide on project structure](https://docs.astro.build/en/basics/project-structure/).
To learn more about the folder structure of an Astro project, refer to
[our guide on project structure](https://docs.astro.build/en/basics/project-structure/).
## 🧞 Commands
All commands are run from the root of the project, from a terminal:
| Command | Action |
| :------------------------ | :----------------------------------------------- |
| `deno install` | Installs dependencies |
| Command | Action |
| :--------------------- | :----------------------------------------------- |
| `deno install` | Installs dependencies |
| `deno dev` | Starts local dev server at `localhost:4321` |
| `deno build` | Build your production site to `./dist/` |
| `deno preview` | Preview your build locally, before deploying |
@ -45,4 +46,5 @@ All commands are run from the root of the project, from a terminal:
## 👀 Want to learn more?
Feel free to check [our documentation](https://docs.astro.build) or jump into our [Discord server](https://astro.build/chat).
Feel free to check [our documentation](https://docs.astro.build) or jump into our
[Discord server](https://astro.build/chat).

View File

@ -1,5 +1,13 @@
// @ts-check
import { defineConfig } from 'astro/config';
import { defineConfig } from "astro/config";
// https://astro.build/config
export default defineConfig({});
export default defineConfig({
vite: {
server: {
fs: {
allow: [".", "../../"],
},
},
},
});

View File

@ -1,14 +1,14 @@
{
"name": "frontend",
"type": "module",
"version": "0.0.1",
"scripts": {
"dev": "astro dev",
"build": "astro build",
"preview": "astro preview",
"astro": "astro"
},
"dependencies": {
"astro": "^5.5.5"
}
"name": "frontend",
"type": "module",
"version": "0.0.1",
"scripts": {
"dev": "astro dev",
"build": "astro build",
"preview": "astro preview",
"astro": "astro"
},
"dependencies": {
"astro": "^5.5.5"
}
}

View File

@ -1,5 +1,5 @@
{
"extends": "astro/tsconfigs/strict",
"include": [".astro/types.d.ts", "**/*"],
"exclude": ["dist"]
"extends": "astro/tsconfigs/strict",
"include": [".astro/types.d.ts", "**/*"],
"exclude": ["dist"]
}

View File

@ -14,14 +14,20 @@ const db = new Database(DATABASE_PATH, { int64: true });
// 设置日志
async function setupLogging() {
await ensureDir(LOG_DIR);
const logStream = await Deno.open(LOG_FILE, { write: true, create: true, append: true });
const logStream = await Deno.open(LOG_FILE, {
write: true,
create: true,
append: true,
});
const redirectConsole =
// deno-lint-ignore no-explicit-any
(originalConsole: (...args: any[]) => void) =>
// deno-lint-ignore no-explicit-any
(...args: any[]) => {
const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
const message = args.map((
arg,
) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
originalConsole(message);
logStream.write(new TextEncoder().encode(message + "\n"));
};
@ -38,14 +44,17 @@ interface Metadata {
// 获取最后一次更新的时间
function getLastUpdate(): Date {
const result = db.prepare("SELECT value FROM metadata WHERE key = 'fetchAid-lastUpdate'").get() as Metadata;
const result = db.prepare(
"SELECT value FROM metadata WHERE key = 'fetchAid-lastUpdate'",
).get() as Metadata;
return result ? new Date(result.value as string) : new Date(0);
}
// 更新最后更新时间
function updateLastUpdate() {
const now = new Date().toISOString();
db.prepare("UPDATE metadata SET value = ? WHERE key = 'fetchAid-lastUpdate'").run(now);
db.prepare("UPDATE metadata SET value = ? WHERE key = 'fetchAid-lastUpdate'")
.run(now);
}
// 辅助函数:获取数据
@ -66,7 +75,9 @@ async function fetchData(pn: number, retries = MAX_RETRIES): Promise<any> {
// 插入 aid 到数据库
function insertAid(aid: number) {
db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')").run(aid);
db.prepare(
"INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')",
).run(aid);
}
// 主函数

View File

@ -5,7 +5,16 @@ import { ensureDir } from "https://deno.land/std@0.113.0/fs/mod.ts";
const aidPath = "./data/2025010104_c30_aids.txt";
const db = new Database("./data/main.db", { int64: true });
const regions = ["shanghai", "hangzhou", "qingdao", "beijing", "zhangjiakou", "chengdu", "shenzhen", "hohhot"];
const regions = [
"shanghai",
"hangzhou",
"qingdao",
"beijing",
"zhangjiakou",
"chengdu",
"shenzhen",
"hohhot",
];
const logDir = "./logs/bili-info-crawl";
const logFile = path.join(logDir, `run-${Date.now() / 1000}.log`);
const shouldReadTextFile = false;
@ -26,14 +35,20 @@ const requestQueue: number[] = [];
async function setupLogging() {
await ensureDir(logDir);
const logStream = await Deno.open(logFile, { write: true, create: true, append: true });
const logStream = await Deno.open(logFile, {
write: true,
create: true,
append: true,
});
const redirectConsole =
// deno-lint-ignore no-explicit-any
(originalConsole: (...args: any[]) => void) =>
// deno-lint-ignore no-explicit-any
(...args: any[]) => {
const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
const message = args.map((
arg,
) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
originalConsole(message);
logStream.write(new TextEncoder().encode(message + "\n"));
};
@ -78,7 +93,9 @@ async function readFromText() {
const newAids = aids.filter((aid) => !existingAidsSet.has(aid));
// 插入这些新条目
const insertStmt = db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')");
const insertStmt = db.prepare(
"INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')",
);
newAids.forEach((aid) => insertStmt.run(aid));
}
@ -88,7 +105,9 @@ async function insertAidsToDB() {
}
const aidsInDB = db
.prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'")
.prepare(
"SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'",
)
.all()
.map((row) => row.aid) as number[];
@ -98,13 +117,21 @@ async function insertAidsToDB() {
const processAid = async (aid: number) => {
try {
const res = await getBiliBiliVideoInfo(aid, regions[processedAids % regions.length]);
const res = await getBiliBiliVideoInfo(
aid,
regions[processedAids % regions.length],
);
if (res === null) {
updateAidStatus(aid, "failed");
} else {
const rawData = JSON.parse(res);
if (rawData.code === 0) {
updateAidStatus(aid, "success", rawData.data.View.bvid, JSON.stringify(rawData.data));
updateAidStatus(
aid,
"success",
rawData.data.View.bvid,
JSON.stringify(rawData.data),
);
} else {
updateAidStatus(aid, "error", undefined, res);
}
@ -136,7 +163,12 @@ async function insertAidsToDB() {
console.log("Starting to process aids...");
}
function updateAidStatus(aid: number, status: string, bvid?: string, data?: string) {
function updateAidStatus(
aid: number,
status: string,
bvid?: string,
data?: string,
) {
const stmt = db.prepare(`
UPDATE bili_info_crawl
SET status = ?,
@ -145,11 +177,22 @@ function updateAidStatus(aid: number, status: string, bvid?: string, data?: stri
timestamp = ?
WHERE aid = ?
`);
const params = [status, ...(bvid ? [bvid] : []), ...(data ? [data] : []), Date.now() / 1000, aid];
const params = [
status,
...(bvid ? [bvid] : []),
...(data ? [data] : []),
Date.now() / 1000,
aid,
];
stmt.run(...params);
}
function logProgress(aid: number, processedAids: number, totalAids: number, startTime: number) {
function logProgress(
aid: number,
processedAids: number,
totalAids: number,
startTime: number,
) {
const elapsedTime = Date.now() - startTime;
const elapsedSeconds = Math.floor(elapsedTime / 1000);
const elapsedMinutes = Math.floor(elapsedSeconds / 60);

View File

@ -1,4 +1,7 @@
export async function getBiliBiliVideoInfo(bvidORaid?: string | number, region: string = "hangzhou") {
export async function getBiliBiliVideoInfo(
bvidORaid?: string | number,
region: string = "hangzhou",
) {
const bvid = typeof bvidORaid === "string" ? bvidORaid : undefined;
const aid = typeof bvidORaid === "number" ? bvidORaid : undefined;
@ -18,7 +21,10 @@ export async function getBiliBiliVideoInfo(bvidORaid?: string | number, region:
}
}
async function proxyRequestWithRegion(url: string, region: string): Promise<any | null> {
async function proxyRequestWithRegion(
url: string,
region: string,
): Promise<any | null> {
const td = new TextDecoder();
// aliyun configure set --access-key-id $ALIYUN_AK --access-key-secret $ALIYUN_SK --region cn-shenzhen --profile CVSA-shenzhen --mode AK
const p = await new Deno.Command("aliyun", {
@ -40,7 +46,9 @@ async function proxyRequestWithRegion(url: string, region: string): Promise<any
const out = td.decode(p.stdout);
const rawData = JSON.parse(out);
if (rawData.statusCode !== 200) {
console.error(`Error proxying request ${url} to ${region} , statusCode: ${rawData.statusCode}`);
console.error(
`Error proxying request ${url} to ${region} , statusCode: ${rawData.statusCode}`,
);
return null;
} else {
return JSON.parse(rawData.body);