ref: format
This commit is contained in:
parent
291a21d82a
commit
e0776a452e
18
deno.json
18
deno.json
@ -1,8 +1,20 @@
|
||||
{
|
||||
"lock": false,
|
||||
"workspace": ["./packages/crawler", "./packages/frontend", "./packages/backend", "./packages/core"],
|
||||
"nodeModulesDir": "auto",
|
||||
"lock": false,
|
||||
"workspace": [
|
||||
"./packages/crawler",
|
||||
"./packages/frontend",
|
||||
"./packages/backend",
|
||||
"./packages/core"
|
||||
],
|
||||
"nodeModulesDir": "auto",
|
||||
"tasks": {
|
||||
"crawler": "deno task --filter 'crawler' all"
|
||||
},
|
||||
"fmt": {
|
||||
"useTabs": true,
|
||||
"lineWidth": 120,
|
||||
"indentWidth": 4,
|
||||
"semiColons": true,
|
||||
"proseWrap": "always"
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
|
||||
WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days'
|
||||
GROUP BY 1
|
||||
ORDER BY window_start
|
||||
`
|
||||
`;
|
||||
|
||||
await redisClient.del(REDIS_KEY);
|
||||
|
||||
@ -36,7 +36,7 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
|
||||
|
||||
for (const row of result.rows) {
|
||||
const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE));
|
||||
const offset = (currentWindow + targetOffset);
|
||||
const offset = currentWindow + targetOffset;
|
||||
if (offset >= 0) {
|
||||
await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count));
|
||||
}
|
||||
@ -186,7 +186,13 @@ export async function getSnapshotScheduleCountWithinRange(client: Client, start:
|
||||
* @param aid The aid of the video.
|
||||
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
|
||||
*/
|
||||
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number, force: boolean = false) {
|
||||
export async function scheduleSnapshot(
|
||||
client: Client,
|
||||
aid: number,
|
||||
type: string,
|
||||
targetTime: number,
|
||||
force: boolean = false,
|
||||
) {
|
||||
if (await videoHasActiveSchedule(client, aid) && !force) return;
|
||||
let adjustedTime = new Date(targetTime);
|
||||
if (type !== "milestone" && type !== "new") {
|
||||
@ -199,7 +205,13 @@ export async function scheduleSnapshot(client: Client, aid: number, type: string
|
||||
);
|
||||
}
|
||||
|
||||
export async function bulkScheduleSnapshot(client: Client, aids: number[], type: string, targetTime: number, force: boolean = false) {
|
||||
export async function bulkScheduleSnapshot(
|
||||
client: Client,
|
||||
aids: number[],
|
||||
type: string,
|
||||
targetTime: number,
|
||||
force: boolean = false,
|
||||
) {
|
||||
for (const aid of aids) {
|
||||
await scheduleSnapshot(client, aid, type, targetTime, force);
|
||||
}
|
||||
@ -237,12 +249,12 @@ export async function adjustSnapshotTime(
|
||||
|
||||
if (delayedDate.getTime() < now.getTime()) {
|
||||
const elapsed = performance.now() - t;
|
||||
timePerIteration = elapsed / (i+1);
|
||||
timePerIteration = elapsed / (i + 1);
|
||||
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
|
||||
return now;
|
||||
}
|
||||
const elapsed = performance.now() - t;
|
||||
timePerIteration = elapsed / (i+1);
|
||||
timePerIteration = elapsed / (i + 1);
|
||||
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
|
||||
return delayedDate;
|
||||
}
|
||||
@ -253,7 +265,6 @@ export async function adjustSnapshotTime(
|
||||
return expectedStartTime;
|
||||
}
|
||||
|
||||
|
||||
export async function getSnapshotsInNextSecond(client: Client) {
|
||||
const query = `
|
||||
SELECT *
|
||||
@ -272,7 +283,7 @@ export async function getSnapshotsInNextSecond(client: Client) {
|
||||
}
|
||||
|
||||
export async function getBulkSnapshotsInNextSecond(client: Client) {
|
||||
const query = `
|
||||
const query = `
|
||||
SELECT *
|
||||
FROM snapshot_schedule
|
||||
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "@cvsa/crawler",
|
||||
"name": "@cvsa/crawler",
|
||||
"tasks": {
|
||||
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
|
||||
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
|
||||
@ -26,11 +26,11 @@
|
||||
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
|
||||
"bullmq": "npm:bullmq",
|
||||
"mq/": "./mq/",
|
||||
"db/": "./db/",
|
||||
"log/": "./log/",
|
||||
"net/": "./net/",
|
||||
"ml/": "./ml/",
|
||||
"utils/": "./utils/",
|
||||
"db/": "./db/",
|
||||
"log/": "./log/",
|
||||
"net/": "./net/",
|
||||
"ml/": "./ml/",
|
||||
"utils/": "./utils/",
|
||||
"ioredis": "npm:ioredis",
|
||||
"@bull-board/api": "npm:@bull-board/api",
|
||||
"@bull-board/express": "npm:@bull-board/express",
|
||||
@ -39,12 +39,5 @@
|
||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
||||
"chalk": "npm:chalk"
|
||||
},
|
||||
"fmt": {
|
||||
"useTabs": true,
|
||||
"lineWidth": 120,
|
||||
"indentWidth": 4,
|
||||
"semiColons": true,
|
||||
"proseWrap": "always"
|
||||
},
|
||||
"exports": "./main.ts"
|
||||
"exports": "./main.ts"
|
||||
}
|
@ -7,6 +7,7 @@ import {
|
||||
bulkSetSnapshotStatus,
|
||||
findClosestSnapshot,
|
||||
findSnapshotBefore,
|
||||
getBulkSnapshotsInNextSecond,
|
||||
getLatestSnapshot,
|
||||
getSnapshotsInNextSecond,
|
||||
getVideosWithoutActiveSnapshotSchedule,
|
||||
@ -15,7 +16,6 @@ import {
|
||||
setSnapshotStatus,
|
||||
snapshotScheduleExists,
|
||||
videoHasProcessingSchedule,
|
||||
getBulkSnapshotsInNextSecond
|
||||
} from "db/snapshotSchedule.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
|
||||
@ -282,8 +282,7 @@ export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
|
||||
}
|
||||
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
|
||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
||||
}
|
||||
finally {
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
5
packages/crawler/net/bilibili.d.ts
vendored
5
packages/crawler/net/bilibili.d.ts
vendored
@ -13,10 +13,9 @@ export type MediaListInfoResponse = BaseResponse<MediaListInfoData>;
|
||||
|
||||
export type MediaListInfoData = MediaListInfoItem[];
|
||||
|
||||
|
||||
export interface MediaListInfoItem {
|
||||
attr: number;
|
||||
bvid: string;
|
||||
bvid: string;
|
||||
id: number;
|
||||
cnt_info: {
|
||||
coin: number;
|
||||
@ -26,7 +25,7 @@ export interface MediaListInfoItem {
|
||||
reply: number;
|
||||
share: number;
|
||||
thumb_up: number;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
interface VideoInfoData {
|
||||
|
@ -13,7 +13,7 @@ import logger from "log/logger.ts";
|
||||
*/
|
||||
export async function bulkGetVideoStats(aids: number[]): Promise<MediaListInfoData | number> {
|
||||
const baseURL = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
|
||||
let url = baseURL;
|
||||
let url = baseURL;
|
||||
for (const aid of aids) {
|
||||
url += `${aid}:2,`;
|
||||
}
|
||||
|
@ -6,13 +6,13 @@ import { lockManager } from "mq/lockManager.ts";
|
||||
import { WorkerError } from "mq/schema.ts";
|
||||
import { getVideoInfoWorker } from "mq/exec/getLatestVideos.ts";
|
||||
import {
|
||||
bulkSnapshotTickWorker,
|
||||
collectMilestoneSnapshotsWorker,
|
||||
regularSnapshotsWorker,
|
||||
snapshotTickWorker,
|
||||
takeSnapshotForVideoWorker,
|
||||
scheduleCleanupWorker,
|
||||
snapshotTickWorker,
|
||||
takeBulkSnapshotForVideosWorker,
|
||||
bulkSnapshotTickWorker
|
||||
takeSnapshotForVideoWorker,
|
||||
} from "mq/exec/snapshotTick.ts";
|
||||
|
||||
Deno.addSignalListener("SIGINT", async () => {
|
||||
|
@ -28,15 +28,16 @@ Inside of your Astro project, you'll see the following folders and files:
|
||||
└── package.json
|
||||
```
|
||||
|
||||
To learn more about the folder structure of an Astro project, refer to [our guide on project structure](https://docs.astro.build/en/basics/project-structure/).
|
||||
To learn more about the folder structure of an Astro project, refer to
|
||||
[our guide on project structure](https://docs.astro.build/en/basics/project-structure/).
|
||||
|
||||
## 🧞 Commands
|
||||
|
||||
All commands are run from the root of the project, from a terminal:
|
||||
|
||||
| Command | Action |
|
||||
| :------------------------ | :----------------------------------------------- |
|
||||
| `deno install` | Installs dependencies |
|
||||
| Command | Action |
|
||||
| :--------------------- | :----------------------------------------------- |
|
||||
| `deno install` | Installs dependencies |
|
||||
| `deno dev` | Starts local dev server at `localhost:4321` |
|
||||
| `deno build` | Build your production site to `./dist/` |
|
||||
| `deno preview` | Preview your build locally, before deploying |
|
||||
@ -45,4 +46,5 @@ All commands are run from the root of the project, from a terminal:
|
||||
|
||||
## 👀 Want to learn more?
|
||||
|
||||
Feel free to check [our documentation](https://docs.astro.build) or jump into our [Discord server](https://astro.build/chat).
|
||||
Feel free to check [our documentation](https://docs.astro.build) or jump into our
|
||||
[Discord server](https://astro.build/chat).
|
||||
|
@ -1,5 +1,13 @@
|
||||
// @ts-check
|
||||
import { defineConfig } from 'astro/config';
|
||||
import { defineConfig } from "astro/config";
|
||||
|
||||
// https://astro.build/config
|
||||
export default defineConfig({});
|
||||
export default defineConfig({
|
||||
vite: {
|
||||
server: {
|
||||
fs: {
|
||||
allow: [".", "../../"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
@ -1,14 +1,14 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"scripts": {
|
||||
"dev": "astro dev",
|
||||
"build": "astro build",
|
||||
"preview": "astro preview",
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"astro": "^5.5.5"
|
||||
}
|
||||
"name": "frontend",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"scripts": {
|
||||
"dev": "astro dev",
|
||||
"build": "astro build",
|
||||
"preview": "astro preview",
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"astro": "^5.5.5"
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"extends": "astro/tsconfigs/strict",
|
||||
"include": [".astro/types.d.ts", "**/*"],
|
||||
"exclude": ["dist"]
|
||||
"extends": "astro/tsconfigs/strict",
|
||||
"include": [".astro/types.d.ts", "**/*"],
|
||||
"exclude": ["dist"]
|
||||
}
|
||||
|
@ -14,14 +14,20 @@ const db = new Database(DATABASE_PATH, { int64: true });
|
||||
// 设置日志
|
||||
async function setupLogging() {
|
||||
await ensureDir(LOG_DIR);
|
||||
const logStream = await Deno.open(LOG_FILE, { write: true, create: true, append: true });
|
||||
const logStream = await Deno.open(LOG_FILE, {
|
||||
write: true,
|
||||
create: true,
|
||||
append: true,
|
||||
});
|
||||
|
||||
const redirectConsole =
|
||||
// deno-lint-ignore no-explicit-any
|
||||
(originalConsole: (...args: any[]) => void) =>
|
||||
// deno-lint-ignore no-explicit-any
|
||||
(...args: any[]) => {
|
||||
const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
|
||||
const message = args.map((
|
||||
arg,
|
||||
) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
|
||||
originalConsole(message);
|
||||
logStream.write(new TextEncoder().encode(message + "\n"));
|
||||
};
|
||||
@ -38,14 +44,17 @@ interface Metadata {
|
||||
|
||||
// 获取最后一次更新的时间
|
||||
function getLastUpdate(): Date {
|
||||
const result = db.prepare("SELECT value FROM metadata WHERE key = 'fetchAid-lastUpdate'").get() as Metadata;
|
||||
const result = db.prepare(
|
||||
"SELECT value FROM metadata WHERE key = 'fetchAid-lastUpdate'",
|
||||
).get() as Metadata;
|
||||
return result ? new Date(result.value as string) : new Date(0);
|
||||
}
|
||||
|
||||
// 更新最后更新时间
|
||||
function updateLastUpdate() {
|
||||
const now = new Date().toISOString();
|
||||
db.prepare("UPDATE metadata SET value = ? WHERE key = 'fetchAid-lastUpdate'").run(now);
|
||||
db.prepare("UPDATE metadata SET value = ? WHERE key = 'fetchAid-lastUpdate'")
|
||||
.run(now);
|
||||
}
|
||||
|
||||
// 辅助函数:获取数据
|
||||
@ -66,7 +75,9 @@ async function fetchData(pn: number, retries = MAX_RETRIES): Promise<any> {
|
||||
|
||||
// 插入 aid 到数据库
|
||||
function insertAid(aid: number) {
|
||||
db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')").run(aid);
|
||||
db.prepare(
|
||||
"INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')",
|
||||
).run(aid);
|
||||
}
|
||||
|
||||
// 主函数
|
||||
|
@ -5,7 +5,16 @@ import { ensureDir } from "https://deno.land/std@0.113.0/fs/mod.ts";
|
||||
|
||||
const aidPath = "./data/2025010104_c30_aids.txt";
|
||||
const db = new Database("./data/main.db", { int64: true });
|
||||
const regions = ["shanghai", "hangzhou", "qingdao", "beijing", "zhangjiakou", "chengdu", "shenzhen", "hohhot"];
|
||||
const regions = [
|
||||
"shanghai",
|
||||
"hangzhou",
|
||||
"qingdao",
|
||||
"beijing",
|
||||
"zhangjiakou",
|
||||
"chengdu",
|
||||
"shenzhen",
|
||||
"hohhot",
|
||||
];
|
||||
const logDir = "./logs/bili-info-crawl";
|
||||
const logFile = path.join(logDir, `run-${Date.now() / 1000}.log`);
|
||||
const shouldReadTextFile = false;
|
||||
@ -26,14 +35,20 @@ const requestQueue: number[] = [];
|
||||
|
||||
async function setupLogging() {
|
||||
await ensureDir(logDir);
|
||||
const logStream = await Deno.open(logFile, { write: true, create: true, append: true });
|
||||
const logStream = await Deno.open(logFile, {
|
||||
write: true,
|
||||
create: true,
|
||||
append: true,
|
||||
});
|
||||
|
||||
const redirectConsole =
|
||||
// deno-lint-ignore no-explicit-any
|
||||
(originalConsole: (...args: any[]) => void) =>
|
||||
// deno-lint-ignore no-explicit-any
|
||||
(...args: any[]) => {
|
||||
const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
|
||||
const message = args.map((
|
||||
arg,
|
||||
) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" ");
|
||||
originalConsole(message);
|
||||
logStream.write(new TextEncoder().encode(message + "\n"));
|
||||
};
|
||||
@ -78,7 +93,9 @@ async function readFromText() {
|
||||
const newAids = aids.filter((aid) => !existingAidsSet.has(aid));
|
||||
|
||||
// 插入这些新条目
|
||||
const insertStmt = db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')");
|
||||
const insertStmt = db.prepare(
|
||||
"INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')",
|
||||
);
|
||||
newAids.forEach((aid) => insertStmt.run(aid));
|
||||
}
|
||||
|
||||
@ -88,7 +105,9 @@ async function insertAidsToDB() {
|
||||
}
|
||||
|
||||
const aidsInDB = db
|
||||
.prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'")
|
||||
.prepare(
|
||||
"SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'",
|
||||
)
|
||||
.all()
|
||||
.map((row) => row.aid) as number[];
|
||||
|
||||
@ -98,13 +117,21 @@ async function insertAidsToDB() {
|
||||
|
||||
const processAid = async (aid: number) => {
|
||||
try {
|
||||
const res = await getBiliBiliVideoInfo(aid, regions[processedAids % regions.length]);
|
||||
const res = await getBiliBiliVideoInfo(
|
||||
aid,
|
||||
regions[processedAids % regions.length],
|
||||
);
|
||||
if (res === null) {
|
||||
updateAidStatus(aid, "failed");
|
||||
} else {
|
||||
const rawData = JSON.parse(res);
|
||||
if (rawData.code === 0) {
|
||||
updateAidStatus(aid, "success", rawData.data.View.bvid, JSON.stringify(rawData.data));
|
||||
updateAidStatus(
|
||||
aid,
|
||||
"success",
|
||||
rawData.data.View.bvid,
|
||||
JSON.stringify(rawData.data),
|
||||
);
|
||||
} else {
|
||||
updateAidStatus(aid, "error", undefined, res);
|
||||
}
|
||||
@ -136,7 +163,12 @@ async function insertAidsToDB() {
|
||||
console.log("Starting to process aids...");
|
||||
}
|
||||
|
||||
function updateAidStatus(aid: number, status: string, bvid?: string, data?: string) {
|
||||
function updateAidStatus(
|
||||
aid: number,
|
||||
status: string,
|
||||
bvid?: string,
|
||||
data?: string,
|
||||
) {
|
||||
const stmt = db.prepare(`
|
||||
UPDATE bili_info_crawl
|
||||
SET status = ?,
|
||||
@ -145,11 +177,22 @@ function updateAidStatus(aid: number, status: string, bvid?: string, data?: stri
|
||||
timestamp = ?
|
||||
WHERE aid = ?
|
||||
`);
|
||||
const params = [status, ...(bvid ? [bvid] : []), ...(data ? [data] : []), Date.now() / 1000, aid];
|
||||
const params = [
|
||||
status,
|
||||
...(bvid ? [bvid] : []),
|
||||
...(data ? [data] : []),
|
||||
Date.now() / 1000,
|
||||
aid,
|
||||
];
|
||||
stmt.run(...params);
|
||||
}
|
||||
|
||||
function logProgress(aid: number, processedAids: number, totalAids: number, startTime: number) {
|
||||
function logProgress(
|
||||
aid: number,
|
||||
processedAids: number,
|
||||
totalAids: number,
|
||||
startTime: number,
|
||||
) {
|
||||
const elapsedTime = Date.now() - startTime;
|
||||
const elapsedSeconds = Math.floor(elapsedTime / 1000);
|
||||
const elapsedMinutes = Math.floor(elapsedSeconds / 60);
|
||||
|
@ -1,4 +1,7 @@
|
||||
export async function getBiliBiliVideoInfo(bvidORaid?: string | number, region: string = "hangzhou") {
|
||||
export async function getBiliBiliVideoInfo(
|
||||
bvidORaid?: string | number,
|
||||
region: string = "hangzhou",
|
||||
) {
|
||||
const bvid = typeof bvidORaid === "string" ? bvidORaid : undefined;
|
||||
const aid = typeof bvidORaid === "number" ? bvidORaid : undefined;
|
||||
|
||||
@ -18,7 +21,10 @@ export async function getBiliBiliVideoInfo(bvidORaid?: string | number, region:
|
||||
}
|
||||
}
|
||||
|
||||
async function proxyRequestWithRegion(url: string, region: string): Promise<any | null> {
|
||||
async function proxyRequestWithRegion(
|
||||
url: string,
|
||||
region: string,
|
||||
): Promise<any | null> {
|
||||
const td = new TextDecoder();
|
||||
// aliyun configure set --access-key-id $ALIYUN_AK --access-key-secret $ALIYUN_SK --region cn-shenzhen --profile CVSA-shenzhen --mode AK
|
||||
const p = await new Deno.Command("aliyun", {
|
||||
@ -40,7 +46,9 @@ async function proxyRequestWithRegion(url: string, region: string): Promise<any
|
||||
const out = td.decode(p.stdout);
|
||||
const rawData = JSON.parse(out);
|
||||
if (rawData.statusCode !== 200) {
|
||||
console.error(`Error proxying request ${url} to ${region} , statusCode: ${rawData.statusCode}`);
|
||||
console.error(
|
||||
`Error proxying request ${url} to ${region} , statusCode: ${rawData.statusCode}`,
|
||||
);
|
||||
return null;
|
||||
} else {
|
||||
return JSON.parse(rawData.body);
|
||||
|
Loading…
Reference in New Issue
Block a user