update: insertion of snapshot schedule

This commit is contained in:
alikia2x (寒寒) 2025-03-22 20:51:28 +08:00
parent 9f9ef800d1
commit e38dc96275
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
15 changed files with 301 additions and 142 deletions

View File

@ -3,7 +3,10 @@ import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
import Akari from "lib/ml/akari.ts";
export async function videoExistsInAllData(client: Client, aid: number) {
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`, [aid])
return await client.queryObject<{ exists: boolean }>(
`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`,
[aid],
)
.then((result) => result.rows[0].exists);
}

10
lib/db/schema.d.ts vendored
View File

@ -31,3 +31,13 @@ export interface VideoSnapshotType {
aid: bigint;
replies: number;
}
export interface SnapshotScheduleType {
id: number;
aid: number;
type?: string;
created_at: string;
started_at?: string;
finished_at?: string;
status: string;
}

View File

@ -1,4 +1,7 @@
import { DAY, MINUTE } from "$std/datetime/constants.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import { SnapshotScheduleType } from "./schema.d.ts";
/*
Returns true if the specified `aid` has at least one record with "pending" or "processing" status.
@ -22,9 +25,10 @@ export async function findClosestSnapshot(
targetTime: Date,
): Promise<Snapshot | null> {
const query = `
SELECT created_at, views FROM video_snapshot
SELECT created_at, views
FROM video_snapshot
WHERE aid = $1
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz))) ASC
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz)))
LIMIT 1
`;
const result = await client.queryObject<{ created_at: string; views: number }>(
@ -49,5 +53,112 @@ export async function getLatestSnapshot(client: Client, aid: number): Promise<Sn
return {
created_at: new Date(row.created_at).getTime(),
views: row.views,
};
}
/*
* Returns the number of snapshot schedules within the specified range.
* @param client The database client.
* @param start The start time of the range. (Timestamp in milliseconds)
* @param end The end time of the range. (Timestamp in milliseconds)
*/
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
const startTimeString = formatTimestampToPsql(start);
const endTimeString = formatTimestampToPsql(end);
const query = `
SELECT COUNT(*) FROM snapshot_schedule
WHERE started_at BETWEEN $1 AND $2
AND status = 'pending'
`;
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
return res.rows[0].count;
}
/*
* Creates a new snapshot schedule record.
* @param client The database client.
* @param aid The aid of the video.
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
*/
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number) {
const ajustedTime = await adjustSnapshotTime(client, new Date(targetTime));
return client.queryObject(
`INSERT INTO snapshot_schedule (aid, type, started_at) VALUES ($1, $2, $3)`,
[aid, type, ajustedTime.toISOString()],
);
}
/**
* Adjust the trigger time of the snapshot to ensure it does not exceed the frequency limit
* @param client PostgreSQL client
* @param expectedStartTime The expected snapshot time
* @returns The adjusted actual snapshot time
*/
export async function adjustSnapshotTime(
client: Client,
expectedStartTime: Date,
): Promise<Date> {
const findWindowQuery = `
WITH windows AS (
SELECT generate_series(
$1::timestamp, -- Start time: current time truncated to the nearest 5-minute window
$2::timestamp, -- End time: 24 hours after the target time window starts
INTERVAL '5 MINUTES'
) AS window_start
)
SELECT w.window_start
FROM windows w
LEFT JOIN snapshot_schedule s ON s.started_at >= w.window_start
AND s.started_at < w.window_start + INTERVAL '5 MINUTES'
AND s.status = 'pending'
GROUP BY w.window_start
HAVING COUNT(s.*) < 2000
ORDER BY w.window_start
LIMIT 1;
`;
for (let i = 0; i < 7; i++) {
const now = new Date(new Date().getTime() + 5 * MINUTE);
const nowTruncated = truncateTo5MinInterval(now);
const currentWindowStart = truncateTo5MinInterval(expectedStartTime);
const end = new Date(currentWindowStart.getTime() + 1 * DAY);
const windowResult = await client.queryObject<{ window_start: Date }>(
findWindowQuery,
[nowTruncated, end],
);
const windowStart = windowResult.rows[0]?.window_start;
if (!windowStart) {
continue;
}
return windowStart;
}
return expectedStartTime;
}
/**
* Truncate the timestamp to the nearest 5-minute interval
* @param timestamp The timestamp
* @returns The truncated time
*/
function truncateTo5MinInterval(timestamp: Date): Date {
const minutes = timestamp.getMinutes() - (timestamp.getMinutes() % 5);
return new Date(
timestamp.getFullYear(),
timestamp.getMonth(),
timestamp.getDate(),
timestamp.getHours(),
minutes,
0,
0,
);
}
export async function getSnapshotsInNextSecond(client: Client) {
const res = await client.queryObject<SnapshotScheduleType>(
`SELECT * FROM cvsa.public.snapshot_schedule WHERE started_at <= NOW() + INTERVAL '1 second'`,
[],
);
return res.rows;
}

View File

@ -1,7 +1,6 @@
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
function softmax(logits: Float32Array): number[] {
const maxLogit = Math.max(...logits);
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));

View File

@ -6,19 +6,16 @@ import { WorkerError } from "lib/mq/schema.ts";
const modelPath = "./model/model.onnx";
class MantisProto extends AIManager {
constructor() {
super();
this.models = {
"predictor": modelPath,
}
};
}
public override async init(): Promise<void> {
await super.init();
}
}
const Mantis = new MantisProto();

View File

@ -1,21 +1,37 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
import { getVideosNearMilestone } from "lib/db/snapshot.ts";
import { findClosestSnapshot, getLatestSnapshot, videoHasActiveSchedule } from "lib/db/snapshotSchedule.ts";
import {
findClosestSnapshot,
getLatestSnapshot,
getSnapshotsInNextSecond,
scheduleSnapshot,
videoHasActiveSchedule,
} from "lib/db/snapshotSchedule.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { HOUR, MINUTE } from "$std/datetime/constants.ts";
import logger from "lib/log/logger.ts";
import { SnapshotQueue } from "lib/mq/index.ts";
const priorityMap: { [key: string]: number } = {
"milestone": 1,
};
export const snapshotTickWorker = async (_job: Job) => {
const client = await db.connect();
try {
// TODO: implement
const schedules = await getSnapshotsInNextSecond(client);
for (const schedule of schedules) {
let priority = 3;
if (schedule.type && priorityMap[schedule.type])
priority = priorityMap[schedule.type];
await SnapshotQueue.add("snapshotVideo", { aid: schedule.aid, priority });
}
} finally {
client.release();
}
};
const log = (a: number, b: number = 10) => Math.log(a) / Math.log(b);
export const closetMilestone = (views: number) => {
if (views < 100000) return 100000;
if (views < 1000000) return 1000000;
@ -24,6 +40,12 @@ export const closetMilestone = (views: number) => {
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
/*
* Returns the minimum ETA in hours for the next snapshot
* @param client - Postgres client
* @param aid - aid of the video
* @returns ETA in hours
*/
const getAdjustedShortTermETA = async (client: Client, aid: number) => {
const latestSnapshot = await getLatestSnapshot(client, aid);
// Immediately dispatch a snapshot if there is no snapshot yet
@ -61,10 +83,12 @@ export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
if (await videoHasActiveSchedule(client, video.aid)) continue;
const eta = await getAdjustedShortTermETA(client, video.aid);
if (eta > 72) continue;
// TODO: dispatch snapshot job
const now = Date.now();
const targetTime = now + eta * HOUR;
await scheduleSnapshot(client, video.aid, "milestone", targetTime);
}
} catch (_e) {
//
} catch (e) {
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
} finally {
client.release();
}

View File

@ -288,7 +288,11 @@ class NetScheduler {
const fileId = randomUUID();
await Deno.writeFile(`./logs/files/${fileId}.stdout`, output.stdout);
await Deno.writeFile(`./logs/files/${fileId}.stderr`, output.stderr);
logger.log(`Returned non-200 status code. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`, "net", "fn:alicloudFcRequest")
logger.log(
`Returned non-200 status code. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`,
"net",
"fn:alicloudFcRequest",
);
throw new NetSchedulerError(
`Error proxying ${url} to ali-fc region ${region}, code: ${rawData.statusCode}.`,
"ALICLOUD_PROXY_ERR",
@ -301,7 +305,11 @@ class NetScheduler {
const fileId = randomUUID();
rawOutput && await Deno.writeFile(`./logs/files/${fileId}.stdout`, rawOutput);
rawErr && await Deno.writeFile(`./logs/files/${fileId}.stderr`, rawErr);
logger.log(`Error occurred. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`, "net", "fn:alicloudFcRequest")
logger.log(
`Error occurred. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`,
"net",
"fn:alicloudFcRequest",
);
}
logger.error(e as Error, "net", "fn:alicloudFcRequest");
throw new NetSchedulerError(`Unhandled error: Cannot proxy ${url} to ali-fc.`, "ALICLOUD_PROXY_ERR", e);

View File

@ -4,7 +4,7 @@ import { getVideoInfo } from "lib/net/getVideoInfo.ts";
export async function insertVideoStats(client: Client, aid: number, task: string) {
const data = await getVideoInfo(aid, task);
const time = new Date().getTime();
if (typeof data == 'number') {
if (typeof data == "number") {
return data;
}
const views = data.stat.view;
@ -14,10 +14,13 @@ export async function insertVideoStats(client: Client, aid: number, task: string
const coins = data.stat.coin;
const shares = data.stat.share;
const favorites = data.stat.favorite;
await client.queryObject(`
await client.queryObject(
`
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`, [aid, views, danmakus, replies, likes, coins, shares, favorites]);
`,
[aid, views, danmakus, replies, likes, coins, shares, favorites],
);
return {
aid,
views,
@ -27,6 +30,6 @@ export async function insertVideoStats(client: Client, aid: number, task: string
coins,
shares,
favorites,
time
}
time,
};
}

View File

@ -26,8 +26,8 @@ interface VideoInfoData {
mid: number;
name: string;
face: string;
},
stat: VideoStats,
};
stat: VideoStats;
}
interface VideoDetailsData {

View File

@ -1,6 +1,6 @@
export const formatSeconds = (seconds: number) => {
if (seconds < 60) {
return `${(seconds).toFixed(1)}s`;
return `${seconds.toFixed(1)}s`;
}
if (seconds < 3600) {
return `${Math.floor(seconds / 60)}m${(seconds % 60).toFixed(1)}s`;

View File

@ -5,7 +5,11 @@ import logger from "lib/log/logger.ts";
import { lockManager } from "lib/mq/lockManager.ts";
import { WorkerError } from "lib/mq/schema.ts";
import { getVideoInfoWorker } from "lib/mq/exec/getLatestVideos.ts";
import { snapshotTickWorker, collectMilestoneSnapshotsWorker, takeSnapshotForVideoWorker } from "lib/mq/exec/snapshotTick.ts";
import {
collectMilestoneSnapshotsWorker,
snapshotTickWorker,
takeSnapshotForVideoWorker,
} from "lib/mq/exec/snapshotTick.ts";
Deno.addSignalListener("SIGINT", async () => {
logger.log("SIGINT Received: Shutting down workers...", "mq");
@ -75,4 +79,4 @@ const snapshotWorker = new Worker(
snapshotWorker.on("error", (err) => {
const e = err as WorkerError;
logger.error(e.rawError, e.service, e.codePath);
})
});

View File

@ -13,7 +13,7 @@ Deno.test("Akari AI - normal cases accuracy test", async () => {
const result = await Akari.classifyVideo(
testCase.title,
testCase.desc,
testCase.tags
testCase.tags,
);
assertEquals(result, testCase.label);
}
@ -35,12 +35,12 @@ Deno.test("Akari AI - performance test", async () => {
await Akari.classifyVideo(
title,
desc,
tags
tags,
);
}
const end = performance.now();
const elapsed = (end - time) / SECOND;
const throughput = N / elapsed;
assertGreaterOrEqual(throughput, 100);
console.log(`Akari AI throughput: ${throughput.toFixed(1)} samples / sec`)
console.log(`Akari AI throughput: ${throughput.toFixed(1)} samples / sec`);
});