update: insertion of snapshot schedule
This commit is contained in:
parent
9f9ef800d1
commit
e38dc96275
@ -3,7 +3,10 @@ import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
|
||||
import Akari from "lib/ml/akari.ts";
|
||||
|
||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
||||
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`, [aid])
|
||||
return await client.queryObject<{ exists: boolean }>(
|
||||
`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`,
|
||||
[aid],
|
||||
)
|
||||
.then((result) => result.rows[0].exists);
|
||||
}
|
||||
|
||||
|
10
lib/db/schema.d.ts
vendored
10
lib/db/schema.d.ts
vendored
@ -31,3 +31,13 @@ export interface VideoSnapshotType {
|
||||
aid: bigint;
|
||||
replies: number;
|
||||
}
|
||||
|
||||
export interface SnapshotScheduleType {
|
||||
id: number;
|
||||
aid: number;
|
||||
type?: string;
|
||||
created_at: string;
|
||||
started_at?: string;
|
||||
finished_at?: string;
|
||||
status: string;
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
import { DAY, MINUTE } from "$std/datetime/constants.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
||||
import { SnapshotScheduleType } from "./schema.d.ts";
|
||||
|
||||
/*
|
||||
Returns true if the specified `aid` has at least one record with "pending" or "processing" status.
|
||||
@ -22,9 +25,10 @@ export async function findClosestSnapshot(
|
||||
targetTime: Date,
|
||||
): Promise<Snapshot | null> {
|
||||
const query = `
|
||||
SELECT created_at, views FROM video_snapshot
|
||||
SELECT created_at, views
|
||||
FROM video_snapshot
|
||||
WHERE aid = $1
|
||||
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz))) ASC
|
||||
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz)))
|
||||
LIMIT 1
|
||||
`;
|
||||
const result = await client.queryObject<{ created_at: string; views: number }>(
|
||||
@ -49,5 +53,112 @@ export async function getLatestSnapshot(client: Client, aid: number): Promise<Sn
|
||||
return {
|
||||
created_at: new Date(row.created_at).getTime(),
|
||||
views: row.views,
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the number of snapshot schedules within the specified range.
|
||||
* @param client The database client.
|
||||
* @param start The start time of the range. (Timestamp in milliseconds)
|
||||
* @param end The end time of the range. (Timestamp in milliseconds)
|
||||
*/
|
||||
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
|
||||
const startTimeString = formatTimestampToPsql(start);
|
||||
const endTimeString = formatTimestampToPsql(end);
|
||||
const query = `
|
||||
SELECT COUNT(*) FROM snapshot_schedule
|
||||
WHERE started_at BETWEEN $1 AND $2
|
||||
AND status = 'pending'
|
||||
`;
|
||||
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
|
||||
return res.rows[0].count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a new snapshot schedule record.
|
||||
* @param client The database client.
|
||||
* @param aid The aid of the video.
|
||||
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
|
||||
*/
|
||||
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number) {
|
||||
const ajustedTime = await adjustSnapshotTime(client, new Date(targetTime));
|
||||
return client.queryObject(
|
||||
`INSERT INTO snapshot_schedule (aid, type, started_at) VALUES ($1, $2, $3)`,
|
||||
[aid, type, ajustedTime.toISOString()],
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjust the trigger time of the snapshot to ensure it does not exceed the frequency limit
|
||||
* @param client PostgreSQL client
|
||||
* @param expectedStartTime The expected snapshot time
|
||||
* @returns The adjusted actual snapshot time
|
||||
*/
|
||||
export async function adjustSnapshotTime(
|
||||
client: Client,
|
||||
expectedStartTime: Date,
|
||||
): Promise<Date> {
|
||||
const findWindowQuery = `
|
||||
WITH windows AS (
|
||||
SELECT generate_series(
|
||||
$1::timestamp, -- Start time: current time truncated to the nearest 5-minute window
|
||||
$2::timestamp, -- End time: 24 hours after the target time window starts
|
||||
INTERVAL '5 MINUTES'
|
||||
) AS window_start
|
||||
)
|
||||
SELECT w.window_start
|
||||
FROM windows w
|
||||
LEFT JOIN snapshot_schedule s ON s.started_at >= w.window_start
|
||||
AND s.started_at < w.window_start + INTERVAL '5 MINUTES'
|
||||
AND s.status = 'pending'
|
||||
GROUP BY w.window_start
|
||||
HAVING COUNT(s.*) < 2000
|
||||
ORDER BY w.window_start
|
||||
LIMIT 1;
|
||||
`;
|
||||
for (let i = 0; i < 7; i++) {
|
||||
const now = new Date(new Date().getTime() + 5 * MINUTE);
|
||||
const nowTruncated = truncateTo5MinInterval(now);
|
||||
const currentWindowStart = truncateTo5MinInterval(expectedStartTime);
|
||||
const end = new Date(currentWindowStart.getTime() + 1 * DAY);
|
||||
|
||||
const windowResult = await client.queryObject<{ window_start: Date }>(
|
||||
findWindowQuery,
|
||||
[nowTruncated, end],
|
||||
);
|
||||
|
||||
const windowStart = windowResult.rows[0]?.window_start;
|
||||
if (!windowStart) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return windowStart;
|
||||
}
|
||||
return expectedStartTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate the timestamp to the nearest 5-minute interval
|
||||
* @param timestamp The timestamp
|
||||
* @returns The truncated time
|
||||
*/
|
||||
function truncateTo5MinInterval(timestamp: Date): Date {
|
||||
const minutes = timestamp.getMinutes() - (timestamp.getMinutes() % 5);
|
||||
return new Date(
|
||||
timestamp.getFullYear(),
|
||||
timestamp.getMonth(),
|
||||
timestamp.getDate(),
|
||||
timestamp.getHours(),
|
||||
minutes,
|
||||
0,
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
export async function getSnapshotsInNextSecond(client: Client) {
|
||||
const res = await client.queryObject<SnapshotScheduleType>(
|
||||
`SELECT * FROM cvsa.public.snapshot_schedule WHERE started_at <= NOW() + INTERVAL '1 second'`,
|
||||
[],
|
||||
);
|
||||
return res.rows;
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
||||
import * as ort from "onnxruntime";
|
||||
|
||||
|
||||
function softmax(logits: Float32Array): number[] {
|
||||
const maxLogit = Math.max(...logits);
|
||||
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
|
||||
|
@ -6,19 +6,16 @@ import { WorkerError } from "lib/mq/schema.ts";
|
||||
const modelPath = "./model/model.onnx";
|
||||
|
||||
class MantisProto extends AIManager {
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.models = {
|
||||
"predictor": modelPath,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public override async init(): Promise<void> {
|
||||
await super.init();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
const Mantis = new MantisProto();
|
||||
|
@ -1,21 +1,37 @@
|
||||
import { Job } from "bullmq";
|
||||
import { db } from "lib/db/init.ts";
|
||||
import { getVideosNearMilestone } from "lib/db/snapshot.ts";
|
||||
import { findClosestSnapshot, getLatestSnapshot, videoHasActiveSchedule } from "lib/db/snapshotSchedule.ts";
|
||||
import {
|
||||
findClosestSnapshot,
|
||||
getLatestSnapshot,
|
||||
getSnapshotsInNextSecond,
|
||||
scheduleSnapshot,
|
||||
videoHasActiveSchedule,
|
||||
} from "lib/db/snapshotSchedule.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR, MINUTE } from "$std/datetime/constants.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { SnapshotQueue } from "lib/mq/index.ts";
|
||||
|
||||
const priorityMap: { [key: string]: number } = {
|
||||
"milestone": 1,
|
||||
};
|
||||
|
||||
export const snapshotTickWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
// TODO: implement
|
||||
const schedules = await getSnapshotsInNextSecond(client);
|
||||
for (const schedule of schedules) {
|
||||
let priority = 3;
|
||||
if (schedule.type && priorityMap[schedule.type])
|
||||
priority = priorityMap[schedule.type];
|
||||
await SnapshotQueue.add("snapshotVideo", { aid: schedule.aid, priority });
|
||||
}
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
const log = (a: number, b: number = 10) => Math.log(a) / Math.log(b);
|
||||
|
||||
export const closetMilestone = (views: number) => {
|
||||
if (views < 100000) return 100000;
|
||||
if (views < 1000000) return 1000000;
|
||||
@ -24,6 +40,12 @@ export const closetMilestone = (views: number) => {
|
||||
|
||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
||||
|
||||
/*
|
||||
* Returns the minimum ETA in hours for the next snapshot
|
||||
* @param client - Postgres client
|
||||
* @param aid - aid of the video
|
||||
* @returns ETA in hours
|
||||
*/
|
||||
const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
||||
@ -61,10 +83,12 @@ export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
|
||||
if (await videoHasActiveSchedule(client, video.aid)) continue;
|
||||
const eta = await getAdjustedShortTermETA(client, video.aid);
|
||||
if (eta > 72) continue;
|
||||
// TODO: dispatch snapshot job
|
||||
const now = Date.now();
|
||||
const targetTime = now + eta * HOUR;
|
||||
await scheduleSnapshot(client, video.aid, "milestone", targetTime);
|
||||
}
|
||||
} catch (_e) {
|
||||
//
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
|
@ -288,7 +288,11 @@ class NetScheduler {
|
||||
const fileId = randomUUID();
|
||||
await Deno.writeFile(`./logs/files/${fileId}.stdout`, output.stdout);
|
||||
await Deno.writeFile(`./logs/files/${fileId}.stderr`, output.stderr);
|
||||
logger.log(`Returned non-200 status code. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`, "net", "fn:alicloudFcRequest")
|
||||
logger.log(
|
||||
`Returned non-200 status code. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`,
|
||||
"net",
|
||||
"fn:alicloudFcRequest",
|
||||
);
|
||||
throw new NetSchedulerError(
|
||||
`Error proxying ${url} to ali-fc region ${region}, code: ${rawData.statusCode}.`,
|
||||
"ALICLOUD_PROXY_ERR",
|
||||
@ -301,7 +305,11 @@ class NetScheduler {
|
||||
const fileId = randomUUID();
|
||||
rawOutput && await Deno.writeFile(`./logs/files/${fileId}.stdout`, rawOutput);
|
||||
rawErr && await Deno.writeFile(`./logs/files/${fileId}.stderr`, rawErr);
|
||||
logger.log(`Error occurred. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`, "net", "fn:alicloudFcRequest")
|
||||
logger.log(
|
||||
`Error occurred. Raw ouput saved to ./logs/files/${fileId}.stdout/stderr`,
|
||||
"net",
|
||||
"fn:alicloudFcRequest",
|
||||
);
|
||||
}
|
||||
logger.error(e as Error, "net", "fn:alicloudFcRequest");
|
||||
throw new NetSchedulerError(`Unhandled error: Cannot proxy ${url} to ali-fc.`, "ALICLOUD_PROXY_ERR", e);
|
||||
|
@ -4,7 +4,7 @@ import { getVideoInfo } from "lib/net/getVideoInfo.ts";
|
||||
export async function insertVideoStats(client: Client, aid: number, task: string) {
|
||||
const data = await getVideoInfo(aid, task);
|
||||
const time = new Date().getTime();
|
||||
if (typeof data == 'number') {
|
||||
if (typeof data == "number") {
|
||||
return data;
|
||||
}
|
||||
const views = data.stat.view;
|
||||
@ -14,10 +14,13 @@ export async function insertVideoStats(client: Client, aid: number, task: string
|
||||
const coins = data.stat.coin;
|
||||
const shares = data.stat.share;
|
||||
const favorites = data.stat.favorite;
|
||||
await client.queryObject(`
|
||||
await client.queryObject(
|
||||
`
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`, [aid, views, danmakus, replies, likes, coins, shares, favorites]);
|
||||
`,
|
||||
[aid, views, danmakus, replies, likes, coins, shares, favorites],
|
||||
);
|
||||
return {
|
||||
aid,
|
||||
views,
|
||||
@ -27,6 +30,6 @@ export async function insertVideoStats(client: Client, aid: number, task: string
|
||||
coins,
|
||||
shares,
|
||||
favorites,
|
||||
time
|
||||
}
|
||||
time,
|
||||
};
|
||||
}
|
||||
|
4
lib/net/bilibili.d.ts
vendored
4
lib/net/bilibili.d.ts
vendored
@ -26,8 +26,8 @@ interface VideoInfoData {
|
||||
mid: number;
|
||||
name: string;
|
||||
face: string;
|
||||
},
|
||||
stat: VideoStats,
|
||||
};
|
||||
stat: VideoStats;
|
||||
}
|
||||
|
||||
interface VideoDetailsData {
|
||||
|
@ -1,6 +1,6 @@
|
||||
export const formatSeconds = (seconds: number) => {
|
||||
if (seconds < 60) {
|
||||
return `${(seconds).toFixed(1)}s`;
|
||||
return `${seconds.toFixed(1)}s`;
|
||||
}
|
||||
if (seconds < 3600) {
|
||||
return `${Math.floor(seconds / 60)}m${(seconds % 60).toFixed(1)}s`;
|
||||
|
@ -5,7 +5,11 @@ import logger from "lib/log/logger.ts";
|
||||
import { lockManager } from "lib/mq/lockManager.ts";
|
||||
import { WorkerError } from "lib/mq/schema.ts";
|
||||
import { getVideoInfoWorker } from "lib/mq/exec/getLatestVideos.ts";
|
||||
import { snapshotTickWorker, collectMilestoneSnapshotsWorker, takeSnapshotForVideoWorker } from "lib/mq/exec/snapshotTick.ts";
|
||||
import {
|
||||
collectMilestoneSnapshotsWorker,
|
||||
snapshotTickWorker,
|
||||
takeSnapshotForVideoWorker,
|
||||
} from "lib/mq/exec/snapshotTick.ts";
|
||||
|
||||
Deno.addSignalListener("SIGINT", async () => {
|
||||
logger.log("SIGINT Received: Shutting down workers...", "mq");
|
||||
@ -75,4 +79,4 @@ const snapshotWorker = new Worker(
|
||||
snapshotWorker.on("error", (err) => {
|
||||
const e = err as WorkerError;
|
||||
logger.error(e.rawError, e.service, e.codePath);
|
||||
})
|
||||
});
|
||||
|
@ -13,7 +13,7 @@ Deno.test("Akari AI - normal cases accuracy test", async () => {
|
||||
const result = await Akari.classifyVideo(
|
||||
testCase.title,
|
||||
testCase.desc,
|
||||
testCase.tags
|
||||
testCase.tags,
|
||||
);
|
||||
assertEquals(result, testCase.label);
|
||||
}
|
||||
@ -35,12 +35,12 @@ Deno.test("Akari AI - performance test", async () => {
|
||||
await Akari.classifyVideo(
|
||||
title,
|
||||
desc,
|
||||
tags
|
||||
tags,
|
||||
);
|
||||
}
|
||||
const end = performance.now();
|
||||
const elapsed = (end - time) / SECOND;
|
||||
const throughput = N / elapsed;
|
||||
assertGreaterOrEqual(throughput, 100);
|
||||
console.log(`Akari AI throughput: ${throughput.toFixed(1)} samples / sec`)
|
||||
console.log(`Akari AI throughput: ${throughput.toFixed(1)} samples / sec`);
|
||||
});
|
Loading…
Reference in New Issue
Block a user