add: insert labelled songs into songs table

This commit is contained in:
alikia2x (寒寒) 2025-03-08 00:55:29 +08:00
parent 2a2e65804f
commit fa414e89ce
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
12 changed files with 231 additions and 148 deletions

View File

@ -8,7 +8,9 @@ export async function videoExistsInAllData(client: Client, aid: number) {
} }
export async function userExistsInBiliUsers(client: Client, uid: number) { export async function userExistsInBiliUsers(client: Client, uid: number) {
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [uid]) return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [
uid,
]);
} }
export async function getUnlabelledVideos(client: Client) { export async function getUnlabelledVideos(client: Client) {
@ -36,16 +38,17 @@ export async function getVideoInfoFromAllData(client: Client, aid: number) {
const q = await client.queryObject<BiliUserType>( const q = await client.queryObject<BiliUserType>(
`SELECT * FROM bili_user WHERE uid = $1`, `SELECT * FROM bili_user WHERE uid = $1`,
[row.uid], [row.uid],
) );
const userRow = q.rows[0]; const userRow = q.rows[0];
if (userRow) if (userRow) {
authorInfo = userRow.desc; authorInfo = userRow.desc;
} }
}
return { return {
title: row.title, title: row.title,
description: row.description, description: row.description,
tags: row.tags, tags: row.tags,
author_info: authorInfo author_info: authorInfo,
}; };
} }
@ -57,7 +60,7 @@ export async function getUnArchivedBiliUsers(client: Client) {
LEFT JOIN bili_user bu ON ad.uid = bu.uid LEFT JOIN bili_user bu ON ad.uid = bu.uid
WHERE bu.uid IS NULL; WHERE bu.uid IS NULL;
`, `,
[] [],
); );
const rows = queryResult.rows; const rows = queryResult.rows;
return rows.map((row) => row.uid); return rows.map((row) => row.uid);

29
lib/db/songs.ts Normal file
View File

@ -0,0 +1,29 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
export async function getNotCollectedSongs(client: Client) {
const queryResult = await client.queryObject<{ aid: number }>(`
SELECT lr.aid
FROM labelling_result lr
WHERE lr.label != 0
AND NOT EXISTS (
SELECT 1
FROM songs s
WHERE s.aid = lr.aid
);
`);
return queryResult.rows.map((row) => row.aid);
}
export async function aidExistsInSongs(client: Client, aid: number) {
const queryResult = await client.queryObject<{ exists: boolean }>(
`
SELECT EXISTS (
SELECT 1
FROM songs
WHERE aid = $1
);
`,
[aid],
);
return queryResult.rows[0].exists;
}

View File

@ -70,13 +70,13 @@ function calculateMetrics(labels: number[], predictions: number[], elapsedTime:
speed: string; speed: string;
} { } {
// 输出label和prediction不一样的index列表 // 输出label和prediction不一样的index列表
const arr = [] const arr = [];
for (let i = 0; i < labels.length; i++) { for (let i = 0; i < labels.length; i++) {
if (labels[i] !== predictions[i] && predictions[i] == 0) { if (labels[i] !== predictions[i] && predictions[i] == 0) {
arr.push([i + 1, labels[i], predictions[i]]) arr.push([i + 1, labels[i], predictions[i]]);
} }
} }
console.log(arr) console.log(arr);
// 初始化混淆矩阵 // 初始化混淆矩阵
const classCount = Math.max(...labels, ...predictions) + 1; const classCount = Math.max(...labels, ...predictions) + 1;
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0)); const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
@ -146,7 +146,7 @@ async function evaluateModel(session: ort.InferenceSession): Promise<{
const embeddings = await getONNXEmbeddings([ const embeddings = await getONNXEmbeddings([
sample.title, sample.title,
sample.description, sample.description,
sample.tags.join(",") sample.tags.join(","),
], session); ], session);
const probabilities = await runClassification(embeddings); const probabilities = await runClassification(embeddings);

View File

@ -5,6 +5,8 @@ import { classifyVideo } from "lib/ml/filter_inference.ts";
import { ClassifyVideoQueue } from "lib/mq/index.ts"; import { ClassifyVideoQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts"; import logger from "lib/log/logger.ts";
import { lockManager } from "lib/mq/lockManager.ts"; import { lockManager } from "lib/mq/lockManager.ts";
import { aidExistsInSongs } from "lib/db/songs.ts";
import { insertIntoSongs } from "lib/mq/task/collectSongs.ts";
export const classifyVideoWorker = async (job: Job) => { export const classifyVideoWorker = async (job: Job) => {
const client = await db.connect(); const client = await db.connect();
@ -23,6 +25,11 @@ export const classifyVideoWorker = async (job: Job) => {
} }
await insertVideoLabel(client, aid, label); await insertVideoLabel(client, aid, label);
const exists = await aidExistsInSongs(client, aid);
if (!exists) {
await insertIntoSongs(client, aid);
}
client.release(); client.release();
await job.updateData({ await job.updateData({

View File

@ -1,6 +1,8 @@
import { Job } from "bullmq"; import { Job } from "bullmq";
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts"; import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
import { db } from "lib/db/init.ts"; import { db } from "lib/db/init.ts";
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
import { collectSongs } from "lib/mq/task/collectSongs.ts";
export const getLatestVideosWorker = async (_job: Job): Promise<void> => { export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
const client = await db.connect(); const client = await db.connect();
@ -10,3 +12,26 @@ export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
client.release(); client.release();
} }
}; };
export const collectSongsWorker = async (_job: Job): Promise<void> => {
const client = await db.connect();
try {
await collectSongs(client);
} finally {
client.release();
}
};
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
const client = await db.connect();
try {
const aid = job.data.aid;
if (!aid) {
return 3;
}
await insertVideoInfo(client, aid);
return 0;
} finally {
client.release();
}
};

View File

@ -1,17 +0,0 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
const client = await db.connect();
try {
const aid = job.data.aid;
if (!aid) {
return 3;
}
await insertVideoInfo(client, aid);
return 0;
} finally {
client.release();
}
};

View File

@ -11,6 +11,10 @@ export async function initMQ() {
every: 5 * MINUTE, every: 5 * MINUTE,
immediately: true, immediately: true,
}); });
await LatestVideosQueue.upsertJobScheduler("collectSongs", {
every: 3 * MINUTE,
immediately: true,
});
logger.log("Message queue initialized."); logger.log("Message queue initialized.");
} }

View File

@ -0,0 +1,29 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts";
import logger from "lib/log/logger.ts";
export async function collectSongs(client: Client) {
const aids = await getNotCollectedSongs(client);
for (const aid of aids) {
const exists = await aidExistsInSongs(client, aid);
if (exists) continue;
await insertIntoSongs(client, aid);
logger.log(`Video ${aid} was added into the songs table.`, "mq", "fn:collectSongs");
}
}
export async function insertIntoSongs(client: Client, aid: number) {
await client.queryObject(
`
INSERT INTO songs (aid, bvid, published_at, duration)
VALUES (
$1,
(SELECT bvid FROM all_data WHERE aid = $1),
(SELECT published_at FROM all_data WHERE aid = $1),
(SELECT duration FROM all_data WHERE aid = $1)
)
ON CONFLICT DO NOTHING
`,
[aid],
);
}

View File

@ -34,8 +34,7 @@ export async function insertVideoInfo(client: Client, aid: number) {
`INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`, `INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower], [uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
); );
} } else {
else {
await client.queryObject( await client.queryObject(
`UPDATE bili_user SET fans = $1 WHERE uid = $2`, `UPDATE bili_user SET fans = $1 WHERE uid = $2`,
[data.Card.follower, uid], [data.Card.follower, uid],

View File

@ -26,12 +26,13 @@ export async function queueLatestVideos(
if (videoExists) { if (videoExists) {
continue; continue;
} }
await LatestVideosQueue.add("getVideoInfo", { aid }, { delay, await LatestVideosQueue.add("getVideoInfo", { aid }, {
delay,
attempts: 100, attempts: 100,
backoff: { backoff: {
type: "fixed", type: "fixed",
delay: SECOND * 5 delay: SECOND * 5,
} },
}); });
videosFound.add(aid); videosFound.add(aid);
allExists = false; allExists = false;

View File

@ -1,10 +1,10 @@
import { Job, Worker } from "bullmq"; import { Job, Worker } from "bullmq";
import { getLatestVideosWorker } from "lib/mq/executors.ts"; import { collectSongsWorker, getLatestVideosWorker } from "lib/mq/executors.ts";
import { redis } from "lib/db/redis.ts"; import { redis } from "lib/db/redis.ts";
import logger from "lib/log/logger.ts"; import logger from "lib/log/logger.ts";
import { lockManager } from "lib/mq/lockManager.ts"; import { lockManager } from "lib/mq/lockManager.ts";
import { WorkerError } from "lib/mq/schema.ts"; import { WorkerError } from "lib/mq/schema.ts";
import { getVideoInfoWorker } from "lib/mq/exec/getVideoInfo.ts"; import { getVideoInfoWorker } from "lib/mq/exec/getLatestVideos.ts";
Deno.addSignalListener("SIGINT", async () => { Deno.addSignalListener("SIGINT", async () => {
logger.log("SIGINT Received: Shutting down workers...", "mq"); logger.log("SIGINT Received: Shutting down workers...", "mq");
@ -28,6 +28,9 @@ const latestVideoWorker = new Worker(
case "getVideoInfo": case "getVideoInfo":
await getVideoInfoWorker(job); await getVideoInfoWorker(job);
break; break;
case "collectSongs":
await collectSongsWorker(job);
break;
default: default:
break; break;
} }