add: insert labelled songs into songs table
This commit is contained in:
parent
2a2e65804f
commit
fa414e89ce
@ -1,6 +1,6 @@
|
|||||||
import {Client} from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||||
import {AllDataType, BiliUserType} from "lib/db/schema.d.ts";
|
import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
|
||||||
import {modelVersion} from "lib/ml/filter_inference.ts";
|
import { modelVersion } from "lib/ml/filter_inference.ts";
|
||||||
|
|
||||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
export async function videoExistsInAllData(client: Client, aid: number) {
|
||||||
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1)`, [aid])
|
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1)`, [aid])
|
||||||
@ -8,7 +8,9 @@ export async function videoExistsInAllData(client: Client, aid: number) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function userExistsInBiliUsers(client: Client, uid: number) {
|
export async function userExistsInBiliUsers(client: Client, uid: number) {
|
||||||
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [uid])
|
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [
|
||||||
|
uid,
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getUnlabelledVideos(client: Client) {
|
export async function getUnlabelledVideos(client: Client) {
|
||||||
@ -36,28 +38,29 @@ export async function getVideoInfoFromAllData(client: Client, aid: number) {
|
|||||||
const q = await client.queryObject<BiliUserType>(
|
const q = await client.queryObject<BiliUserType>(
|
||||||
`SELECT * FROM bili_user WHERE uid = $1`,
|
`SELECT * FROM bili_user WHERE uid = $1`,
|
||||||
[row.uid],
|
[row.uid],
|
||||||
)
|
);
|
||||||
const userRow = q.rows[0];
|
const userRow = q.rows[0];
|
||||||
if (userRow)
|
if (userRow) {
|
||||||
authorInfo = userRow.desc;
|
authorInfo = userRow.desc;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
title: row.title,
|
title: row.title,
|
||||||
description: row.description,
|
description: row.description,
|
||||||
tags: row.tags,
|
tags: row.tags,
|
||||||
author_info: authorInfo
|
author_info: authorInfo,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getUnArchivedBiliUsers(client: Client) {
|
export async function getUnArchivedBiliUsers(client: Client) {
|
||||||
const queryResult = await client.queryObject<{uid: number}>(
|
const queryResult = await client.queryObject<{ uid: number }>(
|
||||||
`
|
`
|
||||||
SELECT ad.uid
|
SELECT ad.uid
|
||||||
FROM all_data ad
|
FROM all_data ad
|
||||||
LEFT JOIN bili_user bu ON ad.uid = bu.uid
|
LEFT JOIN bili_user bu ON ad.uid = bu.uid
|
||||||
WHERE bu.uid IS NULL;
|
WHERE bu.uid IS NULL;
|
||||||
`,
|
`,
|
||||||
[]
|
[],
|
||||||
);
|
);
|
||||||
const rows = queryResult.rows;
|
const rows = queryResult.rows;
|
||||||
return rows.map((row) => row.uid);
|
return rows.map((row) => row.uid);
|
||||||
|
29
lib/db/songs.ts
Normal file
29
lib/db/songs.ts
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||||
|
|
||||||
|
export async function getNotCollectedSongs(client: Client) {
|
||||||
|
const queryResult = await client.queryObject<{ aid: number }>(`
|
||||||
|
SELECT lr.aid
|
||||||
|
FROM labelling_result lr
|
||||||
|
WHERE lr.label != 0
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM songs s
|
||||||
|
WHERE s.aid = lr.aid
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
return queryResult.rows.map((row) => row.aid);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function aidExistsInSongs(client: Client, aid: number) {
|
||||||
|
const queryResult = await client.queryObject<{ exists: boolean }>(
|
||||||
|
`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM songs
|
||||||
|
WHERE aid = $1
|
||||||
|
);
|
||||||
|
`,
|
||||||
|
[aid],
|
||||||
|
);
|
||||||
|
return queryResult.rows[0].exists;
|
||||||
|
}
|
@ -70,13 +70,13 @@ function calculateMetrics(labels: number[], predictions: number[], elapsedTime:
|
|||||||
speed: string;
|
speed: string;
|
||||||
} {
|
} {
|
||||||
// 输出label和prediction不一样的index列表
|
// 输出label和prediction不一样的index列表
|
||||||
const arr = []
|
const arr = [];
|
||||||
for (let i = 0; i < labels.length; i++) {
|
for (let i = 0; i < labels.length; i++) {
|
||||||
if (labels[i] !== predictions[i] && predictions[i] == 0) {
|
if (labels[i] !== predictions[i] && predictions[i] == 0) {
|
||||||
arr.push([i + 1, labels[i], predictions[i]])
|
arr.push([i + 1, labels[i], predictions[i]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
console.log(arr)
|
console.log(arr);
|
||||||
// 初始化混淆矩阵
|
// 初始化混淆矩阵
|
||||||
const classCount = Math.max(...labels, ...predictions) + 1;
|
const classCount = Math.max(...labels, ...predictions) + 1;
|
||||||
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
|
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
|
||||||
@ -146,7 +146,7 @@ async function evaluateModel(session: ort.InferenceSession): Promise<{
|
|||||||
const embeddings = await getONNXEmbeddings([
|
const embeddings = await getONNXEmbeddings([
|
||||||
sample.title,
|
sample.title,
|
||||||
sample.description,
|
sample.description,
|
||||||
sample.tags.join(",")
|
sample.tags.join(","),
|
||||||
], session);
|
], session);
|
||||||
|
|
||||||
const probabilities = await runClassification(embeddings);
|
const probabilities = await runClassification(embeddings);
|
||||||
|
@ -5,6 +5,8 @@ import { classifyVideo } from "lib/ml/filter_inference.ts";
|
|||||||
import { ClassifyVideoQueue } from "lib/mq/index.ts";
|
import { ClassifyVideoQueue } from "lib/mq/index.ts";
|
||||||
import logger from "lib/log/logger.ts";
|
import logger from "lib/log/logger.ts";
|
||||||
import { lockManager } from "lib/mq/lockManager.ts";
|
import { lockManager } from "lib/mq/lockManager.ts";
|
||||||
|
import { aidExistsInSongs } from "lib/db/songs.ts";
|
||||||
|
import { insertIntoSongs } from "lib/mq/task/collectSongs.ts";
|
||||||
|
|
||||||
export const classifyVideoWorker = async (job: Job) => {
|
export const classifyVideoWorker = async (job: Job) => {
|
||||||
const client = await db.connect();
|
const client = await db.connect();
|
||||||
@ -23,6 +25,11 @@ export const classifyVideoWorker = async (job: Job) => {
|
|||||||
}
|
}
|
||||||
await insertVideoLabel(client, aid, label);
|
await insertVideoLabel(client, aid, label);
|
||||||
|
|
||||||
|
const exists = await aidExistsInSongs(client, aid);
|
||||||
|
if (!exists) {
|
||||||
|
await insertIntoSongs(client, aid);
|
||||||
|
}
|
||||||
|
|
||||||
client.release();
|
client.release();
|
||||||
|
|
||||||
await job.updateData({
|
await job.updateData({
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
import { Job } from "bullmq";
|
import { Job } from "bullmq";
|
||||||
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
|
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
|
||||||
import { db } from "lib/db/init.ts";
|
import { db } from "lib/db/init.ts";
|
||||||
|
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
|
||||||
|
import { collectSongs } from "lib/mq/task/collectSongs.ts";
|
||||||
|
|
||||||
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
||||||
const client = await db.connect();
|
const client = await db.connect();
|
||||||
@ -10,3 +12,26 @@ export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
|||||||
client.release();
|
client.release();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const collectSongsWorker = async (_job: Job): Promise<void> => {
|
||||||
|
const client = await db.connect();
|
||||||
|
try {
|
||||||
|
await collectSongs(client);
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
|
||||||
|
const client = await db.connect();
|
||||||
|
try {
|
||||||
|
const aid = job.data.aid;
|
||||||
|
if (!aid) {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
await insertVideoInfo(client, aid);
|
||||||
|
return 0;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
import { Job } from "bullmq";
|
|
||||||
import { db } from "lib/db/init.ts";
|
|
||||||
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
|
|
||||||
|
|
||||||
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const aid = job.data.aid;
|
|
||||||
if (!aid) {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
await insertVideoInfo(client, aid);
|
|
||||||
return 0;
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
@ -11,6 +11,10 @@ export async function initMQ() {
|
|||||||
every: 5 * MINUTE,
|
every: 5 * MINUTE,
|
||||||
immediately: true,
|
immediately: true,
|
||||||
});
|
});
|
||||||
|
await LatestVideosQueue.upsertJobScheduler("collectSongs", {
|
||||||
|
every: 3 * MINUTE,
|
||||||
|
immediately: true,
|
||||||
|
});
|
||||||
|
|
||||||
logger.log("Message queue initialized.");
|
logger.log("Message queue initialized.");
|
||||||
}
|
}
|
||||||
|
29
lib/mq/task/collectSongs.ts
Normal file
29
lib/mq/task/collectSongs.ts
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||||
|
import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts";
|
||||||
|
import logger from "lib/log/logger.ts";
|
||||||
|
|
||||||
|
export async function collectSongs(client: Client) {
|
||||||
|
const aids = await getNotCollectedSongs(client);
|
||||||
|
for (const aid of aids) {
|
||||||
|
const exists = await aidExistsInSongs(client, aid);
|
||||||
|
if (exists) continue;
|
||||||
|
await insertIntoSongs(client, aid);
|
||||||
|
logger.log(`Video ${aid} was added into the songs table.`, "mq", "fn:collectSongs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function insertIntoSongs(client: Client, aid: number) {
|
||||||
|
await client.queryObject(
|
||||||
|
`
|
||||||
|
INSERT INTO songs (aid, bvid, published_at, duration)
|
||||||
|
VALUES (
|
||||||
|
$1,
|
||||||
|
(SELECT bvid FROM all_data WHERE aid = $1),
|
||||||
|
(SELECT published_at FROM all_data WHERE aid = $1),
|
||||||
|
(SELECT duration FROM all_data WHERE aid = $1)
|
||||||
|
)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`,
|
||||||
|
[aid],
|
||||||
|
);
|
||||||
|
}
|
@ -34,8 +34,7 @@ export async function insertVideoInfo(client: Client, aid: number) {
|
|||||||
`INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
|
`INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
|
||||||
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
|
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
|
||||||
);
|
);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
await client.queryObject(
|
await client.queryObject(
|
||||||
`UPDATE bili_user SET fans = $1 WHERE uid = $2`,
|
`UPDATE bili_user SET fans = $1 WHERE uid = $2`,
|
||||||
[data.Card.follower, uid],
|
[data.Card.follower, uid],
|
||||||
|
@ -26,12 +26,13 @@ export async function queueLatestVideos(
|
|||||||
if (videoExists) {
|
if (videoExists) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
await LatestVideosQueue.add("getVideoInfo", { aid }, { delay,
|
await LatestVideosQueue.add("getVideoInfo", { aid }, {
|
||||||
|
delay,
|
||||||
attempts: 100,
|
attempts: 100,
|
||||||
backoff: {
|
backoff: {
|
||||||
type: "fixed",
|
type: "fixed",
|
||||||
delay: SECOND * 5
|
delay: SECOND * 5,
|
||||||
}
|
},
|
||||||
});
|
});
|
||||||
videosFound.add(aid);
|
videosFound.add(aid);
|
||||||
allExists = false;
|
allExists = false;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import { Job, Worker } from "bullmq";
|
import { Job, Worker } from "bullmq";
|
||||||
import { getLatestVideosWorker } from "lib/mq/executors.ts";
|
import { collectSongsWorker, getLatestVideosWorker } from "lib/mq/executors.ts";
|
||||||
import { redis } from "lib/db/redis.ts";
|
import { redis } from "lib/db/redis.ts";
|
||||||
import logger from "lib/log/logger.ts";
|
import logger from "lib/log/logger.ts";
|
||||||
import { lockManager } from "lib/mq/lockManager.ts";
|
import { lockManager } from "lib/mq/lockManager.ts";
|
||||||
import { WorkerError } from "lib/mq/schema.ts";
|
import { WorkerError } from "lib/mq/schema.ts";
|
||||||
import { getVideoInfoWorker } from "lib/mq/exec/getVideoInfo.ts";
|
import { getVideoInfoWorker } from "lib/mq/exec/getLatestVideos.ts";
|
||||||
|
|
||||||
Deno.addSignalListener("SIGINT", async () => {
|
Deno.addSignalListener("SIGINT", async () => {
|
||||||
logger.log("SIGINT Received: Shutting down workers...", "mq");
|
logger.log("SIGINT Received: Shutting down workers...", "mq");
|
||||||
@ -28,6 +28,9 @@ const latestVideoWorker = new Worker(
|
|||||||
case "getVideoInfo":
|
case "getVideoInfo":
|
||||||
await getVideoInfoWorker(job);
|
await getVideoInfoWorker(job);
|
||||||
break;
|
break;
|
||||||
|
case "collectSongs":
|
||||||
|
await collectSongsWorker(job);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user