diff --git a/lib/db/allData.ts b/lib/db/allData.ts index 26840fb..ddcb804 100644 --- a/lib/db/allData.ts +++ b/lib/db/allData.ts @@ -3,19 +3,19 @@ import { AllDataType, BiliUserType } from "lib/db/schema.d.ts"; import Akari from "lib/ml/akari.ts"; export async function videoExistsInAllData(client: Client, aid: number) { - return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1)`, [aid]) + return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`, [aid]) .then((result) => result.rows[0].exists); } export async function userExistsInBiliUsers(client: Client, uid: number) { - return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [ + return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_user WHERE uid = $1)`, [ uid, ]); } export async function getUnlabelledVideos(client: Client) { const queryResult = await client.queryObject<{ aid: number }>( - `SELECT a.aid FROM all_data a LEFT JOIN labelling_result l ON a.aid = l.aid WHERE l.aid IS NULL`, + `SELECT a.aid FROM bilibili_metadata a LEFT JOIN labelling_result l ON a.aid = l.aid WHERE l.aid IS NULL`, ); return queryResult.rows.map((row) => row.aid); } @@ -29,14 +29,14 @@ export async function insertVideoLabel(client: Client, aid: number, label: numbe export async function getVideoInfoFromAllData(client: Client, aid: number) { const queryResult = await client.queryObject( - `SELECT * FROM all_data WHERE aid = $1`, + `SELECT * FROM bilibili_metadata WHERE aid = $1`, [aid], ); const row = queryResult.rows[0]; let authorInfo = ""; if (row.uid && await userExistsInBiliUsers(client, row.uid)) { const q = await client.queryObject( - `SELECT * FROM bili_user WHERE uid = $1`, + `SELECT * FROM bilibili_user WHERE uid = $1`, [row.uid], ); const userRow = q.rows[0]; @@ -56,8 +56,8 @@ export async function getUnArchivedBiliUsers(client: Client) { const queryResult = await client.queryObject<{ uid: number }>( ` SELECT ad.uid - FROM all_data ad - LEFT JOIN bili_user bu ON ad.uid = bu.uid + FROM bilibili_metadata ad + LEFT JOIN bilibili_user bu ON ad.uid = bu.uid WHERE bu.uid IS NULL; `, [], diff --git a/lib/db/snapshot.ts b/lib/db/snapshot.ts index 81fe9a8..d838899 100644 --- a/lib/db/snapshot.ts +++ b/lib/db/snapshot.ts @@ -5,41 +5,31 @@ import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts"; export async function getVideosNearMilestone(client: Client) { const queryResult = await client.queryObject(` - WITH max_views_per_aid AS ( - -- 找出每个 aid 的最大 views 值,并确保 aid 存在于 songs 表中 - SELECT - vs.aid, - MAX(vs.views) AS max_views - FROM + WITH filtered_snapshots AS ( + SELECT + vs.* + FROM video_snapshot vs - INNER JOIN - songs s - ON - vs.aid = s.aid - GROUP BY - vs.aid + WHERE + (vs.views >= 90000 AND vs.views < 100000) OR + (vs.views >= 900000 AND vs.views < 1000000) ), - filtered_max_views AS ( - -- 筛选出满足条件的最大 views - SELECT - aid, - max_views - FROM - max_views_per_aid - WHERE - (max_views >= 90000 AND max_views < 100000) OR - (max_views >= 900000 AND max_views < 1000000) OR - (max_views >= 9900000 AND max_views < 10000000) + ranked_snapshots AS ( + SELECT + fs.*, + ROW_NUMBER() OVER (PARTITION BY fs.aid ORDER BY fs.created_at DESC) as rn, + MAX(fs.views) OVER (PARTITION BY fs.aid) as max_views_per_aid + FROM + filtered_snapshots fs + INNER JOIN + songs s ON fs.aid = s.aid ) - -- 获取符合条件的完整行数据 - SELECT - vs.* - FROM - video_snapshot vs - INNER JOIN - filtered_max_views fmv - ON - vs.aid = fmv.aid AND vs.views = fmv.max_views + SELECT + rs.id, rs.created_at, rs.views, rs.coins, rs.likes, rs.favorites, rs.shares, rs.danmakus, rs.aid, rs.replies + FROM + ranked_snapshots rs + WHERE + rs.rn = 1; `); return queryResult.rows.map((row) => { return { @@ -72,7 +62,7 @@ export async function getSongSnapshotCount(client: Client, aid: number) { } export async function getShortTermEtaPrediction(client: Client, aid: number) { - const queryResult = await client.queryObject<{eta: number}>( + const queryResult = await client.queryObject<{ eta: number }>( ` WITH old_snapshot AS ( SELECT created_at, views @@ -121,6 +111,23 @@ export async function getShortTermEtaPrediction(client: Client, aid: number) { return queryResult.rows[0].eta; } +export async function getIntervalFromLastSnapshotToNow(client: Client, aid: number) { + const queryResult = await client.queryObject<{ interval: number }>( + ` + SELECT EXTRACT(EPOCH FROM (NOW() - created_at)) AS interval + FROM video_snapshot + WHERE aid = $1 + ORDER BY created_at DESC + LIMIT 1; + `, + [aid], + ); + if (queryResult.rows.length === 0) { + return null; + } + return queryResult.rows[0].interval; +} + export async function songEligibleForMilestoneSnapshot(client: Client, aid: number) { const count = await getSongSnapshotCount(client, aid); if (count < 2) { diff --git a/lib/mq/exec/classifyVideo.ts b/lib/mq/exec/classifyVideo.ts index 6649931..b86a9a6 100644 --- a/lib/mq/exec/classifyVideo.ts +++ b/lib/mq/exec/classifyVideo.ts @@ -26,7 +26,7 @@ export const classifyVideoWorker = async (job: Job) => { await insertVideoLabel(client, aid, label); const exists = await aidExistsInSongs(client, aid); - if (!exists) { + if (!exists && label !== 0) { await insertIntoSongs(client, aid); } diff --git a/lib/mq/exec/snapshotTick.ts b/lib/mq/exec/snapshotTick.ts index 9fcc604..e72d11f 100644 --- a/lib/mq/exec/snapshotTick.ts +++ b/lib/mq/exec/snapshotTick.ts @@ -14,6 +14,8 @@ export const snapshotTickWorker = async (_job: Job) => { } }; +const log = (a: number, b: number = 10) => Math.log(a) / Math.log(b); + export const closetMilestone = (views: number) => { if (views < 100000) return 100000; if (views < 1000000) return 1000000; diff --git a/lib/mq/task/collectSongs.ts b/lib/mq/task/collectSongs.ts index 04e033d..9c49823 100644 --- a/lib/mq/task/collectSongs.ts +++ b/lib/mq/task/collectSongs.ts @@ -15,12 +15,11 @@ export async function collectSongs(client: Client) { export async function insertIntoSongs(client: Client, aid: number) { await client.queryObject( ` - INSERT INTO songs (aid, bvid, published_at, duration) + INSERT INTO songs (aid, published_at, duration) VALUES ( $1, - (SELECT bvid FROM all_data WHERE aid = $1), - (SELECT published_at FROM all_data WHERE aid = $1), - (SELECT duration FROM all_data WHERE aid = $1) + (SELECT published_at FROM bilibili_metadata WHERE aid = $1), + (SELECT duration FROM bilibili_metadata WHERE aid = $1) ) ON CONFLICT DO NOTHING `, diff --git a/lib/mq/task/getVideoDetails.ts b/lib/mq/task/getVideoDetails.ts index ead8dd0..ea5f903 100644 --- a/lib/mq/task/getVideoDetails.ts +++ b/lib/mq/task/getVideoDetails.ts @@ -4,6 +4,7 @@ import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts"; import logger from "lib/log/logger.ts"; import { ClassifyVideoQueue } from "lib/mq/index.ts"; import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts"; +import { HOUR, SECOND } from "$std/datetime/constants.ts"; export async function insertVideoInfo(client: Client, aid: number) { const videoExists = await videoExistsInAllData(client, aid); @@ -18,25 +19,25 @@ export async function insertVideoInfo(client: Client, aid: number) { const desc = data.View.desc; const uid = data.View.owner.mid; const tags = data.Tags - .filter((tag) => tag.tag_type in ["old_channel", "topic"]) + .filter((tag) => !["old_channel", "topic"].indexOf(tag.tag_type)) .map((tag) => tag.tag_name).join(","); const title = data.View.title; - const published_at = formatTimestampToPsql(data.View.pubdate); + const published_at = formatTimestampToPsql(data.View.pubdate * SECOND + 8 * HOUR); const duration = data.View.duration; await client.queryObject( - `INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration) + `INSERT INTO bilibili_metadata (aid, bvid, description, uid, tags, title, published_at, duration) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, [aid, bvid, desc, uid, tags, title, published_at, duration], ); const userExists = await userExistsInBiliUsers(client, aid); if (!userExists) { await client.queryObject( - `INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`, + `INSERT INTO bilibili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`, [uid, data.View.owner.name, data.Card.card.sign, data.Card.follower], ); } else { await client.queryObject( - `UPDATE bili_user SET fans = $1 WHERE uid = $2`, + `UPDATE bilibili_user SET fans = $1 WHERE uid = $2`, [data.Card.follower, uid], ); } diff --git a/lib/utils/formatSeconds.ts b/lib/utils/formatSeconds.ts index ffabb22..694f94c 100644 --- a/lib/utils/formatSeconds.ts +++ b/lib/utils/formatSeconds.ts @@ -3,7 +3,7 @@ export const formatSeconds = (seconds: number) => { return `${(seconds).toFixed(1)}s`; } if (seconds < 3600) { - return `${Math.floor(seconds / 60)}m${seconds % 60}s`; + return `${Math.floor(seconds / 60)}m${(seconds % 60).toFixed(1)}s`; } return `${Math.floor(seconds / 3600)}h ${((seconds % 3600) / 60).toFixed(2)}m`; };