From cd8aa826e125ac54efee09b8d76471f6d3d05144 Mon Sep 17 00:00:00 2001 From: alikia2x Date: Mon, 17 Mar 2025 00:33:28 +0800 Subject: [PATCH] fix: prevent videos from being crawled for too long --- lib/db/snapshot.ts | 19 ++++++++++++++++++- lib/mq/exec/snapshotTick.ts | 20 +++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/db/snapshot.ts b/lib/db/snapshot.ts index 663a628..5921059 100644 --- a/lib/db/snapshot.ts +++ b/lib/db/snapshot.ts @@ -71,7 +71,7 @@ export async function getSongSnapshotCount(client: Client, aid: number) { } export async function getShortTermEtaPrediction(client: Client, aid: number) { - const queryResult = await client.queryObject<{eta: number}>( + const queryResult = await client.queryObject<{ eta: number }>( ` WITH old_snapshot AS ( SELECT created_at, views @@ -120,6 +120,23 @@ export async function getShortTermEtaPrediction(client: Client, aid: number) { return queryResult.rows[0].eta; } +export async function getIntervalFromLastSnapshotToNow(client: Client, aid: number) { + const queryResult = await client.queryObject<{ interval: number }>( + ` + SELECT EXTRACT(EPOCH FROM (NOW() - created_at)) AS interval + FROM video_snapshot + WHERE aid = $1 + ORDER BY created_at DESC + LIMIT 1; + `, + [aid], + ); + if (queryResult.rows.length === 0) { + return null; + } + return queryResult.rows[0].interval; +} + export async function songEligibleForMilestoneSnapshot(client: Client, aid: number) { const count = await getSongSnapshotCount(client, aid); if (count < 2) { diff --git a/lib/mq/exec/snapshotTick.ts b/lib/mq/exec/snapshotTick.ts index 12443ff..bbc7205 100644 --- a/lib/mq/exec/snapshotTick.ts +++ b/lib/mq/exec/snapshotTick.ts @@ -1,8 +1,9 @@ import { Job } from "bullmq"; -import { MINUTE, SECOND } from "$std/datetime/constants.ts"; +import { HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { db } from "lib/db/init.ts"; import { +getIntervalFromLastSnapshotToNow, getShortTermEtaPrediction, getSongsNearMilestone, getUnsnapshotedSongs, @@ -55,19 +56,12 @@ async function processMilestoneSnapshots(client: Client, vidoesNearMilestone: So let i = 0; for (const snapshot of vidoesNearMilestone) { if (await snapshotScheduled(snapshot.aid)) { - logger.silly( - `Video ${snapshot.aid} is already scheduled for snapshot`, - "mq", - "fn:processMilestoneSnapshots", - ); continue; } - if (await songEligibleForMilestoneSnapshot(client, snapshot.aid) === false) { - logger.silly( - `Video ${snapshot.aid} is not eligible for milestone snapshot`, - "mq", - "fn:processMilestoneSnapshots", - ); + const timeFromLastSnapshot = await getIntervalFromLastSnapshotToNow(client, snapshot.aid); + const lastSnapshotLessThan8Hrs = timeFromLastSnapshot && timeFromLastSnapshot * SECOND < 8 * HOUR; + const notEligible = await songEligibleForMilestoneSnapshot(client, snapshot.aid); + if (notEligible && lastSnapshotLessThan8Hrs) { continue; } const factor = Math.floor(i / 8); @@ -143,7 +137,7 @@ export const takeSnapshotForMilestoneVideoWorker = async (job: Job) => { eta = viewsToIncrease / (incrementSpeed + DELTA); } const scheduledNextSnapshotDelay = eta * SECOND / 3; - const maxInterval = 20 * MINUTE; + const maxInterval = 60 * MINUTE; const minInterval = 1 * SECOND; const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval); await SnapshotQueue.add("snapshotMilestoneVideo", {