add: more new snapshots for new songs, script for fullSnapshot
This commit is contained in:
parent
96b8cb27b8
commit
4851a69b91
@ -35,6 +35,9 @@ export const classifyVideoWorker = async (job: Job) => {
|
||||
|
||||
const exists = await aidExistsInSongs(sql, aid);
|
||||
if (!exists && label !== 0) {
|
||||
await scheduleSnapshot(sql, aid, "new", Date.now() + 1.5 * MINUTE, true);
|
||||
await scheduleSnapshot(sql, aid, "new", Date.now() + 3 * MINUTE, true);
|
||||
await scheduleSnapshot(sql, aid, "new", Date.now() + 5 * MINUTE, true);
|
||||
await scheduleSnapshot(sql, aid, "new", Date.now() + 10 * MINUTE, true);
|
||||
await insertIntoSongs(aid);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { Job } from "bullmq";
|
||||
import { getVideoDetails } from "net/getVideoDetails";
|
||||
import { getVideoDetails } from "@core/net/getVideoDetails";
|
||||
import logger from "@core/log";
|
||||
import { ClassifyVideoQueue, latestVideosEventsProducer } from "mq/index";
|
||||
import {
|
||||
|
||||
@ -82,7 +82,7 @@ function SongResult({ result }: { result: Exclude<SearchResult, null>["data"][nu
|
||||
<img
|
||||
src={data.image}
|
||||
alt="歌曲封面"
|
||||
className="h-21 w-36 sm:w-42 sm:h-24 rounded-sm object-cover flex-shrink-0"
|
||||
className="h-21 w-36 sm:w-42 sm:h-24 rounded-sm object-cover shrink-0"
|
||||
referrerPolicy="no-referrer"
|
||||
/>
|
||||
)}
|
||||
|
||||
@ -28,11 +28,9 @@ async function fixTimezoneError() {
|
||||
const candidates = await pg`
|
||||
SELECT aid, published_at
|
||||
FROM
|
||||
bilibili_metadata
|
||||
songs
|
||||
WHERE
|
||||
published_at >= '2025-04-26'
|
||||
AND published_at <= '2025-06-01'
|
||||
AND status = 0
|
||||
published_at <= '2000-01-01'
|
||||
`;
|
||||
const query = sqlite.query(`SELECT data FROM bili_info_crawl WHERE aid = $aid`);
|
||||
for (const video of candidates) {
|
||||
|
||||
142
src/fullSnapshot.ts
Normal file
142
src/fullSnapshot.ts
Normal file
@ -0,0 +1,142 @@
|
||||
import arg from "arg";
|
||||
import logger from "@core/log";
|
||||
import { Database } from "bun:sqlite";
|
||||
import { getVideoDetails } from "@core/net/getVideoDetails";
|
||||
import { sql } from "@core/index";
|
||||
|
||||
const quit = (reason?: string) => {
|
||||
reason && logger.error(reason);
|
||||
process.exit();
|
||||
};
|
||||
|
||||
const args = arg({
|
||||
"--db": String,
|
||||
"--aids": String
|
||||
});
|
||||
|
||||
const dbPath = args["--db"];
|
||||
if (!dbPath) {
|
||||
quit("Missing --db <path>");
|
||||
}
|
||||
|
||||
const sqlite = new Database(dbPath);
|
||||
const pg = sql;
|
||||
|
||||
const getAids = async () => {
|
||||
const aidsFile = args["--aids"];
|
||||
if (aidsFile) {
|
||||
return (await Bun.file(aidsFile).text()).split("\n").map(Number);
|
||||
}
|
||||
const aids = await sql<{ aid: number }[]>`SELECT aid FROM bilibili_metadata`;
|
||||
return aids.map((row: any) => row.aid);
|
||||
};
|
||||
|
||||
async function addCandidates() {
|
||||
const aids = await getAids();
|
||||
|
||||
logger.log(`Retrieved ${aids.length} from production DB.`);
|
||||
|
||||
const existingAids = sqlite
|
||||
.prepare("SELECT aid FROM bili_info_crawl")
|
||||
.all()
|
||||
.map((row: any) => row.aid);
|
||||
logger.log(`We have ${existingAids.length} from local DB.`);
|
||||
|
||||
const existingAidsSet = new Set(existingAids);
|
||||
|
||||
const newAids = aids.filter((aid) => !existingAidsSet.has(aid));
|
||||
|
||||
let stmt = "";
|
||||
for (const aid of newAids) {
|
||||
stmt += `INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (${aid}, 'pending');\n`;
|
||||
}
|
||||
sqlite.prepare(stmt).run();
|
||||
logger.log(`Added ${newAids.length} to local DB.`);
|
||||
}
|
||||
|
||||
async function insertAidsToDB() {
|
||||
await addCandidates();
|
||||
|
||||
const aidsInDB = sqlite
|
||||
.prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending'")
|
||||
.all()
|
||||
.map((row: any) => row.aid) as number[];
|
||||
|
||||
const totalAids = aidsInDB.length;
|
||||
let processedAids = 0;
|
||||
const startTime = Date.now();
|
||||
|
||||
const processAid = async (aid: number) => {
|
||||
try {
|
||||
const res = await getVideoDetails(aid);
|
||||
if (res === null) {
|
||||
updateAidStatus(aid, "failed");
|
||||
} else {
|
||||
updateAidStatus(aid, "success", res.View.bvid, JSON.stringify(res));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error updating aid ${aid}: ${error}`);
|
||||
updateAidStatus(aid, "failed");
|
||||
} finally {
|
||||
processedAids++;
|
||||
logProgress(aid, processedAids, totalAids, startTime);
|
||||
}
|
||||
};
|
||||
|
||||
const groupSize = 5;
|
||||
const groups = [];
|
||||
for (let i = 0; i < totalAids; i += groupSize) {
|
||||
groups.push(aidsInDB.slice(i, i + groupSize));
|
||||
}
|
||||
|
||||
logger.log(`Processing ${totalAids} aids in ${groups.length} groups.`);
|
||||
|
||||
for (const group of groups) {
|
||||
await Promise.all(group.map((aid) => processAid(aid)));
|
||||
}
|
||||
}
|
||||
|
||||
function updateAidStatus(aid: number, status: string, bvid?: string, data?: string) {
|
||||
const stmt = sqlite.prepare(`
|
||||
UPDATE bili_info_crawl
|
||||
SET status = ?,
|
||||
${bvid ? "bvid = ?," : ""}
|
||||
${data ? "data = ?," : ""}
|
||||
timestamp = ?
|
||||
WHERE aid = ?
|
||||
`);
|
||||
const params = [
|
||||
status,
|
||||
...(bvid ? [bvid] : []),
|
||||
...(data ? [data] : []),
|
||||
Date.now() / 1000,
|
||||
aid
|
||||
];
|
||||
stmt.run(...params);
|
||||
}
|
||||
|
||||
function logProgress(aid: number, processedAids: number, totalAids: number, startTime: number) {
|
||||
const elapsedTime = Date.now() - startTime;
|
||||
const elapsedSeconds = Math.floor(elapsedTime / 1000);
|
||||
const elapsedMinutes = Math.floor(elapsedSeconds / 60);
|
||||
const elapsedHours = Math.floor(elapsedMinutes / 60);
|
||||
|
||||
const remainingAids = totalAids - processedAids;
|
||||
const averageTimePerAid = elapsedTime / processedAids;
|
||||
const eta = remainingAids * averageTimePerAid;
|
||||
const etaSeconds = Math.floor(eta / 1000);
|
||||
const etaMinutes = Math.floor(etaSeconds / 60);
|
||||
const etaHours = Math.floor(etaMinutes / 60);
|
||||
|
||||
const progress = `${processedAids}/${totalAids}, ${((processedAids / totalAids) * 100).toFixed(
|
||||
2
|
||||
)}%, elapsed ${elapsedHours.toString().padStart(2, "0")}:${(elapsedMinutes % 60).toString().padStart(2, "0")}:${(
|
||||
elapsedSeconds % 60
|
||||
)
|
||||
.toString()
|
||||
.padStart(2, "0")}, ETA ${etaHours}h${(etaMinutes % 60).toString().padStart(2, "0")}m`;
|
||||
logger.log(`Updated aid ${aid}, ${progress}`);
|
||||
}
|
||||
|
||||
await insertAidsToDB();
|
||||
quit();
|
||||
Loading…
Reference in New Issue
Block a user