diff --git a/src/db/raw/insertAidsToDB.ts b/src/db/raw/insertAidsToDB.ts index bbc8ffa..c58e286 100644 --- a/src/db/raw/insertAidsToDB.ts +++ b/src/db/raw/insertAidsToDB.ts @@ -4,7 +4,7 @@ import { getBiliBiliVideoInfo } from "./videoInfo.ts"; import { ensureDir } from "https://deno.land/std@0.113.0/fs/mod.ts"; const aidPath = "./data/2025010104_c30_aids.txt"; -const db = new Database("./data/main.db"); +const db = new Database("./data/main.db", { int64: true }); const regions = ["shanghai", "hangzhou", "qingdao", "beijing", "zhangjiakou", "chengdu", "shenzhen", "hohhot"]; const logDir = "./logs/bili-info-crawl"; const logFile = path.join(logDir, `run-${Date.now() / 1000}.log`); @@ -27,11 +27,13 @@ async function setupLogging() { await ensureDir(logDir); const logStream = await Deno.open(logFile, { write: true, create: true, append: true }); - const redirectConsole = (originalConsole: (...args: any[]) => void) => (...args: any[]) => { - const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" "); - originalConsole(message); - logStream.write(new TextEncoder().encode(message + "\n")); - }; + const redirectConsole = + (originalConsole: (...args: any[]) => void) => + (...args: any[]) => { + const message = args.map((arg) => (typeof arg === "object" ? JSON.stringify(arg) : arg)).join(" "); + originalConsole(message); + logStream.write(new TextEncoder().encode(message + "\n")); + }; console.log = redirectConsole(console.log); console.error = redirectConsole(console.error); @@ -49,14 +51,36 @@ function isRateLimited(): boolean { async function insertAidsToDB() { const aidRawcontent = await Deno.readTextFile(aidPath); - const aids = aidRawcontent.split("\n").filter((line) => line.length > 0).map((line) => parseInt(line)); + const aids = aidRawcontent + .split("\n") + .filter((line) => line.length > 0) + .map((line) => parseInt(line)); - if (!db.prepare("SELECT COUNT(*) FROM bili_info_crawl").get()) { - const insertStmt = db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')"); - aids.forEach((aid) => insertStmt.run(aid)); - } + // if (!db.prepare("SELECT COUNT(*) FROM bili_info_crawl").get()) { + // const insertStmt = db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')"); + // aids.forEach((aid) => insertStmt.run(aid)); + // } - const aidsInDB = db.prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'") + // 查询数据库中已经存在的 aid + const existingAids = db + .prepare("SELECT aid FROM bili_info_crawl") + .all() + .map((row) => row.aid); + console.log(existingAids.length); + + // 将 existingAids 转换为 Set 以提高查找效率 + const existingAidsSet = new Set(existingAids); + + // 找出 aids 数组中不存在于数据库的条目 + const newAids = aids.filter((aid) => !existingAidsSet.has(aid)); + console.log(newAids.length); + + // 插入这些新条目 + const insertStmt = db.prepare("INSERT OR IGNORE INTO bili_info_crawl (aid, status) VALUES (?, 'pending')"); + newAids.forEach((aid) => insertStmt.run(aid)); + + const aidsInDB = db + .prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending' OR status = 'failed'") .all() .map((row) => row.aid) as number[]; @@ -68,18 +92,18 @@ async function insertAidsToDB() { try { const res = await getBiliBiliVideoInfo(aid, regions[processedAids % regions.length]); if (res === null) { - updateAidStatus(aid, 'failed'); + updateAidStatus(aid, "failed"); } else { const rawData = JSON.parse(res); if (rawData.code === 0) { - updateAidStatus(aid, 'success', rawData.data.View.bvid, JSON.stringify(rawData.data)); + updateAidStatus(aid, "success", rawData.data.View.bvid, JSON.stringify(rawData.data)); } else { - updateAidStatus(aid, 'error', undefined, res); + updateAidStatus(aid, "error", undefined, res); } } } catch (error) { console.error(`Error updating aid ${aid}: ${error}`); - updateAidStatus(aid, 'failed'); + updateAidStatus(aid, "failed"); } finally { processedAids++; logProgress(aid, processedAids, totalAids, startTime); @@ -108,8 +132,8 @@ function updateAidStatus(aid: number, status: string, bvid?: string, data?: stri const stmt = db.prepare(` UPDATE bili_info_crawl SET status = ?, - ${bvid ? 'bvid = ?,' : ''} - ${data ? 'data = ?,' : ''} + ${bvid ? "bvid = ?," : ""} + ${data ? "data = ?," : ""} timestamp = ? WHERE aid = ? `); @@ -130,9 +154,15 @@ function logProgress(aid: number, processedAids: number, totalAids: number, star const etaMinutes = Math.floor(etaSeconds / 60); const etaHours = Math.floor(etaMinutes / 60); - const progress = `${processedAids}/${totalAids}, ${((processedAids / totalAids) * 100).toFixed(2)}%, elapsed ${elapsedHours.toString().padStart(2, "0")}:${(elapsedMinutes % 60).toString().padStart(2, "0")}:${(elapsedSeconds % 60).toString().padStart(2, "0")}, ETA ${etaHours}h${(etaMinutes % 60).toString().padStart(2, "0")}m`; + const progress = `${processedAids}/${totalAids}, ${((processedAids / totalAids) * 100).toFixed( + 2 + )}%, elapsed ${elapsedHours.toString().padStart(2, "0")}:${(elapsedMinutes % 60).toString().padStart(2, "0")}:${( + elapsedSeconds % 60 + ) + .toString() + .padStart(2, "0")}, ETA ${etaHours}h${(etaMinutes % 60).toString().padStart(2, "0")}m`; console.log(`Updated aid ${aid}, ${progress}`); } await setupLogging(); -insertAidsToDB(); \ No newline at end of file +insertAidsToDB(); diff --git a/src/db/raw/videoInfo.ts b/src/db/raw/videoInfo.ts index 70b898e..dd0435f 100644 --- a/src/db/raw/videoInfo.ts +++ b/src/db/raw/videoInfo.ts @@ -47,7 +47,7 @@ async function proxyRequestWithRegion(url: string, region: string): Promise