update: the fullSnapshot script to support high QPS
This commit is contained in:
parent
e0227ce4fa
commit
09188f201d
@ -1,7 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DataSourcePerFileMappings">
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/02601ccd-d0d3-42a7-9b8e-7596c6269559/console.sql" value="02601ccd-d0d3-42a7-9b8e-7596c6269559" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/388ec495-a827-4dfe-9fa2-219cf8dc32d2/console.sql" value="388ec495-a827-4dfe-9fa2-219cf8dc32d2" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/443af6ce-6251-4ab5-b7a6-e0783f8da37b/console.sql" value="443af6ce-6251-4ab5-b7a6-e0783f8da37b" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/443af6ce-6251-4ab5-b7a6-e0783f8da37b/console_1.sql" value="443af6ce-6251-4ab5-b7a6-e0783f8da37b" />
|
||||
<file url="file://$APPLICATION_CONFIG_DIR$/consoles/db/443af6ce-6251-4ab5-b7a6-e0783f8da37b/console_2.sql" value="443af6ce-6251-4ab5-b7a6-e0783f8da37b" />
|
||||
</component>
|
||||
</project>
|
||||
@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="db-tree-configuration">
|
||||
<option name="data" value="---------------------------------------- 1:0:388ec495-a827-4dfe-9fa2-219cf8dc32d2 2:0:02601ccd-d0d3-42a7-9b8e-7596c6269559 " />
|
||||
<option name="data" value="---------------------------------------- 1:0:443af6ce-6251-4ab5-b7a6-e0783f8da37b 2:0:197971bc-297c-47ff-b49b-fb1aa0294d47 " />
|
||||
</component>
|
||||
</project>
|
||||
@ -1,6 +1,6 @@
|
||||
import logger from "@core/log";
|
||||
import type { VideoDetailsData, VideoDetailsResponse } from "@core/net/bilibili.d";
|
||||
import networkDelegate from "@core/net/delegate";
|
||||
import networkDelegate, { type RequestTasks } from "@core/net/delegate";
|
||||
|
||||
/**
|
||||
* Fetch detailed video metadata from bilibili API
|
||||
@ -8,9 +8,12 @@ import networkDelegate from "@core/net/delegate";
|
||||
* @returns The detailed metadata of the video, or null if the video does not exist
|
||||
* @throws {NetSchedulerError} The caller would need to handle this error
|
||||
*/
|
||||
export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> {
|
||||
export async function getVideoDetails(
|
||||
aid: number,
|
||||
task: RequestTasks = "getVideoInfo"
|
||||
): Promise<VideoDetailsData | null> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
|
||||
const { data } = await networkDelegate.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const { data } = await networkDelegate.request<VideoDetailsResponse>(url, task);
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(`${errMessage + data.code}-${data.message}`, "net", "fn:getVideoInfo");
|
||||
|
||||
@ -27,7 +27,7 @@ const getAids = async () => {
|
||||
return (await Bun.file(aidsFile).text()).split("\n").map(Number);
|
||||
}
|
||||
const aids = await sql<{ aid: number }[]>`SELECT aid FROM bilibili_metadata`;
|
||||
return aids.map((row: any) => row.aid);
|
||||
return aids.map((row) => row.aid);
|
||||
};
|
||||
|
||||
async function addCandidates() {
|
||||
@ -36,9 +36,9 @@ async function addCandidates() {
|
||||
logger.log(`Retrieved ${aids.length} from production DB.`);
|
||||
|
||||
const existingAids = sqlite
|
||||
.prepare("SELECT aid FROM bili_info_crawl")
|
||||
.prepare<{ aid: number }, []>("SELECT aid FROM bili_info_crawl")
|
||||
.all()
|
||||
.map((row: any) => row.aid);
|
||||
.map((row) => row.aid);
|
||||
logger.log(`We have ${existingAids.length} from local DB.`);
|
||||
|
||||
const existingAidsSet = new Set(existingAids);
|
||||
@ -46,30 +46,30 @@ async function addCandidates() {
|
||||
const newAids = aids.filter((aid) => !existingAidsSet.has(aid));
|
||||
|
||||
const insertStmt = sqlite.prepare(
|
||||
`INSERT INTO bili_info_crawl (aid, status) VALUES (?, 'pending') ON CONFLICT DO NOTHING;`
|
||||
);
|
||||
`INSERT INTO bili_info_crawl (aid, status) VALUES (?, 'pending') ON CONFLICT DO NOTHING;`
|
||||
);
|
||||
|
||||
const insertMany = sqlite.transaction((data) => {
|
||||
for (const aid of data) {
|
||||
insertStmt.run(aid);
|
||||
}
|
||||
});
|
||||
for (const aid of data) {
|
||||
insertStmt.run(aid);
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
insertMany(newAids);
|
||||
logger.log(`Successfully added ${newAids.length} to local DB.`);
|
||||
} catch (err) {
|
||||
logger.error(["Failed to insert candidates:", err]);
|
||||
}
|
||||
insertMany(newAids);
|
||||
logger.log(`Successfully added ${newAids.length} to local DB.`);
|
||||
} catch (err) {
|
||||
logger.error(["Failed to insert candidates:", err]);
|
||||
}
|
||||
}
|
||||
|
||||
async function insertAidsToDB() {
|
||||
await addCandidates();
|
||||
|
||||
const aidsInDB = sqlite
|
||||
.prepare("SELECT aid FROM bili_info_crawl WHERE status = 'pending'")
|
||||
.prepare<{ aid: number }, []>("SELECT aid FROM bili_info_crawl WHERE status = 'pending'")
|
||||
.all()
|
||||
.map((row: any) => row.aid) as number[];
|
||||
.map((row) => row.aid) as number[];
|
||||
|
||||
const totalAids = aidsInDB.length;
|
||||
let processedAids = 0;
|
||||
@ -77,7 +77,7 @@ async function insertAidsToDB() {
|
||||
|
||||
const processAid = async (aid: number) => {
|
||||
try {
|
||||
const res = await getVideoDetails(aid);
|
||||
const res = await getVideoDetails(aid, "annualArchive");
|
||||
if (res === null) {
|
||||
updateAidStatus(aid, "failed");
|
||||
} else {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user