81 lines
2.1 KiB
TypeScript
81 lines
2.1 KiB
TypeScript
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
|
|
import { insertIntoAllData, videoExistsInAllData } from "lib/db/allData.ts";
|
|
import { sleep } from "lib/utils/sleep.ts";
|
|
|
|
const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT"];
|
|
|
|
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
|
|
|
|
if (unsetVars.length > 0) {
|
|
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
|
|
}
|
|
|
|
const databaseHost = Deno.env.get("DB_HOST")!;
|
|
const databaseName = Deno.env.get("DB_NAME")!;
|
|
const databaseUser = Deno.env.get("DB_USER")!;
|
|
const databasePassword = Deno.env.get("DB_PASSWORD")!;
|
|
const databasePort = Deno.env.get("DB_PORT")!;
|
|
|
|
const postgresConfig = {
|
|
hostname: databaseHost,
|
|
port: parseInt(databasePort),
|
|
database: databaseName,
|
|
user: databaseUser,
|
|
password: databasePassword,
|
|
};
|
|
|
|
async function connectToPostgres() {
|
|
const client = new Client(postgresConfig);
|
|
await client.connect();
|
|
return client;
|
|
}
|
|
|
|
export async function insertLatestVideos() {
|
|
const client = await connectToPostgres();
|
|
let page = 334;
|
|
let failCount = 0;
|
|
while (true) {
|
|
try {
|
|
const videos = await getLatestVideos(page, 10);
|
|
if (videos == null) {
|
|
failCount++;
|
|
if (failCount > 5) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
if (videos.length == 0) {
|
|
console.warn("No more videos found");
|
|
break;
|
|
}
|
|
let allExists = true;
|
|
for (const video of videos) {
|
|
const videoExists = await videoExistsInAllData(client, video.aid);
|
|
if (!videoExists) {
|
|
allExists = false;
|
|
insertIntoAllData(client, video);
|
|
}
|
|
}
|
|
if (allExists) {
|
|
console.log("All videos already exist in all_data, stop crawling.");
|
|
break;
|
|
}
|
|
console.log(`Page ${page} crawled, total: ${(page - 1) * 20 + videos.length} videos.`);
|
|
page++;
|
|
} catch (error) {
|
|
console.error(error);
|
|
failCount++;
|
|
if (failCount > 5) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
finally {
|
|
await sleep(Math.random() * 4000 + 1000);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
insertLatestVideos(); |