Compare commits
10 Commits
757cbbab7e
...
37fa6791d5
Author | SHA1 | Date | |
---|---|---|---|
37fa6791d5 | |||
a8292d7b6b | |||
0923a34e16 | |||
f34633dc35 | |||
94e19690d1 | |||
20668609dd | |||
33c6a3c1f8 | |||
f39fef0d9a | |||
13ed20cf5c | |||
22b1c337ac |
@ -14,6 +14,9 @@
|
||||
<excludeFolder url="file://$MODULE_DIR$/logs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/model" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/src/db" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.idea" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.vscode" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.zed" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
6
.idea/deno.xml
Normal file
6
.idea/deno.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DenoSettings">
|
||||
<option name="useDenoValue" value="ENABLE" />
|
||||
</component>
|
||||
</project>
|
@ -16,7 +16,6 @@
|
||||
"imports": {
|
||||
"@astrojs/node": "npm:@astrojs/node@^9.1.3",
|
||||
"@astrojs/svelte": "npm:@astrojs/svelte@^7.0.8",
|
||||
"@core/db/": "./packages/core/db/",
|
||||
"date-fns": "npm:date-fns@^4.1.0"
|
||||
}
|
||||
}
|
||||
|
@ -1,21 +1,21 @@
|
||||
# Table of contents
|
||||
|
||||
- [Welcome](README.md)
|
||||
* [Welcome](README.md)
|
||||
|
||||
## About
|
||||
|
||||
- [About CVSA Project](about/this-project.md)
|
||||
- [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
* [About CVSA Project](about/this-project.md)
|
||||
* [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
|
||||
## Architecure
|
||||
|
||||
* [Overview](architecure/overview.md)
|
||||
* [Crawler](architecure/crawler.md)
|
||||
* [Database Structure](architecure/database-structure/README.md)
|
||||
* [Type of Song](architecure/database-structure/type-of-song.md)
|
||||
* [Message Queue](architecure/message-queue.md)
|
||||
* [Artificial Intelligence](architecure/artificial-intelligence.md)
|
||||
|
||||
## API Doc
|
||||
|
||||
- [Catalog](api-doc/catalog.md)
|
||||
- [Songs](api-doc/songs.md)
|
||||
* [Catalog](api-doc/catalog.md)
|
||||
* [Songs](api-doc/songs.md)
|
||||
|
@ -7,23 +7,34 @@ For a **song**, it must meet the following conditions to be included in CVSA:
|
||||
|
||||
### Category 30
|
||||
|
||||
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in [Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our [automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been uploaded to bilibili / categorized under this category.
|
||||
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
|
||||
[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
|
||||
[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
|
||||
uploaded to bilibili / categorized under this category.
|
||||
|
||||
#### NEWS
|
||||
|
||||
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the parent category "Music"). 
|
||||
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
|
||||
entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
|
||||
parent category "Music"). 
|
||||
|
||||
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However, there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under the "Music General" sub-category.\
|
||||
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated program's crawling.
|
||||
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
|
||||
published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
|
||||
there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
|
||||
the "Music General" sub-category.\
|
||||
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
|
||||
program's crawling.
|
||||
|
||||
### At Leats One Line of Chinese / Chinese Virtual Singer
|
||||
|
||||
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
|
||||
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
|
||||
Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
|
||||
|
||||
We define a **Chinese virtual singer** as follows:
|
||||
|
||||
1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
|
||||
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or Taiwan.
|
||||
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
|
||||
Taiwan.
|
||||
|
||||
### Using Vocal Synthesizer
|
||||
|
||||
|
@ -9,10 +9,13 @@ The AI systems we currently use are:
|
||||
Located at `/filter/` under project root dir, it classifies a video in the
|
||||
[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
|
||||
|
||||
* 0: Not related to Chinese vocal synthesis
|
||||
* 1: A original song with Chinese vocal synthesis
|
||||
* 2: A cover/remix song with Chinese vocal synthesis
|
||||
- 0: Not related to Chinese vocal synthesis
|
||||
- 1: A original song with Chinese vocal synthesis
|
||||
- 2: A cover/remix song with Chinese vocal synthesis
|
||||
|
||||
### The Predictor
|
||||
|
||||
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that takes historical view trends of a video, other contextual information (such as the current time), and future time points to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified future time point.
|
||||
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
|
||||
takes historical view trends of a video, other contextual information (such as the current time), and future time points
|
||||
to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
|
||||
future time point.
|
||||
|
66
doc/en/architecure/crawler.md
Normal file
66
doc/en/architecure/crawler.md
Normal file
@ -0,0 +1,66 @@
|
||||
# Crawler
|
||||
|
||||
Automation is at the core of CVSA’s technical architecture. The `crawler` is built to efficiently orchestrate data collection tasks using a message queue system powered by [BullMQ](https://bullmq.io/). This design enables concurrent processing across multiple stages of the data collection lifecycle.
|
||||
|
||||
State management and data persistence are handled using a combination of Redis (for caching and real-time data) and PostgreSQL (as the primary database).
|
||||
|
||||
## `crawler/db`
|
||||
|
||||
This module handles all database interactions for the crawler, including creation, updates, and data retrieval.
|
||||
|
||||
- `init.ts`: Initializes the PostgreSQL connection pool.
|
||||
- `redis.ts`: Sets up the Redis client.
|
||||
- `withConnection.ts`: Exports `withDatabaseConnection`, a helper that provides a database context to any function.
|
||||
- Other files: Contain table-specific functions, with each file corresponding to a database table.
|
||||
|
||||
## `crawler/ml`
|
||||
|
||||
This module handles machine learning tasks, such as content classification.
|
||||
|
||||
- `manager.ts`: Defines a base class `AIManager` for managing ML models.
|
||||
- `akari.ts`: Implements our primary classification model, `AkariProto`, which extends `AIManager`. It filters videos to determine if they should be included as songs.
|
||||
|
||||
## `crawler/mq`
|
||||
|
||||
This module manages task queuing and processing through BullMQ.
|
||||
|
||||
## `crawler/mq/exec`
|
||||
|
||||
Contains the functions executed by BullMQ workers. Examples include `getVideoInfoWorker` and `takeBulkSnapshotForVideosWorker`.
|
||||
|
||||
> **Terminology note:**
|
||||
> In this documentation:
|
||||
> - Functions in `crawler/mq/exec` are called **workers**.
|
||||
> - Functions in `crawler/mq/workers` are called **BullMQ workers**.
|
||||
|
||||
**Design detail:**
|
||||
Since BullMQ requires one handler per queue, we use a `switch` statement inside each BullMQ worker to route jobs based on their name to the correct function in `crawler/mq/exec`.
|
||||
|
||||
## `crawler/mq/workers`
|
||||
|
||||
Houses the BullMQ worker functions. Each function handles jobs for a specific queue.
|
||||
|
||||
## `crawler/mq/task`
|
||||
|
||||
To keep worker functions clean and focused, reusable logic is extracted into this directory as **tasks**. These tasks are then imported and used by the worker functions.
|
||||
|
||||
## `crawler/net`
|
||||
|
||||
This module handles all data fetching operations. Its core component is the `NetworkDelegate`, defined in `net/delegate.ts`.
|
||||
|
||||
## `crawler/net/delegate.ts`
|
||||
|
||||
Implements robust network request handling, including:
|
||||
|
||||
- Rate limiting by task type and proxy
|
||||
- Support for serverless functions to dynamically rotate requesting IPs
|
||||
|
||||
## `crawler/utils`
|
||||
|
||||
A collection of utility functions shared across the crawler modules.
|
||||
|
||||
## `crawler/src`
|
||||
|
||||
Contains the main entry point of the crawler.
|
||||
|
||||
We use [concurrently](https://www.npmjs.com/package/concurrently) to run multiple scripts in parallel, enabling efficient execution of various processes.
|
@ -5,10 +5,11 @@ CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
|
||||
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
|
||||
following tables:
|
||||
|
||||
* songs: stores the main information of songs
|
||||
* bili\_user: stores snapshots of Bilibili user information
|
||||
* all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
* labelling\_result: Contains label of videos in `all_data`tagged by our [AI system](../artificial-intelligence.md#the-filter).
|
||||
* video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this fetch process as "snapshot".
|
||||
* snapshot\_schedule: The scheduling information for video snapshots.
|
||||
|
||||
- songs: stores the main information of songs
|
||||
- bili\_user: stores snapshots of Bilibili user information
|
||||
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
- labelling\_result: Contains label of videos in `all_data`tagged by our
|
||||
[AI system](../artificial-intelligence.md#the-filter).
|
||||
- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
|
||||
fetch process as "snapshot".
|
||||
- snapshot\_schedule: The scheduling information for video snapshots.
|
||||
|
@ -1,7 +0,0 @@
|
||||
# Message Queue
|
||||
|
||||
We rely on message queues to manage the various tasks that [the cralwer ](overview.md#crawler)needs to perform.
|
||||
|
||||
### Code Path
|
||||
|
||||
Currently, the code related to message queues are located at `lib/mq` and `src`.
|
@ -14,14 +14,24 @@ layout:
|
||||
|
||||
# Overview
|
||||
|
||||
The whole CVSA system can be sperate into three different parts:
|
||||
The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`. 
|
||||
|
||||
* Frontend
|
||||
* API
|
||||
* Crawler
|
||||
**Project structure:**
|
||||
|
||||
The frontend is driven by [Astro](https://astro.build/) and is used to display the final CVSA page. The API is driven by [Hono](https://hono.dev) and is used to query the database and provide REST/GraphQL APIs that can be called by out website, applications, or third parties. The crawler is our automatic data collector, used to automatically collect new songs from bilibili, track their statistics, etc.
|
||||
```
|
||||
cvsa
|
||||
├── deno.json
|
||||
├── packages
|
||||
│ ├── backend
|
||||
│ ├── core
|
||||
│ ├── crawler
|
||||
│ └── frontend
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Crawler
|
||||
**Package Breakdown:**
|
||||
|
||||
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by [BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
|
||||
- **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
|
||||
- **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
|
||||
- **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
|
||||
- **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
|
||||
|
@ -9,12 +9,12 @@
|
||||
|
||||
## 技术架构 <a href="#architecture" id="architecture"></a>
|
||||
|
||||
* [概览](architecture/overview.md)
|
||||
* [数据库结构](architecture/database-structure/README.md)
|
||||
* [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
* [人工智能](architecture/artificial-intelligence.md)
|
||||
* [消息队列](architecture/message-queue/README.md)
|
||||
* [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
|
||||
- [概览](architecture/overview.md)
|
||||
- [数据库结构](architecture/database-structure/README.md)
|
||||
- [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
- [人工智能](architecture/artificial-intelligence.md)
|
||||
- [消息队列](architecture/message-queue/README.md)
|
||||
- [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
|
||||
|
||||
## API 文档 <a href="#api-doc" id="api-doc"></a>
|
||||
|
||||
|
@ -2,13 +2,14 @@
|
||||
|
||||
CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
|
||||
|
||||
CVSA 设计了两个
|
||||
|
||||
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
|
||||
|
||||
* songs:存储歌曲的主要信息
|
||||
* bilibili\_user:存储 Bilibili 用户信息快照
|
||||
* bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
|
||||
* labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
* latest\_video\_snapshot:存储视频最新的快照
|
||||
* video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等)
|
||||
* snapshot\_schedule:视频快照的规划信息,为辅助表
|
||||
|
||||
- songs:存储歌曲的主要信息
|
||||
- bilibili\_user:存储 Bilibili 用户信息快照
|
||||
- bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
|
||||
- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
- latest\_video\_snapshot:存储视频最新的快照
|
||||
- video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等)
|
||||
- snapshot\_schedule:视频快照的规划信息,为辅助表
|
||||
|
@ -1,2 +1 @@
|
||||
# LatestVideosQueue 队列
|
||||
|
||||
|
@ -20,8 +20,7 @@ layout:
|
||||
|
||||
位于项目目录`packages/crawler` 下,它负责以下工作:
|
||||
|
||||
* 抓取新的视频并收录作品
|
||||
* 持续监控视频的播放量等统计信息
|
||||
- 抓取新的视频并收录作品
|
||||
- 持续监控视频的播放量等统计信息
|
||||
|
||||
整个 crawler 由 BullMQ 消息队列驱动,使用 Redis 和 PostgreSQL 管理状态。
|
||||
|
||||
|
@ -9,18 +9,18 @@ export const db = pool;
|
||||
export const dbCred = poolCred;
|
||||
|
||||
export const dbMiddleware = createMiddleware(async (c, next) => {
|
||||
const connection = await pool.connect();
|
||||
const connection = await pool.connect();
|
||||
c.set("db", connection);
|
||||
await next();
|
||||
connection.release();
|
||||
});
|
||||
|
||||
export const dbCredMiddleware = createMiddleware(async (c, next) => {
|
||||
const connection = await poolCred.connect();
|
||||
const connection = await poolCred.connect();
|
||||
c.set("dbCred", connection);
|
||||
await next();
|
||||
connection.release();
|
||||
})
|
||||
});
|
||||
|
||||
declare module "hono" {
|
||||
interface ContextVariableMap {
|
||||
|
@ -4,7 +4,8 @@
|
||||
"@rabbit-company/argon2id": "jsr:@rabbit-company/argon2id@^2.1.0",
|
||||
"hono": "jsr:@hono/hono@^4.7.5",
|
||||
"zod": "npm:zod",
|
||||
"yup": "npm:yup"
|
||||
"yup": "npm:yup",
|
||||
"@core/": "../core/"
|
||||
},
|
||||
"tasks": {
|
||||
"dev": "deno serve --env-file=.env --allow-env --allow-net --watch main.ts",
|
||||
|
@ -6,13 +6,13 @@ import { registerHandler } from "./register.ts";
|
||||
|
||||
export const app = new Hono();
|
||||
|
||||
app.use('/video/*', dbMiddleware);
|
||||
app.use('/user', dbCredMiddleware);
|
||||
app.use("/video/*", dbMiddleware);
|
||||
app.use("/user", dbCredMiddleware);
|
||||
|
||||
app.get("/", ...rootHandler);
|
||||
|
||||
app.get('/video/:id/snapshots', ...getSnapshotsHanlder);
|
||||
app.post('/user', ...registerHandler);
|
||||
app.get("/video/:id/snapshots", ...getSnapshotsHanlder);
|
||||
app.post("/user", ...registerHandler);
|
||||
|
||||
const fetch = app.fetch;
|
||||
|
||||
@ -20,4 +20,4 @@ export default {
|
||||
fetch,
|
||||
} satisfies Deno.ServeDefaultExport;
|
||||
|
||||
export const VERSION = "0.3.0";
|
||||
export const VERSION = "0.3.0";
|
||||
|
@ -8,7 +8,7 @@ import type { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
const RegistrationBodySchema = object({
|
||||
username: string().trim().required("Username is required").max(50, "Username cannot exceed 50 characters"),
|
||||
password: string().required("Password is required"),
|
||||
nickname: string().optional(),
|
||||
nickname: string().optional(),
|
||||
});
|
||||
|
||||
type ContextType = Context<BlankEnv & { Bindings: Bindings }, "/user", BlankInput>;
|
||||
@ -19,7 +19,7 @@ export const userExists = async (username: string, client: Client) => {
|
||||
`;
|
||||
const result = await client.queryObject(query, [username]);
|
||||
return result.rows.length > 0;
|
||||
}
|
||||
};
|
||||
|
||||
export const registerHandler = createHandlers(async (c: ContextType) => {
|
||||
const client = c.get("dbCred");
|
||||
@ -28,11 +28,11 @@ export const registerHandler = createHandlers(async (c: ContextType) => {
|
||||
const body = await RegistrationBodySchema.validate(await c.req.json());
|
||||
const { username, password, nickname } = body;
|
||||
|
||||
if (await userExists(username, client)) {
|
||||
if (await userExists(username, client)) {
|
||||
return c.json({
|
||||
message: `User "${username}" already exists.`,
|
||||
}, 400);
|
||||
}
|
||||
}
|
||||
|
||||
const hash = await Argon2id.hashEncoded(password);
|
||||
|
||||
@ -49,7 +49,7 @@ export const registerHandler = createHandlers(async (c: ContextType) => {
|
||||
return c.json({
|
||||
message: "Invalid registration data.",
|
||||
errors: e.errors,
|
||||
}, 400);
|
||||
}, 400);
|
||||
} else if (e instanceof SyntaxError) {
|
||||
return c.json({
|
||||
message: "Invalid JSON in request body.",
|
||||
|
@ -5,27 +5,25 @@ import { createHandlers } from "./utils.ts";
|
||||
export const rootHandler = createHandlers((c) => {
|
||||
let singer: Singer | Singer[] | null = null;
|
||||
const shouldShowSpecialSinger = Math.random() < 0.016;
|
||||
if (getSingerForBirthday().length !== 0){
|
||||
if (getSingerForBirthday().length !== 0) {
|
||||
singer = getSingerForBirthday();
|
||||
for (const s of singer) {
|
||||
delete s.birthday;
|
||||
s.message = `祝${s.name}生日快乐~`
|
||||
s.message = `祝${s.name}生日快乐~`;
|
||||
}
|
||||
}
|
||||
else if (shouldShowSpecialSinger) {
|
||||
singer = pickSpecialSinger();
|
||||
}
|
||||
else {
|
||||
singer = pickSinger();
|
||||
} else if (shouldShowSpecialSinger) {
|
||||
singer = pickSpecialSinger();
|
||||
} else {
|
||||
singer = pickSinger();
|
||||
}
|
||||
return c.json({
|
||||
"project": {
|
||||
"name": "中V档案馆",
|
||||
"motto": "一起唱吧,心中的歌!"
|
||||
"motto": "一起唱吧,心中的歌!",
|
||||
},
|
||||
"status": 200,
|
||||
"version": VERSION,
|
||||
"time": Date.now(),
|
||||
"singer": singer
|
||||
})
|
||||
})
|
||||
"singer": singer,
|
||||
});
|
||||
});
|
||||
|
@ -70,7 +70,7 @@ export interface Singer {
|
||||
name: string;
|
||||
color?: string;
|
||||
birthday?: string;
|
||||
message?: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export const specialSingers = [
|
||||
|
@ -17,7 +17,7 @@ const idSchema = mixed().test(
|
||||
'id must be a string starting with "av" followed by digits, or "BV" followed by 10 alphanumeric characters, or a positive integer',
|
||||
async (value) => {
|
||||
if (value && await number().integer().isValid(value)) {
|
||||
const v = parseInt(value as string);
|
||||
const v = parseInt(value as string);
|
||||
return Number.isInteger(v) && v > 0;
|
||||
}
|
||||
|
||||
@ -46,10 +46,9 @@ export const getSnapshotsHanlder = createHandlers(async (c: ContextType) => {
|
||||
let videoId: string | number = idParam as string;
|
||||
if (videoId.startsWith("av")) {
|
||||
videoId = parseInt(videoId.slice(2));
|
||||
}
|
||||
else if (await number().isValid(videoId)) {
|
||||
} else if (await number().isValid(videoId)) {
|
||||
videoId = parseInt(videoId);
|
||||
}
|
||||
}
|
||||
const queryParams = await SnapshotQueryParamsSchema.validate(c.req.query());
|
||||
const { ps, pn, offset, reverse = false } = queryParams;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { createFactory } from 'hono/factory'
|
||||
import { createFactory } from "hono/factory";
|
||||
|
||||
const factory = createFactory();
|
||||
|
||||
export const createHandlers = factory.createHandlers;
|
||||
export const createHandlers = factory.createHandlers;
|
||||
|
@ -1,33 +1,62 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { VideoSnapshotType } from "@core/db/schema.d.ts";
|
||||
import type { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import type { VideoSnapshotType } from "./schema.d.ts";
|
||||
|
||||
export async function getVideoSnapshots(client: Client, aid: number, limit: number, pageOrOffset: number, reverse: boolean, mode: 'page' | 'offset' = 'page') {
|
||||
const offset = mode === 'page' ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const order = reverse ? 'ASC' : 'DESC';
|
||||
const query = `
|
||||
export async function getVideoSnapshots(
|
||||
client: Client,
|
||||
aid: number,
|
||||
limit: number,
|
||||
pageOrOffset: number,
|
||||
reverse: boolean,
|
||||
mode: "page" | "offset" = "page",
|
||||
) {
|
||||
const offset = mode === "page" ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const queryDesc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot
|
||||
WHERE aid = $1
|
||||
ORDER BY created_at ${order}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $2
|
||||
OFFSET $3
|
||||
`;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [aid, limit, offset]);
|
||||
return queryResult.rows;
|
||||
const queryAsc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot
|
||||
WHERE aid = $1
|
||||
ORDER BY created_at
|
||||
LIMIT $2 OFFSET $3
|
||||
`;
|
||||
const query = reverse ? queryAsc : queryDesc;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [aid, limit, offset]);
|
||||
return queryResult.rows;
|
||||
}
|
||||
|
||||
export async function getVideoSnapshotsByBV(client: Client, bv: string, limit: number, pageOrOffset: number, reverse: boolean, mode: 'page' | 'offset' = 'page') {
|
||||
const offset = mode === 'page' ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const order = reverse ? 'ASC' : 'DESC';
|
||||
const query = `
|
||||
export async function getVideoSnapshotsByBV(
|
||||
client: Client,
|
||||
bv: string,
|
||||
limit: number,
|
||||
pageOrOffset: number,
|
||||
reverse: boolean,
|
||||
mode: "page" | "offset" = "page",
|
||||
) {
|
||||
const offset = mode === "page" ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const queryAsc = `
|
||||
SELECT vs.*
|
||||
FROM video_snapshot vs
|
||||
JOIN bilibili_metadata bm ON vs.aid = bm.aid
|
||||
WHERE bm.bvid = $1
|
||||
ORDER BY vs.created_at ${order}
|
||||
ORDER BY vs.created_at
|
||||
LIMIT $2
|
||||
OFFSET $3
|
||||
`
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [bv, limit, offset]);
|
||||
return queryResult.rows;
|
||||
}
|
||||
`;
|
||||
const queryDesc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot vs
|
||||
JOIN bilibili_metadata bm ON vs.aid = bm.aid
|
||||
WHERE bm.bvid = $1
|
||||
ORDER BY vs.created_at DESC
|
||||
LIMIT $2 OFFSET $3
|
||||
`;
|
||||
const query = reverse ? queryAsc : queryDesc;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [bv, limit, offset]);
|
||||
return queryResult.rows;
|
||||
}
|
||||
|
@ -0,0 +1,4 @@
|
||||
{
|
||||
"name": "@cvsa/core",
|
||||
"exports": "./main.ts"
|
||||
}
|
1
packages/core/main.ts
Normal file
1
packages/core/main.ts
Normal file
@ -0,0 +1 @@
|
||||
export const DB_VERSION = 10;
|
@ -1,5 +1,5 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { AllDataType, BiliUserType } from "db/schema.d.ts";
|
||||
import { AllDataType, BiliUserType } from "@core/db/schema";
|
||||
import Akari from "ml/akari.ts";
|
||||
|
||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { postgresConfig } from "@core/db/pgConfig.ts";
|
||||
import { postgresConfig } from "@core/db/pgConfig";
|
||||
|
||||
const pool = new Pool(postgresConfig, 12);
|
||||
|
||||
|
55
packages/crawler/db/schema.d.ts
vendored
55
packages/crawler/db/schema.d.ts
vendored
@ -1,55 +0,0 @@
|
||||
export interface AllDataType {
|
||||
id: number;
|
||||
aid: number;
|
||||
bvid: string | null;
|
||||
description: string | null;
|
||||
uid: number | null;
|
||||
tags: string | null;
|
||||
title: string | null;
|
||||
published_at: string | null;
|
||||
duration: number;
|
||||
created_at: string | null;
|
||||
}
|
||||
|
||||
export interface BiliUserType {
|
||||
id: number;
|
||||
uid: number;
|
||||
username: string;
|
||||
desc: string;
|
||||
fans: number;
|
||||
}
|
||||
|
||||
export interface VideoSnapshotType {
|
||||
id: number;
|
||||
created_at: string;
|
||||
views: number;
|
||||
coins: number;
|
||||
likes: number;
|
||||
favorites: number;
|
||||
shares: number;
|
||||
danmakus: number;
|
||||
aid: bigint;
|
||||
replies: number;
|
||||
}
|
||||
|
||||
export interface LatestSnapshotType {
|
||||
aid: number;
|
||||
time: number;
|
||||
views: number;
|
||||
danmakus: number;
|
||||
replies: number;
|
||||
likes: number;
|
||||
coins: number;
|
||||
shares: number;
|
||||
favorites: number;
|
||||
}
|
||||
|
||||
export interface SnapshotScheduleType {
|
||||
id: number;
|
||||
aid: number;
|
||||
type?: string;
|
||||
created_at: string;
|
||||
started_at?: string;
|
||||
finished_at?: string;
|
||||
status: string;
|
||||
}
|
@ -1,15 +1,12 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { LatestSnapshotType } from "db/schema.d.ts";
|
||||
import { LatestSnapshotType } from "@core/db/schema";
|
||||
import { SnapshotNumber } from "mq/task/getVideoStats.ts";
|
||||
|
||||
export async function getVideosNearMilestone(client: Client) {
|
||||
const queryResult = await client.queryObject<LatestSnapshotType>(`
|
||||
SELECT ls.*
|
||||
SELECT ls
|
||||
FROM latest_video_snapshot ls
|
||||
INNER JOIN
|
||||
songs s ON ls.aid = s.aid
|
||||
AND s.deleted = false
|
||||
WHERE
|
||||
s.deleted = false AND
|
||||
(views >= 90000 AND views < 100000) OR
|
||||
(views >= 900000 AND views < 1000000) OR
|
||||
(views >= 9900000 AND views < 10000000)
|
||||
@ -22,7 +19,7 @@ export async function getVideosNearMilestone(client: Client) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | LatestSnapshotType> {
|
||||
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | SnapshotNumber> {
|
||||
const queryResult = await client.queryObject<LatestSnapshotType>(
|
||||
`
|
||||
SELECT *
|
||||
|
@ -1,6 +1,5 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts";
|
||||
import { SnapshotScheduleType } from "./schema.d.ts";
|
||||
import { SnapshotScheduleType } from "@core/db/schema";
|
||||
import logger from "log/logger.ts";
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { redis } from "db/redis.ts";
|
||||
@ -11,8 +10,7 @@ const REDIS_KEY = "cvsa:snapshot_window_counts";
|
||||
function getCurrentWindowIndex(): number {
|
||||
const now = new Date();
|
||||
const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes();
|
||||
const currentWindow = Math.floor(minutesSinceMidnight / 5);
|
||||
return currentWindow;
|
||||
return Math.floor(minutesSinceMidnight / 5);
|
||||
}
|
||||
|
||||
export async function refreshSnapshotWindowCounts(client: Client, redisClient: Redis) {
|
||||
@ -162,24 +160,6 @@ export async function getLatestSnapshot(client: Client, aid: number): Promise<Sn
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the number of snapshot schedules within the specified range.
|
||||
* @param client The database client.
|
||||
* @param start The start time of the range. (Timestamp in milliseconds)
|
||||
* @param end The end time of the range. (Timestamp in milliseconds)
|
||||
*/
|
||||
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
|
||||
const startTimeString = formatTimestampToPsql(start);
|
||||
const endTimeString = formatTimestampToPsql(end);
|
||||
const query = `
|
||||
SELECT COUNT(*) FROM snapshot_schedule
|
||||
WHERE started_at BETWEEN $1 AND $2
|
||||
AND status = 'pending'
|
||||
`;
|
||||
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
|
||||
return res.rows[0].count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a new snapshot schedule record.
|
||||
* @param client The database client.
|
||||
@ -227,7 +207,7 @@ export async function adjustSnapshotTime(
|
||||
|
||||
const initialOffset = currentWindow + Math.max(targetOffset, 0);
|
||||
|
||||
let timePerIteration = 0;
|
||||
let timePerIteration: number;
|
||||
const MAX_ITERATIONS = 2880;
|
||||
let iters = 0;
|
||||
const t = performance.now();
|
||||
|
32
packages/crawler/db/withConnection.ts
Normal file
32
packages/crawler/db/withConnection.ts
Normal file
@ -0,0 +1,32 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { db } from "db/init.ts";
|
||||
|
||||
/**
|
||||
* Executes a function with a database connection.
|
||||
* @param operation The function that accepts the `client` as the parameter.
|
||||
* @param errorHandling Optional function to handle errors.
|
||||
* If no error handling function is provided, the error will be re-thrown.
|
||||
* @param cleanup Optional function to execute after the operation.
|
||||
* @returns The result of the operation or undefined if an error occurred.
|
||||
*/
|
||||
export async function withDbConnection<T>(
|
||||
operation: (client: Client) => Promise<T>,
|
||||
errorHandling?: (error: unknown) => void,
|
||||
cleanup?: () => void,
|
||||
): Promise<T | undefined> {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
return await operation(client);
|
||||
} catch (error) {
|
||||
if (errorHandling) {
|
||||
errorHandling(error);
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
client.release();
|
||||
if (cleanup) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
}
|
@ -37,7 +37,9 @@
|
||||
"express": "npm:express",
|
||||
"src/": "./src/",
|
||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
||||
"chalk": "npm:chalk"
|
||||
"chalk": "npm:chalk",
|
||||
"@core/db/schema": "../core/db/schema.d.ts",
|
||||
"@core/db/pgConfig": "../core/db/pgConfig.ts"
|
||||
},
|
||||
"exports": "./main.ts"
|
||||
}
|
||||
|
@ -4,4 +4,4 @@
|
||||
// SO HERE'S A PLACHOLDER EXPORT FOR DENO:
|
||||
export const DENO = "FUCK YOU DENO";
|
||||
// Oh, maybe export the version is a good idea
|
||||
export const VERSION = "1.0.17";
|
||||
export const VERSION = "1.0.18";
|
||||
|
@ -11,7 +11,6 @@ import {
|
||||
getLatestSnapshot,
|
||||
getSnapshotsInNextSecond,
|
||||
getVideosWithoutActiveSnapshotSchedule,
|
||||
hasAtLeast2Snapshots,
|
||||
scheduleSnapshot,
|
||||
setSnapshotStatus,
|
||||
snapshotScheduleExists,
|
||||
@ -22,12 +21,13 @@ import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { SnapshotQueue } from "mq/index.ts";
|
||||
import { insertVideoSnapshot } from "mq/task/getVideoStats.ts";
|
||||
import { NetSchedulerError } from "mq/scheduler.ts";
|
||||
import { NetSchedulerError } from "net/delegate.ts";
|
||||
import { getBiliVideoStatus, setBiliVideoStatus } from "db/allData.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { lockManager } from "mq/lockManager.ts";
|
||||
import { getSongsPublihsedAt } from "db/songs.ts";
|
||||
import { bulkGetVideoStats } from "net/bulkGetVideoStats.ts";
|
||||
import { getAdjustedShortTermETA } from "../scheduling.ts";
|
||||
|
||||
const priorityMap: { [key: string]: number } = {
|
||||
"milestone": 1,
|
||||
@ -101,52 +101,6 @@ export const closetMilestone = (views: number) => {
|
||||
return 10000000;
|
||||
};
|
||||
|
||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
||||
|
||||
/*
|
||||
* Returns the minimum ETA in hours for the next snapshot
|
||||
* @param client - Postgres client
|
||||
* @param aid - aid of the video
|
||||
* @returns ETA in hours
|
||||
*/
|
||||
export const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
||||
if (!latestSnapshot) return 0;
|
||||
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
|
||||
if (!snapshotsEnough) return 0;
|
||||
|
||||
const currentTimestamp = new Date().getTime();
|
||||
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR, 72 * HOUR];
|
||||
const DELTA = 0.00001;
|
||||
let minETAHours = Infinity;
|
||||
|
||||
for (const timeInterval of timeIntervals) {
|
||||
const date = new Date(currentTimestamp - timeInterval);
|
||||
const snapshot = await findClosestSnapshot(client, aid, date);
|
||||
if (!snapshot) continue;
|
||||
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
|
||||
const viewsDiff = latestSnapshot.views - snapshot.views;
|
||||
if (viewsDiff <= 0) continue;
|
||||
const speed = viewsDiff / (hoursDiff + DELTA);
|
||||
const target = closetMilestone(latestSnapshot.views);
|
||||
const viewsToIncrease = target - latestSnapshot.views;
|
||||
const eta = viewsToIncrease / (speed + DELTA);
|
||||
let factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
|
||||
factor = truncate(factor, 3, 100);
|
||||
const adjustedETA = eta / factor;
|
||||
if (adjustedETA < minETAHours) {
|
||||
minETAHours = adjustedETA;
|
||||
}
|
||||
}
|
||||
|
||||
if (isNaN(minETAHours)) {
|
||||
minETAHours = Infinity;
|
||||
}
|
||||
|
||||
return minETAHours;
|
||||
};
|
||||
|
||||
export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
@ -161,7 +115,7 @@ export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
|
||||
const minInterval = 1 * SECOND;
|
||||
const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
|
||||
const targetTime = now + delay;
|
||||
await scheduleSnapshot(client, aid, "milestone", targetTime);
|
||||
await scheduleSnapshot(client, aid, "milestone", targetTime, true);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
|
||||
@ -216,7 +170,7 @@ export const regularSnapshotsWorker = async (_job: Job) => {
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:regularSnapshotsWorker");
|
||||
} finally {
|
||||
lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
await lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
51
packages/crawler/mq/scheduling.ts
Normal file
51
packages/crawler/mq/scheduling.ts
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Returns the minimum ETA in hours for the next snapshot
|
||||
* @param client - Postgres client
|
||||
* @param aid - aid of the video
|
||||
* @returns ETA in hours
|
||||
*/
|
||||
import { findClosestSnapshot, getLatestSnapshot, hasAtLeast2Snapshots } from "db/snapshotSchedule.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { closetMilestone } from "./exec/snapshotTick.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR, MINUTE } from "$std/datetime/constants.ts";
|
||||
|
||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
||||
|
||||
export const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
||||
if (!latestSnapshot) return 0;
|
||||
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
|
||||
if (!snapshotsEnough) return 0;
|
||||
|
||||
const currentTimestamp = new Date().getTime();
|
||||
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR, 72 * HOUR];
|
||||
const DELTA = 0.00001;
|
||||
let minETAHours = Infinity;
|
||||
|
||||
for (const timeInterval of timeIntervals) {
|
||||
const date = new Date(currentTimestamp - timeInterval);
|
||||
const snapshot = await findClosestSnapshot(client, aid, date);
|
||||
if (!snapshot) continue;
|
||||
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
|
||||
const viewsDiff = latestSnapshot.views - snapshot.views;
|
||||
if (viewsDiff <= 0) continue;
|
||||
const speed = viewsDiff / (hoursDiff + DELTA);
|
||||
const target = closetMilestone(latestSnapshot.views);
|
||||
const viewsToIncrease = target - latestSnapshot.views;
|
||||
const eta = viewsToIncrease / (speed + DELTA);
|
||||
let factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
|
||||
factor = truncate(factor, 3, 100);
|
||||
const adjustedETA = eta / factor;
|
||||
if (adjustedETA < minETAHours) {
|
||||
minETAHours = adjustedETA;
|
||||
}
|
||||
}
|
||||
|
||||
if (isNaN(minETAHours)) {
|
||||
minETAHours = Infinity;
|
||||
}
|
||||
|
||||
return minETAHours;
|
||||
};
|
@ -42,6 +42,18 @@ export async function insertVideoInfo(client: Client, aid: number) {
|
||||
[data.Card.follower, uid],
|
||||
);
|
||||
}
|
||||
|
||||
const stat = data.View.stat;
|
||||
|
||||
const query: string = `
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`;
|
||||
await client.queryObject(
|
||||
query,
|
||||
[aid, stat.view, stat.danmaku, stat.reply, stat.like, stat.coin, stat.share, stat.favorite],
|
||||
);
|
||||
|
||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
||||
}
|
||||
|
@ -1,8 +1,19 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getVideoInfo } from "net/getVideoInfo.ts";
|
||||
import { LatestSnapshotType } from "db/schema.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export interface SnapshotNumber {
|
||||
time: number;
|
||||
views: number;
|
||||
coins: number;
|
||||
likes: number;
|
||||
favorites: number;
|
||||
shares: number;
|
||||
danmakus: number;
|
||||
aid: number;
|
||||
replies: number;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch video stats from bilibili API and insert into database
|
||||
* @returns {Promise<number|VideoSnapshot>}
|
||||
@ -17,7 +28,7 @@ export async function insertVideoSnapshot(
|
||||
client: Client,
|
||||
aid: number,
|
||||
task: string,
|
||||
): Promise<number | LatestSnapshotType> {
|
||||
): Promise<number | SnapshotNumber> {
|
||||
const data = await getVideoInfo(aid, task);
|
||||
if (typeof data == "number") {
|
||||
return data;
|
||||
@ -42,7 +53,7 @@ export async function insertVideoSnapshot(
|
||||
|
||||
logger.log(`Taken snapshot for video ${aid}.`, "net", "fn:insertVideoSnapshot");
|
||||
|
||||
const snapshot: LatestSnapshotType = {
|
||||
return {
|
||||
aid,
|
||||
views,
|
||||
danmakus,
|
||||
@ -53,6 +64,4 @@ export async function insertVideoSnapshot(
|
||||
favorites,
|
||||
time,
|
||||
};
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import networkDelegate from "./delegate.ts";
|
||||
import { MediaListInfoData, MediaListInfoResponse } from "net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
@ -12,12 +12,11 @@ import logger from "log/logger.ts";
|
||||
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
|
||||
*/
|
||||
export async function bulkGetVideoStats(aids: number[]): Promise<MediaListInfoData | number> {
|
||||
const baseURL = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
|
||||
let url = baseURL;
|
||||
let url = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
|
||||
for (const aid of aids) {
|
||||
url += `${aid}:2,`;
|
||||
}
|
||||
const data = await netScheduler.request<MediaListInfoResponse>(url, "bulkSnapshot");
|
||||
const data = await networkDelegate.request<MediaListInfoResponse>(url, "bulkSnapshot");
|
||||
const errMessage = `Error fetching metadata for aid list: ${aids.join(",")}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
|
@ -19,7 +19,7 @@ interface ProxiesMap {
|
||||
[name: string]: Proxy;
|
||||
}
|
||||
|
||||
type NetSchedulerErrorCode =
|
||||
type NetworkDelegateErrorCode =
|
||||
| "NO_PROXY_AVAILABLE"
|
||||
| "PROXY_RATE_LIMITED"
|
||||
| "PROXY_NOT_FOUND"
|
||||
@ -28,9 +28,9 @@ type NetSchedulerErrorCode =
|
||||
| "ALICLOUD_PROXY_ERR";
|
||||
|
||||
export class NetSchedulerError extends Error {
|
||||
public code: NetSchedulerErrorCode;
|
||||
public code: NetworkDelegateErrorCode;
|
||||
public rawError: unknown | undefined;
|
||||
constructor(message: string, errorCode: NetSchedulerErrorCode, rawError?: unknown) {
|
||||
constructor(message: string, errorCode: NetworkDelegateErrorCode, rawError?: unknown) {
|
||||
super(message);
|
||||
this.name = "NetSchedulerError";
|
||||
this.code = errorCode;
|
||||
@ -59,7 +59,7 @@ function shuffleArray<T>(array: T[]): T[] {
|
||||
return newArray;
|
||||
}
|
||||
|
||||
class NetScheduler {
|
||||
class NetworkDelegate {
|
||||
private proxies: ProxiesMap = {};
|
||||
private providerLimiters: LimiterMap = {};
|
||||
private proxyLimiters: OptionalLimiterMap = {};
|
||||
@ -69,15 +69,6 @@ class NetScheduler {
|
||||
this.proxies[proxyName] = { type, data };
|
||||
}
|
||||
|
||||
removeProxy(proxyName: string): void {
|
||||
if (!this.proxies[proxyName]) {
|
||||
throw new Error(`Proxy ${proxyName} not found`);
|
||||
}
|
||||
delete this.proxies[proxyName];
|
||||
// Clean up associated limiters
|
||||
this.cleanupProxyLimiters(proxyName);
|
||||
}
|
||||
|
||||
private cleanupProxyLimiters(proxyName: string): void {
|
||||
for (const limiterId in this.proxyLimiters) {
|
||||
if (limiterId.startsWith(`proxy-${proxyName}`)) {
|
||||
@ -217,8 +208,7 @@ class NetScheduler {
|
||||
const providerLimiterId = "provider-" + proxyName + "-" + provider;
|
||||
if (!this.proxyLimiters[proxyLimiterId]) {
|
||||
const providerLimiter = this.providerLimiters[providerLimiterId];
|
||||
const providerAvailable = await providerLimiter.getAvailability();
|
||||
return providerAvailable;
|
||||
return await providerLimiter.getAvailability();
|
||||
}
|
||||
const proxyLimiter = this.proxyLimiters[proxyLimiterId];
|
||||
const providerLimiter = this.providerLimiters[providerLimiterId];
|
||||
@ -295,7 +285,7 @@ class NetScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
const netScheduler = new NetScheduler();
|
||||
const networkDelegate = new NetworkDelegate();
|
||||
const videoInfoRateLimiterConfig: RateLimiterConfig[] = [
|
||||
{
|
||||
window: new SlidingWindow(redis, 0.3),
|
||||
@ -369,14 +359,14 @@ but both should come after addProxy and addTask to ensure proper setup and depen
|
||||
*/
|
||||
|
||||
const regions = ["shanghai", "hangzhou", "qingdao", "beijing", "zhangjiakou", "chengdu", "shenzhen", "hohhot"];
|
||||
netScheduler.addProxy("native", "native", "");
|
||||
networkDelegate.addProxy("native", "native", "");
|
||||
for (const region of regions) {
|
||||
netScheduler.addProxy(`alicloud-${region}`, "alicloud-fc", region);
|
||||
networkDelegate.addProxy(`alicloud-${region}`, "alicloud-fc", region);
|
||||
}
|
||||
netScheduler.addTask("getVideoInfo", "bilibili", "all");
|
||||
netScheduler.addTask("getLatestVideos", "bilibili", "all");
|
||||
netScheduler.addTask("snapshotMilestoneVideo", "bilibili", regions.map((region) => `alicloud-${region}`));
|
||||
netScheduler.addTask("snapshotVideo", "bili_test", [
|
||||
networkDelegate.addTask("getVideoInfo", "bilibili", "all");
|
||||
networkDelegate.addTask("getLatestVideos", "bilibili", "all");
|
||||
networkDelegate.addTask("snapshotMilestoneVideo", "bilibili", regions.map((region) => `alicloud-${region}`));
|
||||
networkDelegate.addTask("snapshotVideo", "bili_test", [
|
||||
"alicloud-qingdao",
|
||||
"alicloud-shanghai",
|
||||
"alicloud-zhangjiakou",
|
||||
@ -384,7 +374,7 @@ netScheduler.addTask("snapshotVideo", "bili_test", [
|
||||
"alicloud-shenzhen",
|
||||
"alicloud-hohhot",
|
||||
]);
|
||||
netScheduler.addTask("bulkSnapshot", "bili_strict", [
|
||||
networkDelegate.addTask("bulkSnapshot", "bili_strict", [
|
||||
"alicloud-qingdao",
|
||||
"alicloud-shanghai",
|
||||
"alicloud-zhangjiakou",
|
||||
@ -392,13 +382,13 @@ netScheduler.addTask("bulkSnapshot", "bili_strict", [
|
||||
"alicloud-shenzhen",
|
||||
"alicloud-hohhot",
|
||||
]);
|
||||
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
|
||||
netScheduler.setTaskLimiter("getLatestVideos", null);
|
||||
netScheduler.setTaskLimiter("snapshotMilestoneVideo", null);
|
||||
netScheduler.setTaskLimiter("snapshotVideo", null);
|
||||
netScheduler.setTaskLimiter("bulkSnapshot", null);
|
||||
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);
|
||||
netScheduler.setProviderLimiter("bili_test", bili_test);
|
||||
netScheduler.setProviderLimiter("bili_strict", bili_strict);
|
||||
networkDelegate.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
|
||||
networkDelegate.setTaskLimiter("getLatestVideos", null);
|
||||
networkDelegate.setTaskLimiter("snapshotMilestoneVideo", null);
|
||||
networkDelegate.setTaskLimiter("snapshotVideo", null);
|
||||
networkDelegate.setTaskLimiter("bulkSnapshot", null);
|
||||
networkDelegate.setProviderLimiter("bilibili", biliLimiterConfig);
|
||||
networkDelegate.setProviderLimiter("bili_test", bili_test);
|
||||
networkDelegate.setProviderLimiter("bili_strict", bili_strict);
|
||||
|
||||
export default netScheduler;
|
||||
export default networkDelegate;
|
@ -1,6 +1,6 @@
|
||||
import { VideoListResponse } from "net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import networkDelegate from "./delegate.ts";
|
||||
|
||||
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> {
|
||||
const startFrom = 1 + pageSize * (page - 1);
|
||||
@ -8,7 +8,7 @@ export async function getLatestVideoAids(page: number = 1, pageSize: number = 10
|
||||
const range = `${startFrom}-${endTo}`;
|
||||
const errMessage = `Error fetching latest aid for ${range}:`;
|
||||
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
|
||||
const data = await netScheduler.request<VideoListResponse>(url, "getLatestVideos");
|
||||
const data = await networkDelegate.request<VideoListResponse>(url, "getLatestVideos");
|
||||
if (data.code != 0) {
|
||||
logger.error(errMessage + data.message, "net", "getLastestVideos");
|
||||
return [];
|
||||
|
@ -1,10 +1,10 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import networkDelegate from "./delegate.ts";
|
||||
import { VideoDetailsData, VideoDetailsResponse } from "net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const data = await networkDelegate.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
|
@ -1,4 +1,4 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import networkDelegate from "./delegate.ts";
|
||||
import { VideoInfoData, VideoInfoResponse } from "net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
@ -17,7 +17,7 @@ import logger from "log/logger.ts";
|
||||
*/
|
||||
export async function getVideoInfo(aid: number, task: string): Promise<VideoInfoData | number> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoInfoResponse>(url, task);
|
||||
const data = await networkDelegate.request<VideoInfoResponse>(url, task);
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
|
@ -18,7 +18,7 @@ Deno.addSignalListener("SIGTERM", async () => {
|
||||
Deno.exit();
|
||||
});
|
||||
|
||||
Akari.init();
|
||||
await Akari.init();
|
||||
|
||||
const filterWorker = new Worker(
|
||||
"classifyVideo",
|
||||
|
@ -19,6 +19,6 @@ export default defineConfig({
|
||||
allow: [".", "../../"],
|
||||
},
|
||||
},
|
||||
plugins: [tsconfigPaths()]
|
||||
plugins: [tsconfigPaths()],
|
||||
},
|
||||
});
|
||||
|
@ -1 +1 @@
|
||||
export const VERSION = "1.2.7";
|
||||
export const VERSION = "1.3.8";
|
||||
|
@ -1,25 +1,25 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"scripts": {
|
||||
"dev": "astro dev",
|
||||
"build": "astro build",
|
||||
"preview": "astro preview",
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/tailwind": "^6.0.2",
|
||||
"argon2id": "^1.0.1",
|
||||
"astro": "^5.5.5",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"pg": "^8.11.11",
|
||||
"postcss": "^8.5.3",
|
||||
"tailwindcss": "^3.0.24",
|
||||
"vite-tsconfig-paths": "^5.1.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-wasm": "^6.2.2",
|
||||
"@types/pg": "^8.11.11"
|
||||
}
|
||||
"name": "frontend",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"scripts": {
|
||||
"dev": "astro dev",
|
||||
"build": "astro build",
|
||||
"preview": "astro preview",
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/tailwind": "^6.0.2",
|
||||
"argon2id": "^1.0.1",
|
||||
"astro": "^5.5.5",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"pg": "^8.11.11",
|
||||
"postcss": "^8.5.3",
|
||||
"tailwindcss": "^3.0.24",
|
||||
"vite-tsconfig-paths": "^5.1.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-wasm": "^6.2.2",
|
||||
"@types/pg": "^8.11.11"
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,25 @@
|
||||
const N_1024 = BigInt("129023318876534346704360951712586568674758913224876821534686030409476129469193481910786173836188085930974906857867802234113909470848523288588793477904039083513378341278558405407018889387577114155572311708428733260891448259786041525189132461448841652472631435226032063278124857443496954605482776113964107326943")
|
||||
const N_1024 = BigInt(
|
||||
"129023318876534346704360951712586568674758913224876821534686030409476129469193481910786173836188085930974906857867802234113909470848523288588793477904039083513378341278558405407018889387577114155572311708428733260891448259786041525189132461448841652472631435226032063278124857443496954605482776113964107326943",
|
||||
);
|
||||
|
||||
const N_2048 = BigInt("23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109")
|
||||
const N_2048 = BigInt(
|
||||
"23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109",
|
||||
);
|
||||
|
||||
const N_1792 = BigInt("23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109")
|
||||
const N_1792 = BigInt(
|
||||
"23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109",
|
||||
);
|
||||
|
||||
const N_1536 = BigInt("1694330250214463438908848400950857073137355630337290254958754184668036770489801447652464038218330711288158361242955860326168191830448553710492926795708495297280933502917598985378231124113971732841791156356676046934277122699383776036675381503510992810963611269045078440132744168908318454891211962146563551929591147663448816841024591820348784855441153716551049843185172472891407933214238000452095646085222944171689449292644270516031799660928056315886939284985905227")
|
||||
const N_1536 = BigInt(
|
||||
"1694330250214463438908848400950857073137355630337290254958754184668036770489801447652464038218330711288158361242955860326168191830448553710492926795708495297280933502917598985378231124113971732841791156356676046934277122699383776036675381503510992810963611269045078440132744168908318454891211962146563551929591147663448816841024591820348784855441153716551049843185172472891407933214238000452095646085222944171689449292644270516031799660928056315886939284985905227",
|
||||
);
|
||||
|
||||
const N_3072 = BigInt("4432919939296042464443862503456460073874727648022810391370558006281079088795179408238989283371442564716849343712703672836423961818025813387453469700639513190304802553045342607888612037304066433501317127429264242784608682213025490491212489901736408833027611579294436675682774458141490718959615677971745638214649336218217578937534746160749039668886450447773018369168258067682196337978245372237157696236362344796867228581553446331915147012787367438751646936429739232247148712001806846526947508445039707404287951727838234648917450736371192435665040644040487427986702098273581288935278964444790007953559851323281510927332862225214878776790605026472021669614552481167977412450477230442015077669503312683966631454347169703030544483487968842349634064181183599641180349414682042575010303056241481622837185325228233789954078775053744988023738762706404546546146837242590884760044438874357295029411988267287001033032827035809135092270843")
|
||||
const N_3072 = BigInt(
|
||||
"4432919939296042464443862503456460073874727648022810391370558006281079088795179408238989283371442564716849343712703672836423961818025813387453469700639513190304802553045342607888612037304066433501317127429264242784608682213025490491212489901736408833027611579294436675682774458141490718959615677971745638214649336218217578937534746160749039668886450447773018369168258067682196337978245372237157696236362344796867228581553446331915147012787367438751646936429739232247148712001806846526947508445039707404287951727838234648917450736371192435665040644040487427986702098273581288935278964444790007953559851323281510927332862225214878776790605026472021669614552481167977412450477230442015077669503312683966631454347169703030544483487968842349634064181183599641180349414682042575010303056241481622837185325228233789954078775053744988023738762706404546546146837242590884760044438874357295029411988267287001033032827035809135092270843",
|
||||
);
|
||||
|
||||
const N_4096 = BigInt("703671044356805218391078271512201582198770553281951369783674142891088501340774249238173262580562112786670043634665390581120113644316651934154746357220932310140476300088580654571796404198410555061275065442553506658401183560336140989074165998202690496991174269748740565700402715364422506782445179963440819952745241176450402011121226863984008975377353558155910994380700267903933205531681076494639818328879475919332604951949178075254600102192323286738973253864238076198710173840170988339024438220034106150475640983877458155141500313471699516670799821379238743709125064098477109094533426340852518505385314780319279862586851512004686798362431227795743253799490998475141728082088984359237540124375439664236138519644100625154580910233437864328111620708697941949936338367445851449766581651338876219676721272448769082914348242483068204896479076062102236087066428603930888978596966798402915747531679758905013008059396214343112694563043918465373870648649652122703709658068801764236979191262744515840224548957285182453209028157886219424802426566456408109642062498413592155064289314088837031184200671561102160059065729282902863248815224399131391716503171191977463328439766546574118092303414702384104112719959325482439604572518549918705623086363111")
|
||||
const N_4096 = BigInt(
|
||||
"703671044356805218391078271512201582198770553281951369783674142891088501340774249238173262580562112786670043634665390581120113644316651934154746357220932310140476300088580654571796404198410555061275065442553506658401183560336140989074165998202690496991174269748740565700402715364422506782445179963440819952745241176450402011121226863984008975377353558155910994380700267903933205531681076494639818328879475919332604951949178075254600102192323286738973253864238076198710173840170988339024438220034106150475640983877458155141500313471699516670799821379238743709125064098477109094533426340852518505385314780319279862586851512004686798362431227795743253799490998475141728082088984359237540124375439664236138519644100625154580910233437864328111620708697941949936338367445851449766581651338876219676721272448769082914348242483068204896479076062102236087066428603930888978596966798402915747531679758905013008059396214343112694563043918465373870648649652122703709658068801764236979191262744515840224548957285182453209028157886219424802426566456408109642062498413592155064289314088837031184200671561102160059065729282902863248815224399131391716503171191977463328439766546574118092303414702384104112719959325482439604572518549918705623086363111",
|
||||
);
|
||||
|
||||
export const N_ARRAY = [N_1024, N_1536, N_1792, N_2048, N_3072, N_4096];
|
||||
export const N_ARRAY = [N_1024, N_1536, N_1792, N_2048, N_3072, N_4096];
|
||||
|
@ -33,7 +33,6 @@
|
||||
|
||||
参见[CVSA文档](https://docs.projectcvsa.com/)。
|
||||
|
||||
|
||||
## 开放许可
|
||||
|
||||
受本文以[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
|
||||
|
Loading…
Reference in New Issue
Block a user