diff --git a/README.md b/README.md
index 9033ec6..6a46cde 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,12 @@
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
-- [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
-- [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
-- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等[^2],其中包含大量中文歌声合成作品。
+- [萌娘百科](https://zh.moegirl.org.cn/):
+ 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
+- [VCPedia](https://vcpedia.cn/):
+ 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
+- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV
+ 等[^2],其中包含大量中文歌声合成作品。
- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。[^3]
上述网站中,或多或少存在一些不足,例如:
@@ -36,19 +39,22 @@
### 数据库
-中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump) 文件并公开,其内容遵从以下协议或条款:
+中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump)
+文件并公开,其内容遵从以下协议或条款:
- 数据库中的事实性数据,根据适用法律,不构成受版权保护的内容。中V档案馆放弃一切可能的权利([CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/))。
- 对于数据库中有原创性的内容(如贡献者编辑的描述性内容),如无例外,以[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)提供。
-- 对于引用、摘编或改编自萌娘百科、VCPedia的内容,以与原始协议(CC BY-NC-SA 3.0 CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
- > 根据原始协议第四条第2项内容,CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
+- 对于引用、摘编或改编自萌娘百科、VCPedia的内容,以与原始协议(CC BY-NC-SA 3.0
+ CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
+ > 根据原始协议第四条第2项内容,CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
- 中V档案馆文档使用[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)。
### 软件代码
用于构建中V档案馆的软件代码在[AGPL 3.0](https://www.gnu.org/licenses/agpl-3.0.html)许可证下公开,参见[LICENSE](./LICENSE)
-
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
+
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
-[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
\ No newline at end of file
+
+[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
diff --git a/deno.json b/deno.json
index c2fca5b..7392dae 100644
--- a/deno.json
+++ b/deno.json
@@ -1,60 +1,60 @@
{
- "lock": false,
- "tasks": {
- "crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
- "crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
- "check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
- "cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
- "manifest": "deno task cli manifest $(pwd)",
- "start": "deno run -A --watch=static/,routes/ dev.ts",
- "build": "deno run -A dev.ts build",
- "preview": "deno run -A main.ts",
- "update": "deno run -A -r https://fresh.deno.dev/update .",
- "worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
- "worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
- "adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
- "bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
- "all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
- "test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
- },
- "lint": {
- "rules": {
- "tags": ["fresh", "recommended"]
- }
- },
- "exclude": ["**/_fresh/*"],
- "imports": {
- "@std/assert": "jsr:@std/assert@1",
- "$fresh/": "https://deno.land/x/fresh@1.7.3/",
- "preact": "https://esm.sh/preact@10.22.0",
- "preact/": "https://esm.sh/preact@10.22.0/",
- "@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
- "@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
- "tailwindcss": "npm:tailwindcss@3.4.1",
- "tailwindcss/": "npm:/tailwindcss@3.4.1/",
- "tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
- "$std/": "https://deno.land/std@0.216.0/",
- "@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
- "bullmq": "npm:bullmq",
- "lib/": "./lib/",
- "ioredis": "npm:ioredis",
- "@bull-board/api": "npm:@bull-board/api",
- "@bull-board/express": "npm:@bull-board/express",
- "express": "npm:express",
- "src/": "./src/",
- "onnxruntime": "npm:onnxruntime-node@1.19.2",
- "chalk": "npm:chalk"
- },
- "compilerOptions": {
- "jsx": "react-jsx",
- "jsxImportSource": "preact"
- },
- "nodeModulesDir": "auto",
- "fmt": {
- "useTabs": true,
- "lineWidth": 120,
- "indentWidth": 4,
- "semiColons": true,
- "proseWrap": "always"
- }
+ "lock": false,
+ "tasks": {
+ "crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
+ "crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
+ "check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
+ "cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
+ "manifest": "deno task cli manifest $(pwd)",
+ "start": "deno run -A --watch=static/,routes/ dev.ts",
+ "build": "deno run -A dev.ts build",
+ "preview": "deno run -A main.ts",
+ "update": "deno run -A -r https://fresh.deno.dev/update .",
+ "worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
+ "worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
+ "adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
+ "bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
+ "all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
+ "test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
+ },
+ "lint": {
+ "rules": {
+ "tags": ["fresh", "recommended"]
+ }
+ },
+ "exclude": ["**/_fresh/*"],
+ "imports": {
+ "@std/assert": "jsr:@std/assert@1",
+ "$fresh/": "https://deno.land/x/fresh@1.7.3/",
+ "preact": "https://esm.sh/preact@10.22.0",
+ "preact/": "https://esm.sh/preact@10.22.0/",
+ "@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
+ "@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
+ "tailwindcss": "npm:tailwindcss@3.4.1",
+ "tailwindcss/": "npm:/tailwindcss@3.4.1/",
+ "tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
+ "$std/": "https://deno.land/std@0.216.0/",
+ "@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
+ "bullmq": "npm:bullmq",
+ "lib/": "./lib/",
+ "ioredis": "npm:ioredis",
+ "@bull-board/api": "npm:@bull-board/api",
+ "@bull-board/express": "npm:@bull-board/express",
+ "express": "npm:express",
+ "src/": "./src/",
+ "onnxruntime": "npm:onnxruntime-node@1.19.2",
+ "chalk": "npm:chalk"
+ },
+ "compilerOptions": {
+ "jsx": "react-jsx",
+ "jsxImportSource": "preact"
+ },
+ "nodeModulesDir": "auto",
+ "fmt": {
+ "useTabs": true,
+ "lineWidth": 120,
+ "indentWidth": 4,
+ "semiColons": true,
+ "proseWrap": "always"
+ }
}
diff --git a/doc/en/README.md b/doc/en/README.md
index f769d98..2eadf84 100644
--- a/doc/en/README.md
+++ b/doc/en/README.md
@@ -17,7 +17,8 @@ layout:
Welcome to the CVSA Documentation!
-This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors, etc.
+This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors,
+etc.
### Jump right in
diff --git a/doc/en/SUMMARY.md b/doc/en/SUMMARY.md
index 5137229..345f141 100644
--- a/doc/en/SUMMARY.md
+++ b/doc/en/SUMMARY.md
@@ -1,22 +1,22 @@
# Table of contents
-* [Welcome](README.md)
+- [Welcome](README.md)
## About
-* [About CVSA Project](about/this-project.md)
-* [Scope of Inclusion](about/scope-of-inclusion.md)
+- [About CVSA Project](about/this-project.md)
+- [Scope of Inclusion](about/scope-of-inclusion.md)
## Architecure
-* [Overview](architecure/overview.md)
-* [Database Structure](architecure/database-structure/README.md)
- * [Type of Song](architecure/database-structure/type-of-song.md)
-* [Message Queue](architecure/message-queue/README.md)
- * [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
-* [Artificial Intelligence](architecure/artificial-intelligence.md)
+- [Overview](architecure/overview.md)
+- [Database Structure](architecure/database-structure/README.md)
+ - [Type of Song](architecure/database-structure/type-of-song.md)
+- [Message Queue](architecure/message-queue/README.md)
+ - [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
+- [Artificial Intelligence](architecure/artificial-intelligence.md)
## API Doc
-* [Catalog](api-doc/catalog.md)
-* [Songs](api-doc/songs.md)
+- [Catalog](api-doc/catalog.md)
+- [Songs](api-doc/songs.md)
diff --git a/doc/en/about/scope-of-inclusion.md b/doc/en/about/scope-of-inclusion.md
index d893e33..f8e9765 100644
--- a/doc/en/about/scope-of-inclusion.md
+++ b/doc/en/about/scope-of-inclusion.md
@@ -1,19 +1,27 @@
# Scope of Inclusion
-CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators, arranger, etc), singers and voice engines / voicebanks.
+CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators,
+arranger, etc), singers and voice engines / voicebanks.
For a **song**, it must meet the following conditions to be included in CVSA:
### Category 30
-In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is posted on Bilibili. In some special cases, this rule may not be enforced.
+In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
+posted on Bilibili. In some special cases, this rule may not be enforced.
### At Leats One Line of Chinese
-The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
+The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
+Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
### Using Vocal Synthesizer
-To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony vocals).
+To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony
+vocals).
-We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics, encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio) approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g., [so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
+We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically
+modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics,
+encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio)
+approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,
+[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
diff --git a/doc/en/about/this-project.md b/doc/en/about/this-project.md
index 1d2d610..4e386f9 100644
--- a/doc/en/about/this-project.md
+++ b/doc/en/about/this-project.md
@@ -1,11 +1,13 @@
# About CVSA Project
-CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis community in a highly automation-assisted way.
+CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis
+community in a highly automation-assisted way.
-Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an automated and manually edited way:
-
-* Metadata of songs (name, duration, publisher, singer, etc.)
-* Descriptive information of songs (content introduction, creation background, lyrics, etc.)
-* Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites, likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
-* Information about artists, albums, vocal synthesizers, and voicebanks.
+Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an
+automated and manually edited way:
+- Metadata of songs (name, duration, publisher, singer, etc.)
+- Descriptive information of songs (content introduction, creation background, lyrics, etc.)
+- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites,
+ likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
+- Information about artists, albums, vocal synthesizers, and voicebanks.
diff --git a/doc/en/api-doc/catalog.md b/doc/en/api-doc/catalog.md
index 2a57a78..eea6596 100644
--- a/doc/en/api-doc/catalog.md
+++ b/doc/en/api-doc/catalog.md
@@ -1,4 +1,3 @@
# Catalog
-* [**Songs**](songs.md)
-
+- [**Songs**](songs.md)
diff --git a/doc/en/architecure/artificial-intelligence.md b/doc/en/architecure/artificial-intelligence.md
index 849cb27..6d52e54 100644
--- a/doc/en/architecure/artificial-intelligence.md
+++ b/doc/en/architecure/artificial-intelligence.md
@@ -6,8 +6,9 @@ The AI systems we currently use are:
### The Filter
-Located at `/filter/` under project root dir, it classifies a video in the [category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
+Located at `/filter/` under project root dir, it classifies a video in the
+[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
-* 0: Not related to Chinese vocal synthesis
-* 1: A original song with Chinese vocal synthesis
-* 2: A cover/remix song with Chinese vocal synthesis
+- 0: Not related to Chinese vocal synthesis
+- 1: A original song with Chinese vocal synthesis
+- 2: A cover/remix song with Chinese vocal synthesis
diff --git a/doc/en/architecure/database-structure/README.md b/doc/en/architecure/database-structure/README.md
index 96704b7..93e164c 100644
--- a/doc/en/architecure/database-structure/README.md
+++ b/doc/en/architecure/database-structure/README.md
@@ -2,10 +2,11 @@
CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
-All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the following tables:
-
-* songs: stores the main information of songs
-* bili\_user: stores snapshots of Bilibili user information
-* all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
-* labelling\_result: Contains label of videos in `all_data`tagged by our [AI system](../artificial-intelligence.md#the-filter).
+All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
+following tables:
+- songs: stores the main information of songs
+- bili\_user: stores snapshots of Bilibili user information
+- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
+- labelling\_result: Contains label of videos in `all_data`tagged by our
+ [AI system](../artificial-intelligence.md#the-filter).
diff --git a/doc/en/architecure/database-structure/type-of-song.md b/doc/en/architecure/database-structure/type-of-song.md
index c4af1aa..1855f4a 100644
--- a/doc/en/architecure/database-structure/type-of-song.md
+++ b/doc/en/architecure/database-structure/type-of-song.md
@@ -1,6 +1,7 @@
# Type of Song
-The **Unrelated type** refers specifically to videos that are not in our [Scope of Inclusion](../../about/scope-of-inclusion.md).
+The **Unrelated type** refers specifically to videos that are not in our
+[Scope of Inclusion](../../about/scope-of-inclusion.md).
### Table: `songs`
diff --git a/doc/en/architecure/message-queue/README.md b/doc/en/architecure/message-queue/README.md
index d0a8349..4fedf39 100644
--- a/doc/en/architecure/message-queue/README.md
+++ b/doc/en/architecure/message-queue/README.md
@@ -1,2 +1 @@
# Message Queue
-
diff --git a/doc/en/architecure/message-queue/videotagsqueue.md b/doc/en/architecure/message-queue/videotagsqueue.md
index bdddddb..fed620d 100644
--- a/doc/en/architecure/message-queue/videotagsqueue.md
+++ b/doc/en/architecure/message-queue/videotagsqueue.md
@@ -2,7 +2,8 @@
### Jobs
-The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a video, and the latter is responsible for scheduling the former.
+The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
+video, and the latter is responsible for scheduling the former.
### Return value
diff --git a/doc/en/architecure/overview.md b/doc/en/architecure/overview.md
index d80036e..468180f 100644
--- a/doc/en/architecure/overview.md
+++ b/doc/en/architecure/overview.md
@@ -15,4 +15,5 @@ layout:
# Overview
-Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by [BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
+Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
+[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
diff --git a/doc/zh/SUMMARY.md b/doc/zh/SUMMARY.md
index 981ee4b..b8cd029 100644
--- a/doc/zh/SUMMARY.md
+++ b/doc/zh/SUMMARY.md
@@ -1,22 +1,22 @@
# Table of contents
-* [欢迎](README.md)
+- [欢迎](README.md)
## 关于
-* [关于本项目](about/this-project.md)
-* [收录范围](about/scope-of-inclusion.md)
+- [关于本项目](about/this-project.md)
+- [收录范围](about/scope-of-inclusion.md)
## 技术架构
-* [概览](architecture/overview.md)
-* [数据库结构](architecture/database-structure/README.md)
- * [歌曲类型](architecture/database-structure/type-of-song.md)
-* [人工智能](architecture/artificial-intelligence.md)
-* [消息队列](architecture/message-queue/README.md)
- * [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
+- [概览](architecture/overview.md)
+- [数据库结构](architecture/database-structure/README.md)
+ - [歌曲类型](architecture/database-structure/type-of-song.md)
+- [人工智能](architecture/artificial-intelligence.md)
+- [消息队列](architecture/message-queue/README.md)
+ - [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
## API 文档
-* [目录](api-doc/catalog.md)
-* [歌曲](api-doc/songs.md)
+- [目录](api-doc/catalog.md)
+- [歌曲](api-doc/songs.md)
diff --git a/doc/zh/about/scope-of-inclusion.md b/doc/zh/about/scope-of-inclusion.md
index c985544..92ff3be 100644
--- a/doc/zh/about/scope-of-inclusion.md
+++ b/doc/zh/about/scope-of-inclusion.md
@@ -6,7 +6,8 @@
#### VOCALOID·UATU 分区
-原则上,中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU 分区(分区ID为30)下的视频中。在某些特殊情况下,此规则可能不是强制的。
+原则上,中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU
+分区(分区ID为30)下的视频中。在某些特殊情况下,此规则可能不是强制的。
#### 至少一行中文
@@ -16,4 +17,6 @@
歌曲的至少一行必须由歌声合成器生成(包括和声部分),才能被收录到中V档案馆中。
-我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如 VOCALOID、UTAU)和基于 AI 的(如 Synthesizer V、ACE Studio)方法,**但不包括仅改变现有歌声音色的AI声音转换器**(例如 [so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。
+我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如
+VOCALOID、UTAU)和基于 AI 的(如 Synthesizer V、ACE Studio)方法,**但不包括仅改变现有歌声音色的AI声音转换器**(例如
+[so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。
diff --git a/doc/zh/about/this-project.md b/doc/zh/about/this-project.md
index 9459d8e..c78d6d2 100644
--- a/doc/zh/about/this-project.md
+++ b/doc/zh/about/this-project.md
@@ -6,34 +6,33 @@
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
-* [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
-* [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
-* [VocaDB](https://vocadb.net/): [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
-* [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。
+- [萌娘百科](https://zh.moegirl.org.cn/):
+ 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
+- [VCPedia](https://vcpedia.cn/):
+ 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
+- [VocaDB](https://vocadb.net/):
+ [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
+- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。
上述网站中,或多或少存在一些不足,例如:
-* 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。
-* VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。
-* VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
-* 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
+- 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。
+- VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。
+- VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
+- 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
因此,**中V档案馆**吸取前人经验,克服上述网站的不足,希望做到:
-* 歌曲收录(指发现歌曲并创建条目)的完全自动化
-* 歌曲元信息提取的高度自动化
-* 歌曲统计数据收集的完全自动化
-* 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
-* 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
+- 歌曲收录(指发现歌曲并创建条目)的完全自动化
+- 歌曲元信息提取的高度自动化
+- 歌曲统计数据收集的完全自动化
+- 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
+- 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
-
-
-***
+---
本文在[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
-
-
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
diff --git a/doc/zh/api-doc/catalog.md b/doc/zh/api-doc/catalog.md
index a2b70ae..b76ea7a 100644
--- a/doc/zh/api-doc/catalog.md
+++ b/doc/zh/api-doc/catalog.md
@@ -1,3 +1,3 @@
# 目录
-* [歌曲](songs.md)
+- [歌曲](songs.md)
diff --git a/doc/zh/architecture/artificial-intelligence.md b/doc/zh/architecture/artificial-intelligence.md
index 8d08f07..53caba1 100644
--- a/doc/zh/architecture/artificial-intelligence.md
+++ b/doc/zh/architecture/artificial-intelligence.md
@@ -6,8 +6,8 @@ CVSA 的自动化工作流高度依赖人工智能进行信息提取和分类。
#### Filter
-位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
+位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
-* 0:与中文人声合成无关
-* 1:中文人声合成原创曲
-* 2:中文人声合成的翻唱/混音歌曲
+- 0:与中文人声合成无关
+- 1:中文人声合成原创曲
+- 2:中文人声合成的翻唱/混音歌曲
diff --git a/doc/zh/architecture/database-structure/README.md b/doc/zh/architecture/database-structure/README.md
index 15d0a59..fbca8b1 100644
--- a/doc/zh/architecture/database-structure/README.md
+++ b/doc/zh/architecture/database-structure/README.md
@@ -4,7 +4,7 @@ CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
-* songs:存储歌曲的主要信息
-* bili\_user:存储 Bilibili 用户信息快照
-* all\_data:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
-* labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
+- songs:存储歌曲的主要信息
+- bili\_user:存储 Bilibili 用户信息快照
+- all\_data:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
+- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
diff --git a/doc/zh/architecture/database-structure/type-of-song.md b/doc/zh/architecture/database-structure/type-of-song.md
index 22aef46..94630e7 100644
--- a/doc/zh/architecture/database-structure/type-of-song.md
+++ b/doc/zh/architecture/database-structure/type-of-song.md
@@ -7,18 +7,18 @@
`songs` 表格中使用的 `type` 列。
| 类型 | 说明 |
-| -- | ---------- |
-| 0 | 不相关 |
-| 1 | 原创 |
-| 2 | 翻唱 (Cover) |
-| 3 | 混音 (Remix) |
-| 4 | 纯音乐 |
-| 10 | 其他 |
+| ---- | ------------ |
+| 0 | 不相关 |
+| 1 | 原创 |
+| 2 | 翻唱 (Cover) |
+| 3 | 混音 (Remix) |
+| 4 | 纯音乐 |
+| 10 | 其他 |
#### 表格:`labelling_result`
-| 标签 | 说明 |
-| -- | ----------- |
-| 0 | AI 标记:不相关 |
-| 1 | AI 标记:原创 |
-| 2 | AI 标记:翻唱/混音 |
+| 标签 | 说明 |
+| ---- | ------------------ |
+| 0 | AI 标记:不相关 |
+| 1 | AI 标记:原创 |
+| 2 | AI 标记:翻唱/混音 |
diff --git a/doc/zh/architecture/message-queue/README.md b/doc/zh/architecture/message-queue/README.md
index 6493393..b2312f5 100644
--- a/doc/zh/architecture/message-queue/README.md
+++ b/doc/zh/architecture/message-queue/README.md
@@ -1,2 +1 @@
# 消息队列
-
diff --git a/lib/db/allData.ts b/lib/db/allData.ts
index 0c6db08..701c112 100644
--- a/lib/db/allData.ts
+++ b/lib/db/allData.ts
@@ -1,9 +1,5 @@
-import { Client, Transaction } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
+import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { AllDataType } from "lib/db/schema.d.ts";
-import logger from "lib/log/logger.ts";
-import { formatTimestampToPsql, parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
-import { VideoListVideo } from "lib/net/bilibili.d.ts";
-import { HOUR, SECOND } from "$std/datetime/constants.ts";
import { modelVersion } from "lib/ml/filter_inference.ts";
export async function videoExistsInAllData(client: Client, aid: number) {
@@ -11,70 +7,8 @@ export async function videoExistsInAllData(client: Client, aid: number) {
.then((result) => result.rows[0].exists);
}
-export async function biliUserExists(client: Client, uid: number) {
+export async function userExistsInBiliUsers(client: Client, uid: number) {
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [uid])
- .then((result) => result.rows[0].exists);
-}
-
-export async function insertIntoAllData(client: Client, data: VideoListVideo) {
- logger.log(`inserted ${data.aid}`, "db-all_data");
- await client.queryObject(
- `INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
- ON CONFLICT (aid) DO NOTHING`,
- [
- data.aid,
- data.bvid,
- data.desc,
- data.owner.mid,
- null,
- data.title,
- formatTimestampToPsql(data.pubdate * SECOND + 8 * HOUR),
- data.duration,
- ],
- );
-}
-
-export async function getLatestVideoTimestampFromAllData(client: Client) {
- return await client.queryObject<{ published_at: string }>(
- `SELECT published_at FROM all_data ORDER BY published_at DESC LIMIT 1`,
- )
- .then((result) => {
- const date = new Date(result.rows[0].published_at);
- if (isNaN(date.getTime())) {
- return null;
- }
- return date.getTime();
- });
-}
-
-export async function videoTagsIsNull(client: Client | Transaction, aid: number) {
- return await client.queryObject<{ exists: boolean }>(
- `SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1 AND tags IS NULL)`,
- [aid],
- ).then((result) => result.rows[0].exists);
-}
-
-export async function updateVideoTags(client: Client | Transaction, aid: number, tags: string[]) {
- return await client.queryObject(
- `UPDATE all_data SET tags = $1 WHERE aid = $2`,
- [tags.join(","), aid],
- );
-}
-
-export async function getNullVideoTagsList(client: Client) {
- const queryResult = await client.queryObject<{ aid: number; published_at: string }>(
- `SELECT aid, published_at FROM all_data WHERE tags IS NULL`,
- );
- const rows = queryResult.rows;
- return rows.map(
- (row) => {
- return {
- aid: Number(row.aid),
- published_at: parseTimestampFromPsql(row.published_at),
- };
- },
- );
}
export async function getUnlabelledVideos(client: Client) {
diff --git a/lib/db/init.ts b/lib/db/init.ts
index 2c021e5..d206872 100644
--- a/lib/db/init.ts
+++ b/lib/db/init.ts
@@ -1,5 +1,5 @@
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
-import {postgresConfig} from "lib/db/pgConfig.ts";
+import { postgresConfig } from "lib/db/pgConfig.ts";
const pool = new Pool(postgresConfig, 12);
diff --git a/lib/db/pgConfig.ts b/lib/db/pgConfig.ts
index 4c34ef4..5410760 100644
--- a/lib/db/pgConfig.ts
+++ b/lib/db/pgConfig.ts
@@ -3,7 +3,7 @@ const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_POR
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
if (unsetVars.length > 0) {
- throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
+ throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
}
const databaseHost = Deno.env.get("DB_HOST")!;
@@ -18,4 +18,4 @@ export const postgresConfig = {
database: databaseName,
user: databaseUser,
password: databasePassword,
-};
\ No newline at end of file
+};
diff --git a/lib/db/redis.ts b/lib/db/redis.ts
index 7e8152f..51ac02c 100644
--- a/lib/db/redis.ts
+++ b/lib/db/redis.ts
@@ -1,3 +1,3 @@
import { Redis } from "ioredis";
-export const redis = new Redis({ maxRetriesPerRequest: null });
\ No newline at end of file
+export const redis = new Redis({ maxRetriesPerRequest: null });
diff --git a/lib/db/schema.d.ts b/lib/db/schema.d.ts
index db8c9a4..068f084 100644
--- a/lib/db/schema.d.ts
+++ b/lib/db/schema.d.ts
@@ -1,9 +1,9 @@
export interface AllDataType {
- aid: number;
- bvid: string | null;
- description: string | null;
- uid: number | null;
- tags: string | null;
- title: string | null;
- published_at: string | null;
-}
\ No newline at end of file
+ aid: number;
+ bvid: string | null;
+ description: string | null;
+ uid: number | null;
+ tags: string | null;
+ title: string | null;
+ published_at: string | null;
+}
diff --git a/lib/log/test.ts b/lib/log/test.ts
index 49deb8c..71c719c 100644
--- a/lib/log/test.ts
+++ b/lib/log/test.ts
@@ -9,4 +9,4 @@ logger.log("foo", "service");
logger.log("foo", "db", "insert.ts");
logger.warn("warn");
logger.error("error");
-logger.verbose("error");
\ No newline at end of file
+logger.verbose("error");
diff --git a/lib/ml/filter_inference.ts b/lib/ml/filter_inference.ts
index da9ed4a..8758b4d 100644
--- a/lib/ml/filter_inference.ts
+++ b/lib/ml/filter_inference.ts
@@ -1,7 +1,7 @@
-import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
+import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
import logger from "lib/log/logger.ts";
-import {WorkerError} from "lib/mq/schema.ts";
+import { WorkerError } from "lib/mq/schema.ts";
const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024";
const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
@@ -66,7 +66,6 @@ async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession)
return Array.from(embeddings.data as Float32Array);
}
-
async function runClassification(embeddings: number[]): Promise {
if (!sessionClassifier) {
throw new Error("Classifier session is not initialized. Call initializeModels() first.");
@@ -85,7 +84,7 @@ export async function classifyVideo(
description: string,
tags: string,
author_info: string,
- aid: number
+ aid: number,
): Promise {
if (!sessionEmbedding) {
throw new Error("Embedding session is not initialized. Call initializeModels() first.");
@@ -97,6 +96,6 @@ export async function classifyVideo(
author_info,
], sessionEmbedding);
const probabilities = await runClassification(embeddings);
- logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml")
+ logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml");
return probabilities.indexOf(Math.max(...probabilities));
}
diff --git a/lib/ml/quant_benchmark.ts b/lib/ml/quant_benchmark.ts
index f75bf9b..ced9f99 100644
--- a/lib/ml/quant_benchmark.ts
+++ b/lib/ml/quant_benchmark.ts
@@ -1,6 +1,6 @@
-import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
+import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
-import {softmax} from "lib/ml/filter_inference.ts";
+import { softmax } from "lib/ml/filter_inference.ts";
// 配置参数
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
diff --git a/lib/mq/exec/classifyVideo.ts b/lib/mq/exec/classifyVideo.ts
index df45def..bc7f362 100644
--- a/lib/mq/exec/classifyVideo.ts
+++ b/lib/mq/exec/classifyVideo.ts
@@ -1,6 +1,6 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
-import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel} from "lib/db/allData.ts";
+import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts";
import { classifyVideo } from "lib/ml/filter_inference.ts";
import { ClassifyVideoQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts";
@@ -27,7 +27,8 @@ export const classifyVideoWorker = async (job: Job) => {
client.release();
await job.updateData({
- ...job.data, label: label,
+ ...job.data,
+ label: label,
});
return 0;
@@ -38,12 +39,12 @@ export const classifyVideosWorker = async () => {
logger.log("job:classifyVideos is locked, skipping.", "mq");
return;
}
-
+
await lockManager.acquireLock("classifyVideos");
const client = await db.connect();
const videos = await getUnlabelledVideos(client);
- logger.log(`Found ${videos.length} unlabelled videos`)
+ logger.log(`Found ${videos.length} unlabelled videos`);
client.release();
let i = 0;
diff --git a/lib/mq/exec/getLatestVideos.ts b/lib/mq/exec/getLatestVideos.ts
index 17d7677..4f795e0 100644
--- a/lib/mq/exec/getLatestVideos.ts
+++ b/lib/mq/exec/getLatestVideos.ts
@@ -1,52 +1,12 @@
import { Job } from "bullmq";
-import { insertLatestVideos } from "lib/task/insertLatestVideo.ts";
-import { LatestVideosQueue } from "lib/mq/index.ts";
-import { MINUTE } from "$std/datetime/constants.ts";
+import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
import { db } from "lib/db/init.ts";
-import { truncate } from "lib/utils/truncate.ts";
-import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
-import logger from "lib/log/logger.ts";
-import { lockManager } from "lib/mq/lockManager.ts";
-const delayMap = [5, 10, 15, 30, 60, 60];
-
-const updateQueueInterval = async (failedCount: number, delay: number) => {
- logger.log(`job:getLatestVideos added to queue, delay: ${(delay / MINUTE).toFixed(2)} minutes.`, "mq");
- await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
- every: delay,
- }, {
- data: {
- failedCount: failedCount,
- },
- });
- return;
-};
-
-const executeTask = async (client: Client, failedCount: number) => {
- const result = await insertLatestVideos(client);
- failedCount = result !== 0 ? truncate(failedCount + 1, 0, 5) : 0;
- if (failedCount !== 0) {
- await updateQueueInterval(failedCount, delayMap[failedCount] * MINUTE);
- }
- return;
-};
-
-export const getLatestVideosWorker = async (job: Job) => {
- if (await lockManager.isLocked("getLatestVideos")) {
- logger.log("job:getLatestVideos is locked, skipping.", "mq");
- return;
- }
-
- await lockManager.acquireLock("getLatestVideos");
-
- const failedCount = (job.data.failedCount ?? 0) as number;
+export const getLatestVideosWorker = async (_job: Job): Promise => {
const client = await db.connect();
-
try {
- await executeTask(client, failedCount);
+ await queueLatestVideos(client);
} finally {
client.release();
- await lockManager.releaseLock("getLatestVideos");
}
- return;
};
diff --git a/lib/mq/exec/getVideoInfo.ts b/lib/mq/exec/getVideoInfo.ts
new file mode 100644
index 0000000..dfc5e89
--- /dev/null
+++ b/lib/mq/exec/getVideoInfo.ts
@@ -0,0 +1,17 @@
+import { Job } from "bullmq";
+import { db } from "lib/db/init.ts";
+import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
+
+export const getVideoInfoWorker = async (job: Job): Promise => {
+ const client = await db.connect();
+ try {
+ const aid = job.data.aid;
+ if (!aid) {
+ return 3;
+ }
+ await insertVideoInfo(client, aid);
+ return 0;
+ } finally {
+ client.release();
+ }
+};
diff --git a/lib/mq/exec/getVideoTags.ts b/lib/mq/exec/getVideoTags.ts
deleted file mode 100644
index 83fe26f..0000000
--- a/lib/mq/exec/getVideoTags.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import { Job } from "bullmq";
-import { VideoTagsQueue } from "lib/mq/index.ts";
-import { DAY, HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts";
-import { db } from "lib/db/init.ts";
-import { truncate } from "lib/utils/truncate.ts";
-import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
-import logger from "lib/log/logger.ts";
-import { getNullVideoTagsList, updateVideoTags } from "lib/db/allData.ts";
-import { getVideoTags } from "lib/net/getVideoTags.ts";
-import { NetSchedulerError } from "lib/mq/scheduler.ts";
-import { WorkerError } from "lib/mq/schema.ts";
-
-const delayMap = [0.5, 3, 5, 15, 30, 60];
-const getJobPriority = (diff: number) => {
- let priority;
- if (diff > 14 * DAY) {
- priority = 10;
- } else if (diff > 7 * DAY) {
- priority = 7;
- } else if (diff > DAY) {
- priority = 5;
- } else if (diff > 6 * HOUR) {
- priority = 3;
- } else if (diff > HOUR) {
- priority = 2;
- } else {
- priority = 1;
- }
- return priority;
-};
-
-const executeTask = async (client: Client, aid: number, failedCount: number, job: Job) => {
- try {
- const result = await getVideoTags(aid);
- if (!result) {
- failedCount = truncate(failedCount + 1, 0, 5);
- const delay = delayMap[failedCount] * MINUTE;
- logger.log(
- `job:getVideoTags added to queue, delay: ${delayMap[failedCount]} minutes.`,
- "mq",
- );
- await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, { delay, priority: 6 - failedCount });
- return 1;
- }
- await updateVideoTags(client, aid, result);
- logger.log(`Fetched tags for aid: ${aid}`, "task");
- return 0;
- } catch (e) {
- if (!(e instanceof NetSchedulerError)) {
- throw new WorkerError( e, "task", "getVideoTags/fn:executeTask");
- }
- const err = e as NetSchedulerError;
- if (err.code === "NO_AVAILABLE_PROXY" || err.code === "PROXY_RATE_LIMITED") {
- logger.warn(`No available proxy for fetching tags, delayed. aid: ${aid}`, "task");
- await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, {
- delay: 25 * SECOND * Math.random() + 5 * SECOND,
- priority: job.priority,
- });
- return 2;
- }
- throw new WorkerError(err, "task", "getVideoTags/fn:executeTask");
- }
-};
-
-export const getVideoTagsWorker = async (job: Job) => {
- const failedCount = (job.data.failedCount ?? 0) as number;
- const client = await db.connect();
- const aid = job.data.aid;
- if (!aid) {
- return 3;
- }
-
- const v = await executeTask(client, aid, failedCount, job);
- client.release();
- return v;
-};
-
-export const getVideoTagsInitializer = async () => {
- const client = await db.connect();
- const videos = await getNullVideoTagsList(client);
- client.release();
- if (videos.length == 0) {
- return 4;
- }
- const count = await VideoTagsQueue.getJobCounts("wait", "delayed", "active");
- const total = count.delayed + count.active + count.wait;
- const max = 15;
- const rest = truncate(max - total, 0, max);
-
- let i = 0;
- for (const video of videos) {
- if (i > rest) return 100 + i;
- const aid = video.aid;
- const timestamp = video.published_at;
- const diff = Date.now() - timestamp;
- await VideoTagsQueue.add("getVideoTags", { aid }, { priority: getJobPriority(diff) });
- i++;
- }
- return 0;
-};
diff --git a/lib/mq/executors.ts b/lib/mq/executors.ts
index 6af60b2..85c2cc1 100644
--- a/lib/mq/executors.ts
+++ b/lib/mq/executors.ts
@@ -1 +1 @@
-export * from "lib/mq/exec/getLatestVideos.ts";
\ No newline at end of file
+export * from "lib/mq/exec/getLatestVideos.ts";
diff --git a/lib/mq/index.ts b/lib/mq/index.ts
index 4189ae5..9a22495 100644
--- a/lib/mq/index.ts
+++ b/lib/mq/index.ts
@@ -2,6 +2,4 @@ import { Queue } from "bullmq";
export const LatestVideosQueue = new Queue("latestVideos");
-export const VideoTagsQueue = new Queue("videoTags");
-
export const ClassifyVideoQueue = new Queue("classifyVideo");
diff --git a/lib/mq/init.ts b/lib/mq/init.ts
index fbfaa54..3eb2d81 100644
--- a/lib/mq/init.ts
+++ b/lib/mq/init.ts
@@ -1,19 +1,16 @@
import { MINUTE } from "$std/datetime/constants.ts";
-import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
+import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts";
export async function initMQ() {
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
- every: 1 * MINUTE
- });
- await VideoTagsQueue.upsertJobScheduler("getVideosTags", {
- every: 5 * MINUTE,
+ every: 1 * MINUTE,
immediately: true,
});
await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
every: 5 * MINUTE,
immediately: true,
- })
+ });
logger.log("Message queue initialized.");
}
diff --git a/lib/mq/lockManager.ts b/lib/mq/lockManager.ts
index 0aa989e..f83b148 100644
--- a/lib/mq/lockManager.ts
+++ b/lib/mq/lockManager.ts
@@ -23,12 +23,12 @@ class LockManager {
const result = await this.redis.set(key, "locked", "NX");
if (result !== "OK") {
- return false;
+ return false;
}
- if (timeout) {
- await this.redis.expire(key, timeout);
- }
- return true;
+ if (timeout) {
+ await this.redis.expire(key, timeout);
+ }
+ return true;
}
/*
diff --git a/lib/mq/rateLimiter.ts b/lib/mq/rateLimiter.ts
index 41a2f4f..7f62547 100644
--- a/lib/mq/rateLimiter.ts
+++ b/lib/mq/rateLimiter.ts
@@ -7,7 +7,7 @@ export interface RateLimiterConfig {
export class RateLimiter {
private readonly configs: RateLimiterConfig[];
- private readonly configEventNames: string[];
+ private readonly configEventNames: string[];
/*
* @param name The name of the rate limiter
@@ -17,7 +17,7 @@ export class RateLimiter {
*/
constructor(name: string, configs: RateLimiterConfig[]) {
this.configs = configs;
- this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
+ this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
}
/*
@@ -53,4 +53,4 @@ export class RateLimiter {
await config.window.clear(eventName);
}
}
-}
\ No newline at end of file
+}
diff --git a/lib/mq/scheduler.ts b/lib/mq/scheduler.ts
index ba9fbb2..7d6ed80 100644
--- a/lib/mq/scheduler.ts
+++ b/lib/mq/scheduler.ts
@@ -1,7 +1,7 @@
import logger from "lib/log/logger.ts";
-import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
-import {SlidingWindow} from "lib/mq/slidingWindow.ts";
-import {redis} from "lib/db/redis.ts";
+import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
+import { SlidingWindow } from "lib/mq/slidingWindow.ts";
+import { redis } from "lib/db/redis.ts";
import Redis from "ioredis";
import { SECOND } from "$std/datetime/constants.ts";
@@ -152,7 +152,7 @@ class NetScheduler {
const proxiesNames = this.getTaskProxies(task);
for (const proxyName of shuffleArray(proxiesNames)) {
if (await this.getProxyAvailability(proxyName, task)) {
- return await this.proxyRequest(url, proxyName, method);
+ return await this.proxyRequest(url, proxyName, task, method);
}
}
throw new NetSchedulerError("No available proxy currently.", "NO_AVAILABLE_PROXY");
@@ -186,8 +186,9 @@ class NetScheduler {
if (!force) {
const isAvailable = await this.getProxyAvailability(proxyName, task);
+ const limiter = "proxy-" + proxyName + "-" + task
if (!isAvailable) {
- throw new NetSchedulerError(`Proxy "${proxyName}" is rate limited`, "PROXY_RATE_LIMITED");
+ throw new NetSchedulerError(`Proxy "${limiter}" is rate limited`, "PROXY_RATE_LIMITED");
}
}
@@ -225,7 +226,7 @@ class NetScheduler {
logger.error(error, "redis");
return false;
}
- logger.warn(`Unhandled error: ${error.message}`, "mq", "getProxyAvailability");
+ logger.error(error, "mq", "getProxyAvailability");
return false;
}
}
@@ -237,7 +238,7 @@ class NetScheduler {
const response = await fetch(url, {
method,
- signal: controller.signal
+ signal: controller.signal,
});
clearTimeout(timeout);
@@ -281,7 +282,7 @@ const biliLimiterConfig: RateLimiterConfig[] = [
netScheduler.addProxy("native", "native", "");
netScheduler.addTask("getVideoInfo", "bilibili", "all");
netScheduler.addTask("getLatestVideos", "bilibili", "all");
-netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig)
+netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
netScheduler.setTaskLimiter("getLatestVideos", null);
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);
diff --git a/lib/mq/schema.ts b/lib/mq/schema.ts
index 9b48e99..07e4033 100644
--- a/lib/mq/schema.ts
+++ b/lib/mq/schema.ts
@@ -9,4 +9,4 @@ export class WorkerError extends Error {
this.service = service;
this.rawError = rawError;
}
-}
\ No newline at end of file
+}
diff --git a/lib/mq/slidingWindow.ts b/lib/mq/slidingWindow.ts
index 049a9f0..499528f 100644
--- a/lib/mq/slidingWindow.ts
+++ b/lib/mq/slidingWindow.ts
@@ -21,7 +21,7 @@ export class SlidingWindow {
async event(eventName: string): Promise {
const now = Date.now();
const key = `cvsa:sliding_window:${eventName}`;
-
+
const uniqueMember = `${now}-${Math.random()}`;
// Add current timestamp to an ordered set
await this.redis.zadd(key, now, uniqueMember);
diff --git a/lib/mq/task/getVideoInfo.ts b/lib/mq/task/getVideoInfo.ts
new file mode 100644
index 0000000..4d1c615
--- /dev/null
+++ b/lib/mq/task/getVideoInfo.ts
@@ -0,0 +1,40 @@
+import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
+import { getVideoInfo } from "lib/net/getVideoInfo.ts";
+import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
+import logger from "lib/log/logger.ts";
+import { ClassifyVideoQueue } from "lib/mq/index.ts";
+import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
+
+export async function insertVideoInfo(client: Client, aid: number) {
+ const videoExists = await videoExistsInAllData(client, aid);
+ if (videoExists) {
+ return;
+ }
+ const data = await getVideoInfo(aid);
+ if (data === null) {
+ return null;
+ }
+ const bvid = data.View.bvid;
+ const desc = data.View.desc;
+ const uid = data.View.owner.mid;
+ const tags = data.Tags
+ .filter((tag) => tag.tag_type in ["old_channel", "topic"])
+ .map((tag) => tag.tag_name).join(",");
+ const title = data.View.title;
+ const published_at = formatTimestampToPsql(data.View.pubdate);
+ const duration = data.View.duration;
+ await client.queryObject(
+ `INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
+ [aid, bvid, desc, uid, tags, title, published_at, duration],
+ );
+ const userExists = await userExistsInBiliUsers(client, aid);
+ if (!userExists) {
+ await client.queryObject(
+ `INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
+ [uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
+ );
+ }
+ logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
+ await ClassifyVideoQueue.add("classifyVideo", { aid });
+}
diff --git a/lib/mq/task/queueLatestVideo.ts b/lib/mq/task/queueLatestVideo.ts
new file mode 100644
index 0000000..f688c2f
--- /dev/null
+++ b/lib/mq/task/queueLatestVideo.ts
@@ -0,0 +1,55 @@
+import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
+import { getLatestVideoAids } from "lib/net/getLatestVideoAids.ts";
+import { videoExistsInAllData } from "lib/db/allData.ts";
+import { sleep } from "lib/utils/sleep.ts";
+import { SECOND } from "$std/datetime/constants.ts";
+import logger from "lib/log/logger.ts";
+import { LatestVideosQueue } from "lib/mq/index.ts";
+
+export async function queueLatestVideos(
+ client: Client,
+): Promise {
+ let page = 1;
+ let i = 0;
+ const videosFound = new Set();
+ while (true) {
+ const pageSize = page == 1 ? 10 : 30;
+ const aids = await getLatestVideoAids(page, pageSize);
+ if (aids.length == 0) {
+ logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
+ break;
+ }
+ let allExists = true;
+ let delay = 0;
+ for (const aid of aids) {
+ const videoExists = await videoExistsInAllData(client, aid);
+ if (videoExists) {
+ continue;
+ }
+ await LatestVideosQueue.add("getVideoInfo", { aid }, { delay,
+ attempts: 100,
+ backoff: {
+ type: "fixed",
+ delay: SECOND * 5
+ }
+ });
+ videosFound.add(aid);
+ allExists = false;
+ delay += Math.random() * SECOND * 0.5;
+ }
+ i += aids.length;
+ logger.log(
+ `Page ${page} crawled, total: ${videosFound.size}/${i} videos added/observed.`,
+ "net",
+ "fn:queueLatestVideos()",
+ );
+ if (allExists) {
+ return 0;
+ }
+ page++;
+ const randomTime = Math.random() * 4000;
+ const delta = SECOND;
+ await sleep(randomTime + delta);
+ }
+ return 0;
+}
diff --git a/lib/net/bilibili.d.ts b/lib/net/bilibili.d.ts
index a0f682d..16c70a0 100644
--- a/lib/net/bilibili.d.ts
+++ b/lib/net/bilibili.d.ts
@@ -1,117 +1,224 @@
interface BaseResponse {
- code: number;
- message: string;
- ttl: number;
- data: T;
+ code: number;
+ message: string;
+ ttl: number;
+ data: T;
}
export type VideoListResponse = BaseResponse;
+export type VideoDetailsResponse = BaseResponse;
export type VideoTagsResponse = BaseResponse;
+interface VideoDetailsData {
+ View: {
+ bvid: string;
+ aid: number;
+ videos: number;
+ tid: number;
+ tid_v2: number;
+ tname: string;
+ tname_v2: string;
+ copyright: number;
+ pic: string;
+ title: string;
+ pubdate: number;
+ ctime: number;
+ desc: string;
+ desc_v2: string;
+ state: number;
+ duration: number;
+ mission_id: number;
+ rights: VideoRights;
+ owner: {
+ mid: number;
+ name: string;
+ face: string;
+ };
+ stat: VideoStats;
+ argue_info: {
+ argue_msg: string;
+ argue_type: number;
+ argue_link: string;
+ };
+ dynamic: "";
+ cid: number;
+ dimension: VideoDimension;
+ pages: VideoPage[];
+ subtitle: {
+ allow_submit: number;
+ list: VideoSubTitle[];
+ };
+ staff: VideoStaff[];
+ };
+ Card: {
+ card: {
+ mid: number;
+ name: string;
+ sex: string;
+ face: string;
+ fans: number;
+ attention: number;
+ friend: number;
+ sign: string;
+ level_info: {
+ current_level: number;
+ };
+ };
+ archive_count: number;
+ article_count: number;
+ follower: number;
+ like_num: number;
+ };
+ Tags: VideoTagsLite[];
+}
+
+interface VideoTagsLite {
+ tag_id: number;
+ tag_name: string;
+ music_id: string;
+ tag_type: string;
+ jump_url: string;
+}
+
type VideoTagsData = VideoTags[];
+type VideoStaff = {
+ mid: number;
+ title: string;
+ name: string;
+ face: string;
+ follower: number;
+};
+
+type VideoSubTitle = {
+ id: number;
+ lan: string;
+ lan_doc: string;
+ is_lock: number;
+ subtitle_url: string;
+ type: number;
+ id_str: string;
+ ai_type: number;
+ ai_status: number;
+};
+
+type VideoDimension = {
+ width: number;
+ height: number;
+ rotate: number;
+};
+
+interface VideoPage {
+ cid: number;
+ page: number;
+ from: string;
+ part: string;
+ duration: number;
+ vid: string;
+ weblink: string;
+ dimension: VideoDimension;
+ first_frame: string;
+}
+
interface VideoTags {
- tag_id: number;
- tag_name: string;
- cover: string;
- head_cover: string;
- content: string;
- short_content: string;
- type: number;
- state: number;
- ctime: number;
- count: {
- view: number;
- use: number;
- atten: number;
- }
- is_atten: number;
- likes: number;
- hates: number;
- attribute: number;
- liked: number;
- hated: number;
- extra_attr: number;
+ tag_id: number;
+ tag_name: string;
+ cover: string;
+ head_cover: string;
+ content: string;
+ short_content: string;
+ type: number;
+ state: number;
+ ctime: number;
+ count: {
+ view: number;
+ use: number;
+ atten: number;
+ };
+ is_atten: number;
+ likes: number;
+ hates: number;
+ attribute: number;
+ liked: number;
+ hated: number;
+ extra_attr: number;
}
interface VideoListData {
- archives: VideoListVideo[];
- page: {
- num: number;
- size: number;
- count: number;
- };
+ archives: VideoListVideo[];
+ page: {
+ num: number;
+ size: number;
+ count: number;
+ };
}
+type VideoRights = {
+ bp: number;
+ elec: number;
+ download: number;
+ movie: number;
+ pay: number;
+ hd5: number;
+ no_reprint: number;
+ autoplay: number;
+ ugc_pay: number;
+ is_cooperation: number;
+ ugc_pay_preview: number;
+ no_background: number;
+ arc_pay: number;
+ pay_free_watch: number;
+};
+
+type VideoStats = {
+ aid: number;
+ view: number;
+ danmaku: number;
+ reply: number;
+ favorite: number;
+ coin: number;
+ share: number;
+ now_rank: number;
+ his_rank: number;
+ like: number;
+};
+
interface VideoListVideo {
- aid: number;
- videos: number;
- tid: number;
- tname: string;
- copyright: number;
- pic: string;
- title: string;
- pubdate: number;
- ctime: number;
- desc: string;
- state: number;
- duration: number;
- mission_id?: number;
- rights: {
- bp: number;
- elec: number;
- download: number;
- movie: number;
- pay: number;
- hd5: number;
- no_reprint: number;
- autoplay: number;
- ugc_pay: number;
- is_cooperation: number;
- ugc_pay_preview: number;
- no_background: number;
- arc_pay: number;
- pay_free_watch: number;
- },
- owner: {
- mid: number;
- name: string;
- face: string;
- },
- stat: {
- aid: number;
- view: number;
- danmaku: number;
- reply: number;
- favorite: number;
- coin: number;
- share: number;
- now_rank: number;
- his_rank: number;
- like: number;
- dislike: number;
- vt: number;
- vv: number;
- },
- dynamic: string;
- cid: number;
- dimension: {
- width: number;
- height: number;
- rotate: number;
- },
- season_id?: number;
- short_link_v2: string;
- first_frame: string;
- pub_location: string;
- cover43: string;
- tidv2: number;
- tname_v2: string;
- bvid: string;
- season_type: number;
- is_ogv: number;
- ovg_info: string | null;
- rcmd_season: string;
- enable_vt: number;
- ai_rcmd: null | string;
+ aid: number;
+ videos: number;
+ tid: number;
+ tname: string;
+ copyright: number;
+ pic: string;
+ title: string;
+ pubdate: number;
+ ctime: number;
+ desc: string;
+ state: number;
+ duration: number;
+ mission_id?: number;
+ rights: VideoRights;
+ owner: {
+ mid: number;
+ name: string;
+ face: string;
+ };
+ stat: VideoStats;
+ dynamic: string;
+ cid: number;
+ dimension: VideoDimension;
+ season_id?: number;
+ short_link_v2: string;
+ first_frame: string;
+ pub_location: string;
+ cover43: string;
+ tidv2: number;
+ tname_v2: string;
+ bvid: string;
+ season_type: number;
+ is_ogv: number;
+ ovg_info: string | null;
+ rcmd_season: string;
+ enable_vt: number;
+ ai_rcmd: null | string;
}
diff --git a/lib/net/bisectVideoStartFrom.ts b/lib/net/bisectVideoStartFrom.ts
deleted file mode 100644
index 66d9c27..0000000
--- a/lib/net/bisectVideoStartFrom.ts
+++ /dev/null
@@ -1,88 +0,0 @@
-import { getLatestVideos } from "lib/net/getLatestVideos.ts";
-import { SECOND } from "$std/datetime/constants.ts";
-import { VideoListVideo } from "lib/net/bilibili.d.ts";
-
-export async function getVideoPositionInNewList(timestamp: number): Promise {
- const virtualPageSize = 50;
-
- let lowPage = 1;
- let highPage = 1;
- let foundUpper = false;
- while (true) {
- const ps = highPage < 2 ? 50 : 1
- const pn = highPage < 2 ? 1 : highPage * virtualPageSize;
- const videos = await getLatestVideos(pn, ps);
- if (!videos || videos.length === 0) {
- break;
- }
- const lastVideo = videos[videos.length - 1];
- if (!lastVideo || !lastVideo.pubdate) {
- break;
- }
- const lastTime = lastVideo.pubdate * SECOND
- if (lastTime <= timestamp && highPage == 1) {
- return videos;
- }
- else if (lastTime <= timestamp) {
- foundUpper = true;
- break;
- } else {
- lowPage = highPage;
- highPage *= 2;
- }
- }
-
- if (!foundUpper) {
- return null;
- }
-
- let boundaryPage = highPage;
- let lo = lowPage;
- let hi = highPage;
- while (lo <= hi) {
- const mid = Math.floor((lo + hi) / 2);
- const videos = await getLatestVideos(mid * virtualPageSize, 1);
- if (!videos) {
- return null;
- }
- if (videos.length === 0) {
- hi = mid - 1;
- continue;
- }
- const lastVideo = videos[videos.length - 1];
- if (!lastVideo || !lastVideo.pubdate) {
- hi = mid - 1;
- continue;
- }
- const lastTime = lastVideo.pubdate * SECOND
- if (lastTime > timestamp) {
- lo = mid + 1;
- } else {
- boundaryPage = mid;
- hi = mid - 1;
- }
- }
-
- const boundaryVideos = await getLatestVideos(boundaryPage, virtualPageSize);
- let indexInPage = 0;
- if (boundaryVideos && boundaryVideos.length > 0) {
- for (let i = 0; i < boundaryVideos.length; i++) {
- const video = boundaryVideos[i];
- if (!video.pubdate) {
- continue;
- }
- const videoTime = video.pubdate * SECOND
- if (videoTime > timestamp) {
- indexInPage++;
- } else {
- break;
- }
- }
- }
-
- const count = (boundaryPage - 1) * virtualPageSize + indexInPage;
-
- const safetyMargin = 5;
-
- return count + safetyMargin;
-}
diff --git a/lib/net/getLatestVideoAids.ts b/lib/net/getLatestVideoAids.ts
new file mode 100644
index 0000000..2fb44be
--- /dev/null
+++ b/lib/net/getLatestVideoAids.ts
@@ -0,0 +1,21 @@
+import { VideoListResponse } from "lib/net/bilibili.d.ts";
+import logger from "lib/log/logger.ts";
+import netScheduler from "lib/mq/scheduler.ts";
+
+export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise {
+ const startFrom = 1 + pageSize * (page - 1);
+ const endTo = pageSize * page;
+ const range = `${startFrom}-${endTo}`;
+ const errMessage = `Error fetching latest aid for ${range}:`;
+ const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
+ const data = await netScheduler.request(url, "getLatestVideos");
+ if (data.code != 0) {
+ logger.error(errMessage + data.message, "net", "getLastestVideos");
+ return [];
+ }
+ if (data.data.archives.length === 0) {
+ logger.verbose("No more videos found", "net", "getLatestVideos");
+ return [];
+ }
+ return data.data.archives.map((video) => video.aid);
+}
diff --git a/lib/net/getLatestVideos.ts b/lib/net/getLatestVideos.ts
deleted file mode 100644
index b41eae5..0000000
--- a/lib/net/getLatestVideos.ts
+++ /dev/null
@@ -1,36 +0,0 @@
-import {VideoListResponse } from "lib/net/bilibili.d.ts";
-import logger from "lib/log/logger.ts";
-import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
-
-export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise {
- const startFrom = 1 + pageSize * (page - 1);
- const endTo = pageSize * page;
- const range = `${startFrom}-${endTo}`
- const errMessage = `Error fetching latest aid for ${range}:`
- try {
- const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
- const data = await netScheduler.request(url, 'getLatestVideos');
- if (data.code != 0) {
- logger.error(errMessage + data.message, 'net', 'getLastestVideos');
- return [];
- }
- if (data.data.archives.length === 0) {
- logger.verbose("No more videos found", "net", "getLatestVideos");
- return [];
- }
- return data.data.archives.map(video => video.aid);
- }
- catch (e) {
- const error = e as NetSchedulerError;
- if (error.code == "FETCH_ERROR") {
- const rawError = error.rawError! as Error;
- rawError.message = errMessage + rawError.message;
- logger.error(rawError, 'net', 'getVideoTags');
- return null;
- }
- else {
- // Re-throw the error
- throw e;
- }
- }
-}
diff --git a/lib/net/getVideoInfo.ts b/lib/net/getVideoInfo.ts
new file mode 100644
index 0000000..e64a91b
--- /dev/null
+++ b/lib/net/getVideoInfo.ts
@@ -0,0 +1,15 @@
+import netScheduler from "lib/mq/scheduler.ts";
+import { VideoDetailsData, VideoDetailsResponse } from "lib/net/bilibili.d.ts";
+import logger from "lib/log/logger.ts";
+
+export async function getVideoInfo(aid: number): Promise {
+ const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
+ const data = await netScheduler.request(url, "getVideoInfo");
+ const errMessage = `Error fetching metadata for ${aid}:`;
+ logger.log("Fetching metadata for " + aid, "net", "fn:getVideoInfo");
+ if (data.code !== 0) {
+ logger.error(errMessage + data.message, "net", "fn:getVideoInfo");
+ return null;
+ }
+ return data.data;
+}
diff --git a/lib/net/getVideoTags.ts b/lib/net/getVideoTags.ts
deleted file mode 100644
index 4ec0af6..0000000
--- a/lib/net/getVideoTags.ts
+++ /dev/null
@@ -1,35 +0,0 @@
-import { VideoTagsResponse } from "lib/net/bilibili.d.ts";
-import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
-import logger from "lib/log/logger.ts";
-
-/*
- * Fetch the tags for a video
- * @param {number} aid The video's aid
- * @return {Promise} A promise, which resolves to an array of tags,
- * or null if an `fetch` error occurred
- * @throws {NetSchedulerError} If the request failed.
- */
-export async function getVideoTags(aid: number): Promise {
- try {
- const url = `https://api.bilibili.com/x/tag/archive/tags?aid=${aid}`;
- const data = await netScheduler.request(url, 'getVideoTags');
- if (data.code != 0) {
- logger.error(`Error fetching tags for video ${aid}: ${data.message}`, 'net', 'getVideoTags');
- return [];
- }
- return data.data.map((tag) => tag.tag_name);
- }
- catch (e) {
- const error = e as NetSchedulerError;
- if (error.code == "FETCH_ERROR") {
- const rawError = error.rawError! as Error;
- rawError.message = `Error fetching tags for video ${aid}: ` + rawError.message;
- logger.error(rawError, 'net', 'getVideoTags');
- return null;
- }
- else {
- // Re-throw the error
- throw e;
- }
- }
-}
diff --git a/lib/task/insertLatestVideo.ts b/lib/task/insertLatestVideo.ts
deleted file mode 100644
index e6b750b..0000000
--- a/lib/task/insertLatestVideo.ts
+++ /dev/null
@@ -1,76 +0,0 @@
-import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
-import { getLatestVideos } from "lib/net/getLatestVideos.ts";
-import { getLatestVideoTimestampFromAllData, insertIntoAllData, videoExistsInAllData } from "lib/db/allData.ts";
-import { sleep } from "lib/utils/sleep.ts";
-import { getVideoPositionInNewList } from "lib/net/bisectVideoStartFrom.ts";
-import { SECOND } from "$std/datetime/constants.ts";
-import logger from "lib/log/logger.ts";
-
-export async function insertLatestVideos(
- client: Client,
- pageSize: number = 10,
- intervalRate: number = 4000,
-): Promise {
- const latestVideoTimestamp = await getLatestVideoTimestampFromAllData(client);
- if (latestVideoTimestamp == null) {
- logger.error("Cannot get latest video timestamp from current database.", "net", "fn:insertLatestVideos()");
- return null
- }
- logger.log(`Latest video in the database: ${new Date(latestVideoTimestamp).toISOString()}`, "net", "fn:insertLatestVideos()")
- const videoIndex = await getVideoPositionInNewList(latestVideoTimestamp);
- if (videoIndex == null) {
- logger.error("Cannot locate the video through bisect.", "net", "fn:insertLatestVideos()");
- return null
- }
- if (typeof videoIndex == "object") {
- for (const video of videoIndex) {
- const videoExists = await videoExistsInAllData(client, video.aid);
- if (!videoExists) {
- await insertIntoAllData(client, video);
- }
- }
- return 0;
- }
- let page = Math.floor(videoIndex / pageSize) + 1;
- let failCount = 0;
- const insertedVideos = new Set();
- while (true) {
- try {
- const videos = await getLatestVideos(page, pageSize);
- if (videos == null) {
- failCount++;
- if (failCount > 5) {
- return null;
- }
- continue;
- }
- failCount = 0;
- if (videos.length == 0) {
- logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
- break;
- }
- for (const video of videos) {
- const videoExists = await videoExistsInAllData(client, video.aid);
- if (!videoExists) {
- await insertIntoAllData(client, video);
- insertedVideos.add(video.aid);
- }
- }
- logger.log(`Page ${page} crawled, total: ${insertedVideos.size} videos.`, "net", "fn:insertLatestVideos()");
- page--;
- if (page < 1) {
- return 0;
- }
- } catch (error) {
- logger.error(error as Error, "net", "fn:insertLatestVideos()");
- failCount++;
- if (failCount > 5) {
- return null;
- }
-
- } finally {
- await sleep(Math.random() * intervalRate + failCount * 3 * SECOND + SECOND);
- }
- }
- return 0;
-}
diff --git a/lib/utils/sleep.ts b/lib/utils/sleep.ts
index 3a5dcb9..63e382d 100644
--- a/lib/utils/sleep.ts
+++ b/lib/utils/sleep.ts
@@ -1,3 +1,3 @@
export async function sleep(ms: number) {
await new Promise((resolve) => setTimeout(resolve, ms));
-}
\ No newline at end of file
+}
diff --git a/lib/utils/truncate.ts b/lib/utils/truncate.ts
index 677978d..3d5800d 100644
--- a/lib/utils/truncate.ts
+++ b/lib/utils/truncate.ts
@@ -1,3 +1,3 @@
export function truncate(num: number, min: number, max: number) {
- return Math.max(min, Math.min(num, max))
-}
\ No newline at end of file
+ return Math.max(min, Math.min(num, max));
+}
diff --git a/src/bullui.ts b/src/bullui.ts
index 1850bac..407d1c5 100644
--- a/src/bullui.ts
+++ b/src/bullui.ts
@@ -2,13 +2,16 @@ import express from "express";
import { createBullBoard } from "@bull-board/api";
import { BullMQAdapter } from "@bull-board/api/bullMQAdapter.js";
import { ExpressAdapter } from "@bull-board/express";
-import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
+import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
const serverAdapter = new ExpressAdapter();
serverAdapter.setBasePath("/");
createBullBoard({
- queues: [new BullMQAdapter(LatestVideosQueue), new BullMQAdapter(VideoTagsQueue), new BullMQAdapter(ClassifyVideoQueue)],
+ queues: [
+ new BullMQAdapter(LatestVideosQueue),
+ new BullMQAdapter(ClassifyVideoQueue),
+ ],
serverAdapter: serverAdapter,
});
@@ -16,8 +19,6 @@ const app = express();
app.use("/", serverAdapter.getRouter());
-// other configurations of your server
-
app.listen(3000, () => {
console.log("Running on 3000...");
console.log("For the UI, open http://localhost:3000/");
diff --git a/src/filterWorker.ts b/src/filterWorker.ts
index 9746477..8eb43d4 100644
--- a/src/filterWorker.ts
+++ b/src/filterWorker.ts
@@ -18,7 +18,6 @@ Deno.addSignalListener("SIGTERM", async () => {
Deno.exit();
});
-
await initializeModels();
const filterWorker = new Worker(
@@ -45,6 +44,6 @@ filterWorker.on("error", (err) => {
logger.error(e.rawError, e.service, e.codePath);
});
-filterWorker.on("closed", async() => {
+filterWorker.on("closed", async () => {
await lockManager.releaseLock("classifyVideos");
-})
+});
diff --git a/src/worker.ts b/src/worker.ts
index b24abed..fbe791c 100644
--- a/src/worker.ts
+++ b/src/worker.ts
@@ -2,22 +2,19 @@ import { Job, Worker } from "bullmq";
import { getLatestVideosWorker } from "lib/mq/executors.ts";
import { redis } from "lib/db/redis.ts";
import logger from "lib/log/logger.ts";
-import { getVideoTagsWorker } from "lib/mq/exec/getVideoTags.ts";
-import { getVideoTagsInitializer } from "lib/mq/exec/getVideoTags.ts";
import { lockManager } from "lib/mq/lockManager.ts";
import { WorkerError } from "lib/mq/schema.ts";
+import { getVideoInfoWorker } from "lib/mq/exec/getVideoInfo.ts";
Deno.addSignalListener("SIGINT", async () => {
logger.log("SIGINT Received: Shutting down workers...", "mq");
await latestVideoWorker.close(true);
- await videoTagsWorker.close(true);
Deno.exit();
});
Deno.addSignalListener("SIGTERM", async () => {
logger.log("SIGTERM Received: Shutting down workers...", "mq");
await latestVideoWorker.close(true);
- await videoTagsWorker.close(true);
Deno.exit();
});
@@ -28,11 +25,14 @@ const latestVideoWorker = new Worker(
case "getLatestVideos":
await getLatestVideosWorker(job);
break;
+ case "getVideoInfo":
+ await getVideoInfoWorker(job);
+ break;
default:
break;
}
},
- { connection: redis, concurrency: 1, removeOnComplete: { count: 1440 } },
+ { connection: redis, concurrency: 6, removeOnComplete: { count: 1440 } },
);
latestVideoWorker.on("active", () => {
@@ -47,33 +47,3 @@ latestVideoWorker.on("error", (err) => {
latestVideoWorker.on("closed", async () => {
await lockManager.releaseLock("getLatestVideos");
});
-
-const videoTagsWorker = new Worker(
- "videoTags",
- async (job: Job) => {
- switch (job.name) {
- case "getVideoTags":
- return await getVideoTagsWorker(job);
- case "getVideosTags":
- return await getVideoTagsInitializer();
- default:
- break;
- }
- },
- {
- connection: redis,
- concurrency: 6,
- removeOnComplete: {
- count: 1000,
- },
- },
-);
-
-videoTagsWorker.on("active", () => {
- logger.log("Worker (videoTags) activated.", "mq");
-});
-
-videoTagsWorker.on("error", (err) => {
- const e = err as WorkerError;
- logger.error(e.rawError, e.service, e.codePath);
-});
diff --git a/test/db/videoTagIsNull.test.ts b/test/db/videoTagIsNull.test.ts
deleted file mode 100644
index 7ffc8cc..0000000
--- a/test/db/videoTagIsNull.test.ts
+++ /dev/null
@@ -1,33 +0,0 @@
-import { assertEquals } from "jsr:@std/assert";
-import { videoTagsIsNull } from "lib/db/allData.ts";
-import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
-import { postgresConfig } from "lib/db/pgConfig.ts";
-
-// A minimal aid which has an empty tags field in our database
-const TEST_AID = 63569;
-
-Deno.test("videoTagsIsNull function", async () => {
- const client = new Client(postgresConfig);
-
- try {
- const transaction = client.createTransaction("test_transaction");
- await transaction.begin();
-
- const result1 = await videoTagsIsNull(transaction, TEST_AID);
- assertEquals(typeof result1, "boolean", "The result should be a boolean value.");
- assertEquals(result1, false, "The result should be false if tags is not NULL for the given aid.");
-
- await transaction.queryArray`UPDATE all_data SET tags = NULL WHERE aid = ${TEST_AID}`;
-
- const result2 = await videoTagsIsNull(transaction, TEST_AID);
- assertEquals(typeof result2, "boolean", "The result should be a boolean value.");
- assertEquals(result2, true, "The result should be true if tags is NULL for the given aid.");
-
- await transaction.rollback();
- } catch (error) {
- console.error("Error during test:", error);
- throw error;
- } finally {
- client.end();
- }
-});
diff --git a/test/mq/rateLimiter.test.ts b/test/mq/rateLimiter.test.ts
index 054e945..2f19723 100644
--- a/test/mq/rateLimiter.test.ts
+++ b/test/mq/rateLimiter.test.ts
@@ -1,7 +1,7 @@
-import {assertEquals} from "jsr:@std/assert";
-import {SlidingWindow} from "lib/mq/slidingWindow.ts";
-import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
-import {Redis} from "npm:ioredis@5.5.0";
+import { assertEquals } from "jsr:@std/assert";
+import { SlidingWindow } from "lib/mq/slidingWindow.ts";
+import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
+import { Redis } from "npm:ioredis@5.5.0";
Deno.test("RateLimiter works correctly", async () => {
const redis = new Redis({ maxRetriesPerRequest: null });
@@ -71,7 +71,7 @@ Deno.test("Multiple configs work correctly", async () => {
await new Promise((resolve) => setTimeout(resolve, windowSize1 * 1000 + 500));
// Availability should now be true (due to config1)
- assertEquals(await rateLimiter.getAvailability(), true);
+ assertEquals(await rateLimiter.getAvailability(), true);
// Trigger events up to the limit of the second config
for (let i = maxRequests1; i < maxRequests2; i++) {
@@ -88,4 +88,4 @@ Deno.test("Multiple configs work correctly", async () => {
assertEquals(await rateLimiter.getAvailability(), true);
redis.quit();
-});
\ No newline at end of file
+});
diff --git a/test/mq/slidingWindow.test.ts b/test/mq/slidingWindow.test.ts
index cde8d11..a749edc 100644
--- a/test/mq/slidingWindow.test.ts
+++ b/test/mq/slidingWindow.test.ts
@@ -7,13 +7,13 @@ Deno.test("SlidingWindow - event and count", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
- await slidingWindow.clear(eventName);
+ await slidingWindow.clear(eventName);
await slidingWindow.event(eventName);
const count = await slidingWindow.count(eventName);
assertEquals(count, 1);
- redis.quit();
+ redis.quit();
});
Deno.test("SlidingWindow - multiple events", async () => {
@@ -21,7 +21,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
- await slidingWindow.clear(eventName);
+ await slidingWindow.clear(eventName);
await slidingWindow.event(eventName);
await slidingWindow.event(eventName);
@@ -29,7 +29,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
const count = await slidingWindow.count(eventName);
assertEquals(count, 3);
- redis.quit();
+ redis.quit();
});
Deno.test("SlidingWindow - no events", async () => {
@@ -37,12 +37,12 @@ Deno.test("SlidingWindow - no events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
- await slidingWindow.clear(eventName);
+ await slidingWindow.clear(eventName);
const count = await slidingWindow.count(eventName);
assertEquals(count, 0);
- redis.quit();
+ redis.quit();
});
Deno.test("SlidingWindow - different event names", async () => {
@@ -51,8 +51,8 @@ Deno.test("SlidingWindow - different event names", async () => {
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName1 = "test_event_1";
const eventName2 = "test_event_2";
- await slidingWindow.clear(eventName1);
- await slidingWindow.clear(eventName2);
+ await slidingWindow.clear(eventName1);
+ await slidingWindow.clear(eventName2);
await slidingWindow.event(eventName1);
await slidingWindow.event(eventName2);
@@ -62,7 +62,7 @@ Deno.test("SlidingWindow - different event names", async () => {
assertEquals(count1, 1);
assertEquals(count2, 1);
- redis.quit();
+ redis.quit();
});
Deno.test("SlidingWindow - large number of events", async () => {
@@ -70,7 +70,7 @@ Deno.test("SlidingWindow - large number of events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
- await slidingWindow.clear(eventName);
+ await slidingWindow.clear(eventName);
const numEvents = 1000;
for (let i = 0; i < numEvents; i++) {
@@ -80,5 +80,5 @@ Deno.test("SlidingWindow - large number of events", async () => {
const count = await slidingWindow.count(eventName);
assertEquals(count, numEvents);
- redis.quit();
+ redis.quit();
});
diff --git a/test/net/getLatestVideos.test.ts b/test/net/getLatestVideos.test.ts
deleted file mode 100644
index b2daa4d..0000000
--- a/test/net/getLatestVideos.test.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-import { assertEquals } from "jsr:@std/assert";
-import { getLatestVideos } from "lib/net/getLatestVideos.ts";
-
-Deno.test("Get latest videos", async () => {
- const videos = (await getLatestVideos(1, 5))!;
- assertEquals(videos.length, 5);
-
- videos.forEach((video) => {
- assertVideoProperties(video);
- });
-});
-
-function assertVideoProperties(video: object) {
- const aid = "aid" in video && typeof video.aid === "number";
- const bvid = "bvid" in video && typeof video.bvid === "string" &&
- video.bvid.length === 12 && video.bvid.startsWith("BV");
- const description = "description" in video && typeof video.description === "string";
- const uid = "uid" in video && typeof video.uid === "number";
- const tags = "tags" in video && (typeof video.tags === "string" || video.tags === null);
- const title = "title" in video && typeof video.title === "string";
- const publishedAt = "published_at" in video && typeof video.published_at === "string";
-
- const match = aid && bvid && description && uid && tags && title && publishedAt;
- assertEquals(match, true);
-}
diff --git a/test/net/getVideoTags.test.ts b/test/net/getVideoTags.test.ts
deleted file mode 100644
index 0487dfb..0000000
--- a/test/net/getVideoTags.test.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-import { assertEquals } from "jsr:@std/assert";
-import { getVideoTags } from "lib/net/getVideoTags.ts";
-
-Deno.test("Get video tags - regular video", async () => {
- const tags = (await getVideoTags(826597951))!.sort();
- assertEquals(tags, [
- "纯白P",
- "中华墨水娘",
- "中华少女",
- "中华粘土娘",
- "中华缘木娘",
- "中华少女Project",
- "提糯Tino",
- "中华烛火娘",
- "中华烁金娘",
- "新世代音乐人计划女生季",
- ].sort());
-});
-
-Deno.test("Get video tags - non-existent video", async () => {
- const tags = (await getVideoTags(8265979511111111));
- assertEquals(tags, []);
-});
-
-Deno.test("Get video tags - video with no tag", async () => {
- const tags = (await getVideoTags(981001865));
- assertEquals(tags, []);
-});
\ No newline at end of file