ref: separate fetching of aid list and video metadata

This commit is contained in:
alikia2x (寒寒) 2025-03-02 02:38:50 +08:00
parent c67e3d8e36
commit 1838219f04
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
60 changed files with 620 additions and 905 deletions

View File

@ -6,9 +6,12 @@
纵观整个互联网对于「中文歌声合成」或「中文虚拟歌手」常简称为中V或VC相关信息进行较为系统、全面地整理收集的主要有以下几个网站
- [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息呈现形式为传统维基基于[MediaWiki](https://www.mediawiki.org/))。
- [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库其中包含艺术家、唱片、PV 等[^2],其中包含大量中文歌声合成作品。
- [萌娘百科](https://zh.moegirl.org.cn/):
收录了大量中V歌曲及歌姬的信息呈现形式为传统维基基于[MediaWiki](https://www.mediawiki.org/))。
- [VCPedia](https://vcpedia.cn/):
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库其中包含艺术家、唱片、PV
等[^2],其中包含大量中文歌声合成作品。
- [天钿Daily](https://tdd.bunnyxt.com/)一个VC相关数据交流与分享的网站。致力于VC相关数据交流定期抓取VC相关数据选取有意义的纬度展示。[^3]
上述网站中,或多或少存在一些不足,例如:
@ -36,19 +39,22 @@
### 数据库
中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump) 文件并公开,其内容遵从以下协议或条款:
中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump)
文件并公开,其内容遵从以下协议或条款:
- 数据库中的事实性数据根据适用法律不构成受版权保护的内容。中V档案馆放弃一切可能的权利[CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/))。
- 对于数据库中有原创性的内容(如贡献者编辑的描述性内容),如无例外,以[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)提供。
- 对于引用、摘编或改编自萌娘百科、VCPedia的内容以与原始协议(CC BY-NC-SA 3.0 CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
> 根据原始协议第四条第2项内容CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
- 对于引用、摘编或改编自萌娘百科、VCPedia的内容以与原始协议(CC BY-NC-SA 3.0
CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
> 根据原始协议第四条第2项内容CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
- 中V档案馆文档使用[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)。
### 软件代码
用于构建中V档案馆的软件代码在[AGPL 3.0](https://www.gnu.org/licenses/agpl-3.0.html)许可证下公开,参见[LICENSE](./LICENSE)
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)

116
deno.json
View File

@ -1,60 +1,60 @@
{
"lock": false,
"tasks": {
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
"manifest": "deno task cli manifest $(pwd)",
"start": "deno run -A --watch=static/,routes/ dev.ts",
"build": "deno run -A dev.ts build",
"preview": "deno run -A main.ts",
"update": "deno run -A -r https://fresh.deno.dev/update .",
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
"adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
},
"lint": {
"rules": {
"tags": ["fresh", "recommended"]
}
},
"exclude": ["**/_fresh/*"],
"imports": {
"@std/assert": "jsr:@std/assert@1",
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
"preact": "https://esm.sh/preact@10.22.0",
"preact/": "https://esm.sh/preact@10.22.0/",
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
"tailwindcss": "npm:tailwindcss@3.4.1",
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
"$std/": "https://deno.land/std@0.216.0/",
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"lib/": "./lib/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
"express": "npm:express",
"src/": "./src/",
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "preact"
},
"nodeModulesDir": "auto",
"fmt": {
"useTabs": true,
"lineWidth": 120,
"indentWidth": 4,
"semiColons": true,
"proseWrap": "always"
}
"lock": false,
"tasks": {
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
"manifest": "deno task cli manifest $(pwd)",
"start": "deno run -A --watch=static/,routes/ dev.ts",
"build": "deno run -A dev.ts build",
"preview": "deno run -A main.ts",
"update": "deno run -A -r https://fresh.deno.dev/update .",
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
"adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
},
"lint": {
"rules": {
"tags": ["fresh", "recommended"]
}
},
"exclude": ["**/_fresh/*"],
"imports": {
"@std/assert": "jsr:@std/assert@1",
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
"preact": "https://esm.sh/preact@10.22.0",
"preact/": "https://esm.sh/preact@10.22.0/",
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
"tailwindcss": "npm:tailwindcss@3.4.1",
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
"$std/": "https://deno.land/std@0.216.0/",
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"lib/": "./lib/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
"express": "npm:express",
"src/": "./src/",
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "preact"
},
"nodeModulesDir": "auto",
"fmt": {
"useTabs": true,
"lineWidth": 120,
"indentWidth": 4,
"semiColons": true,
"proseWrap": "always"
}
}

View File

@ -17,7 +17,8 @@ layout:
Welcome to the CVSA Documentation!
This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors, etc.
This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors,
etc.
### Jump right in

View File

@ -1,22 +1,22 @@
# Table of contents
* [Welcome](README.md)
- [Welcome](README.md)
## About
* [About CVSA Project](about/this-project.md)
* [Scope of Inclusion](about/scope-of-inclusion.md)
- [About CVSA Project](about/this-project.md)
- [Scope of Inclusion](about/scope-of-inclusion.md)
## Architecure
* [Overview](architecure/overview.md)
* [Database Structure](architecure/database-structure/README.md)
* [Type of Song](architecure/database-structure/type-of-song.md)
* [Message Queue](architecure/message-queue/README.md)
* [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
* [Artificial Intelligence](architecure/artificial-intelligence.md)
- [Overview](architecure/overview.md)
- [Database Structure](architecure/database-structure/README.md)
- [Type of Song](architecure/database-structure/type-of-song.md)
- [Message Queue](architecure/message-queue/README.md)
- [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
- [Artificial Intelligence](architecure/artificial-intelligence.md)
## API Doc
* [Catalog](api-doc/catalog.md)
* [Songs](api-doc/songs.md)
- [Catalog](api-doc/catalog.md)
- [Songs](api-doc/songs.md)

View File

@ -1,19 +1,27 @@
# Scope of Inclusion
CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators, arranger, etc), singers and voice engines / voicebanks. 
CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators,
arranger, etc), singers and voice engines / voicebanks. 
For a **song**, it must meet the following conditions to be included in CVSA:
### Category 30
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is posted on Bilibili. In some special cases, this rule may not be enforced. 
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
posted on Bilibili. In some special cases, this rule may not be enforced. 
### At Leats One Line of Chinese
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
### Using Vocal Synthesizer
To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony vocals).
To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony
vocals).
We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics, encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio) approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g., [so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically
modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics,
encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio)
approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,
[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).

View File

@ -1,11 +1,13 @@
# About CVSA Project
CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis community in a highly automation-assisted way. 
CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis
community in a highly automation-assisted way. 
Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an automated and manually edited way:
* Metadata of songs (name, duration, publisher, singer, etc.)
* Descriptive information of songs (content introduction, creation background, lyrics, etc.)
* Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites, likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
* Information about artists, albums, vocal synthesizers, and voicebanks.
Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an
automated and manually edited way:
- Metadata of songs (name, duration, publisher, singer, etc.)
- Descriptive information of songs (content introduction, creation background, lyrics, etc.)
- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites,
likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
- Information about artists, albums, vocal synthesizers, and voicebanks.

View File

@ -1,4 +1,3 @@
# Catalog
* [**Songs**](songs.md)
- [**Songs**](songs.md)

View File

@ -6,8 +6,9 @@ The AI systems we currently use are:
### The Filter
Located at `/filter/` under project root dir, it classifies a video in the [category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
Located at `/filter/` under project root dir, it classifies a video in the
[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
* 0: Not related to Chinese vocal synthesis
* 1: A original song with Chinese vocal synthesis
* 2: A cover/remix song with Chinese vocal synthesis
- 0: Not related to Chinese vocal synthesis
- 1: A original song with Chinese vocal synthesis
- 2: A cover/remix song with Chinese vocal synthesis

View File

@ -2,10 +2,11 @@
CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the following tables:
* songs: stores the main information of songs
* bili\_user: stores snapshots of Bilibili user information
* all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
* labelling\_result: Contains label of videos in `all_data`tagged by our [AI system](../artificial-intelligence.md#the-filter).
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
following tables:
- songs: stores the main information of songs
- bili\_user: stores snapshots of Bilibili user information
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
- labelling\_result: Contains label of videos in `all_data`tagged by our
[AI system](../artificial-intelligence.md#the-filter).

View File

@ -1,6 +1,7 @@
# Type of Song
The **Unrelated type** refers specifically to videos that are not in our [Scope of Inclusion](../../about/scope-of-inclusion.md).
The **Unrelated type** refers specifically to videos that are not in our
[Scope of Inclusion](../../about/scope-of-inclusion.md).
### Table: `songs`

View File

@ -1,2 +1 @@
# Message Queue

View File

@ -2,7 +2,8 @@
### Jobs
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a video, and the latter is responsible for scheduling the former.
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
video, and the latter is responsible for scheduling the former.
### Return value

View File

@ -15,4 +15,5 @@ layout:
# Overview
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by [BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.

View File

@ -1,22 +1,22 @@
# Table of contents
* [欢迎](README.md)
- [欢迎](README.md)
## 关于 <a href="#about" id="about"></a>
* [关于本项目](about/this-project.md)
* [收录范围](about/scope-of-inclusion.md)
- [关于本项目](about/this-project.md)
- [收录范围](about/scope-of-inclusion.md)
## 技术架构 <a href="#architecture" id="architecture"></a>
* [概览](architecture/overview.md)
* [数据库结构](architecture/database-structure/README.md)
* [歌曲类型](architecture/database-structure/type-of-song.md)
* [人工智能](architecture/artificial-intelligence.md)
* [消息队列](architecture/message-queue/README.md)
* [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
- [概览](architecture/overview.md)
- [数据库结构](architecture/database-structure/README.md)
- [歌曲类型](architecture/database-structure/type-of-song.md)
- [人工智能](architecture/artificial-intelligence.md)
- [消息队列](architecture/message-queue/README.md)
- [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
## API 文档 <a href="#api-doc" id="api-doc"></a>
* [目录](api-doc/catalog.md)
* [歌曲](api-doc/songs.md)
- [目录](api-doc/catalog.md)
- [歌曲](api-doc/songs.md)

View File

@ -6,7 +6,8 @@
#### VOCALOID·UATU 分区
原则上中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU 分区分区ID为30下的视频中。在某些特殊情况下此规则可能不是强制的。
原则上中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU
分区分区ID为30下的视频中。在某些特殊情况下此规则可能不是强制的。
#### 至少一行中文
@ -16,4 +17,6 @@
歌曲的至少一行必须由歌声合成器生成包括和声部分才能被收录到中V档案馆中。
我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如 VOCALOID、UTAU和基于 AI 的(如 Synthesizer V、ACE Studio方法**但不包括仅改变现有歌声音色的AI声音转换器**(例如 [so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。
我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如
VOCALOID、UTAU和基于 AI 的(如 Synthesizer V、ACE Studio方法**但不包括仅改变现有歌声音色的AI声音转换器**(例如
[so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。

View File

@ -6,34 +6,33 @@
纵观整个互联网对于「中文歌声合成」或「中文虚拟歌手」常简称为中V或VC相关信息进行较为系统、全面地整理收集的主要有以下几个网站
* [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息呈现形式为传统维基基于[MediaWiki](https://www.mediawiki.org/))。
* [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
* [VocaDB](https://vocadb.net/): [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
* [天钿Daily](https://tdd.bunnyxt.com/)一个VC相关数据交流与分享的网站。致力于VC相关数据交流定期抓取VC相关数据选取有意义的纬度展示。
- [萌娘百科](https://zh.moegirl.org.cn/):
收录了大量中V歌曲及歌姬的信息呈现形式为传统维基基于[MediaWiki](https://www.mediawiki.org/))。
- [VCPedia](https://vcpedia.cn/):
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
- [VocaDB](https://vocadb.net/):
[一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
- [天钿Daily](https://tdd.bunnyxt.com/)一个VC相关数据交流与分享的网站。致力于VC相关数据交流定期抓取VC相关数据选取有意义的纬度展示。
上述网站中,或多或少存在一些不足,例如:
* 萌娘百科、VCPedia受限于传统维基绝大多数内容依赖人工编辑。
* VocaDB基于结构化数据库构建由此可以依赖程序生成一些信息但**条目收录**仍然完全依赖人工完成。
* VocaDB主要专注于元数据展示少有关于歌曲、作者等的描述性的文字也缺乏描述性的背景信息。
* 天钿Daily只展示歌曲的统计数据及历史趋势没有关于歌曲其它信息的收集。
- 萌娘百科、VCPedia受限于传统维基绝大多数内容依赖人工编辑。
- VocaDB基于结构化数据库构建由此可以依赖程序生成一些信息但**条目收录**仍然完全依赖人工完成。
- VocaDB主要专注于元数据展示少有关于歌曲、作者等的描述性的文字也缺乏描述性的背景信息。
- 天钿Daily只展示歌曲的统计数据及历史趋势没有关于歌曲其它信息的收集。
因此,**中V档案馆**吸取前人经验,克服上述网站的不足,希望做到:
* 歌曲收录(指发现歌曲并创建条目)的完全自动化
* 歌曲元信息提取的高度自动化
* 歌曲统计数据收集的完全自动化
* 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
* 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
- 歌曲收录(指发现歌曲并创建条目)的完全自动化
- 歌曲元信息提取的高度自动化
- 歌曲统计数据收集的完全自动化
- 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
- 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
***
---
本文在[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。

View File

@ -1,3 +1,3 @@
# 目录
* [歌曲](songs.md)
- [歌曲](songs.md)

View File

@ -6,8 +6,8 @@ CVSA 的自动化工作流高度依赖人工智能进行信息提取和分类。
#### Filter
位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
* 0与中文人声合成无关
* 1中文人声合成原创曲
* 2中文人声合成的翻唱/混音歌曲
- 0与中文人声合成无关
- 1中文人声合成原创曲
- 2中文人声合成的翻唱/混音歌曲

View File

@ -4,7 +4,7 @@ CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
* songs存储歌曲的主要信息
* bili\_user存储 Bilibili 用户信息快照
* all\_data[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
* labelling\_result包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
- songs存储歌曲的主要信息
- bili\_user存储 Bilibili 用户信息快照
- all\_data[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
- labelling\_result包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。

View File

@ -7,18 +7,18 @@
`songs` 表格中使用的 `type` 列。
| 类型 | 说明 |
| -- | ---------- |
| 0 | 不相关 |
| 1 | 原创 |
| 2 | 翻唱 (Cover) |
| 3 | 混音 (Remix) |
| 4 | 纯音乐 |
| 10 | 其他 |
| ---- | ------------ |
| 0 | 不相关 |
| 1 | 原创 |
| 2 | 翻唱 (Cover) |
| 3 | 混音 (Remix) |
| 4 | 纯音乐 |
| 10 | 其他 |
#### 表格:`labelling_result`
| 标签 | 说明 |
| -- | ----------- |
| 0 | AI 标记:不相关 |
| 1 | AI 标记:原创 |
| 2 | AI 标记:翻唱/混音 |
| 标签 | 说明 |
| ---- | ------------------ |
| 0 | AI 标记:不相关 |
| 1 | AI 标记:原创 |
| 2 | AI 标记:翻唱/混音 |

View File

@ -1,2 +1 @@
# 消息队列

View File

@ -1,9 +1,5 @@
import { Client, Transaction } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { AllDataType } from "lib/db/schema.d.ts";
import logger from "lib/log/logger.ts";
import { formatTimestampToPsql, parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
import { VideoListVideo } from "lib/net/bilibili.d.ts";
import { HOUR, SECOND } from "$std/datetime/constants.ts";
import { modelVersion } from "lib/ml/filter_inference.ts";
export async function videoExistsInAllData(client: Client, aid: number) {
@ -11,70 +7,8 @@ export async function videoExistsInAllData(client: Client, aid: number) {
.then((result) => result.rows[0].exists);
}
export async function biliUserExists(client: Client, uid: number) {
export async function userExistsInBiliUsers(client: Client, uid: number) {
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [uid])
.then((result) => result.rows[0].exists);
}
export async function insertIntoAllData(client: Client, data: VideoListVideo) {
logger.log(`inserted ${data.aid}`, "db-all_data");
await client.queryObject(
`INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (aid) DO NOTHING`,
[
data.aid,
data.bvid,
data.desc,
data.owner.mid,
null,
data.title,
formatTimestampToPsql(data.pubdate * SECOND + 8 * HOUR),
data.duration,
],
);
}
export async function getLatestVideoTimestampFromAllData(client: Client) {
return await client.queryObject<{ published_at: string }>(
`SELECT published_at FROM all_data ORDER BY published_at DESC LIMIT 1`,
)
.then((result) => {
const date = new Date(result.rows[0].published_at);
if (isNaN(date.getTime())) {
return null;
}
return date.getTime();
});
}
export async function videoTagsIsNull(client: Client | Transaction, aid: number) {
return await client.queryObject<{ exists: boolean }>(
`SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1 AND tags IS NULL)`,
[aid],
).then((result) => result.rows[0].exists);
}
export async function updateVideoTags(client: Client | Transaction, aid: number, tags: string[]) {
return await client.queryObject(
`UPDATE all_data SET tags = $1 WHERE aid = $2`,
[tags.join(","), aid],
);
}
export async function getNullVideoTagsList(client: Client) {
const queryResult = await client.queryObject<{ aid: number; published_at: string }>(
`SELECT aid, published_at FROM all_data WHERE tags IS NULL`,
);
const rows = queryResult.rows;
return rows.map(
(row) => {
return {
aid: Number(row.aid),
published_at: parseTimestampFromPsql(row.published_at),
};
},
);
}
export async function getUnlabelledVideos(client: Client) {

View File

@ -1,5 +1,5 @@
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import {postgresConfig} from "lib/db/pgConfig.ts";
import { postgresConfig } from "lib/db/pgConfig.ts";
const pool = new Pool(postgresConfig, 12);

View File

@ -3,7 +3,7 @@ const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_POR
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
if (unsetVars.length > 0) {
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
}
const databaseHost = Deno.env.get("DB_HOST")!;
@ -18,4 +18,4 @@ export const postgresConfig = {
database: databaseName,
user: databaseUser,
password: databasePassword,
};
};

View File

@ -1,3 +1,3 @@
import { Redis } from "ioredis";
export const redis = new Redis({ maxRetriesPerRequest: null });
export const redis = new Redis({ maxRetriesPerRequest: null });

16
lib/db/schema.d.ts vendored
View File

@ -1,9 +1,9 @@
export interface AllDataType {
aid: number;
bvid: string | null;
description: string | null;
uid: number | null;
tags: string | null;
title: string | null;
published_at: string | null;
}
aid: number;
bvid: string | null;
description: string | null;
uid: number | null;
tags: string | null;
title: string | null;
published_at: string | null;
}

View File

@ -9,4 +9,4 @@ logger.log("foo", "service");
logger.log("foo", "db", "insert.ts");
logger.warn("warn");
logger.error("error");
logger.verbose("error");
logger.verbose("error");

View File

@ -1,7 +1,7 @@
import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
import logger from "lib/log/logger.ts";
import {WorkerError} from "lib/mq/schema.ts";
import { WorkerError } from "lib/mq/schema.ts";
const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024";
const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
@ -66,7 +66,6 @@ async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession)
return Array.from(embeddings.data as Float32Array);
}
async function runClassification(embeddings: number[]): Promise<number[]> {
if (!sessionClassifier) {
throw new Error("Classifier session is not initialized. Call initializeModels() first.");
@ -85,7 +84,7 @@ export async function classifyVideo(
description: string,
tags: string,
author_info: string,
aid: number
aid: number,
): Promise<number> {
if (!sessionEmbedding) {
throw new Error("Embedding session is not initialized. Call initializeModels() first.");
@ -97,6 +96,6 @@ export async function classifyVideo(
author_info,
], sessionEmbedding);
const probabilities = await runClassification(embeddings);
logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml")
logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml");
return probabilities.indexOf(Math.max(...probabilities));
}

View File

@ -1,6 +1,6 @@
import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
import {softmax} from "lib/ml/filter_inference.ts";
import { softmax } from "lib/ml/filter_inference.ts";
// 配置参数
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";

View File

@ -1,6 +1,6 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel} from "lib/db/allData.ts";
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts";
import { classifyVideo } from "lib/ml/filter_inference.ts";
import { ClassifyVideoQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts";
@ -27,7 +27,8 @@ export const classifyVideoWorker = async (job: Job) => {
client.release();
await job.updateData({
...job.data, label: label,
...job.data,
label: label,
});
return 0;
@ -38,12 +39,12 @@ export const classifyVideosWorker = async () => {
logger.log("job:classifyVideos is locked, skipping.", "mq");
return;
}
await lockManager.acquireLock("classifyVideos");
const client = await db.connect();
const videos = await getUnlabelledVideos(client);
logger.log(`Found ${videos.length} unlabelled videos`)
logger.log(`Found ${videos.length} unlabelled videos`);
client.release();
let i = 0;

View File

@ -1,52 +1,12 @@
import { Job } from "bullmq";
import { insertLatestVideos } from "lib/task/insertLatestVideo.ts";
import { LatestVideosQueue } from "lib/mq/index.ts";
import { MINUTE } from "$std/datetime/constants.ts";
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
import { db } from "lib/db/init.ts";
import { truncate } from "lib/utils/truncate.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import logger from "lib/log/logger.ts";
import { lockManager } from "lib/mq/lockManager.ts";
const delayMap = [5, 10, 15, 30, 60, 60];
const updateQueueInterval = async (failedCount: number, delay: number) => {
logger.log(`job:getLatestVideos added to queue, delay: ${(delay / MINUTE).toFixed(2)} minutes.`, "mq");
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
every: delay,
}, {
data: {
failedCount: failedCount,
},
});
return;
};
const executeTask = async (client: Client, failedCount: number) => {
const result = await insertLatestVideos(client);
failedCount = result !== 0 ? truncate(failedCount + 1, 0, 5) : 0;
if (failedCount !== 0) {
await updateQueueInterval(failedCount, delayMap[failedCount] * MINUTE);
}
return;
};
export const getLatestVideosWorker = async (job: Job) => {
if (await lockManager.isLocked("getLatestVideos")) {
logger.log("job:getLatestVideos is locked, skipping.", "mq");
return;
}
await lockManager.acquireLock("getLatestVideos");
const failedCount = (job.data.failedCount ?? 0) as number;
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
const client = await db.connect();
try {
await executeTask(client, failedCount);
await queueLatestVideos(client);
} finally {
client.release();
await lockManager.releaseLock("getLatestVideos");
}
return;
};

View File

@ -0,0 +1,17 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
const client = await db.connect();
try {
const aid = job.data.aid;
if (!aid) {
return 3;
}
await insertVideoInfo(client, aid);
return 0;
} finally {
client.release();
}
};

View File

@ -1,100 +0,0 @@
import { Job } from "bullmq";
import { VideoTagsQueue } from "lib/mq/index.ts";
import { DAY, HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts";
import { db } from "lib/db/init.ts";
import { truncate } from "lib/utils/truncate.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import logger from "lib/log/logger.ts";
import { getNullVideoTagsList, updateVideoTags } from "lib/db/allData.ts";
import { getVideoTags } from "lib/net/getVideoTags.ts";
import { NetSchedulerError } from "lib/mq/scheduler.ts";
import { WorkerError } from "lib/mq/schema.ts";
const delayMap = [0.5, 3, 5, 15, 30, 60];
const getJobPriority = (diff: number) => {
let priority;
if (diff > 14 * DAY) {
priority = 10;
} else if (diff > 7 * DAY) {
priority = 7;
} else if (diff > DAY) {
priority = 5;
} else if (diff > 6 * HOUR) {
priority = 3;
} else if (diff > HOUR) {
priority = 2;
} else {
priority = 1;
}
return priority;
};
const executeTask = async (client: Client, aid: number, failedCount: number, job: Job) => {
try {
const result = await getVideoTags(aid);
if (!result) {
failedCount = truncate(failedCount + 1, 0, 5);
const delay = delayMap[failedCount] * MINUTE;
logger.log(
`job:getVideoTags added to queue, delay: ${delayMap[failedCount]} minutes.`,
"mq",
);
await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, { delay, priority: 6 - failedCount });
return 1;
}
await updateVideoTags(client, aid, result);
logger.log(`Fetched tags for aid: ${aid}`, "task");
return 0;
} catch (e) {
if (!(e instanceof NetSchedulerError)) {
throw new WorkerError(<Error> e, "task", "getVideoTags/fn:executeTask");
}
const err = e as NetSchedulerError;
if (err.code === "NO_AVAILABLE_PROXY" || err.code === "PROXY_RATE_LIMITED") {
logger.warn(`No available proxy for fetching tags, delayed. aid: ${aid}`, "task");
await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, {
delay: 25 * SECOND * Math.random() + 5 * SECOND,
priority: job.priority,
});
return 2;
}
throw new WorkerError(err, "task", "getVideoTags/fn:executeTask");
}
};
export const getVideoTagsWorker = async (job: Job) => {
const failedCount = (job.data.failedCount ?? 0) as number;
const client = await db.connect();
const aid = job.data.aid;
if (!aid) {
return 3;
}
const v = await executeTask(client, aid, failedCount, job);
client.release();
return v;
};
export const getVideoTagsInitializer = async () => {
const client = await db.connect();
const videos = await getNullVideoTagsList(client);
client.release();
if (videos.length == 0) {
return 4;
}
const count = await VideoTagsQueue.getJobCounts("wait", "delayed", "active");
const total = count.delayed + count.active + count.wait;
const max = 15;
const rest = truncate(max - total, 0, max);
let i = 0;
for (const video of videos) {
if (i > rest) return 100 + i;
const aid = video.aid;
const timestamp = video.published_at;
const diff = Date.now() - timestamp;
await VideoTagsQueue.add("getVideoTags", { aid }, { priority: getJobPriority(diff) });
i++;
}
return 0;
};

View File

@ -1 +1 @@
export * from "lib/mq/exec/getLatestVideos.ts";
export * from "lib/mq/exec/getLatestVideos.ts";

View File

@ -2,6 +2,4 @@ import { Queue } from "bullmq";
export const LatestVideosQueue = new Queue("latestVideos");
export const VideoTagsQueue = new Queue("videoTags");
export const ClassifyVideoQueue = new Queue("classifyVideo");

View File

@ -1,19 +1,16 @@
import { MINUTE } from "$std/datetime/constants.ts";
import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts";
export async function initMQ() {
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
every: 1 * MINUTE
});
await VideoTagsQueue.upsertJobScheduler("getVideosTags", {
every: 5 * MINUTE,
every: 1 * MINUTE,
immediately: true,
});
await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
every: 5 * MINUTE,
immediately: true,
})
});
logger.log("Message queue initialized.");
}

View File

@ -23,12 +23,12 @@ class LockManager {
const result = await this.redis.set(key, "locked", "NX");
if (result !== "OK") {
return false;
return false;
}
if (timeout) {
await this.redis.expire(key, timeout);
}
return true;
if (timeout) {
await this.redis.expire(key, timeout);
}
return true;
}
/*

View File

@ -7,7 +7,7 @@ export interface RateLimiterConfig {
export class RateLimiter {
private readonly configs: RateLimiterConfig[];
private readonly configEventNames: string[];
private readonly configEventNames: string[];
/*
* @param name The name of the rate limiter
@ -17,7 +17,7 @@ export class RateLimiter {
*/
constructor(name: string, configs: RateLimiterConfig[]) {
this.configs = configs;
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
}
/*
@ -53,4 +53,4 @@ export class RateLimiter {
await config.window.clear(eventName);
}
}
}
}

View File

@ -1,7 +1,7 @@
import logger from "lib/log/logger.ts";
import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
import {SlidingWindow} from "lib/mq/slidingWindow.ts";
import {redis} from "lib/db/redis.ts";
import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
import { redis } from "lib/db/redis.ts";
import Redis from "ioredis";
import { SECOND } from "$std/datetime/constants.ts";
@ -152,7 +152,7 @@ class NetScheduler {
const proxiesNames = this.getTaskProxies(task);
for (const proxyName of shuffleArray(proxiesNames)) {
if (await this.getProxyAvailability(proxyName, task)) {
return await this.proxyRequest<R>(url, proxyName, method);
return await this.proxyRequest<R>(url, proxyName, task, method);
}
}
throw new NetSchedulerError("No available proxy currently.", "NO_AVAILABLE_PROXY");
@ -186,8 +186,9 @@ class NetScheduler {
if (!force) {
const isAvailable = await this.getProxyAvailability(proxyName, task);
const limiter = "proxy-" + proxyName + "-" + task
if (!isAvailable) {
throw new NetSchedulerError(`Proxy "${proxyName}" is rate limited`, "PROXY_RATE_LIMITED");
throw new NetSchedulerError(`Proxy "${limiter}" is rate limited`, "PROXY_RATE_LIMITED");
}
}
@ -225,7 +226,7 @@ class NetScheduler {
logger.error(error, "redis");
return false;
}
logger.warn(`Unhandled error: ${error.message}`, "mq", "getProxyAvailability");
logger.error(error, "mq", "getProxyAvailability");
return false;
}
}
@ -237,7 +238,7 @@ class NetScheduler {
const response = await fetch(url, {
method,
signal: controller.signal
signal: controller.signal,
});
clearTimeout(timeout);
@ -281,7 +282,7 @@ const biliLimiterConfig: RateLimiterConfig[] = [
netScheduler.addProxy("native", "native", "");
netScheduler.addTask("getVideoInfo", "bilibili", "all");
netScheduler.addTask("getLatestVideos", "bilibili", "all");
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig)
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
netScheduler.setTaskLimiter("getLatestVideos", null);
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);

View File

@ -9,4 +9,4 @@ export class WorkerError extends Error {
this.service = service;
this.rawError = rawError;
}
}
}

View File

@ -21,7 +21,7 @@ export class SlidingWindow {
async event(eventName: string): Promise<void> {
const now = Date.now();
const key = `cvsa:sliding_window:${eventName}`;
const uniqueMember = `${now}-${Math.random()}`;
// Add current timestamp to an ordered set
await this.redis.zadd(key, now, uniqueMember);

View File

@ -0,0 +1,40 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getVideoInfo } from "lib/net/getVideoInfo.ts";
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import logger from "lib/log/logger.ts";
import { ClassifyVideoQueue } from "lib/mq/index.ts";
import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
export async function insertVideoInfo(client: Client, aid: number) {
const videoExists = await videoExistsInAllData(client, aid);
if (videoExists) {
return;
}
const data = await getVideoInfo(aid);
if (data === null) {
return null;
}
const bvid = data.View.bvid;
const desc = data.View.desc;
const uid = data.View.owner.mid;
const tags = data.Tags
.filter((tag) => tag.tag_type in ["old_channel", "topic"])
.map((tag) => tag.tag_name).join(",");
const title = data.View.title;
const published_at = formatTimestampToPsql(data.View.pubdate);
const duration = data.View.duration;
await client.queryObject(
`INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
[aid, bvid, desc, uid, tags, title, published_at, duration],
);
const userExists = await userExistsInBiliUsers(client, aid);
if (!userExists) {
await client.queryObject(
`INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
);
}
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
await ClassifyVideoQueue.add("classifyVideo", { aid });
}

View File

@ -0,0 +1,55 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getLatestVideoAids } from "lib/net/getLatestVideoAids.ts";
import { videoExistsInAllData } from "lib/db/allData.ts";
import { sleep } from "lib/utils/sleep.ts";
import { SECOND } from "$std/datetime/constants.ts";
import logger from "lib/log/logger.ts";
import { LatestVideosQueue } from "lib/mq/index.ts";
export async function queueLatestVideos(
client: Client,
): Promise<number | null> {
let page = 1;
let i = 0;
const videosFound = new Set();
while (true) {
const pageSize = page == 1 ? 10 : 30;
const aids = await getLatestVideoAids(page, pageSize);
if (aids.length == 0) {
logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
break;
}
let allExists = true;
let delay = 0;
for (const aid of aids) {
const videoExists = await videoExistsInAllData(client, aid);
if (videoExists) {
continue;
}
await LatestVideosQueue.add("getVideoInfo", { aid }, { delay,
attempts: 100,
backoff: {
type: "fixed",
delay: SECOND * 5
}
});
videosFound.add(aid);
allExists = false;
delay += Math.random() * SECOND * 0.5;
}
i += aids.length;
logger.log(
`Page ${page} crawled, total: ${videosFound.size}/${i} videos added/observed.`,
"net",
"fn:queueLatestVideos()",
);
if (allExists) {
return 0;
}
page++;
const randomTime = Math.random() * 4000;
const delta = SECOND;
await sleep(randomTime + delta);
}
return 0;
}

309
lib/net/bilibili.d.ts vendored
View File

@ -1,117 +1,224 @@
interface BaseResponse<T> {
code: number;
message: string;
ttl: number;
data: T;
code: number;
message: string;
ttl: number;
data: T;
}
export type VideoListResponse = BaseResponse<VideoListData>;
export type VideoDetailsResponse = BaseResponse<VideoDetailsData>;
export type VideoTagsResponse = BaseResponse<VideoTagsData>;
interface VideoDetailsData {
View: {
bvid: string;
aid: number;
videos: number;
tid: number;
tid_v2: number;
tname: string;
tname_v2: string;
copyright: number;
pic: string;
title: string;
pubdate: number;
ctime: number;
desc: string;
desc_v2: string;
state: number;
duration: number;
mission_id: number;
rights: VideoRights;
owner: {
mid: number;
name: string;
face: string;
};
stat: VideoStats;
argue_info: {
argue_msg: string;
argue_type: number;
argue_link: string;
};
dynamic: "";
cid: number;
dimension: VideoDimension;
pages: VideoPage[];
subtitle: {
allow_submit: number;
list: VideoSubTitle[];
};
staff: VideoStaff[];
};
Card: {
card: {
mid: number;
name: string;
sex: string;
face: string;
fans: number;
attention: number;
friend: number;
sign: string;
level_info: {
current_level: number;
};
};
archive_count: number;
article_count: number;
follower: number;
like_num: number;
};
Tags: VideoTagsLite[];
}
interface VideoTagsLite {
tag_id: number;
tag_name: string;
music_id: string;
tag_type: string;
jump_url: string;
}
type VideoTagsData = VideoTags[];
type VideoStaff = {
mid: number;
title: string;
name: string;
face: string;
follower: number;
};
type VideoSubTitle = {
id: number;
lan: string;
lan_doc: string;
is_lock: number;
subtitle_url: string;
type: number;
id_str: string;
ai_type: number;
ai_status: number;
};
type VideoDimension = {
width: number;
height: number;
rotate: number;
};
interface VideoPage {
cid: number;
page: number;
from: string;
part: string;
duration: number;
vid: string;
weblink: string;
dimension: VideoDimension;
first_frame: string;
}
interface VideoTags {
tag_id: number;
tag_name: string;
cover: string;
head_cover: string;
content: string;
short_content: string;
type: number;
state: number;
ctime: number;
count: {
view: number;
use: number;
atten: number;
}
is_atten: number;
likes: number;
hates: number;
attribute: number;
liked: number;
hated: number;
extra_attr: number;
tag_id: number;
tag_name: string;
cover: string;
head_cover: string;
content: string;
short_content: string;
type: number;
state: number;
ctime: number;
count: {
view: number;
use: number;
atten: number;
};
is_atten: number;
likes: number;
hates: number;
attribute: number;
liked: number;
hated: number;
extra_attr: number;
}
interface VideoListData {
archives: VideoListVideo[];
page: {
num: number;
size: number;
count: number;
};
archives: VideoListVideo[];
page: {
num: number;
size: number;
count: number;
};
}
type VideoRights = {
bp: number;
elec: number;
download: number;
movie: number;
pay: number;
hd5: number;
no_reprint: number;
autoplay: number;
ugc_pay: number;
is_cooperation: number;
ugc_pay_preview: number;
no_background: number;
arc_pay: number;
pay_free_watch: number;
};
type VideoStats = {
aid: number;
view: number;
danmaku: number;
reply: number;
favorite: number;
coin: number;
share: number;
now_rank: number;
his_rank: number;
like: number;
};
interface VideoListVideo {
aid: number;
videos: number;
tid: number;
tname: string;
copyright: number;
pic: string;
title: string;
pubdate: number;
ctime: number;
desc: string;
state: number;
duration: number;
mission_id?: number;
rights: {
bp: number;
elec: number;
download: number;
movie: number;
pay: number;
hd5: number;
no_reprint: number;
autoplay: number;
ugc_pay: number;
is_cooperation: number;
ugc_pay_preview: number;
no_background: number;
arc_pay: number;
pay_free_watch: number;
},
owner: {
mid: number;
name: string;
face: string;
},
stat: {
aid: number;
view: number;
danmaku: number;
reply: number;
favorite: number;
coin: number;
share: number;
now_rank: number;
his_rank: number;
like: number;
dislike: number;
vt: number;
vv: number;
},
dynamic: string;
cid: number;
dimension: {
width: number;
height: number;
rotate: number;
},
season_id?: number;
short_link_v2: string;
first_frame: string;
pub_location: string;
cover43: string;
tidv2: number;
tname_v2: string;
bvid: string;
season_type: number;
is_ogv: number;
ovg_info: string | null;
rcmd_season: string;
enable_vt: number;
ai_rcmd: null | string;
aid: number;
videos: number;
tid: number;
tname: string;
copyright: number;
pic: string;
title: string;
pubdate: number;
ctime: number;
desc: string;
state: number;
duration: number;
mission_id?: number;
rights: VideoRights;
owner: {
mid: number;
name: string;
face: string;
};
stat: VideoStats;
dynamic: string;
cid: number;
dimension: VideoDimension;
season_id?: number;
short_link_v2: string;
first_frame: string;
pub_location: string;
cover43: string;
tidv2: number;
tname_v2: string;
bvid: string;
season_type: number;
is_ogv: number;
ovg_info: string | null;
rcmd_season: string;
enable_vt: number;
ai_rcmd: null | string;
}

View File

@ -1,88 +0,0 @@
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
import { SECOND } from "$std/datetime/constants.ts";
import { VideoListVideo } from "lib/net/bilibili.d.ts";
export async function getVideoPositionInNewList(timestamp: number): Promise<number | null | VideoListVideo[]> {
const virtualPageSize = 50;
let lowPage = 1;
let highPage = 1;
let foundUpper = false;
while (true) {
const ps = highPage < 2 ? 50 : 1
const pn = highPage < 2 ? 1 : highPage * virtualPageSize;
const videos = await getLatestVideos(pn, ps);
if (!videos || videos.length === 0) {
break;
}
const lastVideo = videos[videos.length - 1];
if (!lastVideo || !lastVideo.pubdate) {
break;
}
const lastTime = lastVideo.pubdate * SECOND
if (lastTime <= timestamp && highPage == 1) {
return videos;
}
else if (lastTime <= timestamp) {
foundUpper = true;
break;
} else {
lowPage = highPage;
highPage *= 2;
}
}
if (!foundUpper) {
return null;
}
let boundaryPage = highPage;
let lo = lowPage;
let hi = highPage;
while (lo <= hi) {
const mid = Math.floor((lo + hi) / 2);
const videos = await getLatestVideos(mid * virtualPageSize, 1);
if (!videos) {
return null;
}
if (videos.length === 0) {
hi = mid - 1;
continue;
}
const lastVideo = videos[videos.length - 1];
if (!lastVideo || !lastVideo.pubdate) {
hi = mid - 1;
continue;
}
const lastTime = lastVideo.pubdate * SECOND
if (lastTime > timestamp) {
lo = mid + 1;
} else {
boundaryPage = mid;
hi = mid - 1;
}
}
const boundaryVideos = await getLatestVideos(boundaryPage, virtualPageSize);
let indexInPage = 0;
if (boundaryVideos && boundaryVideos.length > 0) {
for (let i = 0; i < boundaryVideos.length; i++) {
const video = boundaryVideos[i];
if (!video.pubdate) {
continue;
}
const videoTime = video.pubdate * SECOND
if (videoTime > timestamp) {
indexInPage++;
} else {
break;
}
}
}
const count = (boundaryPage - 1) * virtualPageSize + indexInPage;
const safetyMargin = 5;
return count + safetyMargin;
}

View File

@ -0,0 +1,21 @@
import { VideoListResponse } from "lib/net/bilibili.d.ts";
import logger from "lib/log/logger.ts";
import netScheduler from "lib/mq/scheduler.ts";
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> {
const startFrom = 1 + pageSize * (page - 1);
const endTo = pageSize * page;
const range = `${startFrom}-${endTo}`;
const errMessage = `Error fetching latest aid for ${range}:`;
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
const data = await netScheduler.request<VideoListResponse>(url, "getLatestVideos");
if (data.code != 0) {
logger.error(errMessage + data.message, "net", "getLastestVideos");
return [];
}
if (data.data.archives.length === 0) {
logger.verbose("No more videos found", "net", "getLatestVideos");
return [];
}
return data.data.archives.map((video) => video.aid);
}

View File

@ -1,36 +0,0 @@
import {VideoListResponse } from "lib/net/bilibili.d.ts";
import logger from "lib/log/logger.ts";
import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[] | null> {
const startFrom = 1 + pageSize * (page - 1);
const endTo = pageSize * page;
const range = `${startFrom}-${endTo}`
const errMessage = `Error fetching latest aid for ${range}:`
try {
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
const data = await netScheduler.request<VideoListResponse>(url, 'getLatestVideos');
if (data.code != 0) {
logger.error(errMessage + data.message, 'net', 'getLastestVideos');
return [];
}
if (data.data.archives.length === 0) {
logger.verbose("No more videos found", "net", "getLatestVideos");
return [];
}
return data.data.archives.map(video => video.aid);
}
catch (e) {
const error = e as NetSchedulerError;
if (error.code == "FETCH_ERROR") {
const rawError = error.rawError! as Error;
rawError.message = errMessage + rawError.message;
logger.error(rawError, 'net', 'getVideoTags');
return null;
}
else {
// Re-throw the error
throw e;
}
}
}

15
lib/net/getVideoInfo.ts Normal file
View File

@ -0,0 +1,15 @@
import netScheduler from "lib/mq/scheduler.ts";
import { VideoDetailsData, VideoDetailsResponse } from "lib/net/bilibili.d.ts";
import logger from "lib/log/logger.ts";
export async function getVideoInfo(aid: number): Promise<VideoDetailsData | null> {
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
const data = await netScheduler.request<VideoDetailsResponse>(url, "getVideoInfo");
const errMessage = `Error fetching metadata for ${aid}:`;
logger.log("Fetching metadata for " + aid, "net", "fn:getVideoInfo");
if (data.code !== 0) {
logger.error(errMessage + data.message, "net", "fn:getVideoInfo");
return null;
}
return data.data;
}

View File

@ -1,35 +0,0 @@
import { VideoTagsResponse } from "lib/net/bilibili.d.ts";
import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
import logger from "lib/log/logger.ts";
/*
* Fetch the tags for a video
* @param {number} aid The video's aid
* @return {Promise<string[] | null>} A promise, which resolves to an array of tags,
* or null if an `fetch` error occurred
* @throws {NetSchedulerError} If the request failed.
*/
export async function getVideoTags(aid: number): Promise<string[] | null> {
try {
const url = `https://api.bilibili.com/x/tag/archive/tags?aid=${aid}`;
const data = await netScheduler.request<VideoTagsResponse>(url, 'getVideoTags');
if (data.code != 0) {
logger.error(`Error fetching tags for video ${aid}: ${data.message}`, 'net', 'getVideoTags');
return [];
}
return data.data.map((tag) => tag.tag_name);
}
catch (e) {
const error = e as NetSchedulerError;
if (error.code == "FETCH_ERROR") {
const rawError = error.rawError! as Error;
rawError.message = `Error fetching tags for video ${aid}: ` + rawError.message;
logger.error(rawError, 'net', 'getVideoTags');
return null;
}
else {
// Re-throw the error
throw e;
}
}
}

View File

@ -1,76 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
import { getLatestVideoTimestampFromAllData, insertIntoAllData, videoExistsInAllData } from "lib/db/allData.ts";
import { sleep } from "lib/utils/sleep.ts";
import { getVideoPositionInNewList } from "lib/net/bisectVideoStartFrom.ts";
import { SECOND } from "$std/datetime/constants.ts";
import logger from "lib/log/logger.ts";
export async function insertLatestVideos(
client: Client,
pageSize: number = 10,
intervalRate: number = 4000,
): Promise<number | null> {
const latestVideoTimestamp = await getLatestVideoTimestampFromAllData(client);
if (latestVideoTimestamp == null) {
logger.error("Cannot get latest video timestamp from current database.", "net", "fn:insertLatestVideos()");
return null
}
logger.log(`Latest video in the database: ${new Date(latestVideoTimestamp).toISOString()}`, "net", "fn:insertLatestVideos()")
const videoIndex = await getVideoPositionInNewList(latestVideoTimestamp);
if (videoIndex == null) {
logger.error("Cannot locate the video through bisect.", "net", "fn:insertLatestVideos()");
return null
}
if (typeof videoIndex == "object") {
for (const video of videoIndex) {
const videoExists = await videoExistsInAllData(client, video.aid);
if (!videoExists) {
await insertIntoAllData(client, video);
}
}
return 0;
}
let page = Math.floor(videoIndex / pageSize) + 1;
let failCount = 0;
const insertedVideos = new Set();
while (true) {
try {
const videos = await getLatestVideos(page, pageSize);
if (videos == null) {
failCount++;
if (failCount > 5) {
return null;
}
continue;
}
failCount = 0;
if (videos.length == 0) {
logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
break;
}
for (const video of videos) {
const videoExists = await videoExistsInAllData(client, video.aid);
if (!videoExists) {
await insertIntoAllData(client, video);
insertedVideos.add(video.aid);
}
}
logger.log(`Page ${page} crawled, total: ${insertedVideos.size} videos.`, "net", "fn:insertLatestVideos()");
page--;
if (page < 1) {
return 0;
}
} catch (error) {
logger.error(error as Error, "net", "fn:insertLatestVideos()");
failCount++;
if (failCount > 5) {
return null;
}
} finally {
await sleep(Math.random() * intervalRate + failCount * 3 * SECOND + SECOND);
}
}
return 0;
}

View File

@ -1,3 +1,3 @@
export async function sleep(ms: number) {
await new Promise((resolve) => setTimeout(resolve, ms));
}
}

View File

@ -1,3 +1,3 @@
export function truncate(num: number, min: number, max: number) {
return Math.max(min, Math.min(num, max))
}
return Math.max(min, Math.min(num, max));
}

View File

@ -2,13 +2,16 @@ import express from "express";
import { createBullBoard } from "@bull-board/api";
import { BullMQAdapter } from "@bull-board/api/bullMQAdapter.js";
import { ExpressAdapter } from "@bull-board/express";
import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
const serverAdapter = new ExpressAdapter();
serverAdapter.setBasePath("/");
createBullBoard({
queues: [new BullMQAdapter(LatestVideosQueue), new BullMQAdapter(VideoTagsQueue), new BullMQAdapter(ClassifyVideoQueue)],
queues: [
new BullMQAdapter(LatestVideosQueue),
new BullMQAdapter(ClassifyVideoQueue),
],
serverAdapter: serverAdapter,
});
@ -16,8 +19,6 @@ const app = express();
app.use("/", serverAdapter.getRouter());
// other configurations of your server
app.listen(3000, () => {
console.log("Running on 3000...");
console.log("For the UI, open http://localhost:3000/");

View File

@ -18,7 +18,6 @@ Deno.addSignalListener("SIGTERM", async () => {
Deno.exit();
});
await initializeModels();
const filterWorker = new Worker(
@ -45,6 +44,6 @@ filterWorker.on("error", (err) => {
logger.error(e.rawError, e.service, e.codePath);
});
filterWorker.on("closed", async() => {
filterWorker.on("closed", async () => {
await lockManager.releaseLock("classifyVideos");
})
});

View File

@ -2,22 +2,19 @@ import { Job, Worker } from "bullmq";
import { getLatestVideosWorker } from "lib/mq/executors.ts";
import { redis } from "lib/db/redis.ts";
import logger from "lib/log/logger.ts";
import { getVideoTagsWorker } from "lib/mq/exec/getVideoTags.ts";
import { getVideoTagsInitializer } from "lib/mq/exec/getVideoTags.ts";
import { lockManager } from "lib/mq/lockManager.ts";
import { WorkerError } from "lib/mq/schema.ts";
import { getVideoInfoWorker } from "lib/mq/exec/getVideoInfo.ts";
Deno.addSignalListener("SIGINT", async () => {
logger.log("SIGINT Received: Shutting down workers...", "mq");
await latestVideoWorker.close(true);
await videoTagsWorker.close(true);
Deno.exit();
});
Deno.addSignalListener("SIGTERM", async () => {
logger.log("SIGTERM Received: Shutting down workers...", "mq");
await latestVideoWorker.close(true);
await videoTagsWorker.close(true);
Deno.exit();
});
@ -28,11 +25,14 @@ const latestVideoWorker = new Worker(
case "getLatestVideos":
await getLatestVideosWorker(job);
break;
case "getVideoInfo":
await getVideoInfoWorker(job);
break;
default:
break;
}
},
{ connection: redis, concurrency: 1, removeOnComplete: { count: 1440 } },
{ connection: redis, concurrency: 6, removeOnComplete: { count: 1440 } },
);
latestVideoWorker.on("active", () => {
@ -47,33 +47,3 @@ latestVideoWorker.on("error", (err) => {
latestVideoWorker.on("closed", async () => {
await lockManager.releaseLock("getLatestVideos");
});
const videoTagsWorker = new Worker(
"videoTags",
async (job: Job) => {
switch (job.name) {
case "getVideoTags":
return await getVideoTagsWorker(job);
case "getVideosTags":
return await getVideoTagsInitializer();
default:
break;
}
},
{
connection: redis,
concurrency: 6,
removeOnComplete: {
count: 1000,
},
},
);
videoTagsWorker.on("active", () => {
logger.log("Worker (videoTags) activated.", "mq");
});
videoTagsWorker.on("error", (err) => {
const e = err as WorkerError;
logger.error(e.rawError, e.service, e.codePath);
});

View File

@ -1,33 +0,0 @@
import { assertEquals } from "jsr:@std/assert";
import { videoTagsIsNull } from "lib/db/allData.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { postgresConfig } from "lib/db/pgConfig.ts";
// A minimal aid which has an empty tags field in our database
const TEST_AID = 63569;
Deno.test("videoTagsIsNull function", async () => {
const client = new Client(postgresConfig);
try {
const transaction = client.createTransaction("test_transaction");
await transaction.begin();
const result1 = await videoTagsIsNull(transaction, TEST_AID);
assertEquals(typeof result1, "boolean", "The result should be a boolean value.");
assertEquals(result1, false, "The result should be false if tags is not NULL for the given aid.");
await transaction.queryArray`UPDATE all_data SET tags = NULL WHERE aid = ${TEST_AID}`;
const result2 = await videoTagsIsNull(transaction, TEST_AID);
assertEquals(typeof result2, "boolean", "The result should be a boolean value.");
assertEquals(result2, true, "The result should be true if tags is NULL for the given aid.");
await transaction.rollback();
} catch (error) {
console.error("Error during test:", error);
throw error;
} finally {
client.end();
}
});

View File

@ -1,7 +1,7 @@
import {assertEquals} from "jsr:@std/assert";
import {SlidingWindow} from "lib/mq/slidingWindow.ts";
import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
import {Redis} from "npm:ioredis@5.5.0";
import { assertEquals } from "jsr:@std/assert";
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
import { Redis } from "npm:ioredis@5.5.0";
Deno.test("RateLimiter works correctly", async () => {
const redis = new Redis({ maxRetriesPerRequest: null });
@ -71,7 +71,7 @@ Deno.test("Multiple configs work correctly", async () => {
await new Promise((resolve) => setTimeout(resolve, windowSize1 * 1000 + 500));
// Availability should now be true (due to config1)
assertEquals(await rateLimiter.getAvailability(), true);
assertEquals(await rateLimiter.getAvailability(), true);
// Trigger events up to the limit of the second config
for (let i = maxRequests1; i < maxRequests2; i++) {
@ -88,4 +88,4 @@ Deno.test("Multiple configs work correctly", async () => {
assertEquals(await rateLimiter.getAvailability(), true);
redis.quit();
});
});

View File

@ -7,13 +7,13 @@ Deno.test("SlidingWindow - event and count", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
await slidingWindow.clear(eventName);
await slidingWindow.clear(eventName);
await slidingWindow.event(eventName);
const count = await slidingWindow.count(eventName);
assertEquals(count, 1);
redis.quit();
redis.quit();
});
Deno.test("SlidingWindow - multiple events", async () => {
@ -21,7 +21,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
await slidingWindow.clear(eventName);
await slidingWindow.clear(eventName);
await slidingWindow.event(eventName);
await slidingWindow.event(eventName);
@ -29,7 +29,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
const count = await slidingWindow.count(eventName);
assertEquals(count, 3);
redis.quit();
redis.quit();
});
Deno.test("SlidingWindow - no events", async () => {
@ -37,12 +37,12 @@ Deno.test("SlidingWindow - no events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
await slidingWindow.clear(eventName);
await slidingWindow.clear(eventName);
const count = await slidingWindow.count(eventName);
assertEquals(count, 0);
redis.quit();
redis.quit();
});
Deno.test("SlidingWindow - different event names", async () => {
@ -51,8 +51,8 @@ Deno.test("SlidingWindow - different event names", async () => {
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName1 = "test_event_1";
const eventName2 = "test_event_2";
await slidingWindow.clear(eventName1);
await slidingWindow.clear(eventName2);
await slidingWindow.clear(eventName1);
await slidingWindow.clear(eventName2);
await slidingWindow.event(eventName1);
await slidingWindow.event(eventName2);
@ -62,7 +62,7 @@ Deno.test("SlidingWindow - different event names", async () => {
assertEquals(count1, 1);
assertEquals(count2, 1);
redis.quit();
redis.quit();
});
Deno.test("SlidingWindow - large number of events", async () => {
@ -70,7 +70,7 @@ Deno.test("SlidingWindow - large number of events", async () => {
const windowSize = 5000; // 5 seconds
const slidingWindow = new SlidingWindow(redis, windowSize);
const eventName = "test_event";
await slidingWindow.clear(eventName);
await slidingWindow.clear(eventName);
const numEvents = 1000;
for (let i = 0; i < numEvents; i++) {
@ -80,5 +80,5 @@ Deno.test("SlidingWindow - large number of events", async () => {
const count = await slidingWindow.count(eventName);
assertEquals(count, numEvents);
redis.quit();
redis.quit();
});

View File

@ -1,25 +0,0 @@
import { assertEquals } from "jsr:@std/assert";
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
Deno.test("Get latest videos", async () => {
const videos = (await getLatestVideos(1, 5))!;
assertEquals(videos.length, 5);
videos.forEach((video) => {
assertVideoProperties(video);
});
});
function assertVideoProperties(video: object) {
const aid = "aid" in video && typeof video.aid === "number";
const bvid = "bvid" in video && typeof video.bvid === "string" &&
video.bvid.length === 12 && video.bvid.startsWith("BV");
const description = "description" in video && typeof video.description === "string";
const uid = "uid" in video && typeof video.uid === "number";
const tags = "tags" in video && (typeof video.tags === "string" || video.tags === null);
const title = "title" in video && typeof video.title === "string";
const publishedAt = "published_at" in video && typeof video.published_at === "string";
const match = aid && bvid && description && uid && tags && title && publishedAt;
assertEquals(match, true);
}

View File

@ -1,28 +0,0 @@
import { assertEquals } from "jsr:@std/assert";
import { getVideoTags } from "lib/net/getVideoTags.ts";
Deno.test("Get video tags - regular video", async () => {
const tags = (await getVideoTags(826597951))!.sort();
assertEquals(tags, [
"纯白P",
"中华墨水娘",
"中华少女",
"中华粘土娘",
"中华缘木娘",
"中华少女Project",
"提糯Tino",
"中华烛火娘",
"中华烁金娘",
"新世代音乐人计划女生季",
].sort());
});
Deno.test("Get video tags - non-existent video", async () => {
const tags = (await getVideoTags(8265979511111111));
assertEquals(tags, []);
});
Deno.test("Get video tags - video with no tag", async () => {
const tags = (await getVideoTags(981001865));
assertEquals(tags, []);
});