ref: separate fetching of aid list and video metadata
This commit is contained in:
parent
c67e3d8e36
commit
1838219f04
22
README.md
22
README.md
@ -6,9 +6,12 @@
|
||||
|
||||
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
||||
|
||||
- [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等[^2],其中包含大量中文歌声合成作品。
|
||||
- [萌娘百科](https://zh.moegirl.org.cn/):
|
||||
收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VCPedia](https://vcpedia.cn/):
|
||||
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV
|
||||
等[^2],其中包含大量中文歌声合成作品。
|
||||
- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。[^3]
|
||||
|
||||
上述网站中,或多或少存在一些不足,例如:
|
||||
@ -36,19 +39,22 @@
|
||||
|
||||
### 数据库
|
||||
|
||||
中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump) 文件并公开,其内容遵从以下协议或条款:
|
||||
中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump)
|
||||
文件并公开,其内容遵从以下协议或条款:
|
||||
|
||||
- 数据库中的事实性数据,根据适用法律,不构成受版权保护的内容。中V档案馆放弃一切可能的权利([CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/))。
|
||||
- 对于数据库中有原创性的内容(如贡献者编辑的描述性内容),如无例外,以[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)提供。
|
||||
- 对于引用、摘编或改编自萌娘百科、VCPedia的内容,以与原始协议(CC BY-NC-SA 3.0 CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
|
||||
> 根据原始协议第四条第2项内容,CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
|
||||
- 对于引用、摘编或改编自萌娘百科、VCPedia的内容,以与原始协议(CC BY-NC-SA 3.0
|
||||
CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
|
||||
> 根据原始协议第四条第2项内容,CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
|
||||
- 中V档案馆文档使用[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)。
|
||||
|
||||
### 软件代码
|
||||
|
||||
用于构建中V档案馆的软件代码在[AGPL 3.0](https://www.gnu.org/licenses/agpl-3.0.html)许可证下公开,参见[LICENSE](./LICENSE)
|
||||
|
||||
|
||||
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
|
||||
|
||||
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
|
||||
[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
|
||||
|
||||
[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
|
||||
|
116
deno.json
116
deno.json
@ -1,60 +1,60 @@
|
||||
{
|
||||
"lock": false,
|
||||
"tasks": {
|
||||
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
|
||||
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
|
||||
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
|
||||
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
|
||||
"manifest": "deno task cli manifest $(pwd)",
|
||||
"start": "deno run -A --watch=static/,routes/ dev.ts",
|
||||
"build": "deno run -A dev.ts build",
|
||||
"preview": "deno run -A main.ts",
|
||||
"update": "deno run -A -r https://fresh.deno.dev/update .",
|
||||
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
|
||||
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
|
||||
"adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
|
||||
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
|
||||
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
|
||||
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
|
||||
},
|
||||
"lint": {
|
||||
"rules": {
|
||||
"tags": ["fresh", "recommended"]
|
||||
}
|
||||
},
|
||||
"exclude": ["**/_fresh/*"],
|
||||
"imports": {
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
|
||||
"preact": "https://esm.sh/preact@10.22.0",
|
||||
"preact/": "https://esm.sh/preact@10.22.0/",
|
||||
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
|
||||
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
|
||||
"tailwindcss": "npm:tailwindcss@3.4.1",
|
||||
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
|
||||
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
|
||||
"$std/": "https://deno.land/std@0.216.0/",
|
||||
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
|
||||
"bullmq": "npm:bullmq",
|
||||
"lib/": "./lib/",
|
||||
"ioredis": "npm:ioredis",
|
||||
"@bull-board/api": "npm:@bull-board/api",
|
||||
"@bull-board/express": "npm:@bull-board/express",
|
||||
"express": "npm:express",
|
||||
"src/": "./src/",
|
||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
||||
"chalk": "npm:chalk"
|
||||
},
|
||||
"compilerOptions": {
|
||||
"jsx": "react-jsx",
|
||||
"jsxImportSource": "preact"
|
||||
},
|
||||
"nodeModulesDir": "auto",
|
||||
"fmt": {
|
||||
"useTabs": true,
|
||||
"lineWidth": 120,
|
||||
"indentWidth": 4,
|
||||
"semiColons": true,
|
||||
"proseWrap": "always"
|
||||
}
|
||||
"lock": false,
|
||||
"tasks": {
|
||||
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
|
||||
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
|
||||
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
|
||||
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
|
||||
"manifest": "deno task cli manifest $(pwd)",
|
||||
"start": "deno run -A --watch=static/,routes/ dev.ts",
|
||||
"build": "deno run -A dev.ts build",
|
||||
"preview": "deno run -A main.ts",
|
||||
"update": "deno run -A -r https://fresh.deno.dev/update .",
|
||||
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/worker.ts",
|
||||
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
|
||||
"adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
|
||||
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
|
||||
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
|
||||
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
|
||||
},
|
||||
"lint": {
|
||||
"rules": {
|
||||
"tags": ["fresh", "recommended"]
|
||||
}
|
||||
},
|
||||
"exclude": ["**/_fresh/*"],
|
||||
"imports": {
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
|
||||
"preact": "https://esm.sh/preact@10.22.0",
|
||||
"preact/": "https://esm.sh/preact@10.22.0/",
|
||||
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
|
||||
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
|
||||
"tailwindcss": "npm:tailwindcss@3.4.1",
|
||||
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
|
||||
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
|
||||
"$std/": "https://deno.land/std@0.216.0/",
|
||||
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
|
||||
"bullmq": "npm:bullmq",
|
||||
"lib/": "./lib/",
|
||||
"ioredis": "npm:ioredis",
|
||||
"@bull-board/api": "npm:@bull-board/api",
|
||||
"@bull-board/express": "npm:@bull-board/express",
|
||||
"express": "npm:express",
|
||||
"src/": "./src/",
|
||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
||||
"chalk": "npm:chalk"
|
||||
},
|
||||
"compilerOptions": {
|
||||
"jsx": "react-jsx",
|
||||
"jsxImportSource": "preact"
|
||||
},
|
||||
"nodeModulesDir": "auto",
|
||||
"fmt": {
|
||||
"useTabs": true,
|
||||
"lineWidth": 120,
|
||||
"indentWidth": 4,
|
||||
"semiColons": true,
|
||||
"proseWrap": "always"
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,8 @@ layout:
|
||||
|
||||
Welcome to the CVSA Documentation!
|
||||
|
||||
This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors, etc.
|
||||
This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors,
|
||||
etc.
|
||||
|
||||
### Jump right in
|
||||
|
||||
|
@ -1,22 +1,22 @@
|
||||
# Table of contents
|
||||
|
||||
* [Welcome](README.md)
|
||||
- [Welcome](README.md)
|
||||
|
||||
## About
|
||||
|
||||
* [About CVSA Project](about/this-project.md)
|
||||
* [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
- [About CVSA Project](about/this-project.md)
|
||||
- [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
|
||||
## Architecure
|
||||
|
||||
* [Overview](architecure/overview.md)
|
||||
* [Database Structure](architecure/database-structure/README.md)
|
||||
* [Type of Song](architecure/database-structure/type-of-song.md)
|
||||
* [Message Queue](architecure/message-queue/README.md)
|
||||
* [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
|
||||
* [Artificial Intelligence](architecure/artificial-intelligence.md)
|
||||
- [Overview](architecure/overview.md)
|
||||
- [Database Structure](architecure/database-structure/README.md)
|
||||
- [Type of Song](architecure/database-structure/type-of-song.md)
|
||||
- [Message Queue](architecure/message-queue/README.md)
|
||||
- [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
|
||||
- [Artificial Intelligence](architecure/artificial-intelligence.md)
|
||||
|
||||
## API Doc
|
||||
|
||||
* [Catalog](api-doc/catalog.md)
|
||||
* [Songs](api-doc/songs.md)
|
||||
- [Catalog](api-doc/catalog.md)
|
||||
- [Songs](api-doc/songs.md)
|
||||
|
@ -1,19 +1,27 @@
|
||||
# Scope of Inclusion
|
||||
|
||||
CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators, arranger, etc), singers and voice engines / voicebanks. 
|
||||
CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators,
|
||||
arranger, etc), singers and voice engines / voicebanks. 
|
||||
|
||||
For a **song**, it must meet the following conditions to be included in CVSA:
|
||||
|
||||
### Category 30
|
||||
|
||||
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is posted on Bilibili. In some special cases, this rule may not be enforced. 
|
||||
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
|
||||
posted on Bilibili. In some special cases, this rule may not be enforced. 
|
||||
|
||||
### At Leats One Line of Chinese
|
||||
|
||||
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
|
||||
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
|
||||
Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
|
||||
|
||||
### Using Vocal Synthesizer
|
||||
|
||||
To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony vocals).
|
||||
To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony
|
||||
vocals).
|
||||
|
||||
We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics, encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio) approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g., [so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
|
||||
We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically
|
||||
modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics,
|
||||
encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio)
|
||||
approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,
|
||||
[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
|
||||
|
@ -1,11 +1,13 @@
|
||||
# About CVSA Project
|
||||
|
||||
CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis community in a highly automation-assisted way. 
|
||||
CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis
|
||||
community in a highly automation-assisted way. 
|
||||
|
||||
Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an automated and manually edited way:
|
||||
|
||||
* Metadata of songs (name, duration, publisher, singer, etc.)
|
||||
* Descriptive information of songs (content introduction, creation background, lyrics, etc.)
|
||||
* Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites, likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
|
||||
* Information about artists, albums, vocal synthesizers, and voicebanks.
|
||||
Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an
|
||||
automated and manually edited way:
|
||||
|
||||
- Metadata of songs (name, duration, publisher, singer, etc.)
|
||||
- Descriptive information of songs (content introduction, creation background, lyrics, etc.)
|
||||
- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites,
|
||||
likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
|
||||
- Information about artists, albums, vocal synthesizers, and voicebanks.
|
||||
|
@ -1,4 +1,3 @@
|
||||
# Catalog
|
||||
|
||||
* [**Songs**](songs.md)
|
||||
|
||||
- [**Songs**](songs.md)
|
||||
|
@ -6,8 +6,9 @@ The AI systems we currently use are:
|
||||
|
||||
### The Filter
|
||||
|
||||
Located at `/filter/` under project root dir, it classifies a video in the [category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
|
||||
Located at `/filter/` under project root dir, it classifies a video in the
|
||||
[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
|
||||
|
||||
* 0: Not related to Chinese vocal synthesis
|
||||
* 1: A original song with Chinese vocal synthesis
|
||||
* 2: A cover/remix song with Chinese vocal synthesis
|
||||
- 0: Not related to Chinese vocal synthesis
|
||||
- 1: A original song with Chinese vocal synthesis
|
||||
- 2: A cover/remix song with Chinese vocal synthesis
|
||||
|
@ -2,10 +2,11 @@
|
||||
|
||||
CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
|
||||
|
||||
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the following tables:
|
||||
|
||||
* songs: stores the main information of songs
|
||||
* bili\_user: stores snapshots of Bilibili user information
|
||||
* all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
* labelling\_result: Contains label of videos in `all_data`tagged by our [AI system](../artificial-intelligence.md#the-filter).
|
||||
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
|
||||
following tables:
|
||||
|
||||
- songs: stores the main information of songs
|
||||
- bili\_user: stores snapshots of Bilibili user information
|
||||
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
- labelling\_result: Contains label of videos in `all_data`tagged by our
|
||||
[AI system](../artificial-intelligence.md#the-filter).
|
||||
|
@ -1,6 +1,7 @@
|
||||
# Type of Song
|
||||
|
||||
The **Unrelated type** refers specifically to videos that are not in our [Scope of Inclusion](../../about/scope-of-inclusion.md).
|
||||
The **Unrelated type** refers specifically to videos that are not in our
|
||||
[Scope of Inclusion](../../about/scope-of-inclusion.md).
|
||||
|
||||
### Table: `songs`
|
||||
|
||||
|
@ -1,2 +1 @@
|
||||
# Message Queue
|
||||
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
### Jobs
|
||||
|
||||
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a video, and the latter is responsible for scheduling the former.
|
||||
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
|
||||
video, and the latter is responsible for scheduling the former.
|
||||
|
||||
### Return value
|
||||
|
||||
|
@ -15,4 +15,5 @@ layout:
|
||||
|
||||
# Overview
|
||||
|
||||
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by [BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
|
||||
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
|
||||
[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
|
||||
|
@ -1,22 +1,22 @@
|
||||
# Table of contents
|
||||
|
||||
* [欢迎](README.md)
|
||||
- [欢迎](README.md)
|
||||
|
||||
## 关于 <a href="#about" id="about"></a>
|
||||
|
||||
* [关于本项目](about/this-project.md)
|
||||
* [收录范围](about/scope-of-inclusion.md)
|
||||
- [关于本项目](about/this-project.md)
|
||||
- [收录范围](about/scope-of-inclusion.md)
|
||||
|
||||
## 技术架构 <a href="#architecture" id="architecture"></a>
|
||||
|
||||
* [概览](architecture/overview.md)
|
||||
* [数据库结构](architecture/database-structure/README.md)
|
||||
* [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
* [人工智能](architecture/artificial-intelligence.md)
|
||||
* [消息队列](architecture/message-queue/README.md)
|
||||
* [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
|
||||
- [概览](architecture/overview.md)
|
||||
- [数据库结构](architecture/database-structure/README.md)
|
||||
- [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
- [人工智能](architecture/artificial-intelligence.md)
|
||||
- [消息队列](architecture/message-queue/README.md)
|
||||
- [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
|
||||
|
||||
## API 文档 <a href="#api-doc" id="api-doc"></a>
|
||||
|
||||
* [目录](api-doc/catalog.md)
|
||||
* [歌曲](api-doc/songs.md)
|
||||
- [目录](api-doc/catalog.md)
|
||||
- [歌曲](api-doc/songs.md)
|
||||
|
@ -6,7 +6,8 @@
|
||||
|
||||
#### VOCALOID·UATU 分区
|
||||
|
||||
原则上,中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU 分区(分区ID为30)下的视频中。在某些特殊情况下,此规则可能不是强制的。
|
||||
原则上,中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU
|
||||
分区(分区ID为30)下的视频中。在某些特殊情况下,此规则可能不是强制的。
|
||||
|
||||
#### 至少一行中文
|
||||
|
||||
@ -16,4 +17,6 @@
|
||||
|
||||
歌曲的至少一行必须由歌声合成器生成(包括和声部分),才能被收录到中V档案馆中。
|
||||
|
||||
我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如 VOCALOID、UTAU)和基于 AI 的(如 Synthesizer V、ACE Studio)方法,**但不包括仅改变现有歌声音色的AI声音转换器**(例如 [so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。
|
||||
我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如
|
||||
VOCALOID、UTAU)和基于 AI 的(如 Synthesizer V、ACE Studio)方法,**但不包括仅改变现有歌声音色的AI声音转换器**(例如
|
||||
[so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。
|
||||
|
@ -6,34 +6,33 @@
|
||||
|
||||
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
||||
|
||||
* [萌娘百科](https://zh.moegirl.org.cn/): 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
* [VCPedia](https://vcpedia.cn/): 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
* [VocaDB](https://vocadb.net/): [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
|
||||
* [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。
|
||||
- [萌娘百科](https://zh.moegirl.org.cn/):
|
||||
收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VCPedia](https://vcpedia.cn/):
|
||||
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VocaDB](https://vocadb.net/):
|
||||
[一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。
|
||||
- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。
|
||||
|
||||
上述网站中,或多或少存在一些不足,例如:
|
||||
|
||||
* 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。
|
||||
* VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。
|
||||
* VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
|
||||
* 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
|
||||
- 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。
|
||||
- VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。
|
||||
- VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
|
||||
- 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
|
||||
|
||||
因此,**中V档案馆**吸取前人经验,克服上述网站的不足,希望做到:
|
||||
|
||||
* 歌曲收录(指发现歌曲并创建条目)的完全自动化
|
||||
* 歌曲元信息提取的高度自动化
|
||||
* 歌曲统计数据收集的完全自动化
|
||||
* 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
|
||||
* 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
|
||||
- 歌曲收录(指发现歌曲并创建条目)的完全自动化
|
||||
- 歌曲元信息提取的高度自动化
|
||||
- 歌曲统计数据收集的完全自动化
|
||||
- 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错
|
||||
- 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。
|
||||
|
||||
|
||||
|
||||
***
|
||||
---
|
||||
|
||||
本文在[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
|
||||
|
||||
|
||||
|
||||
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
|
||||
|
||||
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
|
||||
|
@ -1,3 +1,3 @@
|
||||
# 目录
|
||||
|
||||
* [歌曲](songs.md)
|
||||
- [歌曲](songs.md)
|
||||
|
@ -6,8 +6,8 @@ CVSA 的自动化工作流高度依赖人工智能进行信息提取和分类。
|
||||
|
||||
#### Filter
|
||||
|
||||
位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
|
||||
位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别:
|
||||
|
||||
* 0:与中文人声合成无关
|
||||
* 1:中文人声合成原创曲
|
||||
* 2:中文人声合成的翻唱/混音歌曲
|
||||
- 0:与中文人声合成无关
|
||||
- 1:中文人声合成原创曲
|
||||
- 2:中文人声合成的翻唱/混音歌曲
|
||||
|
@ -4,7 +4,7 @@ CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
|
||||
|
||||
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
|
||||
|
||||
* songs:存储歌曲的主要信息
|
||||
* bili\_user:存储 Bilibili 用户信息快照
|
||||
* all\_data:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
|
||||
* labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
- songs:存储歌曲的主要信息
|
||||
- bili\_user:存储 Bilibili 用户信息快照
|
||||
- all\_data:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
|
||||
- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
|
@ -7,18 +7,18 @@
|
||||
`songs` 表格中使用的 `type` 列。
|
||||
|
||||
| 类型 | 说明 |
|
||||
| -- | ---------- |
|
||||
| 0 | 不相关 |
|
||||
| 1 | 原创 |
|
||||
| 2 | 翻唱 (Cover) |
|
||||
| 3 | 混音 (Remix) |
|
||||
| 4 | 纯音乐 |
|
||||
| 10 | 其他 |
|
||||
| ---- | ------------ |
|
||||
| 0 | 不相关 |
|
||||
| 1 | 原创 |
|
||||
| 2 | 翻唱 (Cover) |
|
||||
| 3 | 混音 (Remix) |
|
||||
| 4 | 纯音乐 |
|
||||
| 10 | 其他 |
|
||||
|
||||
#### 表格:`labelling_result`
|
||||
|
||||
| 标签 | 说明 |
|
||||
| -- | ----------- |
|
||||
| 0 | AI 标记:不相关 |
|
||||
| 1 | AI 标记:原创 |
|
||||
| 2 | AI 标记:翻唱/混音 |
|
||||
| 标签 | 说明 |
|
||||
| ---- | ------------------ |
|
||||
| 0 | AI 标记:不相关 |
|
||||
| 1 | AI 标记:原创 |
|
||||
| 2 | AI 标记:翻唱/混音 |
|
||||
|
@ -1,2 +1 @@
|
||||
# 消息队列
|
||||
|
||||
|
@ -1,9 +1,5 @@
|
||||
import { Client, Transaction } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { AllDataType } from "lib/db/schema.d.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { formatTimestampToPsql, parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
||||
import { VideoListVideo } from "lib/net/bilibili.d.ts";
|
||||
import { HOUR, SECOND } from "$std/datetime/constants.ts";
|
||||
import { modelVersion } from "lib/ml/filter_inference.ts";
|
||||
|
||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
||||
@ -11,70 +7,8 @@ export async function videoExistsInAllData(client: Client, aid: number) {
|
||||
.then((result) => result.rows[0].exists);
|
||||
}
|
||||
|
||||
export async function biliUserExists(client: Client, uid: number) {
|
||||
export async function userExistsInBiliUsers(client: Client, uid: number) {
|
||||
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bili_user WHERE uid = $1)`, [uid])
|
||||
.then((result) => result.rows[0].exists);
|
||||
}
|
||||
|
||||
export async function insertIntoAllData(client: Client, data: VideoListVideo) {
|
||||
logger.log(`inserted ${data.aid}`, "db-all_data");
|
||||
await client.queryObject(
|
||||
`INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
ON CONFLICT (aid) DO NOTHING`,
|
||||
[
|
||||
data.aid,
|
||||
data.bvid,
|
||||
data.desc,
|
||||
data.owner.mid,
|
||||
null,
|
||||
data.title,
|
||||
formatTimestampToPsql(data.pubdate * SECOND + 8 * HOUR),
|
||||
data.duration,
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
export async function getLatestVideoTimestampFromAllData(client: Client) {
|
||||
return await client.queryObject<{ published_at: string }>(
|
||||
`SELECT published_at FROM all_data ORDER BY published_at DESC LIMIT 1`,
|
||||
)
|
||||
.then((result) => {
|
||||
const date = new Date(result.rows[0].published_at);
|
||||
if (isNaN(date.getTime())) {
|
||||
return null;
|
||||
}
|
||||
return date.getTime();
|
||||
});
|
||||
}
|
||||
|
||||
export async function videoTagsIsNull(client: Client | Transaction, aid: number) {
|
||||
return await client.queryObject<{ exists: boolean }>(
|
||||
`SELECT EXISTS(SELECT 1 FROM all_data WHERE aid = $1 AND tags IS NULL)`,
|
||||
[aid],
|
||||
).then((result) => result.rows[0].exists);
|
||||
}
|
||||
|
||||
export async function updateVideoTags(client: Client | Transaction, aid: number, tags: string[]) {
|
||||
return await client.queryObject(
|
||||
`UPDATE all_data SET tags = $1 WHERE aid = $2`,
|
||||
[tags.join(","), aid],
|
||||
);
|
||||
}
|
||||
|
||||
export async function getNullVideoTagsList(client: Client) {
|
||||
const queryResult = await client.queryObject<{ aid: number; published_at: string }>(
|
||||
`SELECT aid, published_at FROM all_data WHERE tags IS NULL`,
|
||||
);
|
||||
const rows = queryResult.rows;
|
||||
return rows.map(
|
||||
(row) => {
|
||||
return {
|
||||
aid: Number(row.aid),
|
||||
published_at: parseTimestampFromPsql(row.published_at),
|
||||
};
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
export async function getUnlabelledVideos(client: Client) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import {postgresConfig} from "lib/db/pgConfig.ts";
|
||||
import { postgresConfig } from "lib/db/pgConfig.ts";
|
||||
|
||||
const pool = new Pool(postgresConfig, 12);
|
||||
|
||||
|
@ -3,7 +3,7 @@ const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_POR
|
||||
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
|
||||
|
||||
if (unsetVars.length > 0) {
|
||||
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
|
||||
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
|
||||
}
|
||||
|
||||
const databaseHost = Deno.env.get("DB_HOST")!;
|
||||
@ -18,4 +18,4 @@ export const postgresConfig = {
|
||||
database: databaseName,
|
||||
user: databaseUser,
|
||||
password: databasePassword,
|
||||
};
|
||||
};
|
||||
|
@ -1,3 +1,3 @@
|
||||
import { Redis } from "ioredis";
|
||||
|
||||
export const redis = new Redis({ maxRetriesPerRequest: null });
|
||||
export const redis = new Redis({ maxRetriesPerRequest: null });
|
||||
|
16
lib/db/schema.d.ts
vendored
16
lib/db/schema.d.ts
vendored
@ -1,9 +1,9 @@
|
||||
export interface AllDataType {
|
||||
aid: number;
|
||||
bvid: string | null;
|
||||
description: string | null;
|
||||
uid: number | null;
|
||||
tags: string | null;
|
||||
title: string | null;
|
||||
published_at: string | null;
|
||||
}
|
||||
aid: number;
|
||||
bvid: string | null;
|
||||
description: string | null;
|
||||
uid: number | null;
|
||||
tags: string | null;
|
||||
title: string | null;
|
||||
published_at: string | null;
|
||||
}
|
||||
|
@ -9,4 +9,4 @@ logger.log("foo", "service");
|
||||
logger.log("foo", "db", "insert.ts");
|
||||
logger.warn("warn");
|
||||
logger.error("error");
|
||||
logger.verbose("error");
|
||||
logger.verbose("error");
|
||||
|
@ -1,7 +1,7 @@
|
||||
import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
|
||||
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
||||
import * as ort from "onnxruntime";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import {WorkerError} from "lib/mq/schema.ts";
|
||||
import { WorkerError } from "lib/mq/schema.ts";
|
||||
|
||||
const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024";
|
||||
const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
|
||||
@ -66,7 +66,6 @@ async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession)
|
||||
return Array.from(embeddings.data as Float32Array);
|
||||
}
|
||||
|
||||
|
||||
async function runClassification(embeddings: number[]): Promise<number[]> {
|
||||
if (!sessionClassifier) {
|
||||
throw new Error("Classifier session is not initialized. Call initializeModels() first.");
|
||||
@ -85,7 +84,7 @@ export async function classifyVideo(
|
||||
description: string,
|
||||
tags: string,
|
||||
author_info: string,
|
||||
aid: number
|
||||
aid: number,
|
||||
): Promise<number> {
|
||||
if (!sessionEmbedding) {
|
||||
throw new Error("Embedding session is not initialized. Call initializeModels() first.");
|
||||
@ -97,6 +96,6 @@ export async function classifyVideo(
|
||||
author_info,
|
||||
], sessionEmbedding);
|
||||
const probabilities = await runClassification(embeddings);
|
||||
logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml")
|
||||
logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml");
|
||||
return probabilities.indexOf(Math.max(...probabilities));
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
import {AutoTokenizer, PreTrainedTokenizer} from "@huggingface/transformers";
|
||||
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
||||
import * as ort from "onnxruntime";
|
||||
import {softmax} from "lib/ml/filter_inference.ts";
|
||||
import { softmax } from "lib/ml/filter_inference.ts";
|
||||
|
||||
// 配置参数
|
||||
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { Job } from "bullmq";
|
||||
import { db } from "lib/db/init.ts";
|
||||
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel} from "lib/db/allData.ts";
|
||||
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts";
|
||||
import { classifyVideo } from "lib/ml/filter_inference.ts";
|
||||
import { ClassifyVideoQueue } from "lib/mq/index.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
@ -27,7 +27,8 @@ export const classifyVideoWorker = async (job: Job) => {
|
||||
client.release();
|
||||
|
||||
await job.updateData({
|
||||
...job.data, label: label,
|
||||
...job.data,
|
||||
label: label,
|
||||
});
|
||||
|
||||
return 0;
|
||||
@ -38,12 +39,12 @@ export const classifyVideosWorker = async () => {
|
||||
logger.log("job:classifyVideos is locked, skipping.", "mq");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
await lockManager.acquireLock("classifyVideos");
|
||||
|
||||
const client = await db.connect();
|
||||
const videos = await getUnlabelledVideos(client);
|
||||
logger.log(`Found ${videos.length} unlabelled videos`)
|
||||
logger.log(`Found ${videos.length} unlabelled videos`);
|
||||
client.release();
|
||||
|
||||
let i = 0;
|
||||
|
@ -1,52 +1,12 @@
|
||||
import { Job } from "bullmq";
|
||||
import { insertLatestVideos } from "lib/task/insertLatestVideo.ts";
|
||||
import { LatestVideosQueue } from "lib/mq/index.ts";
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
|
||||
import { db } from "lib/db/init.ts";
|
||||
import { truncate } from "lib/utils/truncate.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { lockManager } from "lib/mq/lockManager.ts";
|
||||
|
||||
const delayMap = [5, 10, 15, 30, 60, 60];
|
||||
|
||||
const updateQueueInterval = async (failedCount: number, delay: number) => {
|
||||
logger.log(`job:getLatestVideos added to queue, delay: ${(delay / MINUTE).toFixed(2)} minutes.`, "mq");
|
||||
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
|
||||
every: delay,
|
||||
}, {
|
||||
data: {
|
||||
failedCount: failedCount,
|
||||
},
|
||||
});
|
||||
return;
|
||||
};
|
||||
|
||||
const executeTask = async (client: Client, failedCount: number) => {
|
||||
const result = await insertLatestVideos(client);
|
||||
failedCount = result !== 0 ? truncate(failedCount + 1, 0, 5) : 0;
|
||||
if (failedCount !== 0) {
|
||||
await updateQueueInterval(failedCount, delayMap[failedCount] * MINUTE);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
export const getLatestVideosWorker = async (job: Job) => {
|
||||
if (await lockManager.isLocked("getLatestVideos")) {
|
||||
logger.log("job:getLatestVideos is locked, skipping.", "mq");
|
||||
return;
|
||||
}
|
||||
|
||||
await lockManager.acquireLock("getLatestVideos");
|
||||
|
||||
const failedCount = (job.data.failedCount ?? 0) as number;
|
||||
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
||||
const client = await db.connect();
|
||||
|
||||
try {
|
||||
await executeTask(client, failedCount);
|
||||
await queueLatestVideos(client);
|
||||
} finally {
|
||||
client.release();
|
||||
await lockManager.releaseLock("getLatestVideos");
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
17
lib/mq/exec/getVideoInfo.ts
Normal file
17
lib/mq/exec/getVideoInfo.ts
Normal file
@ -0,0 +1,17 @@
|
||||
import { Job } from "bullmq";
|
||||
import { db } from "lib/db/init.ts";
|
||||
import { insertVideoInfo } from "lib/mq/task/getVideoInfo.ts";
|
||||
|
||||
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
const aid = job.data.aid;
|
||||
if (!aid) {
|
||||
return 3;
|
||||
}
|
||||
await insertVideoInfo(client, aid);
|
||||
return 0;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
@ -1,100 +0,0 @@
|
||||
import { Job } from "bullmq";
|
||||
import { VideoTagsQueue } from "lib/mq/index.ts";
|
||||
import { DAY, HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts";
|
||||
import { db } from "lib/db/init.ts";
|
||||
import { truncate } from "lib/utils/truncate.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { getNullVideoTagsList, updateVideoTags } from "lib/db/allData.ts";
|
||||
import { getVideoTags } from "lib/net/getVideoTags.ts";
|
||||
import { NetSchedulerError } from "lib/mq/scheduler.ts";
|
||||
import { WorkerError } from "lib/mq/schema.ts";
|
||||
|
||||
const delayMap = [0.5, 3, 5, 15, 30, 60];
|
||||
const getJobPriority = (diff: number) => {
|
||||
let priority;
|
||||
if (diff > 14 * DAY) {
|
||||
priority = 10;
|
||||
} else if (diff > 7 * DAY) {
|
||||
priority = 7;
|
||||
} else if (diff > DAY) {
|
||||
priority = 5;
|
||||
} else if (diff > 6 * HOUR) {
|
||||
priority = 3;
|
||||
} else if (diff > HOUR) {
|
||||
priority = 2;
|
||||
} else {
|
||||
priority = 1;
|
||||
}
|
||||
return priority;
|
||||
};
|
||||
|
||||
const executeTask = async (client: Client, aid: number, failedCount: number, job: Job) => {
|
||||
try {
|
||||
const result = await getVideoTags(aid);
|
||||
if (!result) {
|
||||
failedCount = truncate(failedCount + 1, 0, 5);
|
||||
const delay = delayMap[failedCount] * MINUTE;
|
||||
logger.log(
|
||||
`job:getVideoTags added to queue, delay: ${delayMap[failedCount]} minutes.`,
|
||||
"mq",
|
||||
);
|
||||
await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, { delay, priority: 6 - failedCount });
|
||||
return 1;
|
||||
}
|
||||
await updateVideoTags(client, aid, result);
|
||||
logger.log(`Fetched tags for aid: ${aid}`, "task");
|
||||
return 0;
|
||||
} catch (e) {
|
||||
if (!(e instanceof NetSchedulerError)) {
|
||||
throw new WorkerError(<Error> e, "task", "getVideoTags/fn:executeTask");
|
||||
}
|
||||
const err = e as NetSchedulerError;
|
||||
if (err.code === "NO_AVAILABLE_PROXY" || err.code === "PROXY_RATE_LIMITED") {
|
||||
logger.warn(`No available proxy for fetching tags, delayed. aid: ${aid}`, "task");
|
||||
await VideoTagsQueue.add("getVideoTags", { aid, failedCount }, {
|
||||
delay: 25 * SECOND * Math.random() + 5 * SECOND,
|
||||
priority: job.priority,
|
||||
});
|
||||
return 2;
|
||||
}
|
||||
throw new WorkerError(err, "task", "getVideoTags/fn:executeTask");
|
||||
}
|
||||
};
|
||||
|
||||
export const getVideoTagsWorker = async (job: Job) => {
|
||||
const failedCount = (job.data.failedCount ?? 0) as number;
|
||||
const client = await db.connect();
|
||||
const aid = job.data.aid;
|
||||
if (!aid) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
const v = await executeTask(client, aid, failedCount, job);
|
||||
client.release();
|
||||
return v;
|
||||
};
|
||||
|
||||
export const getVideoTagsInitializer = async () => {
|
||||
const client = await db.connect();
|
||||
const videos = await getNullVideoTagsList(client);
|
||||
client.release();
|
||||
if (videos.length == 0) {
|
||||
return 4;
|
||||
}
|
||||
const count = await VideoTagsQueue.getJobCounts("wait", "delayed", "active");
|
||||
const total = count.delayed + count.active + count.wait;
|
||||
const max = 15;
|
||||
const rest = truncate(max - total, 0, max);
|
||||
|
||||
let i = 0;
|
||||
for (const video of videos) {
|
||||
if (i > rest) return 100 + i;
|
||||
const aid = video.aid;
|
||||
const timestamp = video.published_at;
|
||||
const diff = Date.now() - timestamp;
|
||||
await VideoTagsQueue.add("getVideoTags", { aid }, { priority: getJobPriority(diff) });
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
};
|
@ -1 +1 @@
|
||||
export * from "lib/mq/exec/getLatestVideos.ts";
|
||||
export * from "lib/mq/exec/getLatestVideos.ts";
|
||||
|
@ -2,6 +2,4 @@ import { Queue } from "bullmq";
|
||||
|
||||
export const LatestVideosQueue = new Queue("latestVideos");
|
||||
|
||||
export const VideoTagsQueue = new Queue("videoTags");
|
||||
|
||||
export const ClassifyVideoQueue = new Queue("classifyVideo");
|
||||
|
@ -1,19 +1,16 @@
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
|
||||
export async function initMQ() {
|
||||
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
|
||||
every: 1 * MINUTE
|
||||
});
|
||||
await VideoTagsQueue.upsertJobScheduler("getVideosTags", {
|
||||
every: 5 * MINUTE,
|
||||
every: 1 * MINUTE,
|
||||
immediately: true,
|
||||
});
|
||||
await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
|
||||
every: 5 * MINUTE,
|
||||
immediately: true,
|
||||
})
|
||||
});
|
||||
|
||||
logger.log("Message queue initialized.");
|
||||
}
|
||||
|
@ -23,12 +23,12 @@ class LockManager {
|
||||
const result = await this.redis.set(key, "locked", "NX");
|
||||
|
||||
if (result !== "OK") {
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
if (timeout) {
|
||||
await this.redis.expire(key, timeout);
|
||||
}
|
||||
return true;
|
||||
if (timeout) {
|
||||
await this.redis.expire(key, timeout);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7,7 +7,7 @@ export interface RateLimiterConfig {
|
||||
|
||||
export class RateLimiter {
|
||||
private readonly configs: RateLimiterConfig[];
|
||||
private readonly configEventNames: string[];
|
||||
private readonly configEventNames: string[];
|
||||
|
||||
/*
|
||||
* @param name The name of the rate limiter
|
||||
@ -17,7 +17,7 @@ export class RateLimiter {
|
||||
*/
|
||||
constructor(name: string, configs: RateLimiterConfig[]) {
|
||||
this.configs = configs;
|
||||
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
|
||||
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -53,4 +53,4 @@ export class RateLimiter {
|
||||
await config.window.clear(eventName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logger from "lib/log/logger.ts";
|
||||
import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
|
||||
import {SlidingWindow} from "lib/mq/slidingWindow.ts";
|
||||
import {redis} from "lib/db/redis.ts";
|
||||
import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
|
||||
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
|
||||
import { redis } from "lib/db/redis.ts";
|
||||
import Redis from "ioredis";
|
||||
import { SECOND } from "$std/datetime/constants.ts";
|
||||
|
||||
@ -152,7 +152,7 @@ class NetScheduler {
|
||||
const proxiesNames = this.getTaskProxies(task);
|
||||
for (const proxyName of shuffleArray(proxiesNames)) {
|
||||
if (await this.getProxyAvailability(proxyName, task)) {
|
||||
return await this.proxyRequest<R>(url, proxyName, method);
|
||||
return await this.proxyRequest<R>(url, proxyName, task, method);
|
||||
}
|
||||
}
|
||||
throw new NetSchedulerError("No available proxy currently.", "NO_AVAILABLE_PROXY");
|
||||
@ -186,8 +186,9 @@ class NetScheduler {
|
||||
|
||||
if (!force) {
|
||||
const isAvailable = await this.getProxyAvailability(proxyName, task);
|
||||
const limiter = "proxy-" + proxyName + "-" + task
|
||||
if (!isAvailable) {
|
||||
throw new NetSchedulerError(`Proxy "${proxyName}" is rate limited`, "PROXY_RATE_LIMITED");
|
||||
throw new NetSchedulerError(`Proxy "${limiter}" is rate limited`, "PROXY_RATE_LIMITED");
|
||||
}
|
||||
}
|
||||
|
||||
@ -225,7 +226,7 @@ class NetScheduler {
|
||||
logger.error(error, "redis");
|
||||
return false;
|
||||
}
|
||||
logger.warn(`Unhandled error: ${error.message}`, "mq", "getProxyAvailability");
|
||||
logger.error(error, "mq", "getProxyAvailability");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -237,7 +238,7 @@ class NetScheduler {
|
||||
|
||||
const response = await fetch(url, {
|
||||
method,
|
||||
signal: controller.signal
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeout);
|
||||
@ -281,7 +282,7 @@ const biliLimiterConfig: RateLimiterConfig[] = [
|
||||
netScheduler.addProxy("native", "native", "");
|
||||
netScheduler.addTask("getVideoInfo", "bilibili", "all");
|
||||
netScheduler.addTask("getLatestVideos", "bilibili", "all");
|
||||
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig)
|
||||
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
|
||||
netScheduler.setTaskLimiter("getLatestVideos", null);
|
||||
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);
|
||||
|
||||
|
@ -9,4 +9,4 @@ export class WorkerError extends Error {
|
||||
this.service = service;
|
||||
this.rawError = rawError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ export class SlidingWindow {
|
||||
async event(eventName: string): Promise<void> {
|
||||
const now = Date.now();
|
||||
const key = `cvsa:sliding_window:${eventName}`;
|
||||
|
||||
|
||||
const uniqueMember = `${now}-${Math.random()}`;
|
||||
// Add current timestamp to an ordered set
|
||||
await this.redis.zadd(key, now, uniqueMember);
|
||||
|
40
lib/mq/task/getVideoInfo.ts
Normal file
40
lib/mq/task/getVideoInfo.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getVideoInfo } from "lib/net/getVideoInfo.ts";
|
||||
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { ClassifyVideoQueue } from "lib/mq/index.ts";
|
||||
import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
|
||||
|
||||
export async function insertVideoInfo(client: Client, aid: number) {
|
||||
const videoExists = await videoExistsInAllData(client, aid);
|
||||
if (videoExists) {
|
||||
return;
|
||||
}
|
||||
const data = await getVideoInfo(aid);
|
||||
if (data === null) {
|
||||
return null;
|
||||
}
|
||||
const bvid = data.View.bvid;
|
||||
const desc = data.View.desc;
|
||||
const uid = data.View.owner.mid;
|
||||
const tags = data.Tags
|
||||
.filter((tag) => tag.tag_type in ["old_channel", "topic"])
|
||||
.map((tag) => tag.tag_name).join(",");
|
||||
const title = data.View.title;
|
||||
const published_at = formatTimestampToPsql(data.View.pubdate);
|
||||
const duration = data.View.duration;
|
||||
await client.queryObject(
|
||||
`INSERT INTO all_data (aid, bvid, description, uid, tags, title, published_at, duration)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
[aid, bvid, desc, uid, tags, title, published_at, duration],
|
||||
);
|
||||
const userExists = await userExistsInBiliUsers(client, aid);
|
||||
if (!userExists) {
|
||||
await client.queryObject(
|
||||
`INSERT INTO bili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
|
||||
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
|
||||
);
|
||||
}
|
||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
||||
}
|
55
lib/mq/task/queueLatestVideo.ts
Normal file
55
lib/mq/task/queueLatestVideo.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getLatestVideoAids } from "lib/net/getLatestVideoAids.ts";
|
||||
import { videoExistsInAllData } from "lib/db/allData.ts";
|
||||
import { sleep } from "lib/utils/sleep.ts";
|
||||
import { SECOND } from "$std/datetime/constants.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { LatestVideosQueue } from "lib/mq/index.ts";
|
||||
|
||||
export async function queueLatestVideos(
|
||||
client: Client,
|
||||
): Promise<number | null> {
|
||||
let page = 1;
|
||||
let i = 0;
|
||||
const videosFound = new Set();
|
||||
while (true) {
|
||||
const pageSize = page == 1 ? 10 : 30;
|
||||
const aids = await getLatestVideoAids(page, pageSize);
|
||||
if (aids.length == 0) {
|
||||
logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
|
||||
break;
|
||||
}
|
||||
let allExists = true;
|
||||
let delay = 0;
|
||||
for (const aid of aids) {
|
||||
const videoExists = await videoExistsInAllData(client, aid);
|
||||
if (videoExists) {
|
||||
continue;
|
||||
}
|
||||
await LatestVideosQueue.add("getVideoInfo", { aid }, { delay,
|
||||
attempts: 100,
|
||||
backoff: {
|
||||
type: "fixed",
|
||||
delay: SECOND * 5
|
||||
}
|
||||
});
|
||||
videosFound.add(aid);
|
||||
allExists = false;
|
||||
delay += Math.random() * SECOND * 0.5;
|
||||
}
|
||||
i += aids.length;
|
||||
logger.log(
|
||||
`Page ${page} crawled, total: ${videosFound.size}/${i} videos added/observed.`,
|
||||
"net",
|
||||
"fn:queueLatestVideos()",
|
||||
);
|
||||
if (allExists) {
|
||||
return 0;
|
||||
}
|
||||
page++;
|
||||
const randomTime = Math.random() * 4000;
|
||||
const delta = SECOND;
|
||||
await sleep(randomTime + delta);
|
||||
}
|
||||
return 0;
|
||||
}
|
309
lib/net/bilibili.d.ts
vendored
309
lib/net/bilibili.d.ts
vendored
@ -1,117 +1,224 @@
|
||||
interface BaseResponse<T> {
|
||||
code: number;
|
||||
message: string;
|
||||
ttl: number;
|
||||
data: T;
|
||||
code: number;
|
||||
message: string;
|
||||
ttl: number;
|
||||
data: T;
|
||||
}
|
||||
|
||||
export type VideoListResponse = BaseResponse<VideoListData>;
|
||||
export type VideoDetailsResponse = BaseResponse<VideoDetailsData>;
|
||||
export type VideoTagsResponse = BaseResponse<VideoTagsData>;
|
||||
|
||||
interface VideoDetailsData {
|
||||
View: {
|
||||
bvid: string;
|
||||
aid: number;
|
||||
videos: number;
|
||||
tid: number;
|
||||
tid_v2: number;
|
||||
tname: string;
|
||||
tname_v2: string;
|
||||
copyright: number;
|
||||
pic: string;
|
||||
title: string;
|
||||
pubdate: number;
|
||||
ctime: number;
|
||||
desc: string;
|
||||
desc_v2: string;
|
||||
state: number;
|
||||
duration: number;
|
||||
mission_id: number;
|
||||
rights: VideoRights;
|
||||
owner: {
|
||||
mid: number;
|
||||
name: string;
|
||||
face: string;
|
||||
};
|
||||
stat: VideoStats;
|
||||
argue_info: {
|
||||
argue_msg: string;
|
||||
argue_type: number;
|
||||
argue_link: string;
|
||||
};
|
||||
dynamic: "";
|
||||
cid: number;
|
||||
dimension: VideoDimension;
|
||||
pages: VideoPage[];
|
||||
subtitle: {
|
||||
allow_submit: number;
|
||||
list: VideoSubTitle[];
|
||||
};
|
||||
staff: VideoStaff[];
|
||||
};
|
||||
Card: {
|
||||
card: {
|
||||
mid: number;
|
||||
name: string;
|
||||
sex: string;
|
||||
face: string;
|
||||
fans: number;
|
||||
attention: number;
|
||||
friend: number;
|
||||
sign: string;
|
||||
level_info: {
|
||||
current_level: number;
|
||||
};
|
||||
};
|
||||
archive_count: number;
|
||||
article_count: number;
|
||||
follower: number;
|
||||
like_num: number;
|
||||
};
|
||||
Tags: VideoTagsLite[];
|
||||
}
|
||||
|
||||
interface VideoTagsLite {
|
||||
tag_id: number;
|
||||
tag_name: string;
|
||||
music_id: string;
|
||||
tag_type: string;
|
||||
jump_url: string;
|
||||
}
|
||||
|
||||
type VideoTagsData = VideoTags[];
|
||||
|
||||
type VideoStaff = {
|
||||
mid: number;
|
||||
title: string;
|
||||
name: string;
|
||||
face: string;
|
||||
follower: number;
|
||||
};
|
||||
|
||||
type VideoSubTitle = {
|
||||
id: number;
|
||||
lan: string;
|
||||
lan_doc: string;
|
||||
is_lock: number;
|
||||
subtitle_url: string;
|
||||
type: number;
|
||||
id_str: string;
|
||||
ai_type: number;
|
||||
ai_status: number;
|
||||
};
|
||||
|
||||
type VideoDimension = {
|
||||
width: number;
|
||||
height: number;
|
||||
rotate: number;
|
||||
};
|
||||
|
||||
interface VideoPage {
|
||||
cid: number;
|
||||
page: number;
|
||||
from: string;
|
||||
part: string;
|
||||
duration: number;
|
||||
vid: string;
|
||||
weblink: string;
|
||||
dimension: VideoDimension;
|
||||
first_frame: string;
|
||||
}
|
||||
|
||||
interface VideoTags {
|
||||
tag_id: number;
|
||||
tag_name: string;
|
||||
cover: string;
|
||||
head_cover: string;
|
||||
content: string;
|
||||
short_content: string;
|
||||
type: number;
|
||||
state: number;
|
||||
ctime: number;
|
||||
count: {
|
||||
view: number;
|
||||
use: number;
|
||||
atten: number;
|
||||
}
|
||||
is_atten: number;
|
||||
likes: number;
|
||||
hates: number;
|
||||
attribute: number;
|
||||
liked: number;
|
||||
hated: number;
|
||||
extra_attr: number;
|
||||
tag_id: number;
|
||||
tag_name: string;
|
||||
cover: string;
|
||||
head_cover: string;
|
||||
content: string;
|
||||
short_content: string;
|
||||
type: number;
|
||||
state: number;
|
||||
ctime: number;
|
||||
count: {
|
||||
view: number;
|
||||
use: number;
|
||||
atten: number;
|
||||
};
|
||||
is_atten: number;
|
||||
likes: number;
|
||||
hates: number;
|
||||
attribute: number;
|
||||
liked: number;
|
||||
hated: number;
|
||||
extra_attr: number;
|
||||
}
|
||||
|
||||
interface VideoListData {
|
||||
archives: VideoListVideo[];
|
||||
page: {
|
||||
num: number;
|
||||
size: number;
|
||||
count: number;
|
||||
};
|
||||
archives: VideoListVideo[];
|
||||
page: {
|
||||
num: number;
|
||||
size: number;
|
||||
count: number;
|
||||
};
|
||||
}
|
||||
|
||||
type VideoRights = {
|
||||
bp: number;
|
||||
elec: number;
|
||||
download: number;
|
||||
movie: number;
|
||||
pay: number;
|
||||
hd5: number;
|
||||
no_reprint: number;
|
||||
autoplay: number;
|
||||
ugc_pay: number;
|
||||
is_cooperation: number;
|
||||
ugc_pay_preview: number;
|
||||
no_background: number;
|
||||
arc_pay: number;
|
||||
pay_free_watch: number;
|
||||
};
|
||||
|
||||
type VideoStats = {
|
||||
aid: number;
|
||||
view: number;
|
||||
danmaku: number;
|
||||
reply: number;
|
||||
favorite: number;
|
||||
coin: number;
|
||||
share: number;
|
||||
now_rank: number;
|
||||
his_rank: number;
|
||||
like: number;
|
||||
};
|
||||
|
||||
interface VideoListVideo {
|
||||
aid: number;
|
||||
videos: number;
|
||||
tid: number;
|
||||
tname: string;
|
||||
copyright: number;
|
||||
pic: string;
|
||||
title: string;
|
||||
pubdate: number;
|
||||
ctime: number;
|
||||
desc: string;
|
||||
state: number;
|
||||
duration: number;
|
||||
mission_id?: number;
|
||||
rights: {
|
||||
bp: number;
|
||||
elec: number;
|
||||
download: number;
|
||||
movie: number;
|
||||
pay: number;
|
||||
hd5: number;
|
||||
no_reprint: number;
|
||||
autoplay: number;
|
||||
ugc_pay: number;
|
||||
is_cooperation: number;
|
||||
ugc_pay_preview: number;
|
||||
no_background: number;
|
||||
arc_pay: number;
|
||||
pay_free_watch: number;
|
||||
},
|
||||
owner: {
|
||||
mid: number;
|
||||
name: string;
|
||||
face: string;
|
||||
},
|
||||
stat: {
|
||||
aid: number;
|
||||
view: number;
|
||||
danmaku: number;
|
||||
reply: number;
|
||||
favorite: number;
|
||||
coin: number;
|
||||
share: number;
|
||||
now_rank: number;
|
||||
his_rank: number;
|
||||
like: number;
|
||||
dislike: number;
|
||||
vt: number;
|
||||
vv: number;
|
||||
},
|
||||
dynamic: string;
|
||||
cid: number;
|
||||
dimension: {
|
||||
width: number;
|
||||
height: number;
|
||||
rotate: number;
|
||||
},
|
||||
season_id?: number;
|
||||
short_link_v2: string;
|
||||
first_frame: string;
|
||||
pub_location: string;
|
||||
cover43: string;
|
||||
tidv2: number;
|
||||
tname_v2: string;
|
||||
bvid: string;
|
||||
season_type: number;
|
||||
is_ogv: number;
|
||||
ovg_info: string | null;
|
||||
rcmd_season: string;
|
||||
enable_vt: number;
|
||||
ai_rcmd: null | string;
|
||||
aid: number;
|
||||
videos: number;
|
||||
tid: number;
|
||||
tname: string;
|
||||
copyright: number;
|
||||
pic: string;
|
||||
title: string;
|
||||
pubdate: number;
|
||||
ctime: number;
|
||||
desc: string;
|
||||
state: number;
|
||||
duration: number;
|
||||
mission_id?: number;
|
||||
rights: VideoRights;
|
||||
owner: {
|
||||
mid: number;
|
||||
name: string;
|
||||
face: string;
|
||||
};
|
||||
stat: VideoStats;
|
||||
dynamic: string;
|
||||
cid: number;
|
||||
dimension: VideoDimension;
|
||||
season_id?: number;
|
||||
short_link_v2: string;
|
||||
first_frame: string;
|
||||
pub_location: string;
|
||||
cover43: string;
|
||||
tidv2: number;
|
||||
tname_v2: string;
|
||||
bvid: string;
|
||||
season_type: number;
|
||||
is_ogv: number;
|
||||
ovg_info: string | null;
|
||||
rcmd_season: string;
|
||||
enable_vt: number;
|
||||
ai_rcmd: null | string;
|
||||
}
|
||||
|
@ -1,88 +0,0 @@
|
||||
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
|
||||
import { SECOND } from "$std/datetime/constants.ts";
|
||||
import { VideoListVideo } from "lib/net/bilibili.d.ts";
|
||||
|
||||
export async function getVideoPositionInNewList(timestamp: number): Promise<number | null | VideoListVideo[]> {
|
||||
const virtualPageSize = 50;
|
||||
|
||||
let lowPage = 1;
|
||||
let highPage = 1;
|
||||
let foundUpper = false;
|
||||
while (true) {
|
||||
const ps = highPage < 2 ? 50 : 1
|
||||
const pn = highPage < 2 ? 1 : highPage * virtualPageSize;
|
||||
const videos = await getLatestVideos(pn, ps);
|
||||
if (!videos || videos.length === 0) {
|
||||
break;
|
||||
}
|
||||
const lastVideo = videos[videos.length - 1];
|
||||
if (!lastVideo || !lastVideo.pubdate) {
|
||||
break;
|
||||
}
|
||||
const lastTime = lastVideo.pubdate * SECOND
|
||||
if (lastTime <= timestamp && highPage == 1) {
|
||||
return videos;
|
||||
}
|
||||
else if (lastTime <= timestamp) {
|
||||
foundUpper = true;
|
||||
break;
|
||||
} else {
|
||||
lowPage = highPage;
|
||||
highPage *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundUpper) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let boundaryPage = highPage;
|
||||
let lo = lowPage;
|
||||
let hi = highPage;
|
||||
while (lo <= hi) {
|
||||
const mid = Math.floor((lo + hi) / 2);
|
||||
const videos = await getLatestVideos(mid * virtualPageSize, 1);
|
||||
if (!videos) {
|
||||
return null;
|
||||
}
|
||||
if (videos.length === 0) {
|
||||
hi = mid - 1;
|
||||
continue;
|
||||
}
|
||||
const lastVideo = videos[videos.length - 1];
|
||||
if (!lastVideo || !lastVideo.pubdate) {
|
||||
hi = mid - 1;
|
||||
continue;
|
||||
}
|
||||
const lastTime = lastVideo.pubdate * SECOND
|
||||
if (lastTime > timestamp) {
|
||||
lo = mid + 1;
|
||||
} else {
|
||||
boundaryPage = mid;
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
const boundaryVideos = await getLatestVideos(boundaryPage, virtualPageSize);
|
||||
let indexInPage = 0;
|
||||
if (boundaryVideos && boundaryVideos.length > 0) {
|
||||
for (let i = 0; i < boundaryVideos.length; i++) {
|
||||
const video = boundaryVideos[i];
|
||||
if (!video.pubdate) {
|
||||
continue;
|
||||
}
|
||||
const videoTime = video.pubdate * SECOND
|
||||
if (videoTime > timestamp) {
|
||||
indexInPage++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const count = (boundaryPage - 1) * virtualPageSize + indexInPage;
|
||||
|
||||
const safetyMargin = 5;
|
||||
|
||||
return count + safetyMargin;
|
||||
}
|
21
lib/net/getLatestVideoAids.ts
Normal file
21
lib/net/getLatestVideoAids.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import { VideoListResponse } from "lib/net/bilibili.d.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import netScheduler from "lib/mq/scheduler.ts";
|
||||
|
||||
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> {
|
||||
const startFrom = 1 + pageSize * (page - 1);
|
||||
const endTo = pageSize * page;
|
||||
const range = `${startFrom}-${endTo}`;
|
||||
const errMessage = `Error fetching latest aid for ${range}:`;
|
||||
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
|
||||
const data = await netScheduler.request<VideoListResponse>(url, "getLatestVideos");
|
||||
if (data.code != 0) {
|
||||
logger.error(errMessage + data.message, "net", "getLastestVideos");
|
||||
return [];
|
||||
}
|
||||
if (data.data.archives.length === 0) {
|
||||
logger.verbose("No more videos found", "net", "getLatestVideos");
|
||||
return [];
|
||||
}
|
||||
return data.data.archives.map((video) => video.aid);
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
import {VideoListResponse } from "lib/net/bilibili.d.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
|
||||
|
||||
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[] | null> {
|
||||
const startFrom = 1 + pageSize * (page - 1);
|
||||
const endTo = pageSize * page;
|
||||
const range = `${startFrom}-${endTo}`
|
||||
const errMessage = `Error fetching latest aid for ${range}:`
|
||||
try {
|
||||
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
|
||||
const data = await netScheduler.request<VideoListResponse>(url, 'getLatestVideos');
|
||||
if (data.code != 0) {
|
||||
logger.error(errMessage + data.message, 'net', 'getLastestVideos');
|
||||
return [];
|
||||
}
|
||||
if (data.data.archives.length === 0) {
|
||||
logger.verbose("No more videos found", "net", "getLatestVideos");
|
||||
return [];
|
||||
}
|
||||
return data.data.archives.map(video => video.aid);
|
||||
}
|
||||
catch (e) {
|
||||
const error = e as NetSchedulerError;
|
||||
if (error.code == "FETCH_ERROR") {
|
||||
const rawError = error.rawError! as Error;
|
||||
rawError.message = errMessage + rawError.message;
|
||||
logger.error(rawError, 'net', 'getVideoTags');
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
// Re-throw the error
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
15
lib/net/getVideoInfo.ts
Normal file
15
lib/net/getVideoInfo.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import netScheduler from "lib/mq/scheduler.ts";
|
||||
import { VideoDetailsData, VideoDetailsResponse } from "lib/net/bilibili.d.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
|
||||
export async function getVideoInfo(aid: number): Promise<VideoDetailsData | null> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
logger.log("Fetching metadata for " + aid, "net", "fn:getVideoInfo");
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.message, "net", "fn:getVideoInfo");
|
||||
return null;
|
||||
}
|
||||
return data.data;
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
import { VideoTagsResponse } from "lib/net/bilibili.d.ts";
|
||||
import netScheduler, {NetSchedulerError} from "lib/mq/scheduler.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
|
||||
/*
|
||||
* Fetch the tags for a video
|
||||
* @param {number} aid The video's aid
|
||||
* @return {Promise<string[] | null>} A promise, which resolves to an array of tags,
|
||||
* or null if an `fetch` error occurred
|
||||
* @throws {NetSchedulerError} If the request failed.
|
||||
*/
|
||||
export async function getVideoTags(aid: number): Promise<string[] | null> {
|
||||
try {
|
||||
const url = `https://api.bilibili.com/x/tag/archive/tags?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoTagsResponse>(url, 'getVideoTags');
|
||||
if (data.code != 0) {
|
||||
logger.error(`Error fetching tags for video ${aid}: ${data.message}`, 'net', 'getVideoTags');
|
||||
return [];
|
||||
}
|
||||
return data.data.map((tag) => tag.tag_name);
|
||||
}
|
||||
catch (e) {
|
||||
const error = e as NetSchedulerError;
|
||||
if (error.code == "FETCH_ERROR") {
|
||||
const rawError = error.rawError! as Error;
|
||||
rawError.message = `Error fetching tags for video ${aid}: ` + rawError.message;
|
||||
logger.error(rawError, 'net', 'getVideoTags');
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
// Re-throw the error
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
|
||||
import { getLatestVideoTimestampFromAllData, insertIntoAllData, videoExistsInAllData } from "lib/db/allData.ts";
|
||||
import { sleep } from "lib/utils/sleep.ts";
|
||||
import { getVideoPositionInNewList } from "lib/net/bisectVideoStartFrom.ts";
|
||||
import { SECOND } from "$std/datetime/constants.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
|
||||
export async function insertLatestVideos(
|
||||
client: Client,
|
||||
pageSize: number = 10,
|
||||
intervalRate: number = 4000,
|
||||
): Promise<number | null> {
|
||||
const latestVideoTimestamp = await getLatestVideoTimestampFromAllData(client);
|
||||
if (latestVideoTimestamp == null) {
|
||||
logger.error("Cannot get latest video timestamp from current database.", "net", "fn:insertLatestVideos()");
|
||||
return null
|
||||
}
|
||||
logger.log(`Latest video in the database: ${new Date(latestVideoTimestamp).toISOString()}`, "net", "fn:insertLatestVideos()")
|
||||
const videoIndex = await getVideoPositionInNewList(latestVideoTimestamp);
|
||||
if (videoIndex == null) {
|
||||
logger.error("Cannot locate the video through bisect.", "net", "fn:insertLatestVideos()");
|
||||
return null
|
||||
}
|
||||
if (typeof videoIndex == "object") {
|
||||
for (const video of videoIndex) {
|
||||
const videoExists = await videoExistsInAllData(client, video.aid);
|
||||
if (!videoExists) {
|
||||
await insertIntoAllData(client, video);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
let page = Math.floor(videoIndex / pageSize) + 1;
|
||||
let failCount = 0;
|
||||
const insertedVideos = new Set();
|
||||
while (true) {
|
||||
try {
|
||||
const videos = await getLatestVideos(page, pageSize);
|
||||
if (videos == null) {
|
||||
failCount++;
|
||||
if (failCount > 5) {
|
||||
return null;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
failCount = 0;
|
||||
if (videos.length == 0) {
|
||||
logger.verbose("No more videos found", "net", "fn:insertLatestVideos()");
|
||||
break;
|
||||
}
|
||||
for (const video of videos) {
|
||||
const videoExists = await videoExistsInAllData(client, video.aid);
|
||||
if (!videoExists) {
|
||||
await insertIntoAllData(client, video);
|
||||
insertedVideos.add(video.aid);
|
||||
}
|
||||
}
|
||||
logger.log(`Page ${page} crawled, total: ${insertedVideos.size} videos.`, "net", "fn:insertLatestVideos()");
|
||||
page--;
|
||||
if (page < 1) {
|
||||
return 0;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(error as Error, "net", "fn:insertLatestVideos()");
|
||||
failCount++;
|
||||
if (failCount > 5) {
|
||||
return null;
|
||||
}
|
||||
|
||||
} finally {
|
||||
await sleep(Math.random() * intervalRate + failCount * 3 * SECOND + SECOND);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,3 +1,3 @@
|
||||
export async function sleep(ms: number) {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
export function truncate(num: number, min: number, max: number) {
|
||||
return Math.max(min, Math.min(num, max))
|
||||
}
|
||||
return Math.max(min, Math.min(num, max));
|
||||
}
|
||||
|
@ -2,13 +2,16 @@ import express from "express";
|
||||
import { createBullBoard } from "@bull-board/api";
|
||||
import { BullMQAdapter } from "@bull-board/api/bullMQAdapter.js";
|
||||
import { ExpressAdapter } from "@bull-board/express";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue, VideoTagsQueue } from "lib/mq/index.ts";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue } from "lib/mq/index.ts";
|
||||
|
||||
const serverAdapter = new ExpressAdapter();
|
||||
serverAdapter.setBasePath("/");
|
||||
|
||||
createBullBoard({
|
||||
queues: [new BullMQAdapter(LatestVideosQueue), new BullMQAdapter(VideoTagsQueue), new BullMQAdapter(ClassifyVideoQueue)],
|
||||
queues: [
|
||||
new BullMQAdapter(LatestVideosQueue),
|
||||
new BullMQAdapter(ClassifyVideoQueue),
|
||||
],
|
||||
serverAdapter: serverAdapter,
|
||||
});
|
||||
|
||||
@ -16,8 +19,6 @@ const app = express();
|
||||
|
||||
app.use("/", serverAdapter.getRouter());
|
||||
|
||||
// other configurations of your server
|
||||
|
||||
app.listen(3000, () => {
|
||||
console.log("Running on 3000...");
|
||||
console.log("For the UI, open http://localhost:3000/");
|
||||
|
@ -18,7 +18,6 @@ Deno.addSignalListener("SIGTERM", async () => {
|
||||
Deno.exit();
|
||||
});
|
||||
|
||||
|
||||
await initializeModels();
|
||||
|
||||
const filterWorker = new Worker(
|
||||
@ -45,6 +44,6 @@ filterWorker.on("error", (err) => {
|
||||
logger.error(e.rawError, e.service, e.codePath);
|
||||
});
|
||||
|
||||
filterWorker.on("closed", async() => {
|
||||
filterWorker.on("closed", async () => {
|
||||
await lockManager.releaseLock("classifyVideos");
|
||||
})
|
||||
});
|
||||
|
@ -2,22 +2,19 @@ import { Job, Worker } from "bullmq";
|
||||
import { getLatestVideosWorker } from "lib/mq/executors.ts";
|
||||
import { redis } from "lib/db/redis.ts";
|
||||
import logger from "lib/log/logger.ts";
|
||||
import { getVideoTagsWorker } from "lib/mq/exec/getVideoTags.ts";
|
||||
import { getVideoTagsInitializer } from "lib/mq/exec/getVideoTags.ts";
|
||||
import { lockManager } from "lib/mq/lockManager.ts";
|
||||
import { WorkerError } from "lib/mq/schema.ts";
|
||||
import { getVideoInfoWorker } from "lib/mq/exec/getVideoInfo.ts";
|
||||
|
||||
Deno.addSignalListener("SIGINT", async () => {
|
||||
logger.log("SIGINT Received: Shutting down workers...", "mq");
|
||||
await latestVideoWorker.close(true);
|
||||
await videoTagsWorker.close(true);
|
||||
Deno.exit();
|
||||
});
|
||||
|
||||
Deno.addSignalListener("SIGTERM", async () => {
|
||||
logger.log("SIGTERM Received: Shutting down workers...", "mq");
|
||||
await latestVideoWorker.close(true);
|
||||
await videoTagsWorker.close(true);
|
||||
Deno.exit();
|
||||
});
|
||||
|
||||
@ -28,11 +25,14 @@ const latestVideoWorker = new Worker(
|
||||
case "getLatestVideos":
|
||||
await getLatestVideosWorker(job);
|
||||
break;
|
||||
case "getVideoInfo":
|
||||
await getVideoInfoWorker(job);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
},
|
||||
{ connection: redis, concurrency: 1, removeOnComplete: { count: 1440 } },
|
||||
{ connection: redis, concurrency: 6, removeOnComplete: { count: 1440 } },
|
||||
);
|
||||
|
||||
latestVideoWorker.on("active", () => {
|
||||
@ -47,33 +47,3 @@ latestVideoWorker.on("error", (err) => {
|
||||
latestVideoWorker.on("closed", async () => {
|
||||
await lockManager.releaseLock("getLatestVideos");
|
||||
});
|
||||
|
||||
const videoTagsWorker = new Worker(
|
||||
"videoTags",
|
||||
async (job: Job) => {
|
||||
switch (job.name) {
|
||||
case "getVideoTags":
|
||||
return await getVideoTagsWorker(job);
|
||||
case "getVideosTags":
|
||||
return await getVideoTagsInitializer();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
},
|
||||
{
|
||||
connection: redis,
|
||||
concurrency: 6,
|
||||
removeOnComplete: {
|
||||
count: 1000,
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
videoTagsWorker.on("active", () => {
|
||||
logger.log("Worker (videoTags) activated.", "mq");
|
||||
});
|
||||
|
||||
videoTagsWorker.on("error", (err) => {
|
||||
const e = err as WorkerError;
|
||||
logger.error(e.rawError, e.service, e.codePath);
|
||||
});
|
||||
|
@ -1,33 +0,0 @@
|
||||
import { assertEquals } from "jsr:@std/assert";
|
||||
import { videoTagsIsNull } from "lib/db/allData.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { postgresConfig } from "lib/db/pgConfig.ts";
|
||||
|
||||
// A minimal aid which has an empty tags field in our database
|
||||
const TEST_AID = 63569;
|
||||
|
||||
Deno.test("videoTagsIsNull function", async () => {
|
||||
const client = new Client(postgresConfig);
|
||||
|
||||
try {
|
||||
const transaction = client.createTransaction("test_transaction");
|
||||
await transaction.begin();
|
||||
|
||||
const result1 = await videoTagsIsNull(transaction, TEST_AID);
|
||||
assertEquals(typeof result1, "boolean", "The result should be a boolean value.");
|
||||
assertEquals(result1, false, "The result should be false if tags is not NULL for the given aid.");
|
||||
|
||||
await transaction.queryArray`UPDATE all_data SET tags = NULL WHERE aid = ${TEST_AID}`;
|
||||
|
||||
const result2 = await videoTagsIsNull(transaction, TEST_AID);
|
||||
assertEquals(typeof result2, "boolean", "The result should be a boolean value.");
|
||||
assertEquals(result2, true, "The result should be true if tags is NULL for the given aid.");
|
||||
|
||||
await transaction.rollback();
|
||||
} catch (error) {
|
||||
console.error("Error during test:", error);
|
||||
throw error;
|
||||
} finally {
|
||||
client.end();
|
||||
}
|
||||
});
|
@ -1,7 +1,7 @@
|
||||
import {assertEquals} from "jsr:@std/assert";
|
||||
import {SlidingWindow} from "lib/mq/slidingWindow.ts";
|
||||
import {RateLimiter, RateLimiterConfig} from "lib/mq/rateLimiter.ts";
|
||||
import {Redis} from "npm:ioredis@5.5.0";
|
||||
import { assertEquals } from "jsr:@std/assert";
|
||||
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
|
||||
import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
|
||||
import { Redis } from "npm:ioredis@5.5.0";
|
||||
|
||||
Deno.test("RateLimiter works correctly", async () => {
|
||||
const redis = new Redis({ maxRetriesPerRequest: null });
|
||||
@ -71,7 +71,7 @@ Deno.test("Multiple configs work correctly", async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, windowSize1 * 1000 + 500));
|
||||
|
||||
// Availability should now be true (due to config1)
|
||||
assertEquals(await rateLimiter.getAvailability(), true);
|
||||
assertEquals(await rateLimiter.getAvailability(), true);
|
||||
|
||||
// Trigger events up to the limit of the second config
|
||||
for (let i = maxRequests1; i < maxRequests2; i++) {
|
||||
@ -88,4 +88,4 @@ Deno.test("Multiple configs work correctly", async () => {
|
||||
assertEquals(await rateLimiter.getAvailability(), true);
|
||||
|
||||
redis.quit();
|
||||
});
|
||||
});
|
||||
|
@ -7,13 +7,13 @@ Deno.test("SlidingWindow - event and count", async () => {
|
||||
const windowSize = 5000; // 5 seconds
|
||||
const slidingWindow = new SlidingWindow(redis, windowSize);
|
||||
const eventName = "test_event";
|
||||
await slidingWindow.clear(eventName);
|
||||
await slidingWindow.clear(eventName);
|
||||
|
||||
await slidingWindow.event(eventName);
|
||||
const count = await slidingWindow.count(eventName);
|
||||
|
||||
assertEquals(count, 1);
|
||||
redis.quit();
|
||||
redis.quit();
|
||||
});
|
||||
|
||||
Deno.test("SlidingWindow - multiple events", async () => {
|
||||
@ -21,7 +21,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
|
||||
const windowSize = 5000; // 5 seconds
|
||||
const slidingWindow = new SlidingWindow(redis, windowSize);
|
||||
const eventName = "test_event";
|
||||
await slidingWindow.clear(eventName);
|
||||
await slidingWindow.clear(eventName);
|
||||
|
||||
await slidingWindow.event(eventName);
|
||||
await slidingWindow.event(eventName);
|
||||
@ -29,7 +29,7 @@ Deno.test("SlidingWindow - multiple events", async () => {
|
||||
const count = await slidingWindow.count(eventName);
|
||||
|
||||
assertEquals(count, 3);
|
||||
redis.quit();
|
||||
redis.quit();
|
||||
});
|
||||
|
||||
Deno.test("SlidingWindow - no events", async () => {
|
||||
@ -37,12 +37,12 @@ Deno.test("SlidingWindow - no events", async () => {
|
||||
const windowSize = 5000; // 5 seconds
|
||||
const slidingWindow = new SlidingWindow(redis, windowSize);
|
||||
const eventName = "test_event";
|
||||
await slidingWindow.clear(eventName);
|
||||
await slidingWindow.clear(eventName);
|
||||
|
||||
const count = await slidingWindow.count(eventName);
|
||||
|
||||
assertEquals(count, 0);
|
||||
redis.quit();
|
||||
redis.quit();
|
||||
});
|
||||
|
||||
Deno.test("SlidingWindow - different event names", async () => {
|
||||
@ -51,8 +51,8 @@ Deno.test("SlidingWindow - different event names", async () => {
|
||||
const slidingWindow = new SlidingWindow(redis, windowSize);
|
||||
const eventName1 = "test_event_1";
|
||||
const eventName2 = "test_event_2";
|
||||
await slidingWindow.clear(eventName1);
|
||||
await slidingWindow.clear(eventName2);
|
||||
await slidingWindow.clear(eventName1);
|
||||
await slidingWindow.clear(eventName2);
|
||||
|
||||
await slidingWindow.event(eventName1);
|
||||
await slidingWindow.event(eventName2);
|
||||
@ -62,7 +62,7 @@ Deno.test("SlidingWindow - different event names", async () => {
|
||||
|
||||
assertEquals(count1, 1);
|
||||
assertEquals(count2, 1);
|
||||
redis.quit();
|
||||
redis.quit();
|
||||
});
|
||||
|
||||
Deno.test("SlidingWindow - large number of events", async () => {
|
||||
@ -70,7 +70,7 @@ Deno.test("SlidingWindow - large number of events", async () => {
|
||||
const windowSize = 5000; // 5 seconds
|
||||
const slidingWindow = new SlidingWindow(redis, windowSize);
|
||||
const eventName = "test_event";
|
||||
await slidingWindow.clear(eventName);
|
||||
await slidingWindow.clear(eventName);
|
||||
const numEvents = 1000;
|
||||
|
||||
for (let i = 0; i < numEvents; i++) {
|
||||
@ -80,5 +80,5 @@ Deno.test("SlidingWindow - large number of events", async () => {
|
||||
const count = await slidingWindow.count(eventName);
|
||||
|
||||
assertEquals(count, numEvents);
|
||||
redis.quit();
|
||||
redis.quit();
|
||||
});
|
||||
|
@ -1,25 +0,0 @@
|
||||
import { assertEquals } from "jsr:@std/assert";
|
||||
import { getLatestVideos } from "lib/net/getLatestVideos.ts";
|
||||
|
||||
Deno.test("Get latest videos", async () => {
|
||||
const videos = (await getLatestVideos(1, 5))!;
|
||||
assertEquals(videos.length, 5);
|
||||
|
||||
videos.forEach((video) => {
|
||||
assertVideoProperties(video);
|
||||
});
|
||||
});
|
||||
|
||||
function assertVideoProperties(video: object) {
|
||||
const aid = "aid" in video && typeof video.aid === "number";
|
||||
const bvid = "bvid" in video && typeof video.bvid === "string" &&
|
||||
video.bvid.length === 12 && video.bvid.startsWith("BV");
|
||||
const description = "description" in video && typeof video.description === "string";
|
||||
const uid = "uid" in video && typeof video.uid === "number";
|
||||
const tags = "tags" in video && (typeof video.tags === "string" || video.tags === null);
|
||||
const title = "title" in video && typeof video.title === "string";
|
||||
const publishedAt = "published_at" in video && typeof video.published_at === "string";
|
||||
|
||||
const match = aid && bvid && description && uid && tags && title && publishedAt;
|
||||
assertEquals(match, true);
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
import { assertEquals } from "jsr:@std/assert";
|
||||
import { getVideoTags } from "lib/net/getVideoTags.ts";
|
||||
|
||||
Deno.test("Get video tags - regular video", async () => {
|
||||
const tags = (await getVideoTags(826597951))!.sort();
|
||||
assertEquals(tags, [
|
||||
"纯白P",
|
||||
"中华墨水娘",
|
||||
"中华少女",
|
||||
"中华粘土娘",
|
||||
"中华缘木娘",
|
||||
"中华少女Project",
|
||||
"提糯Tino",
|
||||
"中华烛火娘",
|
||||
"中华烁金娘",
|
||||
"新世代音乐人计划女生季",
|
||||
].sort());
|
||||
});
|
||||
|
||||
Deno.test("Get video tags - non-existent video", async () => {
|
||||
const tags = (await getVideoTags(8265979511111111));
|
||||
assertEquals(tags, []);
|
||||
});
|
||||
|
||||
Deno.test("Get video tags - video with no tag", async () => {
|
||||
const tags = (await getVideoTags(981001865));
|
||||
assertEquals(tags, []);
|
||||
});
|
Loading…
Reference in New Issue
Block a user