Compare commits
57 Commits
22b1c337ac
...
d9c8253019
Author | SHA1 | Date | |
---|---|---|---|
d9c8253019 | |||
244298913a | |||
2c47105913 | |||
6eaaf921d6 | |||
288e4f9571 | |||
907c0a6976 | |||
7689e687ff | |||
651eef0b9e | |||
68bd46fd8a | |||
13ea8fec8b | |||
3d9e98c949 | |||
c7dd1cfc2e | |||
e0a19499e1 | |||
0930bbe6f4 | |||
054d28e796 | |||
0614067278 | |||
6df6345ec1 | |||
bae1f84bea | |||
21c918f1fa | |||
f1651fee30 | |||
d0b7d93e5b | |||
7a7c5cada9 | |||
10b761e3db | |||
1f6411b512 | |||
9ef513eed7 | |||
d80a6bfcd9 | |||
7a6892ae8e | |||
b080c51c3e | |||
f4d08e944a | |||
a9582722f4 | |||
4ee4d2ede9 | |||
f21ff45dd3 | |||
b5dbf293a2 | |||
fc90dad185 | |||
0b36f52c6c | |||
445886815a | |||
8e7a1c3076 | |||
71ed0bd66b | |||
b76d8e589c | |||
69fb3604b1 | |||
d98e24b62f | |||
c4c9a3a440 | |||
da1bea7f41 | |||
38c0cbd371 | |||
a90747878e | |||
dd720b18fa | |||
3a83df7954 | |||
a8292d7b6b | |||
0923a34e16 | |||
f34633dc35 | |||
94e19690d1 | |||
20668609dd | |||
33c6a3c1f8 | |||
f39fef0d9a | |||
13ed20cf5c | |||
757cbbab7e | |||
b53366dbab |
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.woff2 filter=lfs diff=lfs merge=lfs -text
|
@ -14,6 +14,11 @@
|
||||
<excludeFolder url="file://$MODULE_DIR$/logs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/model" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/src/db" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.idea" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.vscode" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.zed" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/packages/frontend/.astro" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/scripts" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
6
.idea/deno.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DenoSettings">
|
||||
<option name="denoInit" value="{ "enable": true, "lint": true, "unstable": true, "importMap": "import_map.json", "config": "deno.json", "fmt": { "useTabs": true, "lineWidth": 120, "indentWidth": 4, "semiColons": true, "proseWrap": "always" } }" />
|
||||
</component>
|
||||
</project>
|
@ -1,6 +1,17 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="CssUnknownProperty" enabled="false" level="WARNING" enabled_by_default="false">
|
||||
<option name="myCustomPropertiesEnabled" value="true" />
|
||||
<option name="myIgnoreVendorSpecificProperties" value="false" />
|
||||
<option name="myCustomPropertiesList">
|
||||
<value>
|
||||
<list size="1">
|
||||
<item index="0" class="java.lang.String" itemvalue="lc-l-with-tail" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
|
||||
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
|
@ -3,33 +3,33 @@
|
||||
// For a full list of overridable settings, and general information on folder-specific settings,
|
||||
// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
|
||||
{
|
||||
"lsp": {
|
||||
"deno": {
|
||||
"settings": {
|
||||
"deno": {
|
||||
"enable": true
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"languages": {
|
||||
"TypeScript": {
|
||||
"language_servers": [
|
||||
"deno",
|
||||
"!typescript-language-server",
|
||||
"!vtsls",
|
||||
"!eslint"
|
||||
],
|
||||
"formatter": "language_server"
|
||||
},
|
||||
"TSX": {
|
||||
"language_servers": [
|
||||
"deno",
|
||||
"!typescript-language-server",
|
||||
"!vtsls",
|
||||
"!eslint"
|
||||
],
|
||||
"formatter": "language_server"
|
||||
}
|
||||
}
|
||||
"lsp": {
|
||||
"deno": {
|
||||
"settings": {
|
||||
"deno": {
|
||||
"enable": true
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"languages": {
|
||||
"TypeScript": {
|
||||
"language_servers": [
|
||||
"deno",
|
||||
"!typescript-language-server",
|
||||
"!vtsls",
|
||||
"!eslint"
|
||||
],
|
||||
"formatter": "language_server"
|
||||
},
|
||||
"TSX": {
|
||||
"language_servers": [
|
||||
"deno",
|
||||
"!typescript-language-server",
|
||||
"!vtsls",
|
||||
"!eslint"
|
||||
],
|
||||
"formatter": "language_server"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,6 @@
|
||||
"imports": {
|
||||
"@astrojs/node": "npm:@astrojs/node@^9.1.3",
|
||||
"@astrojs/svelte": "npm:@astrojs/svelte@^7.0.8",
|
||||
"@core/db/": "./packages/core/db/",
|
||||
"date-fns": "npm:date-fns@^4.1.0"
|
||||
}
|
||||
}
|
||||
|
@ -1,21 +1,21 @@
|
||||
# Table of contents
|
||||
|
||||
- [Welcome](README.md)
|
||||
* [Welcome](README.md)
|
||||
|
||||
## About
|
||||
|
||||
- [About CVSA Project](about/this-project.md)
|
||||
- [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
* [About CVSA Project](about/this-project.md)
|
||||
* [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||
|
||||
## Architecure
|
||||
|
||||
* [Overview](architecure/overview.md)
|
||||
* [Crawler](architecure/crawler.md)
|
||||
* [Database Structure](architecure/database-structure/README.md)
|
||||
* [Type of Song](architecure/database-structure/type-of-song.md)
|
||||
* [Message Queue](architecure/message-queue.md)
|
||||
* [Artificial Intelligence](architecure/artificial-intelligence.md)
|
||||
|
||||
## API Doc
|
||||
|
||||
- [Catalog](api-doc/catalog.md)
|
||||
- [Songs](api-doc/songs.md)
|
||||
* [Catalog](api-doc/catalog.md)
|
||||
* [Songs](api-doc/songs.md)
|
||||
|
@ -7,23 +7,34 @@ For a **song**, it must meet the following conditions to be included in CVSA:
|
||||
|
||||
### Category 30
|
||||
|
||||
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in [Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our [automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been uploaded to bilibili / categorized under this category.
|
||||
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
|
||||
[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
|
||||
[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
|
||||
uploaded to bilibili / categorized under this category.
|
||||
|
||||
#### NEWS
|
||||
|
||||
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the parent category "Music"). 
|
||||
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
|
||||
entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
|
||||
parent category "Music"). 
|
||||
|
||||
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However, there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under the "Music General" sub-category.\
|
||||
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated program's crawling.
|
||||
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
|
||||
published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
|
||||
there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
|
||||
the "Music General" sub-category.\
|
||||
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
|
||||
program's crawling.
|
||||
|
||||
### At Leats One Line of Chinese / Chinese Virtual Singer
|
||||
|
||||
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
|
||||
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
|
||||
Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
|
||||
|
||||
We define a **Chinese virtual singer** as follows:
|
||||
|
||||
1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
|
||||
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or Taiwan.
|
||||
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
|
||||
Taiwan.
|
||||
|
||||
### Using Vocal Synthesizer
|
||||
|
||||
|
@ -9,10 +9,13 @@ The AI systems we currently use are:
|
||||
Located at `/filter/` under project root dir, it classifies a video in the
|
||||
[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
|
||||
|
||||
* 0: Not related to Chinese vocal synthesis
|
||||
* 1: A original song with Chinese vocal synthesis
|
||||
* 2: A cover/remix song with Chinese vocal synthesis
|
||||
- 0: Not related to Chinese vocal synthesis
|
||||
- 1: A original song with Chinese vocal synthesis
|
||||
- 2: A cover/remix song with Chinese vocal synthesis
|
||||
|
||||
### The Predictor
|
||||
|
||||
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that takes historical view trends of a video, other contextual information (such as the current time), and future time points to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified future time point.
|
||||
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
|
||||
takes historical view trends of a video, other contextual information (such as the current time), and future time points
|
||||
to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
|
||||
future time point.
|
||||
|
4
doc/en/architecure/crawler.md
Normal file
@ -0,0 +1,4 @@
|
||||
# Crawler
|
||||
|
||||
A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
|
||||
|
@ -5,10 +5,11 @@ CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
|
||||
All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
|
||||
following tables:
|
||||
|
||||
* songs: stores the main information of songs
|
||||
* bili\_user: stores snapshots of Bilibili user information
|
||||
* all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
* labelling\_result: Contains label of videos in `all_data`tagged by our [AI system](../artificial-intelligence.md#the-filter).
|
||||
* video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this fetch process as "snapshot".
|
||||
* snapshot\_schedule: The scheduling information for video snapshots.
|
||||
|
||||
- songs: stores the main information of songs
|
||||
- bili\_user: stores snapshots of Bilibili user information
|
||||
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||
- labelling\_result: Contains label of videos in `all_data`tagged by our
|
||||
[AI system](../artificial-intelligence.md#the-filter).
|
||||
- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
|
||||
fetch process as "snapshot".
|
||||
- snapshot\_schedule: The scheduling information for video snapshots.
|
||||
|
@ -1,7 +0,0 @@
|
||||
# Message Queue
|
||||
|
||||
We rely on message queues to manage the various tasks that [the cralwer ](overview.md#crawler)needs to perform.
|
||||
|
||||
### Code Path
|
||||
|
||||
Currently, the code related to message queues are located at `lib/mq` and `src`.
|
@ -14,14 +14,29 @@ layout:
|
||||
|
||||
# Overview
|
||||
|
||||
The whole CVSA system can be sperate into three different parts:
|
||||
The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`. 
|
||||
|
||||
* Frontend
|
||||
* API
|
||||
* Crawler
|
||||
**Project structure:**
|
||||
|
||||
The frontend is driven by [Astro](https://astro.build/) and is used to display the final CVSA page. The API is driven by [Hono](https://hono.dev) and is used to query the database and provide REST/GraphQL APIs that can be called by out website, applications, or third parties. The crawler is our automatic data collector, used to automatically collect new songs from bilibili, track their statistics, etc.
|
||||
```
|
||||
cvsa
|
||||
├── deno.json
|
||||
├── packages
|
||||
│ ├── backend
|
||||
│ ├── core
|
||||
│ ├── crawler
|
||||
│ └── frontend
|
||||
└── README.md
|
||||
```
|
||||
|
||||
**Package Breakdown:**
|
||||
|
||||
* **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
|
||||
* **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
|
||||
* **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
|
||||
* **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
|
||||
|
||||
### Crawler
|
||||
|
||||
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by [BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
|
||||
Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
|
||||
|
||||
|
106
doc/zh/.gitbook/assets/1.yaml
Normal file
@ -0,0 +1,106 @@
|
||||
openapi: 3.0.0
|
||||
info:
|
||||
title: CVSA API
|
||||
version: v1
|
||||
|
||||
servers:
|
||||
- url: https://api.projectcvsa.com
|
||||
|
||||
paths:
|
||||
/video/{id}/snapshots:
|
||||
get:
|
||||
summary: 获取视频快照列表
|
||||
description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字,以 "BV" 开头的 12 位字母数字,或者一个正整数。
|
||||
parameters:
|
||||
- in: path
|
||||
name: id
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)"
|
||||
- in: query
|
||||
name: ps
|
||||
schema:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: 每页返回的快照数量 (pageSize),默认为 1000。
|
||||
- in: query
|
||||
name: pn
|
||||
schema:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: 页码 (pageNumber),用于分页查询。offset 与 pn 只能选择一个。
|
||||
- in: query
|
||||
name: offset
|
||||
schema:
|
||||
type: integer
|
||||
minimum: 1
|
||||
description: 偏移量,用于基于偏移量的查询。offset 与 pn 只能选择一个。
|
||||
- in: query
|
||||
name: reverse
|
||||
schema:
|
||||
type: boolean
|
||||
description: 是否反向排序(从旧到新),默认为 false。
|
||||
responses:
|
||||
'200':
|
||||
description: 成功获取快照列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: 快照 ID
|
||||
aid:
|
||||
type: integer
|
||||
description: 视频的 av 号
|
||||
views:
|
||||
type: integer
|
||||
description: 视频播放量
|
||||
coins:
|
||||
type: integer
|
||||
description: 视频投币数
|
||||
likes:
|
||||
type: integer
|
||||
description: 视频点赞数
|
||||
favorites:
|
||||
type: integer
|
||||
description: 视频收藏数
|
||||
shares:
|
||||
type: integer
|
||||
description: 视频分享数
|
||||
danmakus:
|
||||
type: integer
|
||||
description: 视频弹幕数
|
||||
replies:
|
||||
type: integer
|
||||
description: 视频评论数
|
||||
'400':
|
||||
description: 无效的查询参数
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
message:
|
||||
type: string
|
||||
description: 错误消息
|
||||
errors:
|
||||
type: object
|
||||
description: 详细的错误信息
|
||||
'500':
|
||||
description: 服务器内部错误
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
message:
|
||||
type: string
|
||||
description: 错误消息
|
||||
error:
|
||||
type: object
|
||||
description: 详细的错误信息
|
@ -1,22 +1,22 @@
|
||||
# Table of contents
|
||||
|
||||
- [欢迎](README.md)
|
||||
* [欢迎](README.md)
|
||||
|
||||
## 关于 <a href="#about" id="about"></a>
|
||||
|
||||
- [关于本项目](about/this-project.md)
|
||||
- [收录范围](about/scope-of-inclusion.md)
|
||||
* [关于本项目](about/this-project.md)
|
||||
* [收录范围](about/scope-of-inclusion.md)
|
||||
|
||||
## 技术架构 <a href="#architecture" id="architecture"></a>
|
||||
|
||||
* [概览](architecture/overview.md)
|
||||
* [数据库结构](architecture/database-structure/README.md)
|
||||
* [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
* [人工智能](architecture/artificial-intelligence.md)
|
||||
* [消息队列](architecture/message-queue/README.md)
|
||||
* [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
|
||||
- [概览](architecture/overview.md)
|
||||
- [数据库结构](architecture/database-structure/README.md)
|
||||
- [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||
- [人工智能](architecture/artificial-intelligence.md)
|
||||
- [消息队列](architecture/message-queue/README.md)
|
||||
- [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
|
||||
|
||||
## API 文档 <a href="#api-doc" id="api-doc"></a>
|
||||
|
||||
- [目录](api-doc/catalog.md)
|
||||
- [歌曲](api-doc/songs.md)
|
||||
* [目录](api-doc/catalog.md)
|
||||
* [视频快照](api-doc/video-snapshot.md)
|
||||
|
@ -1,3 +1,4 @@
|
||||
# 目录
|
||||
|
||||
- [歌曲](songs.md)
|
||||
* [视频快照](video-snapshot.md)
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
# 歌曲
|
||||
|
||||
暂未实现。
|
6
doc/zh/api-doc/video-snapshot.md
Normal file
@ -0,0 +1,6 @@
|
||||
# 视频快照
|
||||
|
||||
{% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %}
|
||||
[1.yaml](../.gitbook/assets/1.yaml)
|
||||
{% endopenapi %}
|
||||
|
@ -2,13 +2,14 @@
|
||||
|
||||
CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
|
||||
|
||||
CVSA 设计了两个
|
||||
|
||||
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
|
||||
|
||||
* songs:存储歌曲的主要信息
|
||||
* bilibili\_user:存储 Bilibili 用户信息快照
|
||||
* bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
|
||||
* labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
* latest\_video\_snapshot:存储视频最新的快照
|
||||
* video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等)
|
||||
* snapshot\_schedule:视频快照的规划信息,为辅助表
|
||||
|
||||
- songs:存储歌曲的主要信息
|
||||
- bilibili\_user:存储 Bilibili 用户信息快照
|
||||
- bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
|
||||
- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||
- latest\_video\_snapshot:存储视频最新的快照
|
||||
- video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等)
|
||||
- snapshot\_schedule:视频快照的规划信息,为辅助表
|
||||
|
@ -1,2 +1 @@
|
||||
# LatestVideosQueue 队列
|
||||
|
||||
|
@ -20,8 +20,7 @@ layout:
|
||||
|
||||
位于项目目录`packages/crawler` 下,它负责以下工作:
|
||||
|
||||
* 抓取新的视频并收录作品
|
||||
* 持续监控视频的播放量等统计信息
|
||||
- 抓取新的视频并收录作品
|
||||
- 持续监控视频的播放量等统计信息
|
||||
|
||||
整个 crawler 由 BullMQ 消息队列驱动,使用 Redis 和 PostgreSQL 管理状态。
|
||||
|
||||
|
@ -9,18 +9,18 @@ export const db = pool;
|
||||
export const dbCred = poolCred;
|
||||
|
||||
export const dbMiddleware = createMiddleware(async (c, next) => {
|
||||
const connection = await pool.connect();
|
||||
const connection = await pool.connect();
|
||||
c.set("db", connection);
|
||||
await next();
|
||||
connection.release();
|
||||
});
|
||||
|
||||
export const dbCredMiddleware = createMiddleware(async (c, next) => {
|
||||
const connection = await poolCred.connect();
|
||||
const connection = await poolCred.connect();
|
||||
c.set("dbCred", connection);
|
||||
await next();
|
||||
connection.release();
|
||||
})
|
||||
});
|
||||
|
||||
declare module "hono" {
|
||||
interface ContextVariableMap {
|
||||
|
@ -4,11 +4,15 @@
|
||||
"@rabbit-company/argon2id": "jsr:@rabbit-company/argon2id@^2.1.0",
|
||||
"hono": "jsr:@hono/hono@^4.7.5",
|
||||
"zod": "npm:zod",
|
||||
"yup": "npm:yup"
|
||||
"yup": "npm:yup",
|
||||
"@core/": "../core/",
|
||||
"log/": "../core/log/",
|
||||
"@crawler/net/videoInfo": "../crawler/net/getVideoInfo.ts",
|
||||
"ioredis": "npm:ioredis"
|
||||
},
|
||||
"tasks": {
|
||||
"dev": "deno serve --env-file=.env --allow-env --allow-net --watch main.ts",
|
||||
"start": "deno serve --env-file=.env --allow-env --allow-net --host 127.0.0.1 main.ts"
|
||||
"dev": "deno serve --env-file=.env --allow-env --allow-net --allow-read --allow-write --allow-run --watch main.ts",
|
||||
"start": "deno serve --env-file=.env --allow-env --allow-net --allow-read --allow-write --allow-run --host 127.0.0.1 main.ts"
|
||||
},
|
||||
"compilerOptions": {
|
||||
"jsx": "precompile",
|
||||
|
@ -3,16 +3,19 @@ import { dbCredMiddleware, dbMiddleware } from "./database.ts";
|
||||
import { rootHandler } from "./root.ts";
|
||||
import { getSnapshotsHanlder } from "./snapshots.ts";
|
||||
import { registerHandler } from "./register.ts";
|
||||
import { videoInfoHandler } from "./videoInfo.ts";
|
||||
|
||||
export const app = new Hono();
|
||||
|
||||
app.use('/video/*', dbMiddleware);
|
||||
app.use('/user', dbCredMiddleware);
|
||||
app.use("/video/*", dbMiddleware);
|
||||
app.use("/user", dbCredMiddleware);
|
||||
|
||||
app.get("/", ...rootHandler);
|
||||
|
||||
app.get('/video/:id/snapshots', ...getSnapshotsHanlder);
|
||||
app.post('/user', ...registerHandler);
|
||||
app.get("/video/:id/snapshots", ...getSnapshotsHanlder);
|
||||
app.post("/user", ...registerHandler);
|
||||
|
||||
app.get("/video/:id/info", ...videoInfoHandler);
|
||||
|
||||
const fetch = app.fetch;
|
||||
|
||||
@ -20,4 +23,4 @@ export default {
|
||||
fetch,
|
||||
} satisfies Deno.ServeDefaultExport;
|
||||
|
||||
export const VERSION = "0.3.0";
|
||||
export const VERSION = "0.4.2";
|
||||
|
@ -8,7 +8,7 @@ import type { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
const RegistrationBodySchema = object({
|
||||
username: string().trim().required("Username is required").max(50, "Username cannot exceed 50 characters"),
|
||||
password: string().required("Password is required"),
|
||||
nickname: string().optional(),
|
||||
nickname: string().optional(),
|
||||
});
|
||||
|
||||
type ContextType = Context<BlankEnv & { Bindings: Bindings }, "/user", BlankInput>;
|
||||
@ -19,7 +19,7 @@ export const userExists = async (username: string, client: Client) => {
|
||||
`;
|
||||
const result = await client.queryObject(query, [username]);
|
||||
return result.rows.length > 0;
|
||||
}
|
||||
};
|
||||
|
||||
export const registerHandler = createHandlers(async (c: ContextType) => {
|
||||
const client = c.get("dbCred");
|
||||
@ -28,11 +28,11 @@ export const registerHandler = createHandlers(async (c: ContextType) => {
|
||||
const body = await RegistrationBodySchema.validate(await c.req.json());
|
||||
const { username, password, nickname } = body;
|
||||
|
||||
if (await userExists(username, client)) {
|
||||
if (await userExists(username, client)) {
|
||||
return c.json({
|
||||
message: `User "${username}" already exists.`,
|
||||
}, 400);
|
||||
}
|
||||
}
|
||||
|
||||
const hash = await Argon2id.hashEncoded(password);
|
||||
|
||||
|
@ -3,29 +3,27 @@ import { VERSION } from "./main.ts";
|
||||
import { createHandlers } from "./utils.ts";
|
||||
|
||||
export const rootHandler = createHandlers((c) => {
|
||||
let singer: Singer | Singer[] | null = null;
|
||||
let singer: Singer | Singer[];
|
||||
const shouldShowSpecialSinger = Math.random() < 0.016;
|
||||
if (getSingerForBirthday().length !== 0){
|
||||
singer = getSingerForBirthday();
|
||||
if (getSingerForBirthday().length !== 0) {
|
||||
singer = JSON.parse(JSON.stringify(getSingerForBirthday())) as Singer[];
|
||||
for (const s of singer) {
|
||||
delete s.birthday;
|
||||
s.message = `祝${s.name}生日快乐~`
|
||||
s.message = `祝${s.name}生日快乐~`;
|
||||
}
|
||||
}
|
||||
else if (shouldShowSpecialSinger) {
|
||||
singer = pickSpecialSinger();
|
||||
}
|
||||
else {
|
||||
singer = pickSinger();
|
||||
} else if (shouldShowSpecialSinger) {
|
||||
singer = pickSpecialSinger();
|
||||
} else {
|
||||
singer = pickSinger();
|
||||
}
|
||||
return c.json({
|
||||
"project": {
|
||||
"name": "中V档案馆",
|
||||
"motto": "一起唱吧,心中的歌!"
|
||||
"motto": "一起唱吧,心中的歌!",
|
||||
},
|
||||
"status": 200,
|
||||
"version": VERSION,
|
||||
"time": Date.now(),
|
||||
"singer": singer
|
||||
})
|
||||
})
|
||||
"singer": singer,
|
||||
});
|
||||
});
|
||||
|
@ -70,7 +70,7 @@ export interface Singer {
|
||||
name: string;
|
||||
color?: string;
|
||||
birthday?: string;
|
||||
message?: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export const specialSingers = [
|
||||
|
@ -12,12 +12,12 @@ const SnapshotQueryParamsSchema = object({
|
||||
reverse: boolean().optional(),
|
||||
});
|
||||
|
||||
const idSchema = mixed().test(
|
||||
export const idSchema = mixed().test(
|
||||
"is-valid-id",
|
||||
'id must be a string starting with "av" followed by digits, or "BV" followed by 10 alphanumeric characters, or a positive integer',
|
||||
async (value) => {
|
||||
if (value && await number().integer().isValid(value)) {
|
||||
const v = parseInt(value as string);
|
||||
const v = parseInt(value as string);
|
||||
return Number.isInteger(v) && v > 0;
|
||||
}
|
||||
|
||||
@ -46,10 +46,9 @@ export const getSnapshotsHanlder = createHandlers(async (c: ContextType) => {
|
||||
let videoId: string | number = idParam as string;
|
||||
if (videoId.startsWith("av")) {
|
||||
videoId = parseInt(videoId.slice(2));
|
||||
}
|
||||
else if (await number().isValid(videoId)) {
|
||||
} else if (await number().isValid(videoId)) {
|
||||
videoId = parseInt(videoId);
|
||||
}
|
||||
}
|
||||
const queryParams = await SnapshotQueryParamsSchema.validate(c.req.query());
|
||||
const { ps, pn, offset, reverse = false } = queryParams;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { createFactory } from 'hono/factory'
|
||||
import { createFactory } from "hono/factory";
|
||||
|
||||
const factory = createFactory();
|
||||
|
||||
|
84
packages/backend/videoInfo.ts
Normal file
@ -0,0 +1,84 @@
|
||||
import logger from "log/logger.ts";
|
||||
import { Redis } from "ioredis";
|
||||
import { number, ValidationError } from "yup";
|
||||
import { createHandlers } from "./utils.ts";
|
||||
import { getVideoInfo, getVideoInfoByBV } from "@crawler/net/videoInfo";
|
||||
import { idSchema } from "./snapshots.ts";
|
||||
import { NetSchedulerError } from "@core/net/delegate.ts";
|
||||
import type { Context } from "hono";
|
||||
import type { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import type { BlankEnv, BlankInput } from "hono/types";
|
||||
import type { VideoInfoData } from "@core/net/bilibili.d.ts";
|
||||
|
||||
const redis = new Redis({ maxRetriesPerRequest: null });
|
||||
const CACHE_EXPIRATION_SECONDS = 60;
|
||||
|
||||
type ContextType = Context<BlankEnv, "/video/:id/info", BlankInput>;
|
||||
|
||||
async function insertVideoSnapshot(client: Client, data: VideoInfoData) {
|
||||
const views = data.stat.view;
|
||||
const danmakus = data.stat.danmaku;
|
||||
const replies = data.stat.reply;
|
||||
const likes = data.stat.like;
|
||||
const coins = data.stat.coin;
|
||||
const shares = data.stat.share;
|
||||
const favorites = data.stat.favorite;
|
||||
const aid = data.aid;
|
||||
|
||||
const query: string = `
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`;
|
||||
|
||||
await client.queryObject(
|
||||
query,
|
||||
[aid, views, danmakus, replies, likes, coins, shares, favorites],
|
||||
);
|
||||
|
||||
logger.log(`Inserted into snapshot for video ${aid} by videoInfo API.`, "api", "fn:insertVideoSnapshot");
|
||||
}
|
||||
|
||||
export const videoInfoHandler = createHandlers(async (c: ContextType) => {
|
||||
const client = c.get("db");
|
||||
try {
|
||||
const id = await idSchema.validate(c.req.param("id"));
|
||||
let videoId: string | number = id as string;
|
||||
if (videoId.startsWith("av")) {
|
||||
videoId = parseInt(videoId.slice(2));
|
||||
} else if (await number().isValid(videoId)) {
|
||||
videoId = parseInt(videoId);
|
||||
}
|
||||
|
||||
const cacheKey = `cvsa:videoInfo:${videoId}`;
|
||||
const cachedData = await redis.get(cacheKey);
|
||||
|
||||
if (cachedData) {
|
||||
return c.json(JSON.parse(cachedData));
|
||||
}
|
||||
|
||||
let result: VideoInfoData | number;
|
||||
if (typeof videoId === "number") {
|
||||
result = await getVideoInfo(videoId, "getVideoInfo");
|
||||
} else {
|
||||
result = await getVideoInfoByBV(videoId, "getVideoInfo");
|
||||
}
|
||||
|
||||
if (typeof result === "number") {
|
||||
return c.json({ message: "Error fetching video info", code: result }, 500);
|
||||
}
|
||||
|
||||
await redis.setex(cacheKey, CACHE_EXPIRATION_SECONDS, JSON.stringify(result));
|
||||
|
||||
await insertVideoSnapshot(client, result);
|
||||
|
||||
return c.json(result);
|
||||
} catch (e) {
|
||||
if (e instanceof ValidationError) {
|
||||
return c.json({ message: "Invalid query parameters", errors: e.errors }, 400);
|
||||
} else if (e instanceof NetSchedulerError) {
|
||||
return c.json({ message: "Error fetching video info", code: e.code }, 500);
|
||||
} else {
|
||||
return c.json({ message: "Unhandled error", error: e }, 500);
|
||||
}
|
||||
}
|
||||
});
|
@ -1,33 +1,62 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { VideoSnapshotType } from "@core/db/schema.d.ts";
|
||||
import type { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import type { VideoSnapshotType } from "./schema.d.ts";
|
||||
|
||||
export async function getVideoSnapshots(client: Client, aid: number, limit: number, pageOrOffset: number, reverse: boolean, mode: 'page' | 'offset' = 'page') {
|
||||
const offset = mode === 'page' ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const order = reverse ? 'ASC' : 'DESC';
|
||||
const query = `
|
||||
export async function getVideoSnapshots(
|
||||
client: Client,
|
||||
aid: number,
|
||||
limit: number,
|
||||
pageOrOffset: number,
|
||||
reverse: boolean,
|
||||
mode: "page" | "offset" = "page",
|
||||
) {
|
||||
const offset = mode === "page" ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const queryDesc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot
|
||||
WHERE aid = $1
|
||||
ORDER BY created_at ${order}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $2
|
||||
OFFSET $3
|
||||
`;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [aid, limit, offset]);
|
||||
return queryResult.rows;
|
||||
const queryAsc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot
|
||||
WHERE aid = $1
|
||||
ORDER BY created_at
|
||||
LIMIT $2 OFFSET $3
|
||||
`;
|
||||
const query = reverse ? queryAsc : queryDesc;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [aid, limit, offset]);
|
||||
return queryResult.rows;
|
||||
}
|
||||
|
||||
export async function getVideoSnapshotsByBV(client: Client, bv: string, limit: number, pageOrOffset: number, reverse: boolean, mode: 'page' | 'offset' = 'page') {
|
||||
const offset = mode === 'page' ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const order = reverse ? 'ASC' : 'DESC';
|
||||
const query = `
|
||||
export async function getVideoSnapshotsByBV(
|
||||
client: Client,
|
||||
bv: string,
|
||||
limit: number,
|
||||
pageOrOffset: number,
|
||||
reverse: boolean,
|
||||
mode: "page" | "offset" = "page",
|
||||
) {
|
||||
const offset = mode === "page" ? (pageOrOffset - 1) * limit : pageOrOffset;
|
||||
const queryAsc = `
|
||||
SELECT vs.*
|
||||
FROM video_snapshot vs
|
||||
JOIN bilibili_metadata bm ON vs.aid = bm.aid
|
||||
WHERE bm.bvid = $1
|
||||
ORDER BY vs.created_at ${order}
|
||||
ORDER BY vs.created_at
|
||||
LIMIT $2
|
||||
OFFSET $3
|
||||
`
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [bv, limit, offset]);
|
||||
return queryResult.rows;
|
||||
`;
|
||||
const queryDesc: string = `
|
||||
SELECT *
|
||||
FROM video_snapshot vs
|
||||
JOIN bilibili_metadata bm ON vs.aid = bm.aid
|
||||
WHERE bm.bvid = $1
|
||||
ORDER BY vs.created_at DESC
|
||||
LIMIT $2 OFFSET $3
|
||||
`;
|
||||
const query = reverse ? queryAsc : queryDesc;
|
||||
const queryResult = await client.queryObject<VideoSnapshotType>(query, [bv, limit, offset]);
|
||||
return queryResult.rows;
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "@cvsa/core",
|
||||
"exports": "./main.ts",
|
||||
"imports": {
|
||||
"ioredis": "npm:ioredis",
|
||||
"log/": "./log/",
|
||||
"db/": "./db/",
|
||||
"$std/": "https://deno.land/std@0.216.0/",
|
||||
"mq/": "./mq/",
|
||||
"chalk": "npm:chalk"
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
import winston, { format, transports } from "npm:winston";
|
||||
import { TransformableInfo } from "npm:logform";
|
||||
import type { TransformableInfo } from "npm:logform";
|
||||
import chalk from "chalk";
|
||||
|
||||
const customFormat = format.printf((info: TransformableInfo) => {
|
1
packages/core/main.ts
Normal file
@ -0,0 +1 @@
|
||||
export const DB_VERSION = 10;
|
@ -1,4 +1,4 @@
|
||||
import { SlidingWindow } from "mq/slidingWindow.ts";
|
||||
import { SlidingWindow } from "./slidingWindow.ts";
|
||||
|
||||
export interface RateLimiterConfig {
|
||||
window: SlidingWindow;
|
@ -1,5 +1,5 @@
|
||||
import logger from "log/logger.ts";
|
||||
import { RateLimiter, RateLimiterConfig } from "mq/rateLimiter.ts";
|
||||
import { RateLimiter, type RateLimiterConfig } from "mq/rateLimiter.ts";
|
||||
import { SlidingWindow } from "mq/slidingWindow.ts";
|
||||
import { redis } from "db/redis.ts";
|
||||
import Redis from "ioredis";
|
||||
@ -19,7 +19,7 @@ interface ProxiesMap {
|
||||
[name: string]: Proxy;
|
||||
}
|
||||
|
||||
type NetSchedulerErrorCode =
|
||||
type NetworkDelegateErrorCode =
|
||||
| "NO_PROXY_AVAILABLE"
|
||||
| "PROXY_RATE_LIMITED"
|
||||
| "PROXY_NOT_FOUND"
|
||||
@ -28,9 +28,9 @@ type NetSchedulerErrorCode =
|
||||
| "ALICLOUD_PROXY_ERR";
|
||||
|
||||
export class NetSchedulerError extends Error {
|
||||
public code: NetSchedulerErrorCode;
|
||||
public code: NetworkDelegateErrorCode;
|
||||
public rawError: unknown | undefined;
|
||||
constructor(message: string, errorCode: NetSchedulerErrorCode, rawError?: unknown) {
|
||||
constructor(message: string, errorCode: NetworkDelegateErrorCode, rawError?: unknown) {
|
||||
super(message);
|
||||
this.name = "NetSchedulerError";
|
||||
this.code = errorCode;
|
||||
@ -59,7 +59,7 @@ function shuffleArray<T>(array: T[]): T[] {
|
||||
return newArray;
|
||||
}
|
||||
|
||||
class NetScheduler {
|
||||
class NetworkDelegate {
|
||||
private proxies: ProxiesMap = {};
|
||||
private providerLimiters: LimiterMap = {};
|
||||
private proxyLimiters: OptionalLimiterMap = {};
|
||||
@ -69,23 +69,6 @@ class NetScheduler {
|
||||
this.proxies[proxyName] = { type, data };
|
||||
}
|
||||
|
||||
removeProxy(proxyName: string): void {
|
||||
if (!this.proxies[proxyName]) {
|
||||
throw new Error(`Proxy ${proxyName} not found`);
|
||||
}
|
||||
delete this.proxies[proxyName];
|
||||
// Clean up associated limiters
|
||||
this.cleanupProxyLimiters(proxyName);
|
||||
}
|
||||
|
||||
private cleanupProxyLimiters(proxyName: string): void {
|
||||
for (const limiterId in this.proxyLimiters) {
|
||||
if (limiterId.startsWith(`proxy-${proxyName}`)) {
|
||||
delete this.proxyLimiters[limiterId];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
addTask(taskName: string, provider: string, proxies: string[] | "all"): void {
|
||||
this.tasks[taskName] = { provider, proxies };
|
||||
}
|
||||
@ -217,8 +200,7 @@ class NetScheduler {
|
||||
const providerLimiterId = "provider-" + proxyName + "-" + provider;
|
||||
if (!this.proxyLimiters[proxyLimiterId]) {
|
||||
const providerLimiter = this.providerLimiters[providerLimiterId];
|
||||
const providerAvailable = await providerLimiter.getAvailability();
|
||||
return providerAvailable;
|
||||
return await providerLimiter.getAvailability();
|
||||
}
|
||||
const proxyLimiter = this.proxyLimiters[proxyLimiterId];
|
||||
const providerLimiter = this.providerLimiters[providerLimiterId];
|
||||
@ -281,6 +263,7 @@ class NetScheduler {
|
||||
const out = decoder.decode(output.stdout);
|
||||
const rawData = JSON.parse(out);
|
||||
if (rawData.statusCode !== 200) {
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new NetSchedulerError(
|
||||
`Error proxying ${url} to ali-fc region ${region}, code: ${rawData.statusCode}.`,
|
||||
"ALICLOUD_PROXY_ERR",
|
||||
@ -295,7 +278,7 @@ class NetScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
const netScheduler = new NetScheduler();
|
||||
const networkDelegate = new NetworkDelegate();
|
||||
const videoInfoRateLimiterConfig: RateLimiterConfig[] = [
|
||||
{
|
||||
window: new SlidingWindow(redis, 0.3),
|
||||
@ -369,14 +352,14 @@ but both should come after addProxy and addTask to ensure proper setup and depen
|
||||
*/
|
||||
|
||||
const regions = ["shanghai", "hangzhou", "qingdao", "beijing", "zhangjiakou", "chengdu", "shenzhen", "hohhot"];
|
||||
netScheduler.addProxy("native", "native", "");
|
||||
networkDelegate.addProxy("native", "native", "");
|
||||
for (const region of regions) {
|
||||
netScheduler.addProxy(`alicloud-${region}`, "alicloud-fc", region);
|
||||
networkDelegate.addProxy(`alicloud-${region}`, "alicloud-fc", region);
|
||||
}
|
||||
netScheduler.addTask("getVideoInfo", "bilibili", "all");
|
||||
netScheduler.addTask("getLatestVideos", "bilibili", "all");
|
||||
netScheduler.addTask("snapshotMilestoneVideo", "bilibili", regions.map((region) => `alicloud-${region}`));
|
||||
netScheduler.addTask("snapshotVideo", "bili_test", [
|
||||
networkDelegate.addTask("getVideoInfo", "bilibili", "all");
|
||||
networkDelegate.addTask("getLatestVideos", "bilibili", "all");
|
||||
networkDelegate.addTask("snapshotMilestoneVideo", "bilibili", regions.map((region) => `alicloud-${region}`));
|
||||
networkDelegate.addTask("snapshotVideo", "bili_test", [
|
||||
"alicloud-qingdao",
|
||||
"alicloud-shanghai",
|
||||
"alicloud-zhangjiakou",
|
||||
@ -384,7 +367,7 @@ netScheduler.addTask("snapshotVideo", "bili_test", [
|
||||
"alicloud-shenzhen",
|
||||
"alicloud-hohhot",
|
||||
]);
|
||||
netScheduler.addTask("bulkSnapshot", "bili_strict", [
|
||||
networkDelegate.addTask("bulkSnapshot", "bili_strict", [
|
||||
"alicloud-qingdao",
|
||||
"alicloud-shanghai",
|
||||
"alicloud-zhangjiakou",
|
||||
@ -392,13 +375,13 @@ netScheduler.addTask("bulkSnapshot", "bili_strict", [
|
||||
"alicloud-shenzhen",
|
||||
"alicloud-hohhot",
|
||||
]);
|
||||
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
|
||||
netScheduler.setTaskLimiter("getLatestVideos", null);
|
||||
netScheduler.setTaskLimiter("snapshotMilestoneVideo", null);
|
||||
netScheduler.setTaskLimiter("snapshotVideo", null);
|
||||
netScheduler.setTaskLimiter("bulkSnapshot", null);
|
||||
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);
|
||||
netScheduler.setProviderLimiter("bili_test", bili_test);
|
||||
netScheduler.setProviderLimiter("bili_strict", bili_strict);
|
||||
networkDelegate.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
|
||||
networkDelegate.setTaskLimiter("getLatestVideos", null);
|
||||
networkDelegate.setTaskLimiter("snapshotMilestoneVideo", null);
|
||||
networkDelegate.setTaskLimiter("snapshotVideo", null);
|
||||
networkDelegate.setTaskLimiter("bulkSnapshot", null);
|
||||
networkDelegate.setProviderLimiter("bilibili", biliLimiterConfig);
|
||||
networkDelegate.setProviderLimiter("bili_test", bili_test);
|
||||
networkDelegate.setProviderLimiter("bili_strict", bili_strict);
|
||||
|
||||
export default netScheduler;
|
||||
export default networkDelegate;
|
@ -1,5 +1,5 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { AllDataType, BiliUserType } from "db/schema.d.ts";
|
||||
import { AllDataType, BiliUserType } from "@core/db/schema";
|
||||
import Akari from "ml/akari.ts";
|
||||
|
||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
@ -1,5 +1,5 @@
|
||||
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { postgresConfig } from "@core/db/pgConfig.ts";
|
||||
import { postgresConfig } from "@core/db/pgConfig";
|
||||
|
||||
const pool = new Pool(postgresConfig, 12);
|
||||
|
||||
|
55
packages/crawler/db/schema.d.ts
vendored
@ -1,55 +0,0 @@
|
||||
export interface AllDataType {
|
||||
id: number;
|
||||
aid: number;
|
||||
bvid: string | null;
|
||||
description: string | null;
|
||||
uid: number | null;
|
||||
tags: string | null;
|
||||
title: string | null;
|
||||
published_at: string | null;
|
||||
duration: number;
|
||||
created_at: string | null;
|
||||
}
|
||||
|
||||
export interface BiliUserType {
|
||||
id: number;
|
||||
uid: number;
|
||||
username: string;
|
||||
desc: string;
|
||||
fans: number;
|
||||
}
|
||||
|
||||
export interface VideoSnapshotType {
|
||||
id: number;
|
||||
created_at: string;
|
||||
views: number;
|
||||
coins: number;
|
||||
likes: number;
|
||||
favorites: number;
|
||||
shares: number;
|
||||
danmakus: number;
|
||||
aid: bigint;
|
||||
replies: number;
|
||||
}
|
||||
|
||||
export interface LatestSnapshotType {
|
||||
aid: number;
|
||||
time: number;
|
||||
views: number;
|
||||
danmakus: number;
|
||||
replies: number;
|
||||
likes: number;
|
||||
coins: number;
|
||||
shares: number;
|
||||
favorites: number;
|
||||
}
|
||||
|
||||
export interface SnapshotScheduleType {
|
||||
id: number;
|
||||
aid: number;
|
||||
type?: string;
|
||||
created_at: string;
|
||||
started_at?: string;
|
||||
finished_at?: string;
|
||||
status: string;
|
||||
}
|
@ -1,15 +1,20 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { LatestSnapshotType } from "db/schema.d.ts";
|
||||
import { LatestSnapshotType } from "@core/db/schema";
|
||||
import { SnapshotNumber } from "mq/task/getVideoStats.ts";
|
||||
|
||||
export async function getVideosNearMilestone(client: Client) {
|
||||
const queryResult = await client.queryObject<LatestSnapshotType>(`
|
||||
SELECT ls.*
|
||||
FROM latest_video_snapshot ls
|
||||
INNER JOIN
|
||||
songs s ON ls.aid = s.aid
|
||||
AND s.deleted = false
|
||||
RIGHT JOIN songs ON songs.aid = ls.aid
|
||||
WHERE
|
||||
(views >= 50000 AND views < 100000) OR
|
||||
(views >= 900000 AND views < 1000000) OR
|
||||
(views >= 9900000 AND views < 10000000)
|
||||
UNION
|
||||
SELECT ls.*
|
||||
FROM latest_video_snapshot ls
|
||||
WHERE
|
||||
s.deleted = false AND
|
||||
(views >= 90000 AND views < 100000) OR
|
||||
(views >= 900000 AND views < 1000000) OR
|
||||
(views >= 9900000 AND views < 10000000)
|
||||
@ -22,7 +27,7 @@ export async function getVideosNearMilestone(client: Client) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | LatestSnapshotType> {
|
||||
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | SnapshotNumber> {
|
||||
const queryResult = await client.queryObject<LatestSnapshotType>(
|
||||
`
|
||||
SELECT *
|
||||
|
@ -1,18 +1,17 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts";
|
||||
import { SnapshotScheduleType } from "./schema.d.ts";
|
||||
import { SnapshotScheduleType } from "@core/db/schema";
|
||||
import logger from "log/logger.ts";
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { redis } from "db/redis.ts";
|
||||
import { redis } from "@core/db/redis.ts";
|
||||
import { Redis } from "ioredis";
|
||||
import {parseTimestampFromPsql} from "../utils/formatTimestampToPostgre.ts";
|
||||
|
||||
const REDIS_KEY = "cvsa:snapshot_window_counts";
|
||||
|
||||
function getCurrentWindowIndex(): number {
|
||||
const now = new Date();
|
||||
const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes();
|
||||
const currentWindow = Math.floor(minutesSinceMidnight / 5);
|
||||
return currentWindow;
|
||||
return Math.floor(minutesSinceMidnight / 5);
|
||||
}
|
||||
|
||||
export async function refreshSnapshotWindowCounts(client: Client, redisClient: Redis) {
|
||||
@ -71,6 +70,14 @@ export async function videoHasActiveSchedule(client: Client, aid: number) {
|
||||
return res.rows.length > 0;
|
||||
}
|
||||
|
||||
export async function videoHasActiveScheduleWithType(client: Client, aid: number, type: string) {
|
||||
const res = await client.queryObject<{ status: string }>(
|
||||
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND (status = 'pending' OR status = 'processing') AND type = $2`,
|
||||
[aid, type],
|
||||
);
|
||||
return res.rows.length > 0;
|
||||
}
|
||||
|
||||
export async function videoHasProcessingSchedule(client: Client, aid: number) {
|
||||
const res = await client.queryObject<{ status: string }>(
|
||||
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND status = 'processing'`,
|
||||
@ -162,22 +169,18 @@ export async function getLatestSnapshot(client: Client, aid: number): Promise<Sn
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the number of snapshot schedules within the specified range.
|
||||
* @param client The database client.
|
||||
* @param start The start time of the range. (Timestamp in milliseconds)
|
||||
* @param end The end time of the range. (Timestamp in milliseconds)
|
||||
*/
|
||||
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
|
||||
const startTimeString = formatTimestampToPsql(start);
|
||||
const endTimeString = formatTimestampToPsql(end);
|
||||
const query = `
|
||||
SELECT COUNT(*) FROM snapshot_schedule
|
||||
WHERE started_at BETWEEN $1 AND $2
|
||||
AND status = 'pending'
|
||||
`;
|
||||
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
|
||||
return res.rows[0].count;
|
||||
export async function getLatestActiveScheduleWithType(client: Client, aid: number, type: string) {
|
||||
const query: string = `
|
||||
SELECT *
|
||||
FROM snapshot_schedule
|
||||
WHERE aid = $1
|
||||
AND type = $2
|
||||
AND (status = 'pending' OR status = 'processing')
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1
|
||||
`
|
||||
const res = await client.queryObject<SnapshotScheduleType>(query, [aid, type]);
|
||||
return res.rows[0];
|
||||
}
|
||||
|
||||
/*
|
||||
@ -193,10 +196,28 @@ export async function scheduleSnapshot(
|
||||
targetTime: number,
|
||||
force: boolean = false,
|
||||
) {
|
||||
if (await videoHasActiveSchedule(client, aid) && !force) return;
|
||||
let adjustedTime = new Date(targetTime);
|
||||
const hashActiveSchedule = await videoHasActiveScheduleWithType(client, aid, type);
|
||||
if (type == "milestone" && hashActiveSchedule) {
|
||||
const latestActiveSchedule = await getLatestActiveScheduleWithType(client, aid, type);
|
||||
const latestScheduleStartedAt = new Date(parseTimestampFromPsql(latestActiveSchedule.started_at!));
|
||||
if (latestScheduleStartedAt > adjustedTime) {
|
||||
await client.queryObject(`
|
||||
UPDATE snapshot_schedule
|
||||
SET started_at = $1
|
||||
WHERE id = $2
|
||||
`, [adjustedTime, latestActiveSchedule.id]);
|
||||
logger.log(
|
||||
`Updated snapshot schedule for ${aid} at ${adjustedTime.toISOString()}`,
|
||||
"mq",
|
||||
"fn:scheduleSnapshot",
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (hashActiveSchedule && !force) return;
|
||||
if (type !== "milestone" && type !== "new") {
|
||||
adjustedTime = await adjustSnapshotTime(new Date(targetTime), 1000, redis);
|
||||
adjustedTime = await adjustSnapshotTime(new Date(targetTime), 2000, redis);
|
||||
}
|
||||
logger.log(`Scheduled snapshot for ${aid} at ${adjustedTime.toISOString()}`, "mq", "fn:scheduleSnapshot");
|
||||
return client.queryObject(
|
||||
@ -227,7 +248,7 @@ export async function adjustSnapshotTime(
|
||||
|
||||
const initialOffset = currentWindow + Math.max(targetOffset, 0);
|
||||
|
||||
let timePerIteration = 0;
|
||||
let timePerIteration: number;
|
||||
const MAX_ITERATIONS = 2880;
|
||||
let iters = 0;
|
||||
const t = performance.now();
|
||||
@ -284,11 +305,17 @@ export async function getSnapshotsInNextSecond(client: Client) {
|
||||
|
||||
export async function getBulkSnapshotsInNextSecond(client: Client) {
|
||||
const query = `
|
||||
SELECT *
|
||||
FROM snapshot_schedule
|
||||
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'
|
||||
ORDER BY started_at
|
||||
LIMIT 1000;
|
||||
SELECT *
|
||||
FROM snapshot_schedule
|
||||
WHERE (started_at <= NOW() + INTERVAL '15 seconds')
|
||||
AND status = 'pending'
|
||||
AND (type = 'normal' OR type = 'archive')
|
||||
ORDER BY CASE
|
||||
WHEN type = 'normal' THEN 1
|
||||
WHEN type = 'archive' THEN 2
|
||||
END,
|
||||
started_at
|
||||
LIMIT 1000;
|
||||
`;
|
||||
const res = await client.queryObject<SnapshotScheduleType>(query, []);
|
||||
return res.rows;
|
||||
@ -318,3 +345,14 @@ export async function getVideosWithoutActiveSnapshotSchedule(client: Client) {
|
||||
const res = await client.queryObject<{ aid: number }>(query, []);
|
||||
return res.rows.map((r) => Number(r.aid));
|
||||
}
|
||||
|
||||
export async function getAllVideosWithoutActiveSnapshotSchedule(client: Client) {
|
||||
const query: string = `
|
||||
SELECT s.aid
|
||||
FROM bilibili_metadata s
|
||||
LEFT JOIN snapshot_schedule ss ON s.aid = ss.aid AND (ss.status = 'pending' OR ss.status = 'processing')
|
||||
WHERE ss.aid IS NULL
|
||||
`;
|
||||
const res = await client.queryObject<{ aid: number }>(query, []);
|
||||
return res.rows.map((r) => Number(r.aid));
|
||||
}
|
||||
|
32
packages/crawler/db/withConnection.ts
Normal file
@ -0,0 +1,32 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { db } from "db/init.ts";
|
||||
|
||||
/**
|
||||
* Executes a function with a database connection.
|
||||
* @param operation The function that accepts the `client` as the parameter.
|
||||
* @param errorHandling Optional function to handle errors.
|
||||
* If no error handling function is provided, the error will be re-thrown.
|
||||
* @param cleanup Optional function to execute after the operation.
|
||||
* @returns The result of the operation or undefined if an error occurred.
|
||||
*/
|
||||
export async function withDbConnection<T>(
|
||||
operation: (client: Client) => Promise<T>,
|
||||
errorHandling?: (error: unknown, client: Client) => void,
|
||||
cleanup?: () => void,
|
||||
): Promise<T | undefined> {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
return await operation(client);
|
||||
} catch (error) {
|
||||
if (errorHandling) {
|
||||
errorHandling(error, client);
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
client.release();
|
||||
if (cleanup) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
}
|
@ -12,7 +12,7 @@
|
||||
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
|
||||
"adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
|
||||
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
|
||||
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
|
||||
"all": "concurrently --restart-tries -1 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
|
||||
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
|
||||
},
|
||||
"lint": {
|
||||
@ -23,11 +23,13 @@
|
||||
"imports": {
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"$std/": "https://deno.land/std@0.216.0/",
|
||||
"@std/datetime": "jsr:@std/datetime@^0.225.4",
|
||||
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
|
||||
"bullmq": "npm:bullmq",
|
||||
"mq/": "./mq/",
|
||||
"db/": "./db/",
|
||||
"log/": "./log/",
|
||||
"@core/": "../core/",
|
||||
"log/": "../core/log/",
|
||||
"net/": "./net/",
|
||||
"ml/": "./ml/",
|
||||
"utils/": "./utils/",
|
||||
@ -37,7 +39,9 @@
|
||||
"express": "npm:express",
|
||||
"src/": "./src/",
|
||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
||||
"chalk": "npm:chalk"
|
||||
"chalk": "npm:chalk",
|
||||
"@core/db/schema": "../core/db/schema.d.ts",
|
||||
"@core/db/pgConfig": "../core/db/pgConfig.ts"
|
||||
},
|
||||
"exports": "./main.ts"
|
||||
}
|
||||
|
@ -4,4 +4,4 @@
|
||||
// SO HERE'S A PLACHOLDER EXPORT FOR DENO:
|
||||
export const DENO = "FUCK YOU DENO";
|
||||
// Oh, maybe export the version is a good idea
|
||||
export const VERSION = "1.0.17";
|
||||
export const VERSION = "1.0.26";
|
||||
|
40
packages/crawler/mq/exec/archiveSnapshots.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { getAllVideosWithoutActiveSnapshotSchedule, scheduleSnapshot } from "db/snapshotSchedule.ts";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { lockManager } from "mq/lockManager.ts";
|
||||
import { getLatestVideoSnapshot } from "db/snapshot.ts";
|
||||
import { HOUR, MINUTE } from "$std/datetime/constants.ts";
|
||||
|
||||
export const archiveSnapshotsWorker = async (_job: Job) =>
|
||||
await withDbConnection<void>(async (client: Client) => {
|
||||
const startedAt = Date.now();
|
||||
if (await lockManager.isLocked("dispatchArchiveSnapshots")) {
|
||||
logger.log("dispatchArchiveSnapshots is already running", "mq");
|
||||
return;
|
||||
}
|
||||
await lockManager.acquireLock("dispatchArchiveSnapshots", 30 * 60);
|
||||
const aids = await getAllVideosWithoutActiveSnapshotSchedule(client);
|
||||
for (const rawAid of aids) {
|
||||
const aid = Number(rawAid);
|
||||
const latestSnapshot = await getLatestVideoSnapshot(client, aid);
|
||||
const now = Date.now();
|
||||
const lastSnapshotedAt = latestSnapshot?.time ?? now;
|
||||
const interval = 168;
|
||||
logger.log(
|
||||
`Scheduled archive snapshot for aid ${aid} in ${interval} hours.`,
|
||||
"mq",
|
||||
"fn:archiveSnapshotsWorker",
|
||||
);
|
||||
const targetTime = lastSnapshotedAt + interval * HOUR;
|
||||
await scheduleSnapshot(client, aid, "archive", targetTime);
|
||||
if (now - startedAt > 250 * MINUTE) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}, (e) => {
|
||||
logger.error(e as Error, "mq", "fn:archiveSnapshotsWorker");
|
||||
}, async () => {
|
||||
await lockManager.releaseLock("dispatchArchiveSnapshots");
|
||||
});
|
@ -1,6 +1,6 @@
|
||||
import { Job } from "bullmq";
|
||||
import { db } from "db/init.ts";
|
||||
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "db/allData.ts";
|
||||
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "../../db/bilibili_metadata.ts";
|
||||
import Akari from "ml/akari.ts";
|
||||
import { ClassifyVideoQueue } from "mq/index.ts";
|
||||
import logger from "log/logger.ts";
|
||||
@ -8,7 +8,7 @@ import { lockManager } from "mq/lockManager.ts";
|
||||
import { aidExistsInSongs } from "db/songs.ts";
|
||||
import { insertIntoSongs } from "mq/task/collectSongs.ts";
|
||||
import { scheduleSnapshot } from "db/snapshotSchedule.ts";
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { MINUTE } from "@std/datetime";
|
||||
|
||||
export const classifyVideoWorker = async (job: Job) => {
|
||||
const client = await db.connect();
|
||||
|
9
packages/crawler/mq/exec/collectSongs.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { collectSongs } from "mq/task/collectSongs.ts";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
|
||||
export const collectSongsWorker = (_job: Job): Promise<void> =>
|
||||
withDbConnection(async (client: Client) => {
|
||||
await collectSongs(client);
|
||||
});
|
29
packages/crawler/mq/exec/dispatchMilestoneSnapshots.ts
Normal file
@ -0,0 +1,29 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getVideosNearMilestone } from "db/snapshot.ts";
|
||||
import { getAdjustedShortTermETA } from "mq/scheduling.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { scheduleSnapshot } from "db/snapshotSchedule.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { HOUR, MINUTE, SECOND } from "@std/datetime";
|
||||
|
||||
export const dispatchMilestoneSnapshotsWorker = (_job: Job): Promise<void> =>
|
||||
withDbConnection(async (client: Client) => {
|
||||
const videos = await getVideosNearMilestone(client);
|
||||
for (const video of videos) {
|
||||
const aid = Number(video.aid);
|
||||
const eta = await getAdjustedShortTermETA(client, aid);
|
||||
if (eta > 144) continue;
|
||||
const now = Date.now();
|
||||
const scheduledNextSnapshotDelay = eta * HOUR;
|
||||
const maxInterval = 1 * HOUR;
|
||||
const minInterval = 1 * SECOND;
|
||||
const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
|
||||
const targetTime = now + delay;
|
||||
await scheduleSnapshot(client, aid, "milestone", targetTime);
|
||||
logger.log(`Scheduled milestone snapshot for aid ${aid} in ${(delay / MINUTE).toFixed(2)} mins.`, "mq");
|
||||
}
|
||||
}, (e) => {
|
||||
logger.error(e as Error, "mq", "fn:dispatchMilestoneSnapshotsWorker");
|
||||
});
|
39
packages/crawler/mq/exec/dispatchRegularSnapshots.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getLatestVideoSnapshot } from "db/snapshot.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { getVideosWithoutActiveSnapshotSchedule, scheduleSnapshot } from "db/snapshotSchedule.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { HOUR, MINUTE, WEEK } from "@std/datetime";
|
||||
import { lockManager } from "../lockManager.ts";
|
||||
import { getRegularSnapshotInterval } from "../task/regularSnapshotInterval.ts";
|
||||
|
||||
export const dispatchRegularSnapshotsWorker = async (_job: Job): Promise<void> =>
|
||||
await withDbConnection(async (client: Client) => {
|
||||
const startedAt = Date.now();
|
||||
if (await lockManager.isLocked("dispatchRegularSnapshots")) {
|
||||
logger.log("dispatchRegularSnapshots is already running", "mq");
|
||||
return;
|
||||
}
|
||||
await lockManager.acquireLock("dispatchRegularSnapshots", 30 * 60);
|
||||
|
||||
const aids = await getVideosWithoutActiveSnapshotSchedule(client);
|
||||
for (const rawAid of aids) {
|
||||
const aid = Number(rawAid);
|
||||
const latestSnapshot = await getLatestVideoSnapshot(client, aid);
|
||||
const now = Date.now();
|
||||
const lastSnapshotedAt = latestSnapshot?.time ?? now;
|
||||
const interval = await getRegularSnapshotInterval(client, aid);
|
||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
||||
const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK);
|
||||
await scheduleSnapshot(client, aid, "normal", targetTime);
|
||||
if (now - startedAt > 25 * MINUTE) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}, (e) => {
|
||||
logger.error(e as Error, "mq", "fn:regularSnapshotsWorker");
|
||||
}, async () => {
|
||||
await lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
});
|
10
packages/crawler/mq/exec/executors.ts
Normal file
@ -0,0 +1,10 @@
|
||||
export * from "mq/exec/getLatestVideos.ts";
|
||||
export * from "./getVideoInfo.ts";
|
||||
export * from "./collectSongs.ts";
|
||||
export * from "./takeBulkSnapshot.ts";
|
||||
export * from "./archiveSnapshots.ts";
|
||||
export * from "./dispatchMilestoneSnapshots.ts";
|
||||
export * from "./dispatchRegularSnapshots.ts";
|
||||
export * from "./snapshotVideo.ts";
|
||||
export * from "./scheduleCleanup.ts";
|
||||
export * from "./snapshotTick.ts";
|
@ -1,37 +1,9 @@
|
||||
import { Job } from "bullmq";
|
||||
import { queueLatestVideos } from "mq/task/queueLatestVideo.ts";
|
||||
import { db } from "db/init.ts";
|
||||
import { insertVideoInfo } from "mq/task/getVideoDetails.ts";
|
||||
import { collectSongs } from "mq/task/collectSongs.ts";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
|
||||
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
export const getLatestVideosWorker = (_job: Job): Promise<void> =>
|
||||
withDbConnection(async (client: Client) => {
|
||||
await queueLatestVideos(client);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
export const collectSongsWorker = async (_job: Job): Promise<void> => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
await collectSongs(client);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
const aid = job.data.aid;
|
||||
if (!aid) {
|
||||
return 3;
|
||||
}
|
||||
await insertVideoInfo(client, aid);
|
||||
return 0;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
});
|
||||
|
15
packages/crawler/mq/exec/getVideoInfo.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { insertVideoInfo } from "mq/task/getVideoDetails.ts";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export const getVideoInfoWorker = async (job: Job): Promise<void> =>
|
||||
await withDbConnection<void>(async (client: Client) => {
|
||||
const aid = job.data.aid;
|
||||
if (!aid) {
|
||||
logger.warn("aid does not exists", "mq", "job:getVideoInfo");
|
||||
return;
|
||||
}
|
||||
await insertVideoInfo(client, aid);
|
||||
});
|
45
packages/crawler/mq/exec/scheduleCleanup.ts
Normal file
@ -0,0 +1,45 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { scheduleSnapshot, setSnapshotStatus } from "db/snapshotSchedule.ts";
|
||||
import { SECOND } from "@std/datetime";
|
||||
import { getTimeoutSchedulesCount } from "mq/task/getTimeoutSchedulesCount.ts";
|
||||
import { removeAllTimeoutSchedules } from "mq/task/removeAllTimeoutSchedules.ts";
|
||||
|
||||
export const scheduleCleanupWorker = async (_job: Job): Promise<void> =>
|
||||
await withDbConnection<void>(async (client: Client) => {
|
||||
if (await getTimeoutSchedulesCount(client) > 2000) {
|
||||
await removeAllTimeoutSchedules(client);
|
||||
return;
|
||||
}
|
||||
|
||||
const query: string = `
|
||||
SELECT id, aid, type
|
||||
FROM snapshot_schedule
|
||||
WHERE status IN ('pending', 'processing')
|
||||
AND started_at < NOW() - INTERVAL '30 minutes'
|
||||
UNION
|
||||
SELECT id, aid, type
|
||||
FROM snapshot_schedule
|
||||
WHERE status IN ('pending', 'processing')
|
||||
AND started_at < NOW() - INTERVAL '2 minutes'
|
||||
AND type = 'milestone'
|
||||
`;
|
||||
const { rows } = await client.queryObject<{ id: bigint; aid: bigint; type: string }>(query);
|
||||
if (rows.length === 0) return;
|
||||
for (const row of rows) {
|
||||
const id = Number(row.id);
|
||||
const aid = Number(row.aid);
|
||||
const type = row.type;
|
||||
await setSnapshotStatus(client, id, "timeout");
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND);
|
||||
logger.log(
|
||||
`Schedule ${id} has not received any response in a while, rescheduled.`,
|
||||
"mq",
|
||||
"fn:scheduleCleanupWorker",
|
||||
);
|
||||
}
|
||||
}, (e) => {
|
||||
logger.error(e as Error, "mq", "fn:scheduleCleanupWorker");
|
||||
});
|
@ -1,45 +1,21 @@
|
||||
import { Job } from "bullmq";
|
||||
import { db } from "db/init.ts";
|
||||
import { getLatestVideoSnapshot, getVideosNearMilestone } from "db/snapshot.ts";
|
||||
import {
|
||||
bulkGetVideosWithoutProcessingSchedules,
|
||||
bulkScheduleSnapshot,
|
||||
bulkSetSnapshotStatus,
|
||||
findClosestSnapshot,
|
||||
findSnapshotBefore,
|
||||
getBulkSnapshotsInNextSecond,
|
||||
getLatestSnapshot,
|
||||
getSnapshotsInNextSecond,
|
||||
getVideosWithoutActiveSnapshotSchedule,
|
||||
hasAtLeast2Snapshots,
|
||||
scheduleSnapshot,
|
||||
setSnapshotStatus,
|
||||
snapshotScheduleExists,
|
||||
videoHasProcessingSchedule,
|
||||
} from "db/snapshotSchedule.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { SnapshotQueue } from "mq/index.ts";
|
||||
import { insertVideoSnapshot } from "mq/task/getVideoStats.ts";
|
||||
import { NetSchedulerError } from "mq/scheduler.ts";
|
||||
import { getBiliVideoStatus, setBiliVideoStatus } from "db/allData.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { lockManager } from "mq/lockManager.ts";
|
||||
import { getSongsPublihsedAt } from "db/songs.ts";
|
||||
import { bulkGetVideoStats } from "net/bulkGetVideoStats.ts";
|
||||
|
||||
const priorityMap: { [key: string]: number } = {
|
||||
"milestone": 1,
|
||||
"normal": 3,
|
||||
};
|
||||
|
||||
const snapshotTypeToTaskMap: { [key: string]: string } = {
|
||||
"milestone": "snapshotMilestoneVideo",
|
||||
"normal": "snapshotVideo",
|
||||
"new": "snapshotMilestoneVideo",
|
||||
};
|
||||
|
||||
export const bulkSnapshotTickWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
@ -52,15 +28,22 @@ export const bulkSnapshotTickWorker = async (_job: Job) => {
|
||||
const filteredAids = await bulkGetVideosWithoutProcessingSchedules(client, aids);
|
||||
if (filteredAids.length === 0) continue;
|
||||
await bulkSetSnapshotStatus(client, filteredAids, "processing");
|
||||
const dataMap: { [key: number]: number } = {};
|
||||
for (const schedule of group) {
|
||||
const id = Number(schedule.id);
|
||||
dataMap[id] = Number(schedule.aid);
|
||||
}
|
||||
const schedulesData = group.map((schedule) => {
|
||||
return {
|
||||
aid: Number(schedule.aid),
|
||||
id: Number(schedule.id),
|
||||
type: schedule.type,
|
||||
created_at: schedule.created_at,
|
||||
started_at: schedule.started_at,
|
||||
finished_at: schedule.finished_at,
|
||||
status: schedule.status,
|
||||
};
|
||||
});
|
||||
await SnapshotQueue.add("bulkSnapshotVideo", {
|
||||
map: dataMap,
|
||||
schedules: schedulesData,
|
||||
}, { priority: 3 });
|
||||
}
|
||||
return `OK`;
|
||||
} catch (e) {
|
||||
logger.error(e as Error);
|
||||
} finally {
|
||||
@ -74,7 +57,7 @@ export const snapshotTickWorker = async (_job: Job) => {
|
||||
const schedules = await getSnapshotsInNextSecond(client);
|
||||
for (const schedule of schedules) {
|
||||
if (await videoHasProcessingSchedule(client, Number(schedule.aid))) {
|
||||
return `ALREADY_PROCESSING`;
|
||||
continue;
|
||||
}
|
||||
let priority = 3;
|
||||
if (schedule.type && priorityMap[schedule.type]) {
|
||||
@ -83,11 +66,12 @@ export const snapshotTickWorker = async (_job: Job) => {
|
||||
const aid = Number(schedule.aid);
|
||||
await setSnapshotStatus(client, schedule.id, "processing");
|
||||
await SnapshotQueue.add("snapshotVideo", {
|
||||
aid: aid,
|
||||
aid: Number(aid),
|
||||
id: Number(schedule.id),
|
||||
type: schedule.type ?? "normal",
|
||||
}, { priority });
|
||||
}
|
||||
return `OK`;
|
||||
} catch (e) {
|
||||
logger.error(e as Error);
|
||||
} finally {
|
||||
@ -100,297 +84,3 @@ export const closetMilestone = (views: number) => {
|
||||
if (views < 1000000) return 1000000;
|
||||
return 10000000;
|
||||
};
|
||||
|
||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
||||
|
||||
/*
|
||||
* Returns the minimum ETA in hours for the next snapshot
|
||||
* @param client - Postgres client
|
||||
* @param aid - aid of the video
|
||||
* @returns ETA in hours
|
||||
*/
|
||||
export const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
||||
if (!latestSnapshot) return 0;
|
||||
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
|
||||
if (!snapshotsEnough) return 0;
|
||||
|
||||
const currentTimestamp = new Date().getTime();
|
||||
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR, 72 * HOUR];
|
||||
const DELTA = 0.00001;
|
||||
let minETAHours = Infinity;
|
||||
|
||||
for (const timeInterval of timeIntervals) {
|
||||
const date = new Date(currentTimestamp - timeInterval);
|
||||
const snapshot = await findClosestSnapshot(client, aid, date);
|
||||
if (!snapshot) continue;
|
||||
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
|
||||
const viewsDiff = latestSnapshot.views - snapshot.views;
|
||||
if (viewsDiff <= 0) continue;
|
||||
const speed = viewsDiff / (hoursDiff + DELTA);
|
||||
const target = closetMilestone(latestSnapshot.views);
|
||||
const viewsToIncrease = target - latestSnapshot.views;
|
||||
const eta = viewsToIncrease / (speed + DELTA);
|
||||
let factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
|
||||
factor = truncate(factor, 3, 100);
|
||||
const adjustedETA = eta / factor;
|
||||
if (adjustedETA < minETAHours) {
|
||||
minETAHours = adjustedETA;
|
||||
}
|
||||
}
|
||||
|
||||
if (isNaN(minETAHours)) {
|
||||
minETAHours = Infinity;
|
||||
}
|
||||
|
||||
return minETAHours;
|
||||
};
|
||||
|
||||
export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
const videos = await getVideosNearMilestone(client);
|
||||
for (const video of videos) {
|
||||
const aid = Number(video.aid);
|
||||
const eta = await getAdjustedShortTermETA(client, aid);
|
||||
if (eta > 72) continue;
|
||||
const now = Date.now();
|
||||
const scheduledNextSnapshotDelay = eta * HOUR;
|
||||
const maxInterval = 4 * HOUR;
|
||||
const minInterval = 1 * SECOND;
|
||||
const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
|
||||
const targetTime = now + delay;
|
||||
await scheduleSnapshot(client, aid, "milestone", targetTime);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
const getRegularSnapshotInterval = async (client: Client, aid: number) => {
|
||||
const now = Date.now();
|
||||
const date = new Date(now - 24 * HOUR);
|
||||
let oldSnapshot = await findSnapshotBefore(client, aid, date);
|
||||
if (!oldSnapshot) oldSnapshot = await findClosestSnapshot(client, aid, date);
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
if (!oldSnapshot || !latestSnapshot) return 0;
|
||||
if (oldSnapshot.created_at === latestSnapshot.created_at) return 0;
|
||||
const hoursDiff = (latestSnapshot.created_at - oldSnapshot.created_at) / HOUR;
|
||||
if (hoursDiff < 8) return 24;
|
||||
const viewsDiff = latestSnapshot.views - oldSnapshot.views;
|
||||
if (viewsDiff === 0) return 72;
|
||||
const speedPerDay = viewsDiff / (hoursDiff + 0.001) * 24;
|
||||
if (speedPerDay < 6) return 36;
|
||||
if (speedPerDay < 120) return 24;
|
||||
if (speedPerDay < 320) return 12;
|
||||
return 6;
|
||||
};
|
||||
|
||||
export const regularSnapshotsWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
const startedAt = Date.now();
|
||||
if (await lockManager.isLocked("dispatchRegularSnapshots")) {
|
||||
logger.log("dispatchRegularSnapshots is already running", "mq");
|
||||
client.release();
|
||||
return;
|
||||
}
|
||||
await lockManager.acquireLock("dispatchRegularSnapshots", 30 * 60);
|
||||
try {
|
||||
const aids = await getVideosWithoutActiveSnapshotSchedule(client);
|
||||
for (const rawAid of aids) {
|
||||
const aid = Number(rawAid);
|
||||
const latestSnapshot = await getLatestVideoSnapshot(client, aid);
|
||||
const now = Date.now();
|
||||
const lastSnapshotedAt = latestSnapshot?.time ?? now;
|
||||
const interval = await getRegularSnapshotInterval(client, aid);
|
||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
||||
const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK);
|
||||
await scheduleSnapshot(client, aid, "normal", targetTime);
|
||||
if (now - startedAt > 25 * MINUTE) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:regularSnapshotsWorker");
|
||||
} finally {
|
||||
lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
|
||||
const dataMap: { [key: number]: number } = job.data.map;
|
||||
const ids = Object.keys(dataMap).map((id) => Number(id));
|
||||
const aidsToFetch: number[] = [];
|
||||
const client = await db.connect();
|
||||
try {
|
||||
for (const id of ids) {
|
||||
const aid = Number(dataMap[id]);
|
||||
const exists = await snapshotScheduleExists(client, id);
|
||||
if (!exists) {
|
||||
continue;
|
||||
}
|
||||
aidsToFetch.push(aid);
|
||||
}
|
||||
const data = await bulkGetVideoStats(aidsToFetch);
|
||||
if (typeof data === "number") {
|
||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 15 * SECOND);
|
||||
return `GET_BILI_STATUS_${data}`;
|
||||
}
|
||||
for (const video of data) {
|
||||
const aid = video.id;
|
||||
const stat = video.cnt_info;
|
||||
const views = stat.play;
|
||||
const danmakus = stat.danmaku;
|
||||
const replies = stat.reply;
|
||||
const likes = stat.thumb_up;
|
||||
const coins = stat.coin;
|
||||
const shares = stat.share;
|
||||
const favorites = stat.collect;
|
||||
const query: string = `
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`;
|
||||
await client.queryObject(
|
||||
query,
|
||||
[aid, views, danmakus, replies, likes, coins, shares, favorites],
|
||||
);
|
||||
|
||||
logger.log(`Taken snapshot for video ${aid} in bulk.`, "net", "fn:takeBulkSnapshotForVideosWorker");
|
||||
}
|
||||
await bulkSetSnapshotStatus(client, ids, "completed");
|
||||
for (const aid of aidsToFetch) {
|
||||
const interval = await getRegularSnapshotInterval(client, aid);
|
||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
||||
await scheduleSnapshot(client, aid, "normal", Date.now() + interval * HOUR);
|
||||
}
|
||||
return `DONE`;
|
||||
} catch (e) {
|
||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
||||
logger.warn(
|
||||
`No available proxy for bulk request now.`,
|
||||
"mq",
|
||||
"fn:takeBulkSnapshotForVideosWorker",
|
||||
);
|
||||
await bulkSetSnapshotStatus(client, ids, "completed");
|
||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 2 * MINUTE);
|
||||
return;
|
||||
}
|
||||
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
|
||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
export const takeSnapshotForVideoWorker = async (job: Job) => {
|
||||
const id = job.data.id;
|
||||
const aid = Number(job.data.aid);
|
||||
const type = job.data.type;
|
||||
const task = snapshotTypeToTaskMap[type] ?? "snapshotVideo";
|
||||
const client = await db.connect();
|
||||
const retryInterval = type === "milestone" ? 5 * SECOND : 2 * MINUTE;
|
||||
const exists = await snapshotScheduleExists(client, id);
|
||||
if (!exists) {
|
||||
client.release();
|
||||
return;
|
||||
}
|
||||
const status = await getBiliVideoStatus(client, aid);
|
||||
if (status !== 0) {
|
||||
client.release();
|
||||
return `REFUSE_WORKING_BILI_STATUS_${status}`;
|
||||
}
|
||||
try {
|
||||
await setSnapshotStatus(client, id, "processing");
|
||||
const stat = await insertVideoSnapshot(client, aid, task);
|
||||
if (typeof stat === "number") {
|
||||
await setBiliVideoStatus(client, aid, stat);
|
||||
await setSnapshotStatus(client, id, "completed");
|
||||
return `GET_BILI_STATUS_${stat}`;
|
||||
}
|
||||
await setSnapshotStatus(client, id, "completed");
|
||||
if (type === "normal") {
|
||||
const interval = await getRegularSnapshotInterval(client, aid);
|
||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + interval * HOUR);
|
||||
return `DONE`;
|
||||
} else if (type === "new") {
|
||||
const publihsedAt = await getSongsPublihsedAt(client, aid);
|
||||
const timeSincePublished = stat.time - publihsedAt!;
|
||||
const viewsPerHour = stat.views / timeSincePublished * HOUR;
|
||||
if (timeSincePublished > 48 * HOUR) {
|
||||
return `DONE`;
|
||||
}
|
||||
if (timeSincePublished > 2 * HOUR && viewsPerHour < 10) {
|
||||
return `DONE`;
|
||||
}
|
||||
let intervalMins = 240;
|
||||
if (viewsPerHour > 50) {
|
||||
intervalMins = 120;
|
||||
}
|
||||
if (viewsPerHour > 100) {
|
||||
intervalMins = 60;
|
||||
}
|
||||
if (viewsPerHour > 1000) {
|
||||
intervalMins = 15;
|
||||
}
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + intervalMins * MINUTE, true);
|
||||
}
|
||||
if (type !== "milestone") return `DONE`;
|
||||
const eta = await getAdjustedShortTermETA(client, aid);
|
||||
if (eta > 72) return "ETA_TOO_LONG";
|
||||
const now = Date.now();
|
||||
const targetTime = now + eta * HOUR;
|
||||
await scheduleSnapshot(client, aid, type, targetTime);
|
||||
return `DONE`;
|
||||
} catch (e) {
|
||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
||||
logger.warn(
|
||||
`No available proxy for aid ${job.data.aid}.`,
|
||||
"mq",
|
||||
"fn:takeSnapshotForVideoWorker",
|
||||
);
|
||||
await setSnapshotStatus(client, id, "completed");
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + retryInterval);
|
||||
return;
|
||||
}
|
||||
logger.error(e as Error, "mq", "fn:takeSnapshotForVideoWorker");
|
||||
await setSnapshotStatus(client, id, "failed");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
||||
export const scheduleCleanupWorker = async (_job: Job) => {
|
||||
const client = await db.connect();
|
||||
try {
|
||||
const query = `
|
||||
SELECT id, aid, type
|
||||
FROM snapshot_schedule
|
||||
WHERE status IN ('pending', 'processing')
|
||||
AND started_at < NOW() - INTERVAL '30 minutes'
|
||||
`;
|
||||
const { rows } = await client.queryObject<{ id: bigint; aid: bigint; type: string }>(query);
|
||||
if (rows.length === 0) return;
|
||||
for (const row of rows) {
|
||||
const id = Number(row.id);
|
||||
const aid = Number(row.aid);
|
||||
const type = row.type;
|
||||
await setSnapshotStatus(client, id, "timeout");
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND);
|
||||
logger.log(
|
||||
`Schedule ${id} has no response received for 5 minutes, rescheduled.`,
|
||||
"mq",
|
||||
"fn:scheduleCleanupWorker",
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error(e as Error, "mq", "fn:scheduleCleanupWorker");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
||||
|
107
packages/crawler/mq/exec/snapshotVideo.ts
Normal file
@ -0,0 +1,107 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { withDbConnection } from "db/withConnection.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { scheduleSnapshot, setSnapshotStatus, snapshotScheduleExists } from "db/snapshotSchedule.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { HOUR, MINUTE, SECOND } from "@std/datetime";
|
||||
import { lockManager } from "mq/lockManager.ts";
|
||||
import { getBiliVideoStatus, setBiliVideoStatus } from "../../db/bilibili_metadata.ts";
|
||||
import { insertVideoSnapshot } from "mq/task/getVideoStats.ts";
|
||||
import { getSongsPublihsedAt } from "db/songs.ts";
|
||||
import { getAdjustedShortTermETA } from "mq/scheduling.ts";
|
||||
import { NetSchedulerError } from "@core/net/delegate.ts";
|
||||
|
||||
const snapshotTypeToTaskMap: { [key: string]: string } = {
|
||||
"milestone": "snapshotMilestoneVideo",
|
||||
"normal": "snapshotVideo",
|
||||
"new": "snapshotMilestoneVideo",
|
||||
};
|
||||
|
||||
export const snapshotVideoWorker = async (job: Job): Promise<void> => {
|
||||
const id = job.data.id;
|
||||
const aid = Number(job.data.aid);
|
||||
const type = job.data.type;
|
||||
const task = snapshotTypeToTaskMap[type] ?? "snapshotVideo";
|
||||
const retryInterval = type === "milestone" ? 5 * SECOND : 2 * MINUTE;
|
||||
await withDbConnection(async (client: Client) => {
|
||||
const exists = await snapshotScheduleExists(client, id);
|
||||
if (!exists) {
|
||||
return;
|
||||
}
|
||||
const status = await getBiliVideoStatus(client, aid);
|
||||
if (status !== 0) {
|
||||
logger.warn(
|
||||
`Video ${aid} has status ${status} in the database. Abort snapshoting.`,
|
||||
"mq",
|
||||
"fn:dispatchRegularSnapshotsWorker",
|
||||
);
|
||||
return;
|
||||
}
|
||||
await setSnapshotStatus(client, id, "processing");
|
||||
const stat = await insertVideoSnapshot(client, aid, task);
|
||||
if (typeof stat === "number") {
|
||||
await setBiliVideoStatus(client, aid, stat);
|
||||
await setSnapshotStatus(client, id, "bili_error");
|
||||
logger.warn(
|
||||
`Bilibili return status ${status} when snapshoting for ${aid}.`,
|
||||
"mq",
|
||||
"fn:dispatchRegularSnapshotsWorker",
|
||||
);
|
||||
return;
|
||||
}
|
||||
await setSnapshotStatus(client, id, "completed");
|
||||
if (type === "new") {
|
||||
const publihsedAt = await getSongsPublihsedAt(client, aid);
|
||||
const timeSincePublished = stat.time - publihsedAt!;
|
||||
const viewsPerHour = stat.views / timeSincePublished * HOUR;
|
||||
if (timeSincePublished > 48 * HOUR) {
|
||||
return;
|
||||
}
|
||||
if (timeSincePublished > 2 * HOUR && viewsPerHour < 10) {
|
||||
return;
|
||||
}
|
||||
let intervalMins = 240;
|
||||
if (viewsPerHour > 50) {
|
||||
intervalMins = 120;
|
||||
}
|
||||
if (viewsPerHour > 100) {
|
||||
intervalMins = 60;
|
||||
}
|
||||
if (viewsPerHour > 1000) {
|
||||
intervalMins = 15;
|
||||
}
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + intervalMins * MINUTE, true);
|
||||
}
|
||||
if (type !== "milestone") return;
|
||||
const eta = await getAdjustedShortTermETA(client, aid);
|
||||
if (eta > 144) {
|
||||
const etaHoursString = eta.toFixed(2) + " hrs";
|
||||
logger.warn(
|
||||
`ETA (${etaHoursString}) too long for milestone snapshot. aid: ${aid}.`,
|
||||
"mq",
|
||||
"fn:dispatchRegularSnapshotsWorker",
|
||||
);
|
||||
}
|
||||
const now = Date.now();
|
||||
const targetTime = now + eta * HOUR;
|
||||
await scheduleSnapshot(client, aid, type, targetTime);
|
||||
await setSnapshotStatus(client, id, "completed");
|
||||
return;
|
||||
}, async (e, client) => {
|
||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
||||
logger.warn(
|
||||
`No available proxy for aid ${job.data.aid}.`,
|
||||
"mq",
|
||||
"fn:takeSnapshotForVideoWorker",
|
||||
);
|
||||
await setSnapshotStatus(client, id, "no_proxy");
|
||||
await scheduleSnapshot(client, aid, type, Date.now() + retryInterval);
|
||||
return;
|
||||
}
|
||||
logger.error(e as Error, "mq", "fn:takeSnapshotForVideoWorker");
|
||||
await setSnapshotStatus(client, id, "failed");
|
||||
}, async () => {
|
||||
await lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
});
|
||||
return;
|
||||
};
|
85
packages/crawler/mq/exec/takeBulkSnapshot.ts
Normal file
@ -0,0 +1,85 @@
|
||||
import { Job } from "npm:bullmq@5.45.2";
|
||||
import { db } from "db/init.ts";
|
||||
import {
|
||||
bulkScheduleSnapshot,
|
||||
bulkSetSnapshotStatus,
|
||||
scheduleSnapshot,
|
||||
snapshotScheduleExists,
|
||||
} from "db/snapshotSchedule.ts";
|
||||
import { bulkGetVideoStats } from "net/bulkGetVideoStats.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { NetSchedulerError } from "@core/net/delegate.ts";
|
||||
import { HOUR, MINUTE, SECOND } from "@std/datetime";
|
||||
import { getRegularSnapshotInterval } from "../task/regularSnapshotInterval.ts";
|
||||
import { SnapshotScheduleType } from "@core/db/schema";
|
||||
|
||||
export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
|
||||
const schedules: SnapshotScheduleType[] = job.data.schedules;
|
||||
const ids = schedules.map((schedule) => Number(schedule.id));
|
||||
const aidsToFetch: number[] = [];
|
||||
const client = await db.connect();
|
||||
try {
|
||||
for (const schedule of schedules) {
|
||||
const aid = Number(schedule.aid);
|
||||
const id = Number(schedule.id);
|
||||
const exists = await snapshotScheduleExists(client, id);
|
||||
if (!exists) {
|
||||
continue;
|
||||
}
|
||||
aidsToFetch.push(aid);
|
||||
}
|
||||
const data = await bulkGetVideoStats(aidsToFetch);
|
||||
if (typeof data === "number") {
|
||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 15 * SECOND);
|
||||
return `GET_BILI_STATUS_${data}`;
|
||||
}
|
||||
for (const video of data) {
|
||||
const aid = video.id;
|
||||
const stat = video.cnt_info;
|
||||
const views = stat.play;
|
||||
const danmakus = stat.danmaku;
|
||||
const replies = stat.reply;
|
||||
const likes = stat.thumb_up;
|
||||
const coins = stat.coin;
|
||||
const shares = stat.share;
|
||||
const favorites = stat.collect;
|
||||
const query: string = `
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`;
|
||||
await client.queryObject(
|
||||
query,
|
||||
[aid, views, danmakus, replies, likes, coins, shares, favorites],
|
||||
);
|
||||
|
||||
logger.log(`Taken snapshot for video ${aid} in bulk.`, "net", "fn:takeBulkSnapshotForVideosWorker");
|
||||
}
|
||||
await bulkSetSnapshotStatus(client, ids, "completed");
|
||||
|
||||
for (const schedule of schedules) {
|
||||
const aid = Number(schedule.aid);
|
||||
const type = schedule.type;
|
||||
if (type == "archive") continue;
|
||||
const interval = await getRegularSnapshotInterval(client, aid);
|
||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
||||
await scheduleSnapshot(client, aid, "normal", Date.now() + interval * HOUR);
|
||||
}
|
||||
return `DONE`;
|
||||
} catch (e) {
|
||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
||||
logger.warn(
|
||||
`No available proxy for bulk request now.`,
|
||||
"mq",
|
||||
"fn:takeBulkSnapshotForVideosWorker",
|
||||
);
|
||||
await bulkSetSnapshotStatus(client, ids, "no_proxy");
|
||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 20 * MINUTE * Math.random());
|
||||
return;
|
||||
}
|
||||
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
|
||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
};
|
@ -1 +0,0 @@
|
||||
export * from "mq/exec/getLatestVideos.ts";
|
@ -1,9 +1,9 @@
|
||||
import { MINUTE, SECOND } from "$std/datetime/constants.ts";
|
||||
import { HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts";
|
||||
import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "mq/index.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { initSnapshotWindowCounts } from "db/snapshotSchedule.ts";
|
||||
import { db } from "db/init.ts";
|
||||
import { redis } from "db/redis.ts";
|
||||
import { redis } from "@core/db/redis.ts";
|
||||
|
||||
export async function initMQ() {
|
||||
const client = await db.connect();
|
||||
@ -30,8 +30,8 @@ export async function initMQ() {
|
||||
immediately: true,
|
||||
}, {
|
||||
opts: {
|
||||
removeOnComplete: 1,
|
||||
removeOnFail: 1,
|
||||
removeOnComplete: 300,
|
||||
removeOnFail: 600,
|
||||
},
|
||||
});
|
||||
|
||||
@ -40,12 +40,12 @@ export async function initMQ() {
|
||||
immediately: true,
|
||||
}, {
|
||||
opts: {
|
||||
removeOnComplete: 1,
|
||||
removeOnFail: 1,
|
||||
removeOnComplete: 60,
|
||||
removeOnFail: 600,
|
||||
},
|
||||
});
|
||||
|
||||
await SnapshotQueue.upsertJobScheduler("collectMilestoneSnapshots", {
|
||||
await SnapshotQueue.upsertJobScheduler("dispatchMilestoneSnapshots", {
|
||||
every: 5 * MINUTE,
|
||||
immediately: true,
|
||||
});
|
||||
@ -55,8 +55,13 @@ export async function initMQ() {
|
||||
immediately: true,
|
||||
});
|
||||
|
||||
await SnapshotQueue.upsertJobScheduler("dispatchArchiveSnapshots", {
|
||||
every: 6 * HOUR,
|
||||
immediately: true,
|
||||
});
|
||||
|
||||
await SnapshotQueue.upsertJobScheduler("scheduleCleanup", {
|
||||
every: 30 * MINUTE,
|
||||
every: 2 * MINUTE,
|
||||
immediately: true,
|
||||
});
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { Redis } from "ioredis";
|
||||
import { redis } from "db/redis.ts";
|
||||
import { redis } from "../../core/db/redis.ts";
|
||||
|
||||
class LockManager {
|
||||
private redis: Redis;
|
||||
|
65
packages/crawler/mq/scheduling.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import { findClosestSnapshot, getLatestSnapshot, hasAtLeast2Snapshots } from "db/snapshotSchedule.ts";
|
||||
import { truncate } from "utils/truncate.ts";
|
||||
import { closetMilestone } from "./exec/snapshotTick.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR, MINUTE } from "@std/datetime";
|
||||
|
||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
||||
|
||||
const getFactor = (x: number) => {
|
||||
const a = 1.054;
|
||||
const b = 4.5;
|
||||
const c = 100;
|
||||
const u = 0.601;
|
||||
const g = 455;
|
||||
if (x>g) {
|
||||
return log(b/log(x+1),a);
|
||||
}
|
||||
else {
|
||||
return log(b/log(x+c),a)+u;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the minimum ETA in hours for the next snapshot
|
||||
* @param client - Postgres client
|
||||
* @param aid - aid of the video
|
||||
* @returns ETA in hours
|
||||
*/
|
||||
export const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
||||
if (!latestSnapshot) return 0;
|
||||
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
|
||||
if (!snapshotsEnough) return 0;
|
||||
|
||||
const currentTimestamp = new Date().getTime();
|
||||
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR, 72 * HOUR];
|
||||
const DELTA = 0.00001;
|
||||
let minETAHours = Infinity;
|
||||
|
||||
for (const timeInterval of timeIntervals) {
|
||||
const date = new Date(currentTimestamp - timeInterval);
|
||||
const snapshot = await findClosestSnapshot(client, aid, date);
|
||||
if (!snapshot) continue;
|
||||
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
|
||||
const viewsDiff = latestSnapshot.views - snapshot.views;
|
||||
if (viewsDiff <= 0) continue;
|
||||
const speed = viewsDiff / (hoursDiff + DELTA);
|
||||
const target = closetMilestone(latestSnapshot.views);
|
||||
const viewsToIncrease = target - latestSnapshot.views;
|
||||
const eta = viewsToIncrease / (speed + DELTA);
|
||||
let factor = getFactor(viewsToIncrease);
|
||||
factor = truncate(factor, 4.5, 100);
|
||||
const adjustedETA = eta / factor;
|
||||
if (adjustedETA < minETAHours) {
|
||||
minETAHours = adjustedETA;
|
||||
}
|
||||
}
|
||||
|
||||
if (isNaN(minETAHours)) {
|
||||
minETAHours = Infinity;
|
||||
}
|
||||
|
||||
return minETAHours;
|
||||
};
|
@ -2,7 +2,7 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { aidExistsInSongs, getNotCollectedSongs } from "db/songs.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { scheduleSnapshot } from "db/snapshotSchedule.ts";
|
||||
import { MINUTE } from "$std/datetime/constants.ts";
|
||||
import { MINUTE } from "@std/datetime";
|
||||
|
||||
export async function collectSongs(client: Client) {
|
||||
const aids = await getNotCollectedSongs(client);
|
||||
|
13
packages/crawler/mq/task/getTimeoutSchedulesCount.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
|
||||
export async function getTimeoutSchedulesCount(client: Client) {
|
||||
const query: string = `
|
||||
SELECT COUNT(id)
|
||||
FROM snapshot_schedule
|
||||
WHERE status IN ('pending', 'processing')
|
||||
AND started_at < NOW() - INTERVAL '30 minutes'
|
||||
`;
|
||||
|
||||
const { rows } = await client.queryObject<{ count: number }>(query);
|
||||
return rows[0].count;
|
||||
}
|
@ -3,8 +3,8 @@ import { getVideoDetails } from "net/getVideoDetails.ts";
|
||||
import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { ClassifyVideoQueue } from "mq/index.ts";
|
||||
import { userExistsInBiliUsers, videoExistsInAllData } from "db/allData.ts";
|
||||
import { HOUR, SECOND } from "$std/datetime/constants.ts";
|
||||
import { userExistsInBiliUsers, videoExistsInAllData } from "../../db/bilibili_metadata.ts";
|
||||
import { HOUR, SECOND } from "@std/datetime";
|
||||
|
||||
export async function insertVideoInfo(client: Client, aid: number) {
|
||||
const videoExists = await videoExistsInAllData(client, aid);
|
||||
@ -42,6 +42,18 @@ export async function insertVideoInfo(client: Client, aid: number) {
|
||||
[data.Card.follower, uid],
|
||||
);
|
||||
}
|
||||
|
||||
const stat = data.View.stat;
|
||||
|
||||
const query: string = `
|
||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
`;
|
||||
await client.queryObject(
|
||||
query,
|
||||
[aid, stat.view, stat.danmaku, stat.reply, stat.like, stat.coin, stat.share, stat.favorite],
|
||||
);
|
||||
|
||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
||||
}
|
||||
|
@ -1,8 +1,19 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getVideoInfo } from "net/getVideoInfo.ts";
|
||||
import { LatestSnapshotType } from "db/schema.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export interface SnapshotNumber {
|
||||
time: number;
|
||||
views: number;
|
||||
coins: number;
|
||||
likes: number;
|
||||
favorites: number;
|
||||
shares: number;
|
||||
danmakus: number;
|
||||
aid: number;
|
||||
replies: number;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch video stats from bilibili API and insert into database
|
||||
* @returns {Promise<number|VideoSnapshot>}
|
||||
@ -17,7 +28,7 @@ export async function insertVideoSnapshot(
|
||||
client: Client,
|
||||
aid: number,
|
||||
task: string,
|
||||
): Promise<number | LatestSnapshotType> {
|
||||
): Promise<number | SnapshotNumber> {
|
||||
const data = await getVideoInfo(aid, task);
|
||||
if (typeof data == "number") {
|
||||
return data;
|
||||
@ -42,7 +53,7 @@ export async function insertVideoSnapshot(
|
||||
|
||||
logger.log(`Taken snapshot for video ${aid}.`, "net", "fn:insertVideoSnapshot");
|
||||
|
||||
const snapshot: LatestSnapshotType = {
|
||||
return {
|
||||
aid,
|
||||
views,
|
||||
danmakus,
|
||||
@ -53,6 +64,4 @@ export async function insertVideoSnapshot(
|
||||
favorites,
|
||||
time,
|
||||
};
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { getLatestVideoAids } from "net/getLatestVideoAids.ts";
|
||||
import { videoExistsInAllData } from "db/allData.ts";
|
||||
import { videoExistsInAllData } from "../../db/bilibili_metadata.ts";
|
||||
import { sleep } from "utils/sleep.ts";
|
||||
import { SECOND } from "$std/datetime/constants.ts";
|
||||
import { SECOND } from "@std/datetime";
|
||||
import logger from "log/logger.ts";
|
||||
import { LatestVideosQueue } from "mq/index.ts";
|
||||
|
||||
|
22
packages/crawler/mq/task/regularSnapshotInterval.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import { findClosestSnapshot, findSnapshotBefore, getLatestSnapshot } from "db/snapshotSchedule.ts";
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import { HOUR } from "@std/datetime";
|
||||
|
||||
export const getRegularSnapshotInterval = async (client: Client, aid: number) => {
|
||||
const now = Date.now();
|
||||
const date = new Date(now - 24 * HOUR);
|
||||
let oldSnapshot = await findSnapshotBefore(client, aid, date);
|
||||
if (!oldSnapshot) oldSnapshot = await findClosestSnapshot(client, aid, date);
|
||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
||||
if (!oldSnapshot || !latestSnapshot) return 0;
|
||||
if (oldSnapshot.created_at === latestSnapshot.created_at) return 0;
|
||||
const hoursDiff = (latestSnapshot.created_at - oldSnapshot.created_at) / HOUR;
|
||||
if (hoursDiff < 8) return 24;
|
||||
const viewsDiff = latestSnapshot.views - oldSnapshot.views;
|
||||
if (viewsDiff === 0) return 72;
|
||||
const speedPerDay = viewsDiff / (hoursDiff + 0.001) * 24;
|
||||
if (speedPerDay < 6) return 36;
|
||||
if (speedPerDay < 120) return 24;
|
||||
if (speedPerDay < 320) return 12;
|
||||
return 6;
|
||||
};
|
16
packages/crawler/mq/task/removeAllTimeoutSchedules.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export async function removeAllTimeoutSchedules(client: Client) {
|
||||
logger.log(
|
||||
"Too many timeout schedules, directly removing these schedules...",
|
||||
"mq",
|
||||
"fn:scheduleCleanupWorker",
|
||||
);
|
||||
const query: string = `
|
||||
DELETE FROM snapshot_schedule
|
||||
WHERE status IN ('pending', 'processing')
|
||||
AND started_at < NOW() - INTERVAL '30 minutes'
|
||||
`;
|
||||
await client.queryObject(query);
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import { MediaListInfoData, MediaListInfoResponse } from "net/bilibili.d.ts";
|
||||
import networkDelegate from "@core/net/delegate.ts";
|
||||
import { MediaListInfoData, MediaListInfoResponse } from "@core/net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
/*
|
||||
@ -12,12 +12,11 @@ import logger from "log/logger.ts";
|
||||
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
|
||||
*/
|
||||
export async function bulkGetVideoStats(aids: number[]): Promise<MediaListInfoData | number> {
|
||||
const baseURL = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
|
||||
let url = baseURL;
|
||||
let url = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
|
||||
for (const aid of aids) {
|
||||
url += `${aid}:2,`;
|
||||
}
|
||||
const data = await netScheduler.request<MediaListInfoResponse>(url, "bulkSnapshot");
|
||||
const data = await networkDelegate.request<MediaListInfoResponse>(url, "bulkSnapshot");
|
||||
const errMessage = `Error fetching metadata for aid list: ${aids.join(",")}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { VideoListResponse } from "net/bilibili.d.ts";
|
||||
import { VideoListResponse } from "@core/net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import networkDelegate from "@core/net/delegate.ts";
|
||||
|
||||
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> {
|
||||
const startFrom = 1 + pageSize * (page - 1);
|
||||
@ -8,7 +8,7 @@ export async function getLatestVideoAids(page: number = 1, pageSize: number = 10
|
||||
const range = `${startFrom}-${endTo}`;
|
||||
const errMessage = `Error fetching latest aid for ${range}:`;
|
||||
const url = `https://api.bilibili.com/x/web-interface/newlist?rid=30&ps=${pageSize}&pn=${page}`;
|
||||
const data = await netScheduler.request<VideoListResponse>(url, "getLatestVideos");
|
||||
const data = await networkDelegate.request<VideoListResponse>(url, "getLatestVideos");
|
||||
if (data.code != 0) {
|
||||
logger.error(errMessage + data.message, "net", "getLastestVideos");
|
||||
return [];
|
||||
|
@ -1,10 +1,10 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import { VideoDetailsData, VideoDetailsResponse } from "net/bilibili.d.ts";
|
||||
import networkDelegate from "@core/net/delegate.ts";
|
||||
import { VideoDetailsData, VideoDetailsResponse } from "@core/net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const data = await networkDelegate.request<VideoDetailsResponse>(url, "getVideoInfo");
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
|
@ -1,5 +1,5 @@
|
||||
import netScheduler from "mq/scheduler.ts";
|
||||
import { VideoInfoData, VideoInfoResponse } from "net/bilibili.d.ts";
|
||||
import networkDelegate from "@core/net/delegate.ts";
|
||||
import { VideoInfoData, VideoInfoResponse } from "@core/net/bilibili.d.ts";
|
||||
import logger from "log/logger.ts";
|
||||
|
||||
/*
|
||||
@ -17,7 +17,7 @@ import logger from "log/logger.ts";
|
||||
*/
|
||||
export async function getVideoInfo(aid: number, task: string): Promise<VideoInfoData | number> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view?aid=${aid}`;
|
||||
const data = await netScheduler.request<VideoInfoResponse>(url, task);
|
||||
const data = await networkDelegate.request<VideoInfoResponse>(url, task);
|
||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
||||
@ -25,3 +25,27 @@ export async function getVideoInfo(aid: number, task: string): Promise<VideoInfo
|
||||
}
|
||||
return data.data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch video metadata from bilibili API by BVID
|
||||
* @param {string} bvid - The video's BVID
|
||||
* @param {string} task - The task name used in scheduler. It can be one of the following:
|
||||
* - snapshotVideo
|
||||
* - getVideoInfo
|
||||
* - snapshotMilestoneVideo
|
||||
* @returns {Promise<VideoInfoData | number>} VideoInfoData or the error code returned by bilibili API
|
||||
* @throws {NetSchedulerError} - The error will be thrown in following cases:
|
||||
* - No proxy is available currently: with error code `NO_PROXY_AVAILABLE`
|
||||
* - The native `fetch` function threw an error: with error code `FETCH_ERROR`
|
||||
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
|
||||
*/
|
||||
export async function getVideoInfoByBV(bvid: string, task: string): Promise<VideoInfoData | number> {
|
||||
const url = `https://api.bilibili.com/x/web-interface/view?bvid=${bvid}`;
|
||||
const data = await networkDelegate.request<VideoInfoResponse>(url, task);
|
||||
const errMessage = `Error fetching metadata for ${bvid}:`;
|
||||
if (data.code !== 0) {
|
||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfoByBV");
|
||||
return data.code;
|
||||
}
|
||||
return data.data;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { ConnectionOptions, Job, Worker } from "bullmq";
|
||||
import { redis } from "db/redis.ts";
|
||||
import { redis } from "../../core/db/redis.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { classifyVideosWorker, classifyVideoWorker } from "mq/exec/classifyVideo.ts";
|
||||
import { WorkerError } from "mq/schema.ts";
|
||||
@ -18,7 +18,7 @@ Deno.addSignalListener("SIGTERM", async () => {
|
||||
Deno.exit();
|
||||
});
|
||||
|
||||
Akari.init();
|
||||
await Akari.init();
|
||||
|
||||
const filterWorker = new Worker(
|
||||
"classifyVideo",
|
||||
|
@ -1,22 +1,37 @@
|
||||
import { ConnectionOptions, Job, Worker } from "bullmq";
|
||||
import { collectSongsWorker, getLatestVideosWorker } from "mq/executors.ts";
|
||||
import { redis } from "db/redis.ts";
|
||||
import {
|
||||
archiveSnapshotsWorker,
|
||||
bulkSnapshotTickWorker,
|
||||
collectSongsWorker,
|
||||
dispatchMilestoneSnapshotsWorker,
|
||||
dispatchRegularSnapshotsWorker,
|
||||
getLatestVideosWorker,
|
||||
getVideoInfoWorker,
|
||||
scheduleCleanupWorker,
|
||||
snapshotTickWorker,
|
||||
snapshotVideoWorker,
|
||||
takeBulkSnapshotForVideosWorker,
|
||||
} from "mq/exec/executors.ts";
|
||||
import { redis } from "@core/db/redis.ts";
|
||||
import logger from "log/logger.ts";
|
||||
import { lockManager } from "mq/lockManager.ts";
|
||||
import { WorkerError } from "mq/schema.ts";
|
||||
import { getVideoInfoWorker } from "mq/exec/getLatestVideos.ts";
|
||||
import {
|
||||
bulkSnapshotTickWorker,
|
||||
collectMilestoneSnapshotsWorker,
|
||||
regularSnapshotsWorker,
|
||||
scheduleCleanupWorker,
|
||||
snapshotTickWorker,
|
||||
takeBulkSnapshotForVideosWorker,
|
||||
takeSnapshotForVideoWorker,
|
||||
} from "mq/exec/snapshotTick.ts";
|
||||
|
||||
const releaseLockForJob = async (name: string) => {
|
||||
await lockManager.releaseLock(name);
|
||||
logger.log(`Released lock: ${name}`, "mq");
|
||||
};
|
||||
|
||||
const releaseAllLocks = async () => {
|
||||
const locks = ["dispatchRegularSnapshots", "dispatchArchiveSnapshots", "getLatestVideos"];
|
||||
for (const lock of locks) {
|
||||
await releaseLockForJob(lock);
|
||||
}
|
||||
};
|
||||
|
||||
Deno.addSignalListener("SIGINT", async () => {
|
||||
logger.log("SIGINT Received: Shutting down workers...", "mq");
|
||||
await releaseAllLocks();
|
||||
await latestVideoWorker.close(true);
|
||||
await snapshotWorker.close(true);
|
||||
Deno.exit();
|
||||
@ -24,6 +39,7 @@ Deno.addSignalListener("SIGINT", async () => {
|
||||
|
||||
Deno.addSignalListener("SIGTERM", async () => {
|
||||
logger.log("SIGTERM Received: Shutting down workers...", "mq");
|
||||
await releaseAllLocks();
|
||||
await latestVideoWorker.close(true);
|
||||
await snapshotWorker.close(true);
|
||||
Deno.exit();
|
||||
@ -34,14 +50,11 @@ const latestVideoWorker = new Worker(
|
||||
async (job: Job) => {
|
||||
switch (job.name) {
|
||||
case "getLatestVideos":
|
||||
await getLatestVideosWorker(job);
|
||||
break;
|
||||
return await getLatestVideosWorker(job);
|
||||
case "getVideoInfo":
|
||||
await getVideoInfoWorker(job);
|
||||
break;
|
||||
return await getVideoInfoWorker(job);
|
||||
case "collectSongs":
|
||||
await collectSongsWorker(job);
|
||||
break;
|
||||
return await collectSongsWorker(job);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -63,35 +76,26 @@ latestVideoWorker.on("error", (err) => {
|
||||
logger.error(e.rawError, e.service, e.codePath);
|
||||
});
|
||||
|
||||
latestVideoWorker.on("closed", async () => {
|
||||
await lockManager.releaseLock("getLatestVideos");
|
||||
});
|
||||
|
||||
const snapshotWorker = new Worker(
|
||||
"snapshot",
|
||||
async (job: Job) => {
|
||||
switch (job.name) {
|
||||
case "snapshotVideo":
|
||||
await takeSnapshotForVideoWorker(job);
|
||||
break;
|
||||
return await snapshotVideoWorker(job);
|
||||
case "snapshotTick":
|
||||
await snapshotTickWorker(job);
|
||||
break;
|
||||
case "collectMilestoneSnapshots":
|
||||
await collectMilestoneSnapshotsWorker(job);
|
||||
break;
|
||||
return await snapshotTickWorker(job);
|
||||
case "dispatchMilestoneSnapshots":
|
||||
return await dispatchMilestoneSnapshotsWorker(job);
|
||||
case "dispatchRegularSnapshots":
|
||||
await regularSnapshotsWorker(job);
|
||||
break;
|
||||
return await dispatchRegularSnapshotsWorker(job);
|
||||
case "scheduleCleanup":
|
||||
await scheduleCleanupWorker(job);
|
||||
break;
|
||||
return await scheduleCleanupWorker(job);
|
||||
case "bulkSnapshotVideo":
|
||||
await takeBulkSnapshotForVideosWorker(job);
|
||||
break;
|
||||
return await takeBulkSnapshotForVideosWorker(job);
|
||||
case "bulkSnapshotTick":
|
||||
await bulkSnapshotTickWorker(job);
|
||||
break;
|
||||
return await bulkSnapshotTickWorker(job);
|
||||
case "dispatchArchiveSnapshots":
|
||||
return await archiveSnapshotsWorker(job);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -103,7 +107,3 @@ snapshotWorker.on("error", (err) => {
|
||||
const e = err as WorkerError;
|
||||
logger.error(e.rawError, e.service, e.codePath);
|
||||
});
|
||||
|
||||
snapshotWorker.on("closed", async () => {
|
||||
await lockManager.releaseLock("dispatchRegularSnapshots");
|
||||
});
|
||||
|
@ -19,6 +19,9 @@ export default defineConfig({
|
||||
allow: [".", "../../"],
|
||||
},
|
||||
},
|
||||
plugins: [tsconfigPaths()]
|
||||
plugins: [tsconfigPaths()],
|
||||
},
|
||||
markdown: {
|
||||
remarkRehype: { footnoteLabel: "脚注", footnoteBackLabel: "回到引用 1" },
|
||||
}
|
||||
});
|
||||
|
1012
packages/frontend/bun.lock
Normal file
@ -9,12 +9,15 @@
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/node": "^9.1.3",
|
||||
"@astrojs/svelte": "^7.0.9",
|
||||
"@astrojs/tailwind": "^6.0.2",
|
||||
"argon2id": "^1.0.1",
|
||||
"astro": "^5.5.5",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"pg": "^8.11.11",
|
||||
"postcss": "^8.5.3",
|
||||
"svelte": "^5.25.7",
|
||||
"tailwindcss": "^3.0.24",
|
||||
"vite-tsconfig-paths": "^5.1.4"
|
||||
},
|
||||
|
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 6.3 KiB |
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 6.4 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
@ -1,30 +1,7 @@
|
||||
---
|
||||
import astroLogoLight from "@assets/标题-浅色.svg";
|
||||
import astroLogoDark from "@assets/标题-深色.svg";
|
||||
import DarkModeImage from "@components/DarkModeImage.svelte";
|
||||
import SearchBox from "@components/SearchBox.svelte";
|
||||
import TitleBarMobile from "@components/TitleBarMobile.svelte";
|
||||
import TitleBarDesktop from "./TitleBarDesktop.astro";
|
||||
---
|
||||
|
||||
<div class="hidden md:block fixed top-0 left-0 w-full h-28 bg-white/80 dark:bg-zinc-900/70 backdrop-blur-lg z-50">
|
||||
<div class="w-[305px] ml-8 inline-flex h-full items-center">
|
||||
<a href="/">
|
||||
<DarkModeImage
|
||||
lightSrc={astroLogoLight.src}
|
||||
darkSrc={astroLogoDark.src}
|
||||
alt="Logo"
|
||||
className="w-[305px] h-24 inline-block"
|
||||
client:load
|
||||
/>
|
||||
</a>
|
||||
</div>
|
||||
<SearchBox client:load />
|
||||
|
||||
<div
|
||||
class="inline-flex right-12 absolute gap-4 h-full text-xl dark:text-[#C6DCF2] font-medium items-center w-48 justify-end"
|
||||
>
|
||||
<a href="/about" class="hover:dark:text-[#B1C5DA]">关于</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<TitleBarDesktop/>
|
||||
<TitleBarMobile client:load />
|
||||
|
26
packages/frontend/src/components/TitleBarDesktop.astro
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
import astroLogoLight from "@assets/标题-浅色.svg";
|
||||
import astroLogoDark from "@assets/标题-深色.svg";
|
||||
import DarkModeImage from "@components/DarkModeImage.svelte";
|
||||
import SearchBox from "@components/SearchBox.svelte";
|
||||
---
|
||||
|
||||
<div class="hidden md:block relative top-0 left-0 w-full h-28 bg-white/80 dark:bg-zinc-900/70 backdrop-blur-lg z-50">
|
||||
<div class="w-[305px] ml-8 inline-flex h-full items-center">
|
||||
<a href="/">
|
||||
<DarkModeImage
|
||||
lightSrc={astroLogoLight.src}
|
||||
darkSrc={astroLogoDark.src}
|
||||
alt="Logo"
|
||||
className="w-[305px] h-24 inline-block"
|
||||
client:load
|
||||
/>
|
||||
</a>
|
||||
</div>
|
||||
<SearchBox client:load/>
|
||||
|
||||
<div class="inline-flex right-12 absolute gap-4 h-full
|
||||
text-xl font-medium items-center w-48 justify-end">
|
||||
<a href="/about">关于</a>
|
||||
</div>
|
||||
</div>
|
@ -15,7 +15,7 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="md:hidden fixed top-0 left-0 w-full h-16 bg-white/80 dark:bg-zinc-800/70 backdrop-blur-lg z-50">
|
||||
<div class="md:hidden relative top-0 left-0 w-full h-16 bg-white/80 dark:bg-zinc-800/70 backdrop-blur-lg z-50">
|
||||
{#if !showSearchBox}
|
||||
<button class="inline-block ml-4 mt-4 dark:text-white">
|
||||
<MenuIcon />
|
||||
|
@ -4,7 +4,7 @@ import TitleBar from "@components/TitleBar.astro";
|
||||
|
||||
<TitleBar/>
|
||||
|
||||
<main class="flex flex-col items-center justify-center min-h-screen gap-8">
|
||||
<h1 class="text-4xl font-bold text-center">正在施工中……</h1>
|
||||
<main class="flex flex-col items-center justify-center h-full flex-grow gap-8 px-4">
|
||||
<h1 class="text-4xl font-medium text-center">正在施工中……</h1>
|
||||
<p>在搜索栏输入BV号或AV号,可以查询目前数据库收集到的信息~</p>
|
||||
</main>
|
||||
|
@ -1,13 +1,25 @@
|
||||
const N_1024 = BigInt("129023318876534346704360951712586568674758913224876821534686030409476129469193481910786173836188085930974906857867802234113909470848523288588793477904039083513378341278558405407018889387577114155572311708428733260891448259786041525189132461448841652472631435226032063278124857443496954605482776113964107326943")
|
||||
const N_1024 = BigInt(
|
||||
"129023318876534346704360951712586568674758913224876821534686030409476129469193481910786173836188085930974906857867802234113909470848523288588793477904039083513378341278558405407018889387577114155572311708428733260891448259786041525189132461448841652472631435226032063278124857443496954605482776113964107326943",
|
||||
);
|
||||
|
||||
const N_2048 = BigInt("23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109")
|
||||
const N_2048 = BigInt(
|
||||
"23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109",
|
||||
);
|
||||
|
||||
const N_1792 = BigInt("23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109")
|
||||
const N_1792 = BigInt(
|
||||
"23987552118069940970878653610463005981599204778388399885550631951871084945075866571231062435627294546200946516668493107358732376187241747090707087544153108117326163500579370560400058549184722138636116585329496684877258304519458316233517215780035360354808658620079068489084797380781488445517430961701007542207001544091884001098497324624368085682074645221148086075871342544591022944384890014176612259729018968864426602901247715051556212559854689574013699665035317257438297910516976812428036717668766321871780963854649899276251822244719887233041422346429752896925499321431273560130952088238625622570366815755926694833109",
|
||||
);
|
||||
|
||||
const N_1536 = BigInt("1694330250214463438908848400950857073137355630337290254958754184668036770489801447652464038218330711288158361242955860326168191830448553710492926795708495297280933502917598985378231124113971732841791156356676046934277122699383776036675381503510992810963611269045078440132744168908318454891211962146563551929591147663448816841024591820348784855441153716551049843185172472891407933214238000452095646085222944171689449292644270516031799660928056315886939284985905227")
|
||||
const N_1536 = BigInt(
|
||||
"1694330250214463438908848400950857073137355630337290254958754184668036770489801447652464038218330711288158361242955860326168191830448553710492926795708495297280933502917598985378231124113971732841791156356676046934277122699383776036675381503510992810963611269045078440132744168908318454891211962146563551929591147663448816841024591820348784855441153716551049843185172472891407933214238000452095646085222944171689449292644270516031799660928056315886939284985905227",
|
||||
);
|
||||
|
||||
const N_3072 = BigInt("4432919939296042464443862503456460073874727648022810391370558006281079088795179408238989283371442564716849343712703672836423961818025813387453469700639513190304802553045342607888612037304066433501317127429264242784608682213025490491212489901736408833027611579294436675682774458141490718959615677971745638214649336218217578937534746160749039668886450447773018369168258067682196337978245372237157696236362344796867228581553446331915147012787367438751646936429739232247148712001806846526947508445039707404287951727838234648917450736371192435665040644040487427986702098273581288935278964444790007953559851323281510927332862225214878776790605026472021669614552481167977412450477230442015077669503312683966631454347169703030544483487968842349634064181183599641180349414682042575010303056241481622837185325228233789954078775053744988023738762706404546546146837242590884760044438874357295029411988267287001033032827035809135092270843")
|
||||
const N_3072 = BigInt(
|
||||
"4432919939296042464443862503456460073874727648022810391370558006281079088795179408238989283371442564716849343712703672836423961818025813387453469700639513190304802553045342607888612037304066433501317127429264242784608682213025490491212489901736408833027611579294436675682774458141490718959615677971745638214649336218217578937534746160749039668886450447773018369168258067682196337978245372237157696236362344796867228581553446331915147012787367438751646936429739232247148712001806846526947508445039707404287951727838234648917450736371192435665040644040487427986702098273581288935278964444790007953559851323281510927332862225214878776790605026472021669614552481167977412450477230442015077669503312683966631454347169703030544483487968842349634064181183599641180349414682042575010303056241481622837185325228233789954078775053744988023738762706404546546146837242590884760044438874357295029411988267287001033032827035809135092270843",
|
||||
);
|
||||
|
||||
const N_4096 = BigInt("703671044356805218391078271512201582198770553281951369783674142891088501340774249238173262580562112786670043634665390581120113644316651934154746357220932310140476300088580654571796404198410555061275065442553506658401183560336140989074165998202690496991174269748740565700402715364422506782445179963440819952745241176450402011121226863984008975377353558155910994380700267903933205531681076494639818328879475919332604951949178075254600102192323286738973253864238076198710173840170988339024438220034106150475640983877458155141500313471699516670799821379238743709125064098477109094533426340852518505385314780319279862586851512004686798362431227795743253799490998475141728082088984359237540124375439664236138519644100625154580910233437864328111620708697941949936338367445851449766581651338876219676721272448769082914348242483068204896479076062102236087066428603930888978596966798402915747531679758905013008059396214343112694563043918465373870648649652122703709658068801764236979191262744515840224548957285182453209028157886219424802426566456408109642062498413592155064289314088837031184200671561102160059065729282902863248815224399131391716503171191977463328439766546574118092303414702384104112719959325482439604572518549918705623086363111")
|
||||
const N_4096 = BigInt(
|
||||
"703671044356805218391078271512201582198770553281951369783674142891088501340774249238173262580562112786670043634665390581120113644316651934154746357220932310140476300088580654571796404198410555061275065442553506658401183560336140989074165998202690496991174269748740565700402715364422506782445179963440819952745241176450402011121226863984008975377353558155910994380700267903933205531681076494639818328879475919332604951949178075254600102192323286738973253864238076198710173840170988339024438220034106150475640983877458155141500313471699516670799821379238743709125064098477109094533426340852518505385314780319279862586851512004686798362431227795743253799490998475141728082088984359237540124375439664236138519644100625154580910233437864328111620708697941949936338367445851449766581651338876219676721272448769082914348242483068204896479076062102236087066428603930888978596966798402915747531679758905013008059396214343112694563043918465373870648649652122703709658068801764236979191262744515840224548957285182453209028157886219424802426566456408109642062498413592155064289314088837031184200671561102160059065729282902863248815224399131391716503171191977463328439766546574118092303414702384104112719959325482439604572518549918705623086363111",
|
||||
);
|
||||
|
||||
export const N_ARRAY = [N_1024, N_1536, N_1792, N_2048, N_3072, N_4096];
|
@ -1,27 +1,27 @@
|
||||
# 关于「中V档案馆」
|
||||
# 关于「中 V 档案馆」
|
||||
|
||||
「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
|
||||
「中 V 档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
|
||||
|
||||
## 创建背景与关联工作
|
||||
|
||||
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
||||
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中 V 或 VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
||||
|
||||
- [萌娘百科](https://zh.moegirl.org.cn/):
|
||||
收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
收录了大量中 V 歌曲及歌姬的信息,呈现形式为传统维基(基于 [MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VCPedia](https://vcpedia.cn/):
|
||||
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。
|
||||
由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点 [^1],呈现形式为传统维基(基于 [MediaWiki](https://www.mediawiki.org/))。
|
||||
- [VocaDB](https://vocadb.net/): 一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV
|
||||
等[^2],其中包含大量中文歌声合成作品。
|
||||
- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。[^3]
|
||||
等 [^2],其中包含大量中文歌声合成作品。
|
||||
- [天钿 Daily](https://tdd.bunnyxt.com/):一个 VC 相关数据交流与分享的网站。致力于 VC 相关数据交流,定期抓取 VC 相关数据,选取有意义的纬度展示。[^3]
|
||||
|
||||
上述网站中,或多或少存在一些不足,例如:
|
||||
|
||||
- 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。
|
||||
- VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。
|
||||
- VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
|
||||
- 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
|
||||
- 萌娘百科、VCPedia 受限于传统维基,绝大多数内容依赖人工编辑。
|
||||
- VocaDB 基于结构化数据库构建,由此可以依赖程序生成一些信息,但 **条目收录** 仍然完全依赖人工完成。
|
||||
- VocaDB 主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。
|
||||
- 天钿 Daily 只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。
|
||||
|
||||
因此,**中V档案馆**吸取前人经验,克服上述网站的不足,希望做到:
|
||||
因此,**中 V 档案馆** 吸取前人经验,克服上述网站的不足,希望做到:
|
||||
|
||||
- 歌曲收录(指发现歌曲并创建条目)的完全自动化
|
||||
- 歌曲元信息提取的高度自动化
|
||||
@ -31,31 +31,30 @@
|
||||
|
||||
## 技术架构
|
||||
|
||||
参见[CVSA文档](https://docs.projectcvsa.com/)。
|
||||
|
||||
参见 [CVSA 文档](https://docs.projectcvsa.com/)。
|
||||
|
||||
## 开放许可
|
||||
|
||||
受本文以[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
|
||||
受本文以 [CC BY-NC-SA 4.0 协议](https://creativecommons.org/licenses/by-nc-sa/4.0/) 提供。
|
||||
|
||||
### 数据库
|
||||
|
||||
中V档案馆使用[PostgreSQL](https://postgresql.org)作为数据库,我们承诺定期导出数据库转储 (dump)
|
||||
中 V 档案馆使用 [PostgreSQL](https://postgresql.org) 作为数据库,我们承诺定期导出数据库转储 (dump)
|
||||
文件并公开,其内容遵从以下协议或条款:
|
||||
|
||||
- 数据库中的事实性数据,根据适用法律,不构成受版权保护的内容。中V档案馆放弃一切可能的权利([CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/))。
|
||||
- 对于数据库中有原创性的内容(如贡献者编辑的描述性内容),如无例外,以[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)提供。
|
||||
- 对于引用、摘编或改编自萌娘百科、VCPedia的内容,以与原始协议(CC BY-NC-SA 3.0
|
||||
CN)兼容的协议[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供,并注明原始协议 。
|
||||
> 根据原始协议第四条第2项内容,CC BY-NC-SA 4.0协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
|
||||
- 中V档案馆文档使用[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)。
|
||||
- 数据库中的事实性数据,根据适用法律,不构成受版权保护的内容。中 V 档案馆放弃一切可能的权利([CC0 1.0 Universal](https://creativecommons.org/publicdomain/zero/1.0/))。
|
||||
- 对于数据库中有原创性的内容(如贡献者编辑的描述性内容),如无例外,以 [CC BY 4.0 协议](https://creativecommons.org/licenses/by/4.0/) 提供。
|
||||
- 对于引用、摘编或改编自萌娘百科、VCPedia 的内容,以与原始协议(CC BY-NC-SA 3.0
|
||||
CN)兼容的协议 [CC BY-NC-SA 4.0 协议](https://creativecommons.org/licenses/by-nc-sa/4.0/) 提供,并注明原始协议 。
|
||||
> 根据原始协议第四条第 2 项内容,CC BY-NC-SA 4.0 协议为与原始协议具有相同授权要素的后续版本(“可适用的协议”)。
|
||||
- 中 V 档案馆文档使用 [CC BY 4.0 协议](https://creativecommons.org/licenses/by/4.0/)。
|
||||
|
||||
### 软件代码
|
||||
|
||||
用于构建中V档案馆的软件代码在[AGPL 3.0](https://www.gnu.org/licenses/agpl-3.0.html)许可证下公开,参见[LICENSE](./LICENSE)
|
||||
用于构建中 V 档案馆的软件代码在 [AGPL 3.0](https://www.gnu.org/licenses/agpl-3.0.html) 许可证下公开,参见 [LICENSE](./LICENSE)
|
||||
|
||||
[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
|
||||
[^1]: 引用自 [VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于 [知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/) 下提供。
|
||||
|
||||
[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
|
||||
[^2]: 翻译自 [VocaDB](https://vocadb.net/),于 [CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/) 下提供。
|
||||
|
||||
[^3]: 引用自[关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
|
||||
[^3]: 引用自 [关于 - 天钿Daily](https://tdd.bunnyxt.com/about)
|
||||
|
15
packages/frontend/src/layouts/Content.astro
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
import TitleBar from "@components/TitleBar.astro";
|
||||
import Layout from '@layouts/Layout.astro';
|
||||
import Footer from "./Footer.astro";
|
||||
---
|
||||
|
||||
<Layout>
|
||||
<TitleBar/>
|
||||
<main class="flex flex-col items-center flex-grow gap-8 md:mt-12 relative z-0">
|
||||
<div class="w-full lg:w-2/3 xl:w-1/2 content px-8 md:px-12 lg:px-0">
|
||||
<slot/>
|
||||
</div>
|
||||
</main>
|
||||
<Footer/>
|
||||
</Layout>
|
10
packages/frontend/src/layouts/Footer.astro
Normal file
@ -0,0 +1,10 @@
|
||||
<footer class="py-6">
|
||||
<div class="container mx-auto text-center">
|
||||
<ul class="flex justify-center space-x-4">
|
||||
<li><a href="/about">关于</a></li>
|
||||
<li><a href="#">服务</a></li>
|
||||
<li><a href="#">隐私政策</a></li>
|
||||
<li><a href="#">联系我们</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</footer>
|
3
packages/frontend/src/layouts/Header.astro
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
|
||||
---
|
@ -1,5 +1,7 @@
|
||||
---
|
||||
import "../styles/global.css";
|
||||
const { title } = Astro.props;
|
||||
const pageTitle = title ? title + ' - 中V档案馆' :'中V档案馆';
|
||||
---
|
||||
|
||||
<!doctype html>
|
||||
@ -7,9 +9,9 @@ import "../styles/global.css";
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>中V档案馆</title>
|
||||
<title>{pageTitle}</title>
|
||||
</head>
|
||||
<body class="dark:bg-zinc-900 dark:text-zinc-100">
|
||||
<body class="dark:bg-zinc-900 dark:text-zinc-100 min-h-screen flex flex-col">
|
||||
<slot />
|
||||
</body>
|
||||
</html>
|
||||
|
12
packages/frontend/src/pages/LICENSE.astro
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
import Layout from "@layouts/Layout.astro";
|
||||
import Content from "@layouts/Content.astro";
|
||||
---
|
||||
|
||||
<Layout title="开源许可">
|
||||
<Content>
|
||||
中V档案馆运行着开源软件,它的前端、后端和其它组件的代码在
|
||||
<a href="https://www.gnu.org/licenses/agpl-3.0.html">AGPL 3.0</a>
|
||||
开源许可下授权。
|
||||
</Content>
|
||||
</Layout>
|
@ -7,8 +7,8 @@ import "../styles/content.css";
|
||||
|
||||
<Layout>
|
||||
<TitleBar/>
|
||||
<main class="flex flex-col items-center min-h-screen gap-8 mt-36 relative z-0">
|
||||
<div class="lg:w-1/2 content">
|
||||
<main class="flex flex-col items-center min-h-screen gap-8 md:mt-12 relative z-0">
|
||||
<div class="w-full lg:w-2/3 xl:w-1/2 content px-8 md:px-12 lg:px-0">
|
||||
<AboutContent/>
|
||||
</div>
|
||||
</main>
|
||||
|
9
packages/frontend/src/pages/register/index.astro
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
import Layout from "@layouts/Layout.astro";
|
||||
---
|
||||
|
||||
<Layout title="注册">
|
||||
<main class="relative flex-grow pt-36">
|
||||
<h1>欢迎</h1>
|
||||
</main>
|
||||
</Layout>
|
@ -100,57 +100,57 @@ interface Snapshot {
|
||||
|
||||
<Layout>
|
||||
<TitleBar />
|
||||
<main class="flex flex-col items-center min-h-screen gap-8 mt-36 relative z-0">
|
||||
<main class="flex flex-col items-center min-h-screen gap-8 mt-6 relative z-0 overflow-x-auto">
|
||||
<div class="max-w-4xl mx-auto rounded-lg p-6">
|
||||
<h1 class="text-2xl font-bold mb-4">视频信息: <a href={`https://www.bilibili.com/video/av${aid}`} class="underline">av{aid}</a></h1>
|
||||
<h1 class="text-2xl font-medium mb-4">视频信息: <a href={`https://www.bilibili.com/video/av${aid}`} class="underline ">av{aid}</a></h1>
|
||||
|
||||
<div class="mb-6 p-4 rounded-lg">
|
||||
<h2 class="text-xl font-semibold mb-8">基本信息</h2>
|
||||
<div class="overflow-x-auto">
|
||||
<h2 class="text-xl font-medium mb-8">基本信息</h2>
|
||||
<div class="overflow-x-auto max-w-full">
|
||||
<table class="table-auto w-full">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">ID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-semibold">ID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.id}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">AID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-semibold">AID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.aid}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">BVID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-semibold">BVID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.bvid}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">标题</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">标题</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.title}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">描述</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">描述</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.description}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">UID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-semibold">UID</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.uid}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">标签</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">标签</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.tags}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">发布时间</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">发布时间</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.published_at ? format(new Date(videoInfo.published_at), 'yyyy-MM-dd HH:mm:ss', { locale: zhCN }) : '-'}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">时长 (秒)</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">时长 (秒)</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.duration}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">创建时间</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">创建时间</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.created_at ? format(new Date(videoInfo.created_at), 'yyyy-MM-dd HH:mm:ss', { locale: zhCN }) : '-'}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-bold">封面</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2 font-[470]">封面</td>
|
||||
<td class="border dark:border-zinc-500 px-4 py-2">{videoInfo?.cover_url ? videoInfo.cover_url : '-'}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
@ -159,20 +159,20 @@ interface Snapshot {
|
||||
</div>
|
||||
|
||||
<div class="p-4 rounded-lg">
|
||||
<h2 class="text-xl font-semibold mb-4">播放量历史数据</h2>
|
||||
<h2 class="text-xl font-medium mb-4">播放量历史数据</h2>
|
||||
{snapshots && snapshots.length > 0 ? (
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table-auto w-full">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">创建时间</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">观看</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">硬币</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">点赞</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">收藏</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">分享</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">弹幕</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2">评论</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">创建时间</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">观看</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">硬币</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">点赞</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">收藏</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">分享</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">弹幕</th>
|
||||
<th class="border dark:border-zinc-500 px-4 py-2 font-medium">评论</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|