From 126752a28862657aa125f76b9b64e00a1e312d68 Mon Sep 17 00:00:00 2001 From: alikia2x Date: Tue, 9 Dec 2025 23:47:40 +0800 Subject: [PATCH] ref: remove useless files --- .gitignore | 4 +- .kilocodeignore | 1 + doc/en/README.md | 25 ----- doc/en/SUMMARY.md | 21 ---- doc/en/about/scope-of-inclusion.md | 48 -------- doc/en/about/this-project.md | 13 --- doc/en/api-doc/catalog.md | 3 - doc/en/api-doc/songs.md | 3 - doc/en/architecure/artificial-intelligence.md | 21 ---- doc/en/architecure/crawler.md | 4 - .../architecure/database-structure/README.md | 15 --- .../database-structure/type-of-song.md | 25 ----- doc/en/architecure/overview.md | 42 ------- doc/zh/.gitbook/assets/1.yaml | 106 ------------------ doc/zh/README.md | 25 ----- doc/zh/SUMMARY.md | 22 ---- doc/zh/about/scope-of-inclusion.md | 22 ---- doc/zh/about/this-project.md | 38 ------- doc/zh/api-doc/catalog.md | 4 - doc/zh/api-doc/video-snapshot.md | 6 - .../architecture/artificial-intelligence.md | 13 --- .../architecture/database-structure/README.md | 15 --- .../database-structure/type-of-song.md | 24 ---- doc/zh/architecture/message-queue/README.md | 1 - .../latestvideosqueue-dui-lie.md | 1 - doc/zh/architecture/overview.md | 26 ----- ml_new/.gitignore | 1 + 27 files changed, 5 insertions(+), 524 deletions(-) create mode 100644 .kilocodeignore delete mode 100644 doc/en/README.md delete mode 100644 doc/en/SUMMARY.md delete mode 100644 doc/en/about/scope-of-inclusion.md delete mode 100644 doc/en/about/this-project.md delete mode 100644 doc/en/api-doc/catalog.md delete mode 100644 doc/en/api-doc/songs.md delete mode 100644 doc/en/architecure/artificial-intelligence.md delete mode 100644 doc/en/architecure/crawler.md delete mode 100644 doc/en/architecure/database-structure/README.md delete mode 100644 doc/en/architecure/database-structure/type-of-song.md delete mode 100644 doc/en/architecure/overview.md delete mode 100644 doc/zh/.gitbook/assets/1.yaml delete mode 100644 doc/zh/README.md delete mode 100644 doc/zh/SUMMARY.md delete mode 100644 doc/zh/about/scope-of-inclusion.md delete mode 100644 doc/zh/about/this-project.md delete mode 100644 doc/zh/api-doc/catalog.md delete mode 100644 doc/zh/api-doc/video-snapshot.md delete mode 100644 doc/zh/architecture/artificial-intelligence.md delete mode 100644 doc/zh/architecture/database-structure/README.md delete mode 100644 doc/zh/architecture/database-structure/type-of-song.md delete mode 100644 doc/zh/architecture/message-queue/README.md delete mode 100644 doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md delete mode 100644 doc/zh/architecture/overview.md create mode 100644 ml_new/.gitignore diff --git a/.gitignore b/.gitignore index 3719e98..4ea3796 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,6 @@ ucaptcha-config.yaml temp/ -meili \ No newline at end of file +meili + +.turbo \ No newline at end of file diff --git a/.kilocodeignore b/.kilocodeignore new file mode 100644 index 0000000..329e85b --- /dev/null +++ b/.kilocodeignore @@ -0,0 +1 @@ +packages/core/drizzle/main/meta \ No newline at end of file diff --git a/doc/en/README.md b/doc/en/README.md deleted file mode 100644 index 2eadf84..0000000 --- a/doc/en/README.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -icon: hand-wave -layout: - title: - visible: true - description: - visible: false - tableOfContents: - visible: false - outline: - visible: false - pagination: - visible: false ---- - -# Welcome - -Welcome to the CVSA Documentation! - -This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors, -etc. - -### Jump right in - -
About CVSASome information you might want to know about.this-project.md
ArchitectureThe technical details about how CVSA was built.Broken link
API DocDocumentation about APIs provided by CVSA.Broken link
diff --git a/doc/en/SUMMARY.md b/doc/en/SUMMARY.md deleted file mode 100644 index 99ca460..0000000 --- a/doc/en/SUMMARY.md +++ /dev/null @@ -1,21 +0,0 @@ -# Table of contents - -* [Welcome](README.md) - -## About - -* [About CVSA Project](about/this-project.md) -* [Scope of Inclusion](about/scope-of-inclusion.md) - -## Architecure - -* [Overview](architecure/overview.md) -* [Crawler](architecure/crawler.md) -* [Database Structure](architecure/database-structure/README.md) - * [Type of Song](architecure/database-structure/type-of-song.md) -* [Artificial Intelligence](architecure/artificial-intelligence.md) - -## API Doc - -* [Catalog](api-doc/catalog.md) -* [Songs](api-doc/songs.md) diff --git a/doc/en/about/scope-of-inclusion.md b/doc/en/about/scope-of-inclusion.md deleted file mode 100644 index 136061c..0000000 --- a/doc/en/about/scope-of-inclusion.md +++ /dev/null @@ -1,48 +0,0 @@ -# Scope of Inclusion - -CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators, -arranger, etc), singers and voice engines / voicebanks. - -For a **song**, it must meet the following conditions to be included in CVSA: - -### Category 30 - -In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in -[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our -[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been -uploaded to bilibili / categorized under this category. - -#### NEWS - -Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be -entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the -parent category "Music"). - -According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly -published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However, -there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under -the "Music General" sub-category.\ -We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated -program's crawling. - -### At Leats One Line of Chinese / Chinese Virtual Singer - -The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain -Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used. - -We define a **Chinese virtual singer** as follows: - -1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese) -2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or - Taiwan. - -### Using Vocal Synthesizer - -To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony -vocals). - -We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically -modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics, -encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio) -approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g., -[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)). diff --git a/doc/en/about/this-project.md b/doc/en/about/this-project.md deleted file mode 100644 index 4e386f9..0000000 --- a/doc/en/about/this-project.md +++ /dev/null @@ -1,13 +0,0 @@ -# About CVSA Project - -CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis -community in a highly automation-assisted way. - -Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an -automated and manually edited way: - -- Metadata of songs (name, duration, publisher, singer, etc.) -- Descriptive information of songs (content introduction, creation background, lyrics, etc.) -- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites, - likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website. -- Information about artists, albums, vocal synthesizers, and voicebanks. diff --git a/doc/en/api-doc/catalog.md b/doc/en/api-doc/catalog.md deleted file mode 100644 index eea6596..0000000 --- a/doc/en/api-doc/catalog.md +++ /dev/null @@ -1,3 +0,0 @@ -# Catalog - -- [**Songs**](songs.md) diff --git a/doc/en/api-doc/songs.md b/doc/en/api-doc/songs.md deleted file mode 100644 index 914c266..0000000 --- a/doc/en/api-doc/songs.md +++ /dev/null @@ -1,3 +0,0 @@ -# Songs - -Not implemented yet. diff --git a/doc/en/architecure/artificial-intelligence.md b/doc/en/architecure/artificial-intelligence.md deleted file mode 100644 index e9da3dc..0000000 --- a/doc/en/architecure/artificial-intelligence.md +++ /dev/null @@ -1,21 +0,0 @@ -# Artificial Intelligence - -CVSA's automated workflow relies heavily on artificial intelligence for information extraction and classification. - -The AI ​​systems we currently use are: - -### The Filter - -Located at `/filter/` under project root dir, it classifies a video in the -[category 30](../about/scope-of-inclusion.md#category-30) into the following categories: - -- 0: Not related to Chinese vocal synthesis -- 1: A original song with Chinese vocal synthesis -- 2: A cover/remix song with Chinese vocal synthesis - -### The Predictor - -Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that -takes historical view trends of a video, other contextual information (such as the current time), and future time points -to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified -future time point. diff --git a/doc/en/architecure/crawler.md b/doc/en/architecure/crawler.md deleted file mode 100644 index e60f132..0000000 --- a/doc/en/architecure/crawler.md +++ /dev/null @@ -1,4 +0,0 @@ -# Crawler - -A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database. - diff --git a/doc/en/architecure/database-structure/README.md b/doc/en/architecure/database-structure/README.md deleted file mode 100644 index 84d2ba5..0000000 --- a/doc/en/architecure/database-structure/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Database Structure - -CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database. - -All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the -following tables: - -- songs: stores the main information of songs -- bili\_user: stores snapshots of Bilibili user information -- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30). -- labelling\_result: Contains label of videos in `all_data`tagged by our - [AI system](../artificial-intelligence.md#the-filter). -- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this - fetch process as "snapshot". -- snapshot\_schedule: The scheduling information for video snapshots. diff --git a/doc/en/architecure/database-structure/type-of-song.md b/doc/en/architecure/database-structure/type-of-song.md deleted file mode 100644 index 1855f4a..0000000 --- a/doc/en/architecure/database-structure/type-of-song.md +++ /dev/null @@ -1,25 +0,0 @@ -# Type of Song - -The **Unrelated type** refers specifically to videos that are not in our -[Scope of Inclusion](../../about/scope-of-inclusion.md). - -### Table: `songs` - -The `type` column used in the `songs` table. - -| Type | Description | -| ---- | ------------ | -| 0 | Unrelated | -| 1 | Original | -| 2 | Cover | -| 3 | Remix | -| 4 | Instrumental | -| 10 | Others | - -### Table: `labelling_result` - -| Label | Description | -| ----- | ---------------------- | -| 0 | AI tagged: Unrelated | -| 1 | AI tagged: Original | -| 2 | AI tagged: Cover/Remix | diff --git a/doc/en/architecure/overview.md b/doc/en/architecure/overview.md deleted file mode 100644 index fc694fe..0000000 --- a/doc/en/architecure/overview.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -layout: - title: - visible: true - description: - visible: false - tableOfContents: - visible: true - outline: - visible: true - pagination: - visible: true ---- - -# Overview - -The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`. - -**Project structure:** - -``` -cvsa -├── deno.json -├── packages -│ ├── backend -│ ├── core -│ ├── crawler -│ └── frontend -└── README.md -``` - -**Package Breakdown:** - -* **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers. -* **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database. -* **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time. -* **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo. - -### Crawler - -Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database. - diff --git a/doc/zh/.gitbook/assets/1.yaml b/doc/zh/.gitbook/assets/1.yaml deleted file mode 100644 index 29eb6dc..0000000 --- a/doc/zh/.gitbook/assets/1.yaml +++ /dev/null @@ -1,106 +0,0 @@ -openapi: 3.0.0 -info: - title: CVSA API - version: v1 - -servers: - - url: https://api.projectcvsa.com - -paths: - /video/{id}/snapshots: - get: - summary: 获取视频快照列表 - description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字,以 "BV" 开头的 12 位字母数字,或者一个正整数。 - parameters: - - in: path - name: id - required: true - schema: - type: string - description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)" - - in: query - name: ps - schema: - type: integer - minimum: 1 - description: 每页返回的快照数量 (pageSize),默认为 1000。 - - in: query - name: pn - schema: - type: integer - minimum: 1 - description: 页码 (pageNumber),用于分页查询。offset 与 pn 只能选择一个。 - - in: query - name: offset - schema: - type: integer - minimum: 1 - description: 偏移量,用于基于偏移量的查询。offset 与 pn 只能选择一个。 - - in: query - name: reverse - schema: - type: boolean - description: 是否反向排序(从旧到新),默认为 false。 - responses: - '200': - description: 成功获取快照列表 - content: - application/json: - schema: - type: array - items: - type: object - properties: - id: - type: integer - description: 快照 ID - aid: - type: integer - description: 视频的 av 号 - views: - type: integer - description: 视频播放量 - coins: - type: integer - description: 视频投币数 - likes: - type: integer - description: 视频点赞数 - favorites: - type: integer - description: 视频收藏数 - shares: - type: integer - description: 视频分享数 - danmakus: - type: integer - description: 视频弹幕数 - replies: - type: integer - description: 视频评论数 - '400': - description: 无效的查询参数 - content: - application/json: - schema: - type: object - properties: - message: - type: string - description: 错误消息 - errors: - type: object - description: 详细的错误信息 - '500': - description: 服务器内部错误 - content: - application/json: - schema: - type: object - properties: - message: - type: string - description: 错误消息 - error: - type: object - description: 详细的错误信息 \ No newline at end of file diff --git a/doc/zh/README.md b/doc/zh/README.md deleted file mode 100644 index 70369d2..0000000 --- a/doc/zh/README.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -icon: hand-wave -description: 「中V档案馆」 (CVSA) 是一个收录中文歌声合成文化圈有关信息的网站。 -layout: - title: - visible: true - description: - visible: true - tableOfContents: - visible: false - outline: - visible: false - pagination: - visible: false ---- - -# 欢迎 - -欢迎阅读CVSA文档! - -该文档包含有关中V档案馆项目的各种信息,包括本项目的有关信息、技术架构、访客指南、API文档等。 - -### 导航 - -
关于本项目一些你可能想知道的…this-project.md
技术架构关于本项目的技术细节Broken link
API 文档 中V档案馆公开 API 的文档Broken link
项目地址GitHubGitee 上查看本项目https://gitee.com/alikia/cvsa
🇺🇸 English VersionHint: There's a language switcher on the top-left corner, just to the right of the logo.CVSA Doc English
diff --git a/doc/zh/SUMMARY.md b/doc/zh/SUMMARY.md deleted file mode 100644 index c44766c..0000000 --- a/doc/zh/SUMMARY.md +++ /dev/null @@ -1,22 +0,0 @@ -# Table of contents - -* [欢迎](README.md) - -## 关于 - -* [关于本项目](about/this-project.md) -* [收录范围](about/scope-of-inclusion.md) - -## 技术架构 - -- [概览](architecture/overview.md) -- [数据库结构](architecture/database-structure/README.md) - - [歌曲类型](architecture/database-structure/type-of-song.md) -- [人工智能](architecture/artificial-intelligence.md) -- [消息队列](architecture/message-queue/README.md) - - [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md) - -## API 文档 - -* [目录](api-doc/catalog.md) -* [视频快照](api-doc/video-snapshot.md) diff --git a/doc/zh/about/scope-of-inclusion.md b/doc/zh/about/scope-of-inclusion.md deleted file mode 100644 index 92ff3be..0000000 --- a/doc/zh/about/scope-of-inclusion.md +++ /dev/null @@ -1,22 +0,0 @@ -# 收录范围 - -中V档案馆收录许多有关中文歌声合成的内容,包括歌曲、专辑、艺术家(发布者、调校师、编曲者等)、歌手以及引擎/声库。 - -对于一首**歌曲**,必须满足以下条件才能被收录到中V档案馆中: - -#### VOCALOID·UATU 分区 - -原则上,中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU -分区(分区ID为30)下的视频中。在某些特殊情况下,此规则可能不是强制的。 - -#### 至少一行中文 - -歌曲的歌词必须包含至少一行中文。这意味着,即使使用了仅支持中文的声库,如果歌曲的歌词中没有中文,也不会被收录到中V档案馆中(例如,跨语种调校)。 - -#### 使用歌声合成器 - -歌曲的至少一行必须由歌声合成器生成(包括和声部分),才能被收录到中V档案馆中。 - -我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统,包括基于波形拼接的(如 -VOCALOID、UTAU)和基于 AI 的(如 Synthesizer V、ACE Studio)方法,**但不包括仅改变现有歌声音色的AI声音转换器**(例如 -[so-vits svc](https://github.com/svc-develop-team/so-vits-svc))。 diff --git a/doc/zh/about/this-project.md b/doc/zh/about/this-project.md deleted file mode 100644 index c78d6d2..0000000 --- a/doc/zh/about/this-project.md +++ /dev/null @@ -1,38 +0,0 @@ -# 关于本项目 - -「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。 - -### 创建背景与关联工作 - -纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站: - -- [萌娘百科](https://zh.moegirl.org.cn/): - 收录了大量中V歌曲及歌姬的信息,呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。 -- [VCPedia](https://vcpedia.cn/): - 由原萌娘百科中文歌声合成编辑团队的部分成员搭建,专属于中文歌声合成相关内容的信息集成站点[^1],呈现形式为传统维基(基于[MediaWiki](https://www.mediawiki.org/))。 -- [VocaDB](https://vocadb.net/): - [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库,其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2],其中包含大量中文歌声合成作品。 -- [天钿Daily](https://tdd.bunnyxt.com/):一个VC相关数据交流与分享的网站。致力于VC相关数据交流,定期抓取VC相关数据,选取有意义的纬度展示。 - -上述网站中,或多或少存在一些不足,例如: - -- 萌娘百科、VCPedia受限于传统维基,绝大多数内容依赖人工编辑。 -- VocaDB基于结构化数据库构建,由此可以依赖程序生成一些信息,但**条目收录**仍然完全依赖人工完成。 -- VocaDB主要专注于元数据展示,少有关于歌曲、作者等的描述性的文字,也缺乏描述性的背景信息。 -- 天钿Daily只展示歌曲的统计数据及历史趋势,没有关于歌曲其它信息的收集。 - -因此,**中V档案馆**吸取前人经验,克服上述网站的不足,希望做到: - -- 歌曲收录(指发现歌曲并创建条目)的完全自动化 -- 歌曲元信息提取的高度自动化 -- 歌曲统计数据收集的完全自动化 -- 在程序辅助的同时欢迎并鼓励贡献者参与编辑(主要为描述性内容)或纠错 -- 在适当的许可声明下,引用来自上述源的数据,使内容更加全面、丰富。 - ---- - -本文在[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。 - -[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5),于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。 - -[^2]: 翻译自[VocaDB](https://vocadb.net/),于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。 diff --git a/doc/zh/api-doc/catalog.md b/doc/zh/api-doc/catalog.md deleted file mode 100644 index 5298b49..0000000 --- a/doc/zh/api-doc/catalog.md +++ /dev/null @@ -1,4 +0,0 @@ -# 目录 - -* [视频快照](video-snapshot.md) - diff --git a/doc/zh/api-doc/video-snapshot.md b/doc/zh/api-doc/video-snapshot.md deleted file mode 100644 index c143151..0000000 --- a/doc/zh/api-doc/video-snapshot.md +++ /dev/null @@ -1,6 +0,0 @@ -# 视频快照 - -{% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %} -[1.yaml](../.gitbook/assets/1.yaml) -{% endopenapi %} - diff --git a/doc/zh/architecture/artificial-intelligence.md b/doc/zh/architecture/artificial-intelligence.md deleted file mode 100644 index 53caba1..0000000 --- a/doc/zh/architecture/artificial-intelligence.md +++ /dev/null @@ -1,13 +0,0 @@ -# 人工智能 - -CVSA 的自动化工作流高度依赖人工智能进行信息提取和分类。 - -我们目前使用的 AI 系统有: - -#### Filter - -位于项目根目录下的 `/filter/`,它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别: - -- 0:与中文人声合成无关 -- 1:中文人声合成原创曲 -- 2:中文人声合成的翻唱/混音歌曲 diff --git a/doc/zh/architecture/database-structure/README.md b/doc/zh/architecture/database-structure/README.md deleted file mode 100644 index 44a5b5d..0000000 --- a/doc/zh/architecture/database-structure/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# 数据库结构 - -CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。 - -CVSA 设计了两个 - -CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表: - -- songs:存储歌曲的主要信息 -- bilibili\_user:存储 Bilibili 用户信息快照 -- bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据 -- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。 -- latest\_video\_snapshot:存储视频最新的快照 -- video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等) -- snapshot\_schedule:视频快照的规划信息,为辅助表 diff --git a/doc/zh/architecture/database-structure/type-of-song.md b/doc/zh/architecture/database-structure/type-of-song.md deleted file mode 100644 index 94630e7..0000000 --- a/doc/zh/architecture/database-structure/type-of-song.md +++ /dev/null @@ -1,24 +0,0 @@ -# 歌曲类型 - -**不相关** 特指不在我们的 [收录范围](../../about/scope-of-inclusion.md) 中的视频。 - -#### 表格:`songs` - -`songs` 表格中使用的 `type` 列。 - -| 类型 | 说明 | -| ---- | ------------ | -| 0 | 不相关 | -| 1 | 原创 | -| 2 | 翻唱 (Cover) | -| 3 | 混音 (Remix) | -| 4 | 纯音乐 | -| 10 | 其他 | - -#### 表格:`labelling_result` - -| 标签 | 说明 | -| ---- | ------------------ | -| 0 | AI 标记:不相关 | -| 1 | AI 标记:原创 | -| 2 | AI 标记:翻唱/混音 | diff --git a/doc/zh/architecture/message-queue/README.md b/doc/zh/architecture/message-queue/README.md deleted file mode 100644 index b2312f5..0000000 --- a/doc/zh/architecture/message-queue/README.md +++ /dev/null @@ -1 +0,0 @@ -# 消息队列 diff --git a/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md b/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md deleted file mode 100644 index 744e878..0000000 --- a/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md +++ /dev/null @@ -1 +0,0 @@ -# LatestVideosQueue 队列 diff --git a/doc/zh/architecture/overview.md b/doc/zh/architecture/overview.md deleted file mode 100644 index 636b5a9..0000000 --- a/doc/zh/architecture/overview.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -layout: - title: - visible: true - description: - visible: false - tableOfContents: - visible: true - outline: - visible: true - pagination: - visible: true ---- - -# 概览 - -整个CVSA项目分为三个组件:**crawler**, **frontend** 和 **backend。** - -### **crawler** - -位于项目目录`packages/crawler` 下,它负责以下工作: - -- 抓取新的视频并收录作品 -- 持续监控视频的播放量等统计信息 - -整个 crawler 由 BullMQ 消息队列驱动,使用 Redis 和 PostgreSQL 管理状态。 diff --git a/ml_new/.gitignore b/ml_new/.gitignore new file mode 100644 index 0000000..c0190e1 --- /dev/null +++ b/ml_new/.gitignore @@ -0,0 +1 @@ +datasets \ No newline at end of file