From 126752a28862657aa125f76b9b64e00a1e312d68 Mon Sep 17 00:00:00 2001
From: alikia2x <alikia2x@outlook.com>
Date: Tue, 9 Dec 2025 23:47:40 +0800
Subject: [PATCH] ref: remove useless files

---
 .gitignore                                    |   4 +-
 .kilocodeignore                               |   1 +
 doc/en/README.md                              |  25 -----
 doc/en/SUMMARY.md                             |  21 ----
 doc/en/about/scope-of-inclusion.md            |  48 --------
 doc/en/about/this-project.md                  |  13 ---
 doc/en/api-doc/catalog.md                     |   3 -
 doc/en/api-doc/songs.md                       |   3 -
 doc/en/architecure/artificial-intelligence.md |  21 ----
 doc/en/architecure/crawler.md                 |   4 -
 .../architecure/database-structure/README.md  |  15 ---
 .../database-structure/type-of-song.md        |  25 -----
 doc/en/architecure/overview.md                |  42 -------
 doc/zh/.gitbook/assets/1.yaml                 | 106 ------------------
 doc/zh/README.md                              |  25 -----
 doc/zh/SUMMARY.md                             |  22 ----
 doc/zh/about/scope-of-inclusion.md            |  22 ----
 doc/zh/about/this-project.md                  |  38 -------
 doc/zh/api-doc/catalog.md                     |   4 -
 doc/zh/api-doc/video-snapshot.md              |   6 -
 .../architecture/artificial-intelligence.md   |  13 ---
 .../architecture/database-structure/README.md |  15 ---
 .../database-structure/type-of-song.md        |  24 ----
 doc/zh/architecture/message-queue/README.md   |   1 -
 .../latestvideosqueue-dui-lie.md              |   1 -
 doc/zh/architecture/overview.md               |  26 -----
 ml_new/.gitignore                             |   1 +
 27 files changed, 5 insertions(+), 524 deletions(-)
 create mode 100644 .kilocodeignore
 delete mode 100644 doc/en/README.md
 delete mode 100644 doc/en/SUMMARY.md
 delete mode 100644 doc/en/about/scope-of-inclusion.md
 delete mode 100644 doc/en/about/this-project.md
 delete mode 100644 doc/en/api-doc/catalog.md
 delete mode 100644 doc/en/api-doc/songs.md
 delete mode 100644 doc/en/architecure/artificial-intelligence.md
 delete mode 100644 doc/en/architecure/crawler.md
 delete mode 100644 doc/en/architecure/database-structure/README.md
 delete mode 100644 doc/en/architecure/database-structure/type-of-song.md
 delete mode 100644 doc/en/architecure/overview.md
 delete mode 100644 doc/zh/.gitbook/assets/1.yaml
 delete mode 100644 doc/zh/README.md
 delete mode 100644 doc/zh/SUMMARY.md
 delete mode 100644 doc/zh/about/scope-of-inclusion.md
 delete mode 100644 doc/zh/about/this-project.md
 delete mode 100644 doc/zh/api-doc/catalog.md
 delete mode 100644 doc/zh/api-doc/video-snapshot.md
 delete mode 100644 doc/zh/architecture/artificial-intelligence.md
 delete mode 100644 doc/zh/architecture/database-structure/README.md
 delete mode 100644 doc/zh/architecture/database-structure/type-of-song.md
 delete mode 100644 doc/zh/architecture/message-queue/README.md
 delete mode 100644 doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md
 delete mode 100644 doc/zh/architecture/overview.md
 create mode 100644 ml_new/.gitignore

diff --git a/.gitignore b/.gitignore
index 3719e98..4ea3796 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,4 +45,6 @@ ucaptcha-config.yaml
 
 temp/
 
-meili
\ No newline at end of file
+meili
+
+.turbo
\ No newline at end of file
diff --git a/.kilocodeignore b/.kilocodeignore
new file mode 100644
index 0000000..329e85b
--- /dev/null
+++ b/.kilocodeignore
@@ -0,0 +1 @@
+packages/core/drizzle/main/meta
\ No newline at end of file
diff --git a/doc/en/README.md b/doc/en/README.md
deleted file mode 100644
index 2eadf84..0000000
--- a/doc/en/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-icon: hand-wave
-layout:
-  title:
-    visible: true
-  description:
-    visible: false
-  tableOfContents:
-    visible: false
-  outline:
-    visible: false
-  pagination:
-    visible: false
----
-
-# Welcome
-
-Welcome to the CVSA Documentation!
-
-This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors,
-etc.
-
-### Jump right in
-
-<table data-view="cards"><thead><tr><th></th><th></th><th data-hidden data-card-cover data-type="files"></th><th data-hidden></th><th data-hidden data-card-target data-type="content-ref"></th></tr></thead><tbody><tr><td><strong>About CVSA</strong></td><td>Some information you might want to know about.</td><td></td><td></td><td><a href="about/this-project.md">this-project.md</a></td></tr><tr><td><strong>Architecture</strong></td><td>The technical details about how CVSA was built.</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr><tr><td><strong>API Doc</strong></td><td>Documentation about APIs provided by CVSA.</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr></tbody></table>
diff --git a/doc/en/SUMMARY.md b/doc/en/SUMMARY.md
deleted file mode 100644
index 99ca460..0000000
--- a/doc/en/SUMMARY.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Table of contents
-
-* [Welcome](README.md)
-
-## About
-
-* [About CVSA Project](about/this-project.md)
-* [Scope of Inclusion](about/scope-of-inclusion.md)
-
-## Architecure
-
-* [Overview](architecure/overview.md)
-* [Crawler](architecure/crawler.md)
-* [Database Structure](architecure/database-structure/README.md)
-  * [Type of Song](architecure/database-structure/type-of-song.md)
-* [Artificial Intelligence](architecure/artificial-intelligence.md)
-
-## API Doc
-
-* [Catalog](api-doc/catalog.md)
-* [Songs](api-doc/songs.md)
diff --git a/doc/en/about/scope-of-inclusion.md b/doc/en/about/scope-of-inclusion.md
deleted file mode 100644
index 136061c..0000000
--- a/doc/en/about/scope-of-inclusion.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# Scope of Inclusion
-
-CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators,
-arranger, etc), singers and voice engines / voicebanks.&#x20;
-
-For a **song**, it must meet the following conditions to be included in CVSA:
-
-### Category 30
-
-In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
-[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
-[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
-uploaded to bilibili / categorized under this category.
-
-#### NEWS
-
-Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
-entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
-parent category "Music").&#x20;
-
-According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
-published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
-there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
-the "Music General" sub-category.\
-We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
-program's crawling.
-
-### At Leats One Line of Chinese / Chinese Virtual Singer
-
-The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
-Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
-
-We define a **Chinese virtual singer** as follows:
-
-1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
-2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
-   Taiwan.
-
-### Using Vocal Synthesizer
-
-To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony
-vocals).
-
-We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically
-modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics,
-encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio)
-approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,
-[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
diff --git a/doc/en/about/this-project.md b/doc/en/about/this-project.md
deleted file mode 100644
index 4e386f9..0000000
--- a/doc/en/about/this-project.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# About CVSA Project
-
-CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis
-community in a highly automation-assisted way.&#x20;
-
-Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an
-automated and manually edited way:
-
-- Metadata of songs (name, duration, publisher, singer, etc.)
-- Descriptive information of songs (content introduction, creation background, lyrics, etc.)
-- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites,
-  likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
-- Information about artists, albums, vocal synthesizers, and voicebanks.
diff --git a/doc/en/api-doc/catalog.md b/doc/en/api-doc/catalog.md
deleted file mode 100644
index eea6596..0000000
--- a/doc/en/api-doc/catalog.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Catalog
-
-- [**Songs**](songs.md)
diff --git a/doc/en/api-doc/songs.md b/doc/en/api-doc/songs.md
deleted file mode 100644
index 914c266..0000000
--- a/doc/en/api-doc/songs.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Songs
-
-Not implemented yet.
diff --git a/doc/en/architecure/artificial-intelligence.md b/doc/en/architecure/artificial-intelligence.md
deleted file mode 100644
index e9da3dc..0000000
--- a/doc/en/architecure/artificial-intelligence.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Artificial Intelligence
-
-CVSA's automated workflow relies heavily on artificial intelligence for information extraction and classification.
-
-The AI ​​systems we currently use are:
-
-### The Filter
-
-Located at `/filter/` under project root dir, it classifies a video in the
-[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
-
-- 0: Not related to Chinese vocal synthesis
-- 1: A original song with Chinese vocal synthesis
-- 2: A cover/remix song with Chinese vocal synthesis
-
-### The Predictor
-
-Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
-takes historical view trends of a video, other contextual information (such as the current time), and future time points
-to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
-future time point.
diff --git a/doc/en/architecure/crawler.md b/doc/en/architecure/crawler.md
deleted file mode 100644
index e60f132..0000000
--- a/doc/en/architecure/crawler.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Crawler
-
-A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
-
diff --git a/doc/en/architecure/database-structure/README.md b/doc/en/architecure/database-structure/README.md
deleted file mode 100644
index 84d2ba5..0000000
--- a/doc/en/architecure/database-structure/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Database Structure
-
-CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
-
-All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
-following tables:
-
-- songs: stores the main information of songs
-- bili\_user: stores snapshots of Bilibili user information
-- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
-- labelling\_result: Contains label of videos in `all_data`tagged by our
-  [AI system](../artificial-intelligence.md#the-filter).
-- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
-  fetch process as "snapshot".
-- snapshot\_schedule: The scheduling information for video snapshots.
diff --git a/doc/en/architecure/database-structure/type-of-song.md b/doc/en/architecure/database-structure/type-of-song.md
deleted file mode 100644
index 1855f4a..0000000
--- a/doc/en/architecure/database-structure/type-of-song.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Type of Song
-
-The **Unrelated type** refers specifically to videos that are not in our
-[Scope of Inclusion](../../about/scope-of-inclusion.md).
-
-### Table: `songs`
-
-The `type` column used in the `songs` table.
-
-| Type | Description  |
-| ---- | ------------ |
-| 0    | Unrelated    |
-| 1    | Original     |
-| 2    | Cover        |
-| 3    | Remix        |
-| 4    | Instrumental |
-| 10   | Others       |
-
-### Table: `labelling_result`
-
-| Label | Description            |
-| ----- | ---------------------- |
-| 0     | AI tagged: Unrelated   |
-| 1     | AI tagged: Original    |
-| 2     | AI tagged: Cover/Remix |
diff --git a/doc/en/architecure/overview.md b/doc/en/architecure/overview.md
deleted file mode 100644
index fc694fe..0000000
--- a/doc/en/architecure/overview.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-layout:
-  title:
-    visible: true
-  description:
-    visible: false
-  tableOfContents:
-    visible: true
-  outline:
-    visible: true
-  pagination:
-    visible: true
----
-
-# Overview
-
-The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`.&#x20;
-
-**Project structure:**
-
-```
-cvsa
-├── deno.json
-├── packages
-│   ├── backend
-│   ├── core
-│   ├── crawler
-│   └── frontend
-└── README.md
-```
-
-**Package Breakdown:**
-
-* **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
-* **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
-* **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
-* **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
-
-### Crawler
-
-Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
-
diff --git a/doc/zh/.gitbook/assets/1.yaml b/doc/zh/.gitbook/assets/1.yaml
deleted file mode 100644
index 29eb6dc..0000000
--- a/doc/zh/.gitbook/assets/1.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-openapi: 3.0.0
-info:
-  title: CVSA API
-  version: v1
-
-servers:
-  - url: https://api.projectcvsa.com
-
-paths:
-  /video/{id}/snapshots:
-    get:
-      summary: 获取视频快照列表
-      description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字，以 "BV" 开头的 12 位字母数字，或者一个正整数。
-      parameters:
-        - in: path
-          name: id
-          required: true
-          schema:
-            type: string
-          description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)"
-        - in: query
-          name: ps
-          schema:
-            type: integer
-            minimum: 1
-          description: 每页返回的快照数量 (pageSize)，默认为 1000。
-        - in: query
-          name: pn
-          schema:
-            type: integer
-            minimum: 1
-          description: 页码 (pageNumber)，用于分页查询。offset 与 pn 只能选择一个。
-        - in: query
-          name: offset
-          schema:
-            type: integer
-            minimum: 1
-          description: 偏移量，用于基于偏移量的查询。offset 与 pn 只能选择一个。
-        - in: query
-          name: reverse
-          schema:
-            type: boolean
-          description: 是否反向排序（从旧到新），默认为 false。
-      responses:
-        '200':
-          description: 成功获取快照列表
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  type: object
-                  properties:
-                    id:
-                      type: integer
-                      description: 快照 ID
-                    aid:
-                      type: integer
-                      description: 视频的 av 号
-                    views:
-                      type: integer
-                      description: 视频播放量
-                    coins:
-                      type: integer
-                      description: 视频投币数
-                    likes:
-                      type: integer
-                      description: 视频点赞数
-                    favorites:
-                      type: integer
-                      description: 视频收藏数
-                    shares:
-                      type: integer
-                      description: 视频分享数
-                    danmakus:
-                      type: integer
-                      description: 视频弹幕数
-                    replies:
-                      type: integer
-                      description: 视频评论数
-        '400':
-          description: 无效的查询参数
-          content:
-            application/json:
-              schema:
-                type: object
-                properties:
-                  message:
-                    type: string
-                    description: 错误消息
-                  errors:
-                    type: object
-                    description: 详细的错误信息
-        '500':
-          description: 服务器内部错误
-          content:
-            application/json:
-              schema:
-                type: object
-                properties:
-                  message:
-                    type: string
-                    description: 错误消息
-                  error:
-                    type: object
-                    description: 详细的错误信息
\ No newline at end of file
diff --git a/doc/zh/README.md b/doc/zh/README.md
deleted file mode 100644
index 70369d2..0000000
--- a/doc/zh/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-icon: hand-wave
-description: 「中V档案馆」 (CVSA) 是一个收录中文歌声合成文化圈有关信息的网站。
-layout:
-  title:
-    visible: true
-  description:
-    visible: true
-  tableOfContents:
-    visible: false
-  outline:
-    visible: false
-  pagination:
-    visible: false
----
-
-# 欢迎
-
-欢迎阅读CVSA文档！
-
-该文档包含有关中V档案馆项目的各种信息，包括本项目的有关信息、技术架构、访客指南、API文档等。
-
-### 导航
-
-<table data-view="cards"><thead><tr><th></th><th></th><th data-hidden data-card-cover data-type="files"></th><th data-hidden></th><th data-hidden data-card-target data-type="content-ref"></th></tr></thead><tbody><tr><td><strong>关于本项目</strong></td><td>一些你可能想知道的…</td><td></td><td></td><td><a href="about/this-project.md">this-project.md</a></td></tr><tr><td><strong>技术架构</strong></td><td>关于本项目的技术细节</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr><tr><td><strong>API 文档</strong> </td><td>中V档案馆公开 API 的文档</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr><tr><td><strong>项目地址</strong></td><td>在 <a href="https://github.com/alikia2x/cvsa">GitHub</a> 或 <a href="https://gitee.com/alikia/cvsa">Gitee</a> 上查看本项目</td><td></td><td></td><td><a href="https://gitee.com/alikia/cvsa">https://gitee.com/alikia/cvsa</a></td></tr><tr><td>🇺🇸 English Version</td><td>Hint: There's a language switcher on the top-left corner, just to the right of the logo.</td><td></td><td></td><td><a href="https://app.gitbook.com/o/ZRcyqFK0ovlJduZb50X0/s/89Gi0XfqMigoQkEYJZZl/">CVSA Doc English</a></td></tr></tbody></table>
diff --git a/doc/zh/SUMMARY.md b/doc/zh/SUMMARY.md
deleted file mode 100644
index c44766c..0000000
--- a/doc/zh/SUMMARY.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Table of contents
-
-* [欢迎](README.md)
-
-## 关于 <a href="#about" id="about"></a>
-
-* [关于本项目](about/this-project.md)
-* [收录范围](about/scope-of-inclusion.md)
-
-## 技术架构 <a href="#architecture" id="architecture"></a>
-
-- [概览](architecture/overview.md)
-- [数据库结构](architecture/database-structure/README.md)
-  - [歌曲类型](architecture/database-structure/type-of-song.md)
-- [人工智能](architecture/artificial-intelligence.md)
-- [消息队列](architecture/message-queue/README.md)
-  - [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
-
-## API 文档 <a href="#api-doc" id="api-doc"></a>
-
-* [目录](api-doc/catalog.md)
-* [视频快照](api-doc/video-snapshot.md)
diff --git a/doc/zh/about/scope-of-inclusion.md b/doc/zh/about/scope-of-inclusion.md
deleted file mode 100644
index 92ff3be..0000000
--- a/doc/zh/about/scope-of-inclusion.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# 收录范围
-
-中V档案馆收录许多有关中文歌声合成的内容，包括歌曲、专辑、艺术家（发布者、调校师、编曲者等）、歌手以及引擎/声库。&#x20;
-
-对于一首**歌曲**，必须满足以下条件才能被收录到中V档案馆中：
-
-#### VOCALOID·UATU 分区
-
-原则上，中V档案馆中收录的歌曲必须包含在哔哩哔哩 VOCALOID·UTAU
-分区（分区ID为30）下的视频中。在某些特殊情况下，此规则可能不是强制的。
-
-#### 至少一行中文
-
-歌曲的歌词必须包含至少一行中文。这意味着，即使使用了仅支持中文的声库，如果歌曲的歌词中没有中文，也不会被收录到中V档案馆中（例如，跨语种调校）。
-
-#### 使用歌声合成器
-
-歌曲的至少一行必须由歌声合成器生成（包括和声部分），才能被收录到中V档案馆中。
-
-我们将歌声合成器定义为通过算法建模声音特征并根据输入的歌词、音高等参数生成音频的软件或系统，包括基于波形拼接的（如
-VOCALOID、UTAU）和基于 AI 的（如 Synthesizer V、ACE Studio）方法，**但不包括仅改变现有歌声音色的AI声音转换器**（例如
-[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)）。
diff --git a/doc/zh/about/this-project.md b/doc/zh/about/this-project.md
deleted file mode 100644
index c78d6d2..0000000
--- a/doc/zh/about/this-project.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# 关于本项目
-
-「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
-
-### 创建背景与关联工作
-
-纵观整个互联网，对于「中文歌声合成」或「中文虚拟歌手」（常简称为中V或VC）相关信息进行较为系统、全面地整理收集的主要有以下几个网站：
-
-- [萌娘百科](https://zh.moegirl.org.cn/):
-  收录了大量中V歌曲及歌姬的信息，呈现形式为传统维基（基于[MediaWiki](https://www.mediawiki.org/)）。
-- [VCPedia](https://vcpedia.cn/):
-  由原萌娘百科中文歌声合成编辑团队的部分成员搭建，专属于中文歌声合成相关内容的信息集成站点[^1]，呈现形式为传统维基（基于[MediaWiki](https://www.mediawiki.org/)）。
-- [VocaDB](https://vocadb.net/):
-  [一个围绕 Vocaloid、UTAU 和其他歌声合成器的协作数据库，其中包含艺术家、唱片、PV 等](#user-content-fn-2)[^2]，其中包含大量中文歌声合成作品。
-- [天钿Daily](https://tdd.bunnyxt.com/)：一个VC相关数据交流与分享的网站。致力于VC相关数据交流，定期抓取VC相关数据，选取有意义的纬度展示。
-
-上述网站中，或多或少存在一些不足，例如：
-
-- 萌娘百科、VCPedia受限于传统维基，绝大多数内容依赖人工编辑。
-- VocaDB基于结构化数据库构建，由此可以依赖程序生成一些信息，但**条目收录**仍然完全依赖人工完成。
-- VocaDB主要专注于元数据展示，少有关于歌曲、作者等的描述性的文字，也缺乏描述性的背景信息。
-- 天钿Daily只展示歌曲的统计数据及历史趋势，没有关于歌曲其它信息的收集。
-
-因此，**中V档案馆**吸取前人经验，克服上述网站的不足，希望做到：
-
-- 歌曲收录（指发现歌曲并创建条目）的完全自动化
-- 歌曲元信息提取的高度自动化
-- 歌曲统计数据收集的完全自动化
-- 在程序辅助的同时欢迎并鼓励贡献者参与编辑（主要为描述性内容）或纠错
-- 在适当的许可声明下，引用来自上述源的数据，使内容更加全面、丰富。
-
----
-
-本文在[CC BY-NC-SA 4.0协议](https://creativecommons.org/licenses/by-nc-sa/4.0/)提供。
-
-[^1]: 引用自[VCPedia](https://vcpedia.cn/%E9%A6%96%E9%A1%B5)，于[知识共享 署名-非商业性使用-相同方式共享 3.0中国大陆 (CC BY-NC-SA 3.0 CN) 许可协议](https://creativecommons.org/licenses/by-nc-sa/3.0/cn/)下提供。
-
-[^2]: 翻译自[VocaDB](https://vocadb.net/)，于[CC BY 4.0协议](https://creativecommons.org/licenses/by/4.0/)下提供。
diff --git a/doc/zh/api-doc/catalog.md b/doc/zh/api-doc/catalog.md
deleted file mode 100644
index 5298b49..0000000
--- a/doc/zh/api-doc/catalog.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# 目录
-
-* [视频快照](video-snapshot.md)
-
diff --git a/doc/zh/api-doc/video-snapshot.md b/doc/zh/api-doc/video-snapshot.md
deleted file mode 100644
index c143151..0000000
--- a/doc/zh/api-doc/video-snapshot.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# 视频快照
-
-{% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %}
-[1.yaml](../.gitbook/assets/1.yaml)
-{% endopenapi %}
-
diff --git a/doc/zh/architecture/artificial-intelligence.md b/doc/zh/architecture/artificial-intelligence.md
deleted file mode 100644
index 53caba1..0000000
--- a/doc/zh/architecture/artificial-intelligence.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# 人工智能
-
-CVSA 的自动化工作流高度依赖人工智能进行信息提取和分类。
-
-我们目前使用的 AI 系统有：
-
-#### Filter
-
-位于项目根目录下的 `/filter/`，它将 [30 分区](../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中的视频分为以下类别：
-
-- 0：与中文人声合成无关
-- 1：中文人声合成原创曲
-- 2：中文人声合成的翻唱/混音歌曲
diff --git a/doc/zh/architecture/database-structure/README.md b/doc/zh/architecture/database-structure/README.md
deleted file mode 100644
index 44a5b5d..0000000
--- a/doc/zh/architecture/database-structure/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# 数据库结构
-
-CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
-
-CVSA 设计了两个
-
-CVSA 的所有公开数据（不包括用户的个人数据）都存储在名为 `cvsa_main` 的数据库中，该数据库包含以下表：
-
-- songs：存储歌曲的主要信息
-- bilibili\_user：存储 Bilibili 用户信息快照
-- bilibili\_metadata：[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
-- labelling\_result：包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
-- latest\_video\_snapshot：存储视频最新的快照
-- video\_snapshot：存储视频的快照，包括特定时间下视频的统计信息（播放量、点赞数等）
-- snapshot\_schedule：视频快照的规划信息，为辅助表
diff --git a/doc/zh/architecture/database-structure/type-of-song.md b/doc/zh/architecture/database-structure/type-of-song.md
deleted file mode 100644
index 94630e7..0000000
--- a/doc/zh/architecture/database-structure/type-of-song.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# 歌曲类型
-
-**不相关** 特指不在我们的 [收录范围](../../about/scope-of-inclusion.md) 中的视频。
-
-#### 表格：`songs`
-
-`songs` 表格中使用的 `type` 列。
-
-| 类型 | 说明         |
-| ---- | ------------ |
-| 0    | 不相关       |
-| 1    | 原创         |
-| 2    | 翻唱 (Cover) |
-| 3    | 混音 (Remix) |
-| 4    | 纯音乐       |
-| 10   | 其他         |
-
-#### 表格：`labelling_result`
-
-| 标签 | 说明               |
-| ---- | ------------------ |
-| 0    | AI 标记：不相关    |
-| 1    | AI 标记：原创      |
-| 2    | AI 标记：翻唱/混音 |
diff --git a/doc/zh/architecture/message-queue/README.md b/doc/zh/architecture/message-queue/README.md
deleted file mode 100644
index b2312f5..0000000
--- a/doc/zh/architecture/message-queue/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# 消息队列
diff --git a/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md b/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md
deleted file mode 100644
index 744e878..0000000
--- a/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md
+++ /dev/null
@@ -1 +0,0 @@
-# LatestVideosQueue 队列
diff --git a/doc/zh/architecture/overview.md b/doc/zh/architecture/overview.md
deleted file mode 100644
index 636b5a9..0000000
--- a/doc/zh/architecture/overview.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-layout:
-  title:
-    visible: true
-  description:
-    visible: false
-  tableOfContents:
-    visible: true
-  outline:
-    visible: true
-  pagination:
-    visible: true
----
-
-# 概览
-
-整个CVSA项目分为三个组件：**crawler**, **frontend** 和 **backend。**
-
-### **crawler**
-
-位于项目目录`packages/crawler` 下，它负责以下工作：
-
-- 抓取新的视频并收录作品
-- 持续监控视频的播放量等统计信息
-
-整个 crawler 由 BullMQ 消息队列驱动，使用 Redis 和 PostgreSQL 管理状态。
diff --git a/ml_new/.gitignore b/ml_new/.gitignore
new file mode 100644
index 0000000..c0190e1
--- /dev/null
+++ b/ml_new/.gitignore
@@ -0,0 +1 @@
+datasets
\ No newline at end of file