diff --git a/doc/en/.gitbook/assets/api-doc.yaml b/doc/en/.gitbook/assets/api-doc.yaml
new file mode 100644
index 0000000..b710850
--- /dev/null
+++ b/doc/en/.gitbook/assets/api-doc.yaml
@@ -0,0 +1,107 @@
+openapi: 3.1.0
+info:
+  title: CVSA API
+  version: v1
+
+servers:
+  - url: https://api.projectcvsa.com
+
+paths:
+  /video/{id}/snapshots:
+    get:
+      summary: Get list of video snapshots
+      description: Get a list of video snapshots by the ID. The ID can be "av" + number, or "BV" + a 12-digit alphanumeric string, or an integer as the av number in bilibili.
+      parameters:
+        - in: path
+          name: id
+          required: true
+          schema:
+            type: string
+          description: "The ID of the video (e.g. av78977256, BV1KJ411C7CW, 78977256)"
+        - in: query
+          name: ps
+          schema:
+            type: integer
+            minimum: 1
+            default: 1000
+          description: The number of snapshots returned per page (pageSize), the default is 1000.
+        - in: query
+          name: pn
+          schema:
+            type: integer
+            minimum: 1
+          description: The page number, used for pagination. Only one of offset and pn can be specified.
+        - in: query
+          name: offset
+          schema:
+            type: integer
+            minimum: 1
+          description: The offset for offset-based queries. Only one of offset and pn can be specified.
+        - in: query
+          name: reverse
+          schema:
+            type: boolean
+          description: Reverse snapshots from old to new if set to true. Default is false.
+      responses:
+        "200":
+          description: Successfuly retrieved snapshots
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: object
+                  properties:
+                    id:
+                      type: integer
+                      description: Snapshot ID (Not the same as the video ID)
+                    aid:
+                      type: integer
+                      description: The av number of the video
+                    views:
+                      type: integer
+                      description: The number of views the video has
+                    coins:
+                      type: integer
+                      description: The number of coins the video has
+                    likes:
+                      type: integer
+                      description: The number of likes the video has
+                    favorites:
+                      type: integer
+                      description: The number of favorites the video has
+                    shares:
+                      type: integer
+                      description: The number of shares the video has
+                    danmakus:
+                      type: integer
+                      description: The number of danmakus the video has
+                    replies:
+                      type: integer
+                      description: The number of replies the video has
+        "400":
+          description: Invalid query parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                    description: Error message
+                  errors:
+                    type: object
+                    description: Detailed error information
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                    description: Error message
+                  error:
+                    type: object
+                    description: Detailed error information
diff --git a/doc/en/README.md b/doc/en/README.md
index 2eadf84..64ca250 100644
--- a/doc/en/README.md
+++ b/doc/en/README.md
@@ -17,9 +17,8 @@ layout:
 
 Welcome to the CVSA Documentation!
 
-This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors,
-etc.
+This doc contains various information about the CVSA project, including technical architecture, tutorials for visitors, etc.
 
 ### Jump right in
 
-<table data-view="cards"><thead><tr><th></th><th></th><th data-hidden data-card-cover data-type="files"></th><th data-hidden></th><th data-hidden data-card-target data-type="content-ref"></th></tr></thead><tbody><tr><td><strong>About CVSA</strong></td><td>Some information you might want to know about.</td><td></td><td></td><td><a href="about/this-project.md">this-project.md</a></td></tr><tr><td><strong>Architecture</strong></td><td>The technical details about how CVSA was built.</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr><tr><td><strong>API Doc</strong></td><td>Documentation about APIs provided by CVSA.</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr></tbody></table>
+<table data-view="cards"><thead><tr><th></th><th></th><th data-hidden data-card-cover data-type="files"></th><th data-hidden></th><th data-hidden data-card-target data-type="content-ref"></th></tr></thead><tbody><tr><td><strong>About this project</strong></td><td>Some information you might want to know about.</td><td></td><td></td><td><a href="about/this-project.md">this-project.md</a></td></tr><tr><td><strong>Architecture</strong></td><td>The technical details about how CVSA was built.</td><td></td><td></td><td><a href="broken-reference">Broken link</a></td></tr><tr><td><strong>API Doc</strong></td><td>Documentation about APIs provided by CVSA.</td><td></td><td></td><td><a href="broken-reference/">broken-reference</a></td></tr><tr><td><strong>Source Code</strong></td><td>View this project on GitHub</td><td></td><td></td><td><a href="https://github.com/alikia2x/cvsa">https://github.com/alikia2x/cvsa</a></td></tr><tr><td>🇨🇳 中文版本</td><td>浏览本文档的中文版本</td><td></td><td></td><td><a href="https://app.gitbook.com/s/pv6AFgCPzXeRmP9slTBR/">欢迎</a></td></tr></tbody></table>
diff --git a/doc/en/SUMMARY.md b/doc/en/SUMMARY.md
index 99ca460..de4daf1 100644
--- a/doc/en/SUMMARY.md
+++ b/doc/en/SUMMARY.md
@@ -4,7 +4,7 @@
 
 ## About
 
-* [About CVSA Project](about/this-project.md)
+* [About the CVSA Project](about/this-project.md)
 * [Scope of Inclusion](about/scope-of-inclusion.md)
 
 ## Architecure
@@ -12,7 +12,7 @@
 * [Overview](architecure/overview.md)
 * [Crawler](architecure/crawler.md)
 * [Database Structure](architecure/database-structure/README.md)
-  * [Type of Song](architecure/database-structure/type-of-song.md)
+  * [Type of a Song](architecure/database-structure/type-of-song.md)
 * [Artificial Intelligence](architecure/artificial-intelligence.md)
 
 ## API Doc
diff --git a/doc/en/about/scope-of-inclusion.md b/doc/en/about/scope-of-inclusion.md
index 136061c..1c15541 100644
--- a/doc/en/about/scope-of-inclusion.md
+++ b/doc/en/about/scope-of-inclusion.md
@@ -1,48 +1,34 @@
 # Scope of Inclusion
 
-CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators,
-arranger, etc), singers and voice engines / voicebanks.&#x20;
+CVSA contains many aspects of Chinese Vocal Synthesis, including songs, albums, artists (publisher, manipulators, arranger, etc), singers and voice engines / voicebanks.
 
-For a **song**, it must meet the following conditions to be included in CVSA:
-
-### Category 30
-
-In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
-[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
-[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
-uploaded to bilibili / categorized under this category.
-
-#### NEWS
-
-Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
-entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
-parent category "Music").&#x20;
-
-According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
-published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
-there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
-the "Music General" sub-category.\
-We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
-program's crawling.
+For a **song**, it must meet the following two conditions to be included in CVSA:
 
 ### At Leats One Line of Chinese / Chinese Virtual Singer
 
-The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
-Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
+The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
 
 We define a **Chinese virtual singer** as follows:
 
 1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
-2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
+2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or\
    Taiwan.
 
 ### Using Vocal Synthesizer
 
-To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony
-vocals).
+To be included in CVSA, at least one line of the song must be produced by a Vocal Synthesizer (including harmony vocals).
 
-We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically
-modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics,
-encompassing both waveform-concatenation-based (e.g., VOCALOID, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio)
-approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,
-[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
+We define a vocal synthesizer as a software or system that generates synthesized singing voices by algorithmically modeling vocal characteristics and producing audio from input parameters such as lyrics, pitch, and dynamics, encompassing both waveform-concatenation-based (e.g., VOCALOID 1\~5, UTAU) and AI-based (e.g., Synthesizer V, ACE Studio) approaches, **but excluding voice conversion tools that solely alter the timbre of pre-existing recordings** (e.g.,[so-vits svc](https://github.com/svc-develop-team/so-vits-svc)).
+
+
+
+In addition, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in [Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our [automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been uploaded to bilibili / categorized under this category.
+
+#### NEWS
+
+Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the parent category "Music").
+
+According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However, there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under\
+the "Music General" sub-category.
+
+We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated program's crawling.
diff --git a/doc/en/about/this-project.md b/doc/en/about/this-project.md
index 4e386f9..57dabc0 100644
--- a/doc/en/about/this-project.md
+++ b/doc/en/about/this-project.md
@@ -1,13 +1,13 @@
-# About CVSA Project
+# About the CVSA Project
 
-CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis
-community in a highly automation-assisted way.&#x20;
+CVSA (Chinese Vocal Synthesis Archive) aims to collect as much content as possible about the Chinese Vocal Synthesis\
+community in a highly automation-assisted way.
 
-Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an
+Unlike existing projects such as [VocaDB](https://vocadb.net), CVSA collects and displays the following content in an\
 automated and manually edited way:
 
-- Metadata of songs (name, duration, publisher, singer, etc.)
-- Descriptive information of songs (content introduction, creation background, lyrics, etc.)
-- Engagement data snapshots of songs, i.e. historical snapshots of their engagement data (including views, favorites,
-  likes, etc.) on the [Bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
-- Information about artists, albums, vocal synthesizers, and voicebanks.
+* Metadata of songs (name, duration, publisher, singer, etc.)
+* Descriptive information of songs (content introduction, creation background, lyrics, etc.)
+* Statistical data snapshots of songs, i.e. historical snapshots of their statistical data (including number views, favorites, likes, etc.) on the [bilibili](https://en.wikipedia.org/wiki/Bilibili) website.
+* Information about artists, albums, vocal synthesizers, and voicebanks.
+
diff --git a/doc/en/api-doc/songs.md b/doc/en/api-doc/songs.md
index 914c266..4ee3632 100644
--- a/doc/en/api-doc/songs.md
+++ b/doc/en/api-doc/songs.md
@@ -1,3 +1,6 @@
 # Songs
 
-Not implemented yet.
+{% openapi src="../.gitbook/assets/api-doc.yaml" path="/video/{id}/snapshots" method="get" %}
+[api-doc.yaml](../.gitbook/assets/api-doc.yaml)
+{% endopenapi %}
+
diff --git a/doc/en/architecure/artificial-intelligence.md b/doc/en/architecure/artificial-intelligence.md
index e9da3dc..7133614 100644
--- a/doc/en/architecure/artificial-intelligence.md
+++ b/doc/en/architecure/artificial-intelligence.md
@@ -4,18 +4,20 @@ CVSA's automated workflow relies heavily on artificial intelligence for informat
 
 The AI ​​systems we currently use are:
 
-### The Filter
+### The Filter (codename Akari)
 
-Located at `/filter/` under project root dir, it classifies a video in the
-[category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
+Located at `/ml/filter/` under project root dir, it classifies a video in the [category 30](../about/scope-of-inclusion.md#category-30) into the following categories:
 
-- 0: Not related to Chinese vocal synthesis
-- 1: A original song with Chinese vocal synthesis
-- 2: A cover/remix song with Chinese vocal synthesis
+* 0: Not related to Chinese vocal synthesis
+* 1: A original song with Chinese vocal synthesis
+* 2: A cover/remix song with Chinese vocal synthesis
+
+We also have some experimental work that is not yet in production:
 
 ### The Predictor
 
-Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
-takes historical view trends of a video, other contextual information (such as the current time), and future time points
-to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
-future time point.
+Located at `/ml/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that takes historical view trends of a video, other contextual information (such as the current time), and future time points to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified future time point.
+
+### Lyrics Alignment
+
+Located at `/ml/lab/`under the project root dir, it uses [MMS wav2vec](https://huggingface.co/docs/transformers/en/model_doc/mms) and [Whisper](https://github.com/openai/whisper) models for phoneme-level and line-level alignment, respectively. The original purpose of this work is to drive the live lyrics feature in our other project: [AquaVox](https://github.com/alikia2x/aquavox).
diff --git a/doc/en/architecure/database-structure/README.md b/doc/en/architecure/database-structure/README.md
index 84d2ba5..c3f4659 100644
--- a/doc/en/architecure/database-structure/README.md
+++ b/doc/en/architecure/database-structure/README.md
@@ -2,14 +2,13 @@
 
 CVSA uses [PostgreSQL](https://www.postgresql.org/) as our database.
 
-All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the
+All public data of CVSA (excluding users' personal data) is stored in a database named `cvsa_main`, which contains the\
 following tables:
 
-- songs: stores the main information of songs
-- bili\_user: stores snapshots of Bilibili user information
-- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
-- labelling\_result: Contains label of videos in `all_data`tagged by our
-  [AI system](../artificial-intelligence.md#the-filter).
-- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
-  fetch process as "snapshot".
-- snapshot\_schedule: The scheduling information for video snapshots.
+* songs: stores the main information of songs
+* bilibili\_user: stores snapshots of Bilibili user information
+* bilibili\_metadata: metadata of all videos we collected from bilibili.
+* labelling\_result: Contains label of videos in `bilibili_metadata`tagged by our [AI system](../artificial-intelligence.md#the-filter).
+* video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this fetch process as "snapshot".
+* snapshot\_schedule: The scheduling information for video snapshots.
+