fix: incorrect condition for filter the tags

2025-03-23 13:44:03 +08:00
493 changed files with 2666 additions and 25028 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,96 +0,0 @@
 built/*
 tests/cases/rwc/*
 tests/cases/perf/*
 !tests/cases/webharness/compilerToString.js
 test-args.txt
 ~*.docx
 \#*\#
 .\#*
 tests/baselines/local/*
 tests/baselines/local.old/*
 tests/services/baselines/local/*
 tests/baselines/prototyping/local/*
 tests/baselines/rwc/*
 tests/baselines/reference/projectOutput/*
 tests/baselines/local/projectOutput/*
 tests/baselines/reference/testresults.tap
 tests/baselines/symlinks/*
 tests/services/baselines/prototyping/local/*
 tests/services/browser/typescriptServices.js
 src/harness/*.js
 src/compiler/diagnosticInformationMap.generated.ts
 src/compiler/diagnosticMessages.generated.json
 src/parser/diagnosticInformationMap.generated.ts
 src/parser/diagnosticMessages.generated.json
 rwc-report.html
 *.swp
 build.json
 *.actual
 tests/webTestServer.js
 tests/webTestServer.js.map
 tests/webhost/*.d.ts
 tests/webhost/webtsc.js
 tests/cases/**/*.js
 tests/cases/**/*.js.map
 *.config
 scripts/eslint/built/
 scripts/debug.bat
 scripts/run.bat
 scripts/**/*.js
 scripts/**/*.js.map
 coverage/
 internal/
 **/.DS_Store
 .settings
 **/.vs
 **/.vscode/*
 !**/.vscode/tasks.json
 !**/.vscode/settings.template.json
 !**/.vscode/launch.template.json
 !**/.vscode/extensions.json
 !tests/cases/projects/projectOption/**/node_modules
 !tests/cases/projects/NodeModulesSearch/**/*
 !tests/baselines/reference/project/nodeModules*/**/*
 yarn.lock
 yarn-error.log
 .parallelperf.*
 tests/baselines/reference/dt
 .failed-tests
 TEST-results.xml
 package-lock.json
 .eslintcache
 *v8.log
 # dotenv environment variable files
 .env
 .env.development.local
 .env.test.local
 .env.production.local
 .env.local
 # npm dependencies
 node_modules/
 # project specific
 logs/
 __pycache__
 ml/filter/runs
 ml/pred/runs
 ml/pred/checkpoints
 ml/pred/observed
 ml/data/
 ml/filter/checkpoints
 scripts
 model/
 .astro
 # Database
 *.dump
 *.db
 *.sqlite
 *.sqlite3
 data/
 docker-compose.yml
--- a/.gitattributes
+++ b/.gitattributes
@ -1 +0,0 @@
 *.woff2 filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -1,44 +1,87 @@
 built/*
 tests/cases/rwc/*
 tests/cases/perf/*
 !tests/cases/webharness/compilerToString.js
 test-args.txt
 ~*.docx
 \#*\#
 .\#*
 tests/baselines/local/*
 tests/baselines/local.old/*
 tests/services/baselines/local/*
 tests/baselines/prototyping/local/*
 tests/baselines/rwc/*
 tests/baselines/reference/projectOutput/*
 tests/baselines/local/projectOutput/*
 tests/baselines/reference/testresults.tap
 tests/baselines/symlinks/*
 tests/services/baselines/prototyping/local/*
 tests/services/browser/typescriptServices.js
 src/harness/*.js
 src/compiler/diagnosticInformationMap.generated.ts
 src/compiler/diagnosticMessages.generated.json
 src/parser/diagnosticInformationMap.generated.ts
 src/parser/diagnosticMessages.generated.json
 rwc-report.html
 *.swp
 build.json
 *.actual
 tests/webTestServer.js
 tests/webTestServer.js.map
 tests/webhost/*.d.ts
 tests/webhost/webtsc.js
 tests/cases/**/*.js
 tests/cases/**/*.js.map
 *.config
 scripts/eslint/built/
 scripts/debug.bat
 scripts/run.bat
 scripts/**/*.js
 scripts/**/*.js.map
 coverage/
 internal/
 **/.DS_Store
 .settings
 **/.vs
 **/.vscode/*
-
+!**/.vscode/tasks.json
 !**/.vscode/settings.template.json
 !**/.vscode/launch.template.json
 !**/.vscode/extensions.json
 !tests/cases/projects/projectOption/**/node_modules
 !tests/cases/projects/NodeModulesSearch/**/*
 !tests/baselines/reference/project/nodeModules*/**/*
 .idea
 yarn.lock
 yarn-error.log
 .parallelperf.*
 tests/baselines/reference/dt
 .failed-tests
 TEST-results.xml
 package-lock.json
 .eslintcache
 *v8.log
 # dotenv environment variable files
 .env
-.env.*
+.env.development.local
 .env.test.local
 .env.production.local
 .env.local
 # Fresh build directory
 _fresh/
 # npm dependencies
 node_modules/
 # project specific
 logs/
 __pycache__
-ml/filter/runs
+filter/runs
-ml/pred/runs
+pred/runs
-ml/pred/checkpoints
+pred/checkpoints
-ml/pred/observed
+data/
-ml/data/
+filter/checkpoints
 ml/filter/checkpoints
 scripts
 model/
 .astro
 # Database
 *.dump
 *.db
 *.sqlite
 *.sqlite3
 data/
 redis/
 # Build 
 dist/
 build/
 docker-compose.yml
 ucaptcha-config.yaml
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -1,10 +0,0 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Editor-based HTTP Client requests
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
 dataSources.xml
 MarsCodeWorkspaceAppSettings.xml
--- a/.idea/bun.xml
+++ b/.idea/bun.xml
@ -1,6 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="BunSettings">
    <option name="bunPath" value="$USER_HOME$/.bun/bin/bun" />
  </component>
 </project>
--- a/.idea/codeStyles/Project.xml
+++ b/.idea/codeStyles/Project.xml
@ -1,55 +0,0 @@
 <component name="ProjectCodeStyleConfiguration">
  <code_scheme name="Project" version="173">
    <option name="LINE_SEPARATOR" value="&#10;" />
    <HTMLCodeStyleSettings>
      <option name="HTML_SPACE_INSIDE_EMPTY_TAG" value="true" />
    </HTMLCodeStyleSettings>
    <JSCodeStyleSettings version="0">
      <option name="FORCE_SEMICOLON_STYLE" value="true" />
      <option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
      <option name="FORCE_QUOTE_STYlE" value="true" />
      <option name="ENFORCE_TRAILING_COMMA" value="Remove" />
      <option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
      <option name="SPACES_WITHIN_IMPORTS" value="true" />
    </JSCodeStyleSettings>
    <TypeScriptCodeStyleSettings version="0">
      <option name="FORCE_SEMICOLON_STYLE" value="true" />
      <option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
      <option name="FORCE_QUOTE_STYlE" value="true" />
      <option name="ENFORCE_TRAILING_COMMA" value="Remove" />
      <option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
      <option name="SPACES_WITHIN_IMPORTS" value="true" />
    </TypeScriptCodeStyleSettings>
    <VueCodeStyleSettings>
      <option name="INTERPOLATION_NEW_LINE_AFTER_START_DELIMITER" value="false" />
      <option name="INTERPOLATION_NEW_LINE_BEFORE_END_DELIMITER" value="false" />
    </VueCodeStyleSettings>
    <codeStyleSettings language="HTML">
      <option name="SOFT_MARGINS" value="120" />
      <indentOptions>
        <option name="CONTINUATION_INDENT_SIZE" value="4" />
        <option name="USE_TAB_CHARACTER" value="true" />
      </indentOptions>
    </codeStyleSettings>
    <codeStyleSettings language="JavaScript">
      <option name="SOFT_MARGINS" value="120" />
      <indentOptions>
        <option name="USE_TAB_CHARACTER" value="true" />
      </indentOptions>
    </codeStyleSettings>
    <codeStyleSettings language="TypeScript">
      <option name="SOFT_MARGINS" value="120" />
      <indentOptions>
        <option name="USE_TAB_CHARACTER" value="true" />
      </indentOptions>
    </codeStyleSettings>
    <codeStyleSettings language="Vue">
      <option name="SOFT_MARGINS" value="120" />
      <indentOptions>
        <option name="INDENT_SIZE" value="4" />
        <option name="TAB_SIZE" value="4" />
        <option name="USE_TAB_CHARACTER" value="true" />
      </indentOptions>
    </codeStyleSettings>
  </code_scheme>
 </component>
--- a/.idea/codeStyles/codeStyleConfig.xml
+++ b/.idea/codeStyles/codeStyleConfig.xml
@ -1,5 +0,0 @@
 <component name="ProjectCodeStyleConfiguration">
  <state>
    <option name="USE_PER_PROJECT_SETTINGS" value="true" />
  </state>
 </component>
--- a/.idea/cvsa.iml
+++ b/.idea/cvsa.iml
@ -1,37 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="WEB_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/.tmp" />
      <excludeFolder url="file://$MODULE_DIR$/temp" />
      <excludeFolder url="file://$MODULE_DIR$/tmp" />
      <excludeFolder url="file://$MODULE_DIR$/ml/data" />
      <excludeFolder url="file://$MODULE_DIR$/doc" />
      <excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
      <excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
      <excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
      <excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
      <excludeFolder url="file://$MODULE_DIR$/logs" />
      <excludeFolder url="file://$MODULE_DIR$/model" />
      <excludeFolder url="file://$MODULE_DIR$/src/db" />
      <excludeFolder url="file://$MODULE_DIR$/.idea" />
      <excludeFolder url="file://$MODULE_DIR$/.vscode" />
      <excludeFolder url="file://$MODULE_DIR$/.zed" />
      <excludeFolder url="file://$MODULE_DIR$/packages/frontend/.astro" />
      <excludeFolder url="file://$MODULE_DIR$/scripts" />
      <excludeFolder url="file://$MODULE_DIR$/.astro" />
      <excludeFolder url="file://$MODULE_DIR$/ml/pred/checkpoints" />
      <excludeFolder url="file://$MODULE_DIR$/ml/pred/observed" />
      <excludeFolder url="file://$MODULE_DIR$/ml/pred/runs" />
      <excludeFolder url="file://$MODULE_DIR$/packages/backend/logs" />
      <excludeFolder url="file://$MODULE_DIR$/packages/core/net/logs" />
      <excludeFolder url="file://$MODULE_DIR$/packages/crawler/logs" />
      <excludeFolder url="file://$MODULE_DIR$/data" />
      <excludeFolder url="file://$MODULE_DIR$/redis" />
      <excludeFolder url="file://$MODULE_DIR$/ml" />
      <excludeFolder url="file://$MODULE_DIR$/src" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/deno.xml
+++ b/.idea/deno.xml
@ -1,7 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="DenoSettings">
    <option name="denoInit" value="{&#10;  &quot;enable&quot;: true,&#10;  &quot;lint&quot;: true,&#10;  &quot;unstable&quot;: true,&#10;  &quot;importMap&quot;: &quot;import_map.json&quot;,&#10;  &quot;config&quot;: &quot;deno.json&quot;,&#10;  &quot;fmt&quot;: {&#10;    &quot;useTabs&quot;: true,&#10;    &quot;lineWidth&quot;: 120,&#10;    &quot;indentWidth&quot;: 4,&#10;    &quot;semiColons&quot;: true,&#10;    &quot;proseWrap&quot;: &quot;always&quot;&#10;  }&#10;}" />
    <option name="useDenoValue" value="DISABLE" />
  </component>
 </project>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -1,36 +0,0 @@
 <component name="InspectionProjectProfileManager">
  <profile version="1.0">
    <option name="myName" value="Project Default" />
    <option name="scopesOrder">
      <list>
        <option value="Astro" />
        <option value="All Changed Files" />
        <option value="Open Files" />
        <option value="Project Files" />
        <option value="Scratches and Consoles" />
        <option value="Tests" />
      </list>
    </option>
    <inspection_tool class="ES6UnusedImports" enabled="true" level="WARNING" enabled_by_default="true">
      <scope name="Astro" level="INFORMATION" enabled="false" editorAttributes="INFORMATION_ATTRIBUTES" />
    </inspection_tool>
    <inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
    <inspection_tool class="HtmlUnknownAttribute" enabled="true" level="WARNING" enabled_by_default="true">
      <option name="myValues">
        <value>
          <list size="1">
            <item index="0" class="java.lang.String" itemvalue="autocorrect" />
          </list>
        </value>
      </option>
      <option name="myCustomValuesEnabled" value="true" />
    </inspection_tool>
    <inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
    <inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
      <option name="processCode" value="true" />
      <option name="processLiterals" value="true" />
      <option name="processComments" value="true" />
    </inspection_tool>
    <inspection_tool class="TsLint" enabled="true" level="WARNING" enabled_by_default="true" />
  </profile>
 </component>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -1,8 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/scopes/Astro.xml
+++ b/.idea/scopes/Astro.xml
@ -1,3 +0,0 @@
 <component name="DependencyValidationManager">
  <scope name="Astro" pattern="file:*.astro" />
 </component>
--- a/.idea/sqldialects.xml
+++ b/.idea/sqldialects.xml
@ -1,6 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="SqlDialectMappings">
    <file url="PROJECT" dialect="PostgreSQL" />
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="" vcs="Git" />
  </component>
 </project>
--- a/.prettierrc
+++ b/.prettierrc
@ -1,8 +0,0 @@
 {
 	"useTabs": true,
 	"tabWidth": 4,
 	"trailingComma": "none",
 	"singleQuote": false,
 	"printWidth": 120,
 	"endOfLine": "lf"
 }
--- a/.tokeignore
+++ b/.tokeignore
@ -2,10 +2,4 @@ data
 *.json
 *.svg
 *.txt
-*.md
+*.md
 *config*
 Inter.css
 MiSans.css
 *.yaml
 *.yml
 *.mdx
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -0,0 +1,6 @@
 {
 	"recommendations": [
 		"denoland.vscode-deno",
 		"bradlc.vscode-tailwindcss"
 	]
 }
--- a/.zed/settings.json
+++ b/.zed/settings.json
@ -0,0 +1,35 @@
 // Folder-specific settings
 //
 // For a full list of overridable settings, and general information on folder-specific settings,
 // see the documentation: https://zed.dev/docs/configuring-zed#settings-files
 {
    "lsp": {
        "deno": {
            "settings": {
                "deno": {
                    "enable": true
                }
            }
        }
    },
    "languages": {
        "TypeScript": {
            "language_servers": [
                "deno",
                "!typescript-language-server",
                "!vtsls",
                "!eslint"
            ],
            "formatter": "language_server"
        },
        "TSX": {
            "language_servers": [
                "deno",
                "!typescript-language-server",
                "!vtsls",
                "!eslint"
            ],
            "formatter": "language_server"
        }
    }
 }
--- a/Dockerfile.backend
+++ b/Dockerfile.backend
@ -1,23 +0,0 @@
 FROM oven/bun:1.2.8-debian
 WORKDIR /app
 COPY ./packages/core ./core
 COPY ./packages/backend/package.json ./packages/backend/bun.lock ./backend/
 RUN apt update && apt install -y curl
 RUN ln -s /bin/uname /usr/bin/uname
 RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
 WORKDIR backend
 RUN bun install
 COPY ./packages/backend/ .
 RUN mkdir -p /app/logs
 CMD ["bun", "start"]
--- a/Dockerfile.crawler
+++ b/Dockerfile.crawler
@ -1,19 +0,0 @@
 FROM oven/bun:1.2.8-debian
 WORKDIR /app
 COPY . .
 RUN bun i
 RUN mkdir -p /app/logs
 RUN apt update && apt install -y curl
 RUN ln -s /bin/uname /usr/bin/uname
 RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
 WORKDIR packages/crawler
 CMD ["bun", "all"]
--- a/Dockerfile.frontend
+++ b/Dockerfile.frontend
@ -1,23 +0,0 @@
 FROM oven/bun
 ARG BACKEND_URL
 WORKDIR /app
 COPY . .
 RUN bun install
 WORKDIR packages/frontend
 RUN bun run build
 ENV HOST=0.0.0.0
 ENV PORT=4321
 ENV BACKEND_URL=${BACKEND_URL}
 EXPOSE 4321
 RUN mkdir -p /app/logs
 CMD ["bun", "/app/packages/frontend/dist/server/entry.mjs"]
--- a/Dockerfile.next
+++ b/Dockerfile.next
@ -1,14 +0,0 @@
 FROM node:lts-slim AS production
 WORKDIR /app
 COPY ./packages/next/.next ./.next
 COPY ./packages/next/public ./public
 COPY ./packages/next/package.json ./package.json
 COPY ./packages/next/node_modules ./node_modules
 ENV NODE_ENV production
 EXPOSE 7400
 CMD ["npm", "start"]
--- a/README.md
+++ b/README.md
@ -2,11 +2,6 @@
 「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
 ## 新闻 - 测试版本上线
 目前，中V档案馆上线了用于测试的前端网页和API接口，它们分别位于[projectcvsa.com](https://projectcvsa.com)和[api.projectcvsa.com](https://api.projectcvsa.com)。
 API调用方法请参见[接口文档](https://docs.projectcvsa.com/api-doc/)。
 ## 创建背景与关联工作
 纵观整个互联网，对于「中文歌声合成」或「中文虚拟歌手」（常简称为中V或VC）相关信息进行较为系统、全面地整理收集的主要有以下几个网站：
@ -36,7 +31,7 @@ API调用方法请参见[接口文档](https://docs.projectcvsa.com/api-doc/)。
 ## 技术架构
-参见[CVSA文档](https://docs.projectcvsa.com/)。
+参见[CVSA文档](https://cvsa.gitbook.io/)。
 ## 开放许可
--- a/bun.lock
+++ b/bun.lock
--- a/components/Button.tsx
+++ b/components/Button.tsx
@ -0,0 +1,12 @@
 import { JSX } from "preact";
 import { IS_BROWSER } from "$fresh/runtime.ts";
 export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
 	return (
 		<button
 			{...props}
 			disabled={!IS_BROWSER || props.disabled}
 			class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
 		/>
 	);
 }
--- a/data/filter/1.py
+++ b/data/filter/1.py
@ -0,0 +1,55 @@
 import json
 import random
 def process_data(input_file, output_file):
    """
    从输入文件中读取数据，找出model和human不一致的行，
    删除"model"键，将"human"键重命名为"label"，
    然后将处理后的数据添加到输出文件中。
    在写入之前，它会加载output_file中的所有样本，
    并使用aid键进行去重过滤。
    Args:
        input_file (str): 输入文件的路径。
        output_file (str): 输出文件的路径。
    """
    # 加载output_file中已有的数据，用于去重
    existing_data = set()
    try:
        with open(output_file, 'r', encoding='utf-8') as f_out:
            for line in f_out:
                try:
                    data = json.loads(line)
                    existing_data.add(data['aid'])
                except json.JSONDecodeError:
                    pass  # 忽略JSON解码错误，继续读取下一行
    except FileNotFoundError:
        pass  # 如果文件不存在，则忽略
    with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
        for line in f_in:
            try:
                data = json.loads(line)
                if data['model'] != data['human'] or random.random() < 0.2:
                    if data['aid'] not in existing_data:  # 检查aid是否已存在
                        del data['model']
                        data['label'] = data['human']
                        del data['human']
                        f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
                        existing_data.add(data['aid'])  # 将新的aid添加到集合中
            except json.JSONDecodeError as e:
                print(f"JSON解码错误: {e}")
                print(f"错误行内容: {line.strip()}")
            except KeyError as e:
                print(f"KeyError: 键 '{e}' 不存在")
                print(f"错误行内容: {line.strip()}")
 # 调用函数处理数据
 input_file = 'real_test.jsonl'
 output_file = 'labeled_data.jsonl'
 process_data(input_file, output_file)
 print(f"处理完成，结果已写入 {output_file}")
--- a/deno.json
+++ b/deno.json
@ -0,0 +1,60 @@
 {
 	"lock": false,
 	"tasks": {
 		"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
 		"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
 		"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
 		"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
 		"manifest": "deno task cli manifest $(pwd)",
 		"start": "deno run -A --watch=static/,routes/ dev.ts",
 		"build": "deno run -A dev.ts build",
 		"preview": "deno run -A main.ts",
 		"update": "deno run -A -r https://fresh.deno.dev/update .",
 		"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts",
 		"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
 		"adder": "deno run --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
 		"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
 		"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
 		"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
 	},
 	"lint": {
 		"rules": {
 			"tags": ["fresh", "recommended"]
 		}
 	},
 	"exclude": ["**/_fresh/*"],
 	"imports": {
 		"@std/assert": "jsr:@std/assert@1",
 		"$fresh/": "https://deno.land/x/fresh@1.7.3/",
 		"preact": "https://esm.sh/preact@10.22.0",
 		"preact/": "https://esm.sh/preact@10.22.0/",
 		"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
 		"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
 		"tailwindcss": "npm:tailwindcss@3.4.1",
 		"tailwindcss/": "npm:/tailwindcss@3.4.1/",
 		"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
 		"$std/": "https://deno.land/std@0.216.0/",
 		"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
 		"bullmq": "npm:bullmq",
 		"lib/": "./lib/",
 		"ioredis": "npm:ioredis",
 		"@bull-board/api": "npm:@bull-board/api",
 		"@bull-board/express": "npm:@bull-board/express",
 		"express": "npm:express",
 		"src/": "./src/",
 		"onnxruntime": "npm:onnxruntime-node@1.19.2",
 		"chalk": "npm:chalk"
 	},
 	"compilerOptions": {
 		"jsx": "react-jsx",
 		"jsxImportSource": "preact"
 	},
 	"nodeModulesDir": "auto",
 	"fmt": {
 		"useTabs": true,
 		"lineWidth": 120,
 		"indentWidth": 4,
 		"semiColons": true,
 		"proseWrap": "always"
 	}
 }
--- a/dev.ts
+++ b/dev.ts
@ -0,0 +1,7 @@
 #!/usr/bin/env -S deno run -A --watch=static/,routes/
 import dev from "$fresh/dev.ts";
 import config from "./fresh.config.ts";
 import "$std/dotenv/load.ts";
 await dev(import.meta.url, "./main.ts", config);
--- a/doc/en/SUMMARY.md
+++ b/doc/en/SUMMARY.md
@ -1,21 +1,22 @@
 # Table of contents
-* [Welcome](README.md)
+- [Welcome](README.md)
 ## About
-* [About CVSA Project](about/this-project.md)
+- [About CVSA Project](about/this-project.md)
-* [Scope of Inclusion](about/scope-of-inclusion.md)
+- [Scope of Inclusion](about/scope-of-inclusion.md)
 ## Architecure
-* [Overview](architecure/overview.md)
+- [Overview](architecure/overview.md)
-* [Crawler](architecure/crawler.md)
+- [Database Structure](architecure/database-structure/README.md)
-* [Database Structure](architecure/database-structure/README.md)
+  - [Type of Song](architecure/database-structure/type-of-song.md)
-  * [Type of Song](architecure/database-structure/type-of-song.md)
+- [Message Queue](architecure/message-queue/README.md)
-* [Artificial Intelligence](architecure/artificial-intelligence.md)
+  - [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
 - [Artificial Intelligence](architecure/artificial-intelligence.md)
 ## API Doc
-* [Catalog](api-doc/catalog.md)
+- [Catalog](api-doc/catalog.md)
-* [Songs](api-doc/songs.md)
+- [Songs](api-doc/songs.md)
--- a/doc/en/about/scope-of-inclusion.md
+++ b/doc/en/about/scope-of-inclusion.md
@ -7,34 +7,13 @@ For a **song**, it must meet the following conditions to be included in CVSA:
 ### Category 30
-In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
+In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
-[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
+posted on Bilibili. In some special cases, this rule may not be enforced.&#x20;
 [automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
 uploaded to bilibili / categorized under this category.
-#### NEWS
+### At Leats One Line of Chinese
-Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
+The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
-entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
+Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
 parent category "Music").&#x20;
 According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
 published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
 there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
 the "Music General" sub-category.\
 We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
 program's crawling.
 ### At Leats One Line of Chinese / Chinese Virtual Singer
 The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
 Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
 We define a **Chinese virtual singer** as follows:
 1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
 2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
   Taiwan.
 ### Using Vocal Synthesizer
--- a/doc/en/architecure/artificial-intelligence.md
+++ b/doc/en/architecure/artificial-intelligence.md
@ -12,10 +12,3 @@ Located at `/filter/` under project root dir, it classifies a video in the
 - 0: Not related to Chinese vocal synthesis
 - 1: A original song with Chinese vocal synthesis
 - 2: A cover/remix song with Chinese vocal synthesis
 ### The Predictor
 Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
 takes historical view trends of a video, other contextual information (such as the current time), and future time points
 to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
 future time point.
--- a/doc/en/architecure/crawler.md
+++ b/doc/en/architecure/crawler.md
@ -1,4 +0,0 @@
 # Crawler
 A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
--- a/doc/en/architecure/database-structure/README.md
+++ b/doc/en/architecure/database-structure/README.md
@ -10,6 +10,3 @@ following tables:
 - all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
 - labelling\_result: Contains label of videos in `all_data`tagged by our
  [AI system](../artificial-intelligence.md#the-filter).
 - video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
  fetch process as "snapshot".
 - snapshot\_schedule: The scheduling information for video snapshots.
--- a/doc/en/architecure/message-queue/README.md
+++ b/doc/en/architecure/message-queue/README.md
@ -0,0 +1 @@
 # Message Queue
--- a/doc/en/architecure/message-queue/videotagsqueue.md
+++ b/doc/en/architecure/message-queue/videotagsqueue.md
@ -0,0 +1,12 @@
 # VideoTagsQueue
 ### Jobs
 The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
 video, and the latter is responsible for scheduling the former.
 ### Return value
 The return values across two jobs follows the following table:
 <table><thead><tr><th width="168">Return Value</th><th>Description</th></tr></thead><tbody><tr><td>0</td><td>In <code>getVideoTags</code>: the tags was successfully fetched<br>In <code>getVideosTags</code>: all null-tags videos have a corresponding job successfully queued.</td></tr><tr><td>1</td><td>Used in <code>getVideoTags</code>: occured <code>fetch</code>error during the job</td></tr><tr><td>2</td><td>Used in <code>getVideoTags</code>: we've reached the rate limit set in NetScheduler</td></tr><tr><td>3</td><td>Used in <code>getVideoTags</code>: did't provide aid in the job data</td></tr><tr><td>4</td><td>Used in<code>getVideosTags</code>: There's no video with NULL as `tags`</td></tr><tr><td>1xx</td><td>Used in<code>getVideosTags</code>:  the number of tasks in the queue has exceeded the limit, thus <code>getVideosTags</code> stops adding tasks. <code>xx</code> is the number of jobs added to the queue during execution.</td></tr></tbody></table>
--- a/doc/en/architecure/overview.md
+++ b/doc/en/architecure/overview.md
@ -1,4 +1,5 @@
 ---
 icon: globe-pointer
 layout:
  title:
    visible: true
@ -14,29 +15,5 @@ layout:
 # Overview
-The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`.&#x20;
+Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
-
+[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
 **Project structure:**
 ```
 cvsa
 ├── deno.json
 ├── packages
 │   ├── backend
 │   ├── core
 │   ├── crawler
 │   └── frontend
 └── README.md
 ```
 **Package Breakdown:**
 * **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
 * **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
 * **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
 * **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
 ### Crawler
 Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
--- a/doc/zh/.gitbook/assets/1.yaml
+++ b/doc/zh/.gitbook/assets/1.yaml
@ -1,106 +0,0 @@
 openapi: 3.0.0
 info:
  title: CVSA API
  version: v1
 servers:
  - url: https://api.projectcvsa.com
 paths:
  /video/{id}/snapshots:
    get:
      summary: 获取视频快照列表
      description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字，以 "BV" 开头的 12 位字母数字，或者一个正整数。
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: string
          description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)"
        - in: query
          name: ps
          schema:
            type: integer
            minimum: 1
          description: 每页返回的快照数量 (pageSize)，默认为 1000。
        - in: query
          name: pn
          schema:
            type: integer
            minimum: 1
          description: 页码 (pageNumber)，用于分页查询。offset 与 pn 只能选择一个。
        - in: query
          name: offset
          schema:
            type: integer
            minimum: 1
          description: 偏移量，用于基于偏移量的查询。offset 与 pn 只能选择一个。
        - in: query
          name: reverse
          schema:
            type: boolean
          description: 是否反向排序（从旧到新），默认为 false。
      responses:
        '200':
          description: 成功获取快照列表
          content:
            application/json:
              schema:
                type: array
                items:
                  type: object
                  properties:
                    id:
                      type: integer
                      description: 快照 ID
                    aid:
                      type: integer
                      description: 视频的 av 号
                    views:
                      type: integer
                      description: 视频播放量
                    coins:
                      type: integer
                      description: 视频投币数
                    likes:
                      type: integer
                      description: 视频点赞数
                    favorites:
                      type: integer
                      description: 视频收藏数
                    shares:
                      type: integer
                      description: 视频分享数
                    danmakus:
                      type: integer
                      description: 视频弹幕数
                    replies:
                      type: integer
                      description: 视频评论数
        '400':
          description: 无效的查询参数
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: 错误消息
                  errors:
                    type: object
                    description: 详细的错误信息
        '500':
          description: 服务器内部错误
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: 错误消息
                  error:
                    type: object
                    description: 详细的错误信息
--- a/doc/zh/SUMMARY.md
+++ b/doc/zh/SUMMARY.md
@ -1,11 +1,11 @@
 # Table of contents
-* [欢迎](README.md)
+- [欢迎](README.md)
 ## 关于 <a href="#about" id="about"></a>
-* [关于本项目](about/this-project.md)
+- [关于本项目](about/this-project.md)
-* [收录范围](about/scope-of-inclusion.md)
+- [收录范围](about/scope-of-inclusion.md)
 ## 技术架构 <a href="#architecture" id="architecture"></a>
@ -14,9 +14,9 @@
  - [歌曲类型](architecture/database-structure/type-of-song.md)
 - [人工智能](architecture/artificial-intelligence.md)
 - [消息队列](architecture/message-queue/README.md)
-  - [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
+  - [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
 ## API 文档 <a href="#api-doc" id="api-doc"></a>
-* [目录](api-doc/catalog.md)
+- [目录](api-doc/catalog.md)
-* [视频快照](api-doc/video-snapshot.md)
+- [歌曲](api-doc/songs.md)
--- a/doc/zh/api-doc/catalog.md
+++ b/doc/zh/api-doc/catalog.md
@ -1,4 +1,3 @@
 # 目录
-* [视频快照](video-snapshot.md)
+- [歌曲](songs.md)
--- a/doc/zh/api-doc/songs.md
+++ b/doc/zh/api-doc/songs.md
@ -0,0 +1,3 @@
 # 歌曲
 暂未实现。
--- a/doc/zh/api-doc/video-snapshot.md
+++ b/doc/zh/api-doc/video-snapshot.md
@ -1,6 +0,0 @@
 # 视频快照
 {% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %}
 [1.yaml](../.gitbook/assets/1.yaml)
 {% endopenapi %}
--- a/doc/zh/architecture/database-structure/README.md
+++ b/doc/zh/architecture/database-structure/README.md
@ -2,14 +2,9 @@
 CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
 CVSA 设计了两个
 CVSA 的所有公开数据（不包括用户的个人数据）都存储在名为 `cvsa_main` 的数据库中，该数据库包含以下表：
 - songs：存储歌曲的主要信息
- bilibili\_user：存储 Bilibili 用户信息快照
+- bili\_user：存储 Bilibili 用户信息快照
- bilibili\_metadata：[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
+- all\_data：[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
 - labelling\_result：包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
 - latest\_video\_snapshot：存储视频最新的快照
 - video\_snapshot：存储视频的快照，包括特定时间下视频的统计信息（播放量、点赞数等）
 - snapshot\_schedule：视频快照的规划信息，为辅助表
--- a/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md
+++ b/doc/zh/architecture/message-queue/latestvideosqueue-dui-lie.md
@ -1 +0,0 @@
 # LatestVideosQueue 队列
--- a/doc/zh/architecture/message-queue/video-tags-queue.md
+++ b/doc/zh/architecture/message-queue/video-tags-queue.md
@ -0,0 +1,15 @@
 ---
 description: 关于VideoTagsQueue队列的信息。
 ---
 # VideoTagsQueue队列
 ### 任务
 视频标签队列包含两个任务：`getVideoTags`和`getVideosTags`。前者用于获取视频的标签，后者负责调度前者。
 ### 返回值
 两个任务的返回值遵循以下表格：
 <table><thead><tr><th width="168">返回值</th><th>描述</th></tr></thead><tbody><tr><td>0</td><td>在 <code>getVideoTags</code> 中：标签成功获取<br>在 <code>getVideosTags</code> 中：所有无标签视频的相应任务已成功排队。</td></tr><tr><td>1</td><td>在 <code>getVideoTags</code> 中：任务期间发生 <code>fetch</code> 错误</td></tr><tr><td>2</td><td>在 <code>getVideoTags</code> 中：已达到 NetScheduler 设置的速率限制</td></tr><tr><td>3</td><td>在 <code>getVideoTags</code> 中：未在任务数据中提供帮助</td></tr><tr><td>4</td><td>在 <code>getVideosTags</code> 中：没有视频的 `tags` 为 NULL</td></tr><tr><td>1xx</td><td>在 <code>getVideosTags</code> 中：队列中的任务数量超过了限制，因此 <code>getVideosTags</code> 停止添加任务。<code>xx</code> 是在执行期间添加到队列的任务数量。</td></tr></tbody></table>
--- a/doc/zh/architecture/overview.md
+++ b/doc/zh/architecture/overview.md
@ -1,4 +1,5 @@
 ---
 icon: globe-pointer
 layout:
  title:
    visible: true
@ -14,13 +15,4 @@ layout:
 # 概览
-整个CVSA项目分为三个组件：**crawler**, **frontend** 和 **backend。**
+自动化是 CVSA 技术设计的最大亮点，为了实现自动化，我们使用BullMQ驱动的消息队列来并发处理数据采集生命周期中的各项任务。
 ### **crawler**
 位于项目目录`packages/crawler` 下，它负责以下工作：
 - 抓取新的视频并收录作品
 - 持续监控视频的播放量等统计信息
 整个 crawler 由 BullMQ 消息队列驱动，使用 Redis 和 PostgreSQL 管理状态。
--- a/docker-compose.example.yml
+++ b/docker-compose.example.yml
@ -1,71 +0,0 @@
 version: '3.8'
 services:
  db:
    image: postgres:17
    ports:
      - "5431:5432"
    environment:
      POSTGRES_USER: cvsa
      POSTGRES_PASSWORD: ""
      POSTGRES_DB: cvsa_main
    volumes:
      - ./data:/var/lib/postgresql/data
  redis:
    image: redis:latest
    ports:
      - "6378:6379"
    volumes:
      - ./redis/data:/data
      - ./redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./redis/logs:/logs
  frontend:
    build:
      context: .
      dockerfile: Dockerfile.frontend
    ports:
      - "4321:4321"
    environment:
      - HOST=0.0.0.0
      - PORT=4321
      - DB_HOST=db
      - DB_NAME=cvsa_main
      - DB_NAME_CRED=cvsa_cred
      - DB_USER=cvsa
      - DB_PORT=5432
      - DB_PASSWORD=""
      - LOG_VERBOSE=/app/logs/verbose.log
      - LOG_WARN=/app/logs/warn.log
      - LOG_ERR=/app/logs/error.log
    depends_on:
      - db
    volumes:
      - /path/to/your/logs:/app/logs
  backend:
    build:
      context: .
      dockerfile: Dockerfile.backend
    ports:
      - "8000:8000"
    environment:
      - HOST=0.0.0.0
      - DB_HOST=db
      - DB_NAME=cvsa_main
      - DB_NAME_CRED=cvsa_cred
      - DB_USER=cvsa
      - DB_PORT=5432
      - DB_PASSWORD=""
      - LOG_VERBOSE=/app/logs/verbose.log
      - LOG_WARN=/app/logs/warn.log
      - LOG_ERR=/app/logs/error.log
      - REDIS_HOST=redis
      - REDIS_PORT=6379
    depends_on:
      - db
    volumes:
      - /path/to/your/logs:/app/logs
 volumes:
  db_data:
--- a/ml/filter/RunningLogs.txt
+++ b/ml/filter/RunningLogs.txt
--- a/ml/filter/checkpoint_conversion.py
+++ b/ml/filter/checkpoint_conversion.py
--- a/ml/filter/clean_dataset.py
+++ b/ml/filter/clean_dataset.py
--- a/ml/filter/dataset.py
+++ b/ml/filter/dataset.py
--- a/ml/filter/db_utils.py
+++ b/ml/filter/db_utils.py
--- a/ml/filter/embedding.py
+++ b/ml/filter/embedding.py
--- a/ml/filter/embedding_range.py
+++ b/ml/filter/embedding_range.py
--- a/ml/filter/embedding_visualization.py
+++ b/ml/filter/embedding_visualization.py
--- a/ml/filter/labeling_system.py
+++ b/ml/filter/labeling_system.py
--- a/ml/filter/model.py
+++ b/ml/filter/model.py
--- a/ml/filter/modelV3_10.py
+++ b/ml/filter/modelV3_10.py
--- a/ml/filter/modelV3_12.py
+++ b/ml/filter/modelV3_12.py
--- a/ml/filter/modelV3_15.py
+++ b/ml/filter/modelV3_15.py
--- a/ml/filter/modelV6_0.py
+++ b/ml/filter/modelV6_0.py
--- a/ml/filter/onnx_export.py
+++ b/ml/filter/onnx_export.py
--- a/ml/filter/predict.py
+++ b/ml/filter/predict.py
--- a/ml/filter/quantize.py
+++ b/ml/filter/quantize.py
--- a/ml/filter/tag.py
+++ b/ml/filter/tag.py
--- a/ml/filter/test.py
+++ b/ml/filter/test.py
--- a/ml/filter/train.py
+++ b/ml/filter/train.py
--- a/fresh.config.ts
+++ b/fresh.config.ts
@ -0,0 +1,6 @@
 import { defineConfig } from "$fresh/server.ts";
 import tailwind from "$fresh/plugins/tailwind.ts";
 export default defineConfig({
 	plugins: [tailwind()],
 });
--- a/fresh.gen.ts
+++ b/fresh.gen.ts
@ -0,0 +1,27 @@
 // DO NOT EDIT. This file is generated by Fresh.
 // This file SHOULD be checked into source version control.
 // This file is automatically updated during development when running `dev.ts`.
 import * as $_404 from "./routes/_404.tsx";
 import * as $_app from "./routes/_app.tsx";
 import * as $api_joke from "./routes/api/joke.ts";
 import * as $greet_name_ from "./routes/greet/[name].tsx";
 import * as $index from "./routes/index.tsx";
 import * as $Counter from "./islands/Counter.tsx";
 import type { Manifest } from "$fresh/server.ts";
 const manifest = {
 	routes: {
 		"./routes/_404.tsx": $_404,
 		"./routes/_app.tsx": $_app,
 		"./routes/api/joke.ts": $api_joke,
 		"./routes/greet/[name].tsx": $greet_name_,
 		"./routes/index.tsx": $index,
 	},
 	islands: {
 		"./islands/Counter.tsx": $Counter,
 	},
 	baseUrl: import.meta.url,
 } satisfies Manifest;
 export default manifest;
--- a/islands/Counter.tsx
+++ b/islands/Counter.tsx
@ -0,0 +1,16 @@
 import type { Signal } from "@preact/signals";
 import { Button } from "../components/Button.tsx";
 interface CounterProps {
 	count: Signal<number>;
 }
 export default function Counter(props: CounterProps) {
 	return (
 		<div class="flex gap-8 py-6">
 			<Button onClick={() => props.count.value -= 1}>-1</Button>
 			<p class="text-3xl tabular-nums">{props.count}</p>
 			<Button onClick={() => props.count.value += 1}>+1</Button>
 		</div>
 	);
 }
--- a/ml/lab/.gitignore
+++ b/ml/lab/.gitignore
--- a/ml/lab/align-pipeline.md
+++ b/ml/lab/align-pipeline.md
--- a/ml/lab/mmsAlignment/align2LRC.py
+++ b/ml/lab/mmsAlignment/align2LRC.py
--- a/ml/lab/mmsAlignment/alignWithMMS.py
+++ b/ml/lab/mmsAlignment/alignWithMMS.py
--- a/ml/lab/mmsAlignment/splitSong.py
+++ b/ml/lab/mmsAlignment/splitSong.py
--- a/ml/lab/utils/audio.py
+++ b/ml/lab/utils/audio.py
--- a/ml/lab/utils/cleanTempDir.py
+++ b/ml/lab/utils/cleanTempDir.py
--- a/ml/lab/utils/ttml.py
+++ b/ml/lab/utils/ttml.py
--- a/ml/lab/whisperAlignment/align2srt.py
+++ b/ml/lab/whisperAlignment/align2srt.py
--- a/ml/lab/whisperAlignment/alignWithGroup.py
+++ b/ml/lab/whisperAlignment/alignWithGroup.py
--- a/ml/lab/whisperAlignment/splitGroups.py
+++ b/ml/lab/whisperAlignment/splitGroups.py
--- a/ml/lab/whisperAlignment/srt2lrc.py
+++ b/ml/lab/whisperAlignment/srt2lrc.py
--- a/lib/db/allData.ts
+++ b/lib/db/allData.ts
@ -0,0 +1,67 @@
 import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
 import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
 import { modelVersion } from "lib/ml/filter_inference.ts";
 export async function videoExistsInAllData(client: Client, aid: number) {
 	return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`, [aid])
 		.then((result) => result.rows[0].exists);
 }
 export async function userExistsInBiliUsers(client: Client, uid: number) {
 	return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_user WHERE uid = $1)`, [
 		uid,
 	]);
 }
 export async function getUnlabelledVideos(client: Client) {
 	const queryResult = await client.queryObject<{ aid: number }>(
 		`SELECT a.aid FROM bilibili_metadata a LEFT JOIN labelling_result l ON a.aid = l.aid WHERE l.aid IS NULL`,
 	);
 	return queryResult.rows.map((row) => row.aid);
 }
 export async function insertVideoLabel(client: Client, aid: number, label: number) {
 	return await client.queryObject(
 		`INSERT INTO labelling_result (aid, label, model_version) VALUES ($1, $2, $3) ON CONFLICT (aid, model_version) DO NOTHING`,
 		[aid, label, modelVersion],
 	);
 }
 export async function getVideoInfoFromAllData(client: Client, aid: number) {
 	const queryResult = await client.queryObject<AllDataType>(
 		`SELECT * FROM bilibili_metadata WHERE aid = $1`,
 		[aid],
 	);
 	const row = queryResult.rows[0];
 	let authorInfo = "";
 	if (row.uid && await userExistsInBiliUsers(client, row.uid)) {
 		const q = await client.queryObject<BiliUserType>(
 			`SELECT * FROM bilibili_user WHERE uid = $1`,
 			[row.uid],
 		);
 		const userRow = q.rows[0];
 		if (userRow) {
 			authorInfo = userRow.desc;
 		}
 	}
 	return {
 		title: row.title,
 		description: row.description,
 		tags: row.tags,
 		author_info: authorInfo,
 	};
 }
 export async function getUnArchivedBiliUsers(client: Client) {
 	const queryResult = await client.queryObject<{ uid: number }>(
 		`
 		SELECT ad.uid
 		FROM bilibili_metadata ad
 		LEFT JOIN bilibili_user bu ON ad.uid = bu.uid
 		WHERE bu.uid IS NULL;
 		`,
 		[],
 	);
 	const rows = queryResult.rows;
 	return rows.map((row) => row.uid);
 }
--- a/lib/db/init.ts
+++ b/lib/db/init.ts
@ -0,0 +1,6 @@
 import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
 import { postgresConfig } from "lib/db/pgConfig.ts";
 const pool = new Pool(postgresConfig, 12);
 export const db = pool;
--- a/lib/db/pgConfig.ts
+++ b/lib/db/pgConfig.ts
@ -0,0 +1,21 @@
 const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT"];
 const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
 if (unsetVars.length > 0) {
 	throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
 }
 const databaseHost = Deno.env.get("DB_HOST")!;
 const databaseName = Deno.env.get("DB_NAME")!;
 const databaseUser = Deno.env.get("DB_USER")!;
 const databasePassword = Deno.env.get("DB_PASSWORD")!;
 const databasePort = Deno.env.get("DB_PORT")!;
 export const postgresConfig = {
 	hostname: databaseHost,
 	port: parseInt(databasePort),
 	database: databaseName,
 	user: databaseUser,
 	password: databasePassword,
 };
--- a/lib/db/redis.ts
+++ b/lib/db/redis.ts
@ -0,0 +1,3 @@
 import { Redis } from "ioredis";
 export const redis = new Redis({ maxRetriesPerRequest: null });
--- a/lib/db/schema.d.ts
+++ b/lib/db/schema.d.ts
@ -0,0 +1,33 @@
 export interface AllDataType {
 	id: number;
 	aid: number;
 	bvid: string | null;
 	description: string | null;
 	uid: number | null;
 	tags: string | null;
 	title: string | null;
 	published_at: string | null;
 	duration: number;
 	created_at: string | null;
 }
 export interface BiliUserType {
 	id: number;
 	uid: number;
 	username: string;
 	desc: string;
 	fans: number;
 }
 export interface VideoSnapshotType {
 	id: number;
 	created_at: string;
 	views: number;
 	coins: number;
 	likes: number;
 	favorites: number;
 	shares: number;
 	danmakus: number;
 	aid: bigint;
 	replies: number;
 }
--- a/lib/db/snapshot.ts
+++ b/lib/db/snapshot.ts
@ -0,0 +1,198 @@
 import { DAY, SECOND } from "$std/datetime/constants.ts";
 import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
 import { VideoSnapshotType } from "lib/db/schema.d.ts";
 import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
 export async function getSongsNearMilestone(client: Client) {
 	const queryResult = await client.queryObject<VideoSnapshotType>(`
    	WITH filtered_snapshots AS (
 			SELECT
 				vs.*
 			FROM
 				video_snapshot vs
 			WHERE
 				(vs.views >= 90000 AND vs.views < 100000) OR
 				(vs.views >= 900000 AND vs.views < 1000000)
 		),
 		ranked_snapshots AS (
 			SELECT
 				fs.*,
 				ROW_NUMBER() OVER (PARTITION BY fs.aid ORDER BY fs.created_at DESC) as rn,
 				MAX(fs.views) OVER (PARTITION BY fs.aid) as max_views_per_aid
 			FROM
 				filtered_snapshots fs
 			INNER JOIN
 				songs s ON fs.aid = s.aid
 		)
 		SELECT
 			rs.id, rs.created_at, rs.views, rs.coins, rs.likes, rs.favorites, rs.shares, rs.danmakus, rs.aid, rs.replies
 		FROM
 			ranked_snapshots rs
 		WHERE
 			rs.rn = 1;
    `);
 	return queryResult.rows.map((row) => {
 		return {
 			...row,
 			aid: Number(row.aid),
 		};
 	});
 }
 export async function getUnsnapshotedSongs(client: Client) {
 	const queryResult = await client.queryObject<{ aid: bigint }>(`
 		SELECT DISTINCT s.aid
 		FROM songs s
 		LEFT JOIN video_snapshot v ON s.aid = v.aid
 		WHERE v.aid IS NULL;
 	`);
 	return queryResult.rows.map((row) => Number(row.aid));
 }
 export async function getSongSnapshotCount(client: Client, aid: number) {
 	const queryResult = await client.queryObject<{ count: number }>(
 		`
 		SELECT COUNT(*) AS count
 		FROM video_snapshot
 		WHERE aid = $1;
 	`,
 		[aid],
 	);
 	return queryResult.rows[0].count;
 }
 export async function getShortTermEtaPrediction(client: Client, aid: number) {
 	const queryResult = await client.queryObject<{ eta: number }>(
 		`
 		WITH old_snapshot AS (
 			SELECT created_at, views 
 			FROM video_snapshot 
 			WHERE aid = $1 AND
 			NOW() - created_at > '20 min'
 			ORDER BY created_at DESC 
 			LIMIT 1
 		),
 		new_snapshot AS (
 			SELECT created_at, views
 			FROM video_snapshot
 			WHERE aid = $1
 			ORDER BY created_at DESC
 			LIMIT 1
 		)
 		SELECT
 			CASE 
 				WHEN n.views > 100000 
 				THEN 
 					(1000000 - n.views) -- Views remaining
 					/ 
 					(
 						(n.views - o.views)  -- Views delta
 						/
 						(EXTRACT(EPOCH FROM (n.created_at - o.created_at)) + 0.001) -- Time delta in seconds
 					    + 0.001
 					) -- Increment per second
 				ELSE 
 					(100000 - n.views) -- Views remaining
 			        /
 					(
 					    (n.views - o.views) -- Views delta
 			            /
 					    (EXTRACT(EPOCH FROM (n.created_at - o.created_at)) + 0.001) -- Time delta in seconds
 					    + 0.001
 			        ) -- Increment per second
 			END AS eta
 		FROM old_snapshot o, new_snapshot n;
 		`,
 		[aid],
 	);
 	if (queryResult.rows.length === 0) {
 		return null;
 	}
 	return queryResult.rows[0].eta;
 }
 export async function getIntervalFromLastSnapshotToNow(client: Client, aid: number) {
 	const queryResult = await client.queryObject<{ interval: number }>(
 		`
 		SELECT EXTRACT(EPOCH FROM (NOW() - created_at)) AS interval
 		FROM video_snapshot
 		WHERE aid = $1
 		ORDER BY created_at DESC
 		LIMIT 1;
 		`,
 		[aid],
 	);
 	if (queryResult.rows.length === 0) {
 		return null;
 	}
 	return queryResult.rows[0].interval;
 }
 export async function songEligibleForMilestoneSnapshot(client: Client, aid: number) {
 	const count = await getSongSnapshotCount(client, aid);
 	if (count < 2) {
 		return true;
 	}
 	const queryResult = await client.queryObject<
 		{ views1: number; created_at1: string; views2: number; created_at2: string }
 	>(
 		`
 			WITH latest_snapshot AS (
 				SELECT 
 					aid,
 					views,
 					created_at
 				FROM video_snapshot
 				WHERE aid = $1
 				ORDER BY created_at DESC
 				LIMIT 1
 			),
 			pairs AS (
 				SELECT 
 					a.views AS views1,
 					a.created_at AS created_at1,
 					b.views AS views2,
 					b.created_at AS created_at2,
 					(b.created_at - a.created_at) AS interval
 				FROM video_snapshot a
 				JOIN latest_snapshot b 
 					ON a.aid = b.aid 
 					AND a.created_at < b.created_at
 			)
 			SELECT 
 				views1,
 				created_at1,
 				views2,
 				created_at2
 			FROM (
 				SELECT 
 					*,
 					ROW_NUMBER() OVER (
 						ORDER BY 
 							CASE WHEN interval <= INTERVAL '3 days' THEN 0 ELSE 1 END,
 							CASE WHEN interval <= INTERVAL '3 days' THEN -interval ELSE interval END
 					) AS rn
 				FROM pairs
 			) ranked
 			WHERE rn = 1;
 		`,
 		[aid],
 	);
 	if (queryResult.rows.length === 0) {
 		return true;
 	}
 	const recentViewsData = queryResult.rows[0];
 	const time1 = parseTimestampFromPsql(recentViewsData.created_at1);
 	const time2 = parseTimestampFromPsql(recentViewsData.created_at2);
 	const intervalSec = (time2 - time1) / SECOND;
 	const views1 = recentViewsData.views1;
 	const views2 = recentViewsData.views2;
 	const viewsDiff = views2 - views1;
 	if (viewsDiff == 0) {
 		return false;
 	}
 	const nextMilestone = views2 >= 100000 ? 1000000 : 100000;
 	const expectedViewsDiff = nextMilestone - views2;
 	const expectedIntervalSec = expectedViewsDiff / viewsDiff * intervalSec;
 	return expectedIntervalSec <= 3 * DAY;
 }
--- a/lib/db/songs.ts
+++ b/lib/db/songs.ts
@ -0,0 +1,29 @@
 import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
 export async function getNotCollectedSongs(client: Client) {
 	const queryResult = await client.queryObject<{ aid: number }>(`
 		SELECT lr.aid
 		FROM labelling_result lr
 		WHERE lr.label != 0
 		AND NOT EXISTS (
 			SELECT 1
 			FROM songs s
 			WHERE s.aid = lr.aid
 		);
 	`);
 	return queryResult.rows.map((row) => row.aid);
 }
 export async function aidExistsInSongs(client: Client, aid: number) {
 	const queryResult = await client.queryObject<{ exists: boolean }>(
 		`
 	    SELECT EXISTS (
 	        SELECT 1
 	        FROM songs
 	        WHERE aid = $1
 	    );
 	`,
 		[aid],
 	);
 	return queryResult.rows[0].exists;
 }
--- a/packages/core/log/logger.ts
+++ b/packages/core/log/logger.ts
@ -1,5 +1,5 @@
-import winston, { format, transports } from "winston";
+import winston, { format, transports } from "npm:winston";
-import type { TransformableInfo } from "logform";
+import { TransformableInfo } from "npm:logform";
 import chalk from "chalk";
 const customFormat = format.printf((info: TransformableInfo) => {
@ -24,13 +24,13 @@ const createTransport = (level: string, filename: string) => {
 	let maxsize = undefined;
 	let maxFiles = undefined;
 	let tailable = undefined;
-	if (level === "silly") {
+	if (level === "verbose") {
-		maxsize = 500 * MB;
+		maxsize = 10 * MB;
-		maxFiles = undefined;
+		maxFiles = 10;
 		tailable = false;
 	} else if (level === "warn") {
 		maxsize = 10 * MB;
-		maxFiles = 5;
+		maxFiles = 1;
 		tailable = false;
 	}
 	function replacer(key: unknown, value: unknown) {
@ -52,9 +52,9 @@ const createTransport = (level: string, filename: string) => {
 	});
 };
-const sillyLogPath = process.env["LOG_VERBOSE"] ?? "logs/verbose.log";
+const sillyLogPath = Deno.env.get("LOG_VERBOSE") ?? "logs/verbose.log";
-const warnLogPath = process.env["LOG_WARN"] ?? "logs/warn.log";
+const warnLogPath = Deno.env.get("LOG_WARN") ?? "logs/warn.log";
-const errorLogPath = process.env["LOG_ERROR"] ?? "logs/error.log";
+const errorLogPath = Deno.env.get("LOG_ERROR") ?? "logs/error.log";
 const winstonLogger = winston.createLogger({
 	levels: winston.config.npm.levels,
@ -62,7 +62,7 @@ const winstonLogger = winston.createLogger({
 		new transports.Console({
 			level: "debug",
 			format: format.combine(
-				format.timestamp({ format: "YYYY-MM-DD HH:mm:ss.SSS" }),
+				format.timestamp({ format: "HH:mm:ss.SSS" }),
 				format.colorize(),
 				format.errors({ stack: true }),
 				customFormat,
--- a/packages/core/log/test.ts
+++ b/packages/core/log/test.ts
@ -1,4 +1,4 @@
-import logger from "@core/log/logger.ts";
+import logger from "lib/log/logger.ts";
 logger.error(Error("test error"), "test service");
 logger.debug(`some string`);
--- a/lib/ml/benchmark.ts
+++ b/lib/ml/benchmark.ts
@ -0,0 +1,173 @@
 import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 import * as ort from "onnxruntime";
 import { softmax } from "lib/ml/filter_inference.ts";
 // 配置参数
 const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
 const onnxClassifierPath = "./model/video_classifier_v3_17.onnx";
 const onnxEmbeddingPath = "./model/embedding_original.onnx";
 const testDataPath = "./data/filter/test1.jsonl";
 // 初始化会话
 const [sessionClassifier, sessionEmbedding] = await Promise.all([
 	ort.InferenceSession.create(onnxClassifierPath),
 	ort.InferenceSession.create(onnxEmbeddingPath),
 ]);
 let tokenizer: PreTrainedTokenizer;
 // 初始化分词器
 async function loadTokenizer() {
 	const tokenizerConfig = { local_files_only: true };
 	tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
 }
 // 新的嵌入生成函数（使用ONNX）
 async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
 	const { input_ids } = await tokenizer(texts, {
 		add_special_tokens: false,
 		return_tensor: false,
 	});
 	// 构造输入参数
 	const cumsum = (arr: number[]): number[] =>
 		arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
 	const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
 	const flattened_input_ids = input_ids.flat();
 	// 准备ONNX输入
 	const inputs = {
 		input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
 			flattened_input_ids.length,
 		]),
 		offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
 	};
 	// 执行推理
 	const { embeddings } = await session.run(inputs);
 	return Array.from(embeddings.data as Float32Array);
 }
 // 分类推理函数
 async function runClassification(embeddings: number[]): Promise<number[]> {
 	const inputTensor = new ort.Tensor(
 		Float32Array.from(embeddings),
 		[1, 3, 1024],
 	);
 	const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
 	return softmax(logits.data as Float32Array);
 }
 // 指标计算函数
 function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
 	accuracy: number;
 	precision: number;
 	recall: number;
 	f1: number;
 	"Class 0 Prec": number;
 	speed: string;
 } {
 	// 输出label和prediction不一样的index列表
 	const arr = [];
 	for (let i = 0; i < labels.length; i++) {
 		if (labels[i] !== predictions[i] && predictions[i] == 0) {
 			arr.push([i + 1, labels[i], predictions[i]]);
 		}
 	}
 	console.log(arr);
 	// 初始化混淆矩阵
 	const classCount = Math.max(...labels, ...predictions) + 1;
 	const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
 	// 填充矩阵
 	labels.forEach((trueLabel, i) => {
 		matrix[trueLabel][predictions[i]]++;
 	});
 	// 计算各指标
 	let totalTP = 0, totalFP = 0, totalFN = 0;
 	for (let c = 0; c < classCount; c++) {
 		const TP = matrix[c][c];
 		const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
 		const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
 		totalTP += TP;
 		totalFP += FP;
 		totalFN += FN;
 	}
 	const precision = totalTP / (totalTP + totalFP);
 	const recall = totalTP / (totalTP + totalFN);
 	const f1 = 2 * (precision * recall) / (precision + recall) || 0;
 	// 计算Class 0 Precision
 	const class0TP = matrix[0][0];
 	const class0FP = matrix.flatMap((row, i) => i === 0 ? [] : [row[0]]).reduce((a, b) => a + b, 0);
 	const class0Precision = class0TP / (class0TP + class0FP) || 0;
 	return {
 		accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
 		precision,
 		recall,
 		f1,
 		speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
 		"Class 0 Prec": class0Precision,
 	};
 }
 // 改造后的评估函数
 async function evaluateModel(session: ort.InferenceSession): Promise<{
 	accuracy: number;
 	precision: number;
 	recall: number;
 	f1: number;
 	"Class 0 Prec": number;
 }> {
 	const data = await Deno.readTextFile(testDataPath);
 	const samples = data.split("\n")
 		.map((line) => {
 			try {
 				return JSON.parse(line);
 			} catch {
 				return null;
 			}
 		})
 		.filter(Boolean);
 	const allPredictions: number[] = [];
 	const allLabels: number[] = [];
 	const t = new Date().getTime();
 	for (const sample of samples) {
 		try {
 			const embeddings = await getONNXEmbeddings([
 				sample.title,
 				sample.description,
 				sample.tags.join(","),
 			], session);
 			const probabilities = await runClassification(embeddings);
 			allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
 			allLabels.push(sample.label);
 		} catch (error) {
 			console.error("Processing error:", error);
 		}
 	}
 	const elapsed = new Date().getTime() - t;
 	return calculateMetrics(allLabels, allPredictions, elapsed);
 }
 // 主函数
 async function main() {
 	await loadTokenizer();
 	const metrics = await evaluateModel(sessionEmbedding);
 	console.log("Model Metrics:");
 	console.table(metrics);
 }
 await main();
--- a/lib/ml/filter_inference.ts
+++ b/lib/ml/filter_inference.ts
@ -0,0 +1,99 @@
 import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 import * as ort from "onnxruntime";
 import logger from "lib/log/logger.ts";
 import { WorkerError } from "lib/mq/schema.ts";
 const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024";
 const onnxClassifierPath = "./model/video_classifier_v3_17.onnx";
 const onnxEmbeddingOriginalPath = "./model/model.onnx";
 export const modelVersion = "3.17";
 let sessionClassifier: ort.InferenceSession | null = null;
 let sessionEmbedding: ort.InferenceSession | null = null;
 let tokenizer: PreTrainedTokenizer | null = null;
 export async function initializeModels() {
 	if (tokenizer && sessionClassifier && sessionEmbedding) {
 		return;
 	}
 	try {
 		tokenizer = await AutoTokenizer.from_pretrained(tokenizerModel);
 		const [classifierSession, embeddingSession] = await Promise.all([
 			ort.InferenceSession.create(onnxClassifierPath),
 			ort.InferenceSession.create(onnxEmbeddingOriginalPath),
 		]);
 		sessionClassifier = classifierSession;
 		sessionEmbedding = embeddingSession;
 		logger.log("Filter models initialized", "ml");
 	} catch (error) {
 		throw new WorkerError(error as Error, "ml", "fn:initializeModels");
 	}
 }
 export function softmax(logits: Float32Array): number[] {
 	const maxLogit = Math.max(...logits);
 	const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
 	const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
 	return Array.from(exponents.map((exp) => exp / sumOfExponents));
 }
 async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
 	if (!tokenizer) {
 		throw new Error("Tokenizer is not initialized. Call initializeModels() first.");
 	}
 	const { input_ids } = await tokenizer(texts, {
 		add_special_tokens: false,
 		return_tensor: false,
 	});
 	const cumsum = (arr: number[]): number[] =>
 		arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
 	const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
 	const flattened_input_ids = input_ids.flat();
 	const inputs = {
 		input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
 			flattened_input_ids.length,
 		]),
 		offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
 	};
 	const { embeddings } = await session.run(inputs);
 	return Array.from(embeddings.data as Float32Array);
 }
 async function runClassification(embeddings: number[]): Promise<number[]> {
 	if (!sessionClassifier) {
 		throw new Error("Classifier session is not initialized. Call initializeModels() first.");
 	}
 	const inputTensor = new ort.Tensor(
 		Float32Array.from(embeddings),
 		[1, 3, 1024],
 	);
 	const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
 	return softmax(logits.data as Float32Array);
 }
 export async function classifyVideo(
 	title: string,
 	description: string,
 	tags: string,
 	aid: number,
 ): Promise<number> {
 	if (!sessionEmbedding) {
 		throw new Error("Embedding session is not initialized. Call initializeModels() first.");
 	}
 	const embeddings = await getONNXEmbeddings([
 		title,
 		description,
 		tags,
 	], sessionEmbedding);
 	const probabilities = await runClassification(embeddings);
 	logger.log(`Prediction result for aid: ${aid}: [${probabilities.map((p) => p.toFixed(5))}]`, "ml");
 	return probabilities.indexOf(Math.max(...probabilities));
 }
--- a/lib/ml/quant_benchmark.ts
+++ b/lib/ml/quant_benchmark.ts
@ -0,0 +1,165 @@
 import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 import * as ort from "onnxruntime";
 import { softmax } from "lib/ml/filter_inference.ts";
 // 配置参数
 const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
 const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
 const onnxEmbeddingOriginalPath = "./model/embedding_original.onnx";
 const onnxEmbeddingQuantizedPath = "./model/embedding_original.onnx";
 // 初始化会话
 const [sessionClassifier, sessionEmbeddingOriginal, sessionEmbeddingQuantized] = await Promise.all([
 	ort.InferenceSession.create(onnxClassifierPath),
 	ort.InferenceSession.create(onnxEmbeddingOriginalPath),
 	ort.InferenceSession.create(onnxEmbeddingQuantizedPath),
 ]);
 let tokenizer: PreTrainedTokenizer;
 // 初始化分词器
 async function loadTokenizer() {
 	const tokenizerConfig = { local_files_only: true };
 	tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
 }
 // 新的嵌入生成函数（使用ONNX）
 async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
 	const { input_ids } = await tokenizer(texts, {
 		add_special_tokens: false,
 		return_tensor: false,
 	});
 	// 构造输入参数
 	const cumsum = (arr: number[]): number[] =>
 		arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
 	const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
 	const flattened_input_ids = input_ids.flat();
 	// 准备ONNX输入
 	const inputs = {
 		input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
 			flattened_input_ids.length,
 		]),
 		offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
 	};
 	// 执行推理
 	const { embeddings } = await session.run(inputs);
 	return Array.from(embeddings.data as Float32Array);
 }
 // 分类推理函数
 async function runClassification(embeddings: number[]): Promise<number[]> {
 	const inputTensor = new ort.Tensor(
 		Float32Array.from(embeddings),
 		[1, 4, 1024],
 	);
 	const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
 	return softmax(logits.data as Float32Array);
 }
 // 指标计算函数
 function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
 	accuracy: number;
 	precision: number;
 	recall: number;
 	f1: number;
 	speed: string;
 } {
 	// 初始化混淆矩阵
 	const classCount = Math.max(...labels, ...predictions) + 1;
 	const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
 	// 填充矩阵
 	labels.forEach((trueLabel, i) => {
 		matrix[trueLabel][predictions[i]]++;
 	});
 	// 计算各指标
 	let totalTP = 0, totalFP = 0, totalFN = 0;
 	for (let c = 0; c < classCount; c++) {
 		const TP = matrix[c][c];
 		const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
 		const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
 		totalTP += TP;
 		totalFP += FP;
 		totalFN += FN;
 	}
 	const precision = totalTP / (totalTP + totalFP);
 	const recall = totalTP / (totalTP + totalFN);
 	const f1 = 2 * (precision * recall) / (precision + recall) || 0;
 	return {
 		accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
 		precision,
 		recall,
 		f1,
 		speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
 	};
 }
 // 改造后的评估函数
 async function evaluateModel(session: ort.InferenceSession): Promise<{
 	accuracy: number;
 	precision: number;
 	recall: number;
 	f1: number;
 }> {
 	const data = await Deno.readTextFile("./data/filter/test1.jsonl");
 	const samples = data.split("\n")
 		.map((line) => {
 			try {
 				return JSON.parse(line);
 			} catch {
 				return null;
 			}
 		})
 		.filter(Boolean);
 	const allPredictions: number[] = [];
 	const allLabels: number[] = [];
 	const t = new Date().getTime();
 	for (const sample of samples) {
 		try {
 			const embeddings = await getONNXEmbeddings([
 				sample.title,
 				sample.description,
 				sample.tags.join(","),
 				sample.author_info,
 			], session);
 			const probabilities = await runClassification(embeddings);
 			allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
 			allLabels.push(sample.label);
 		} catch (error) {
 			console.error("Processing error:", error);
 		}
 	}
 	const elapsed = new Date().getTime() - t;
 	return calculateMetrics(allLabels, allPredictions, elapsed);
 }
 // 主函数
 async function main() {
 	await loadTokenizer();
 	// 评估原始模型
 	const originalMetrics = await evaluateModel(sessionEmbeddingOriginal);
 	console.log("Original Model Metrics:");
 	console.table(originalMetrics);
 	// 评估量化模型
 	const quantizedMetrics = await evaluateModel(sessionEmbeddingQuantized);
 	console.log("Quantized Model Metrics:");
 	console.table(quantizedMetrics);
 }
 await main();
--- a/lib/mq/exec/classifyVideo.ts
+++ b/lib/mq/exec/classifyVideo.ts
@ -0,0 +1,67 @@
 import { Job } from "bullmq";
 import { db } from "lib/db/init.ts";
 import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts";
 import { classifyVideo } from "lib/ml/filter_inference.ts";
 import { ClassifyVideoQueue } from "lib/mq/index.ts";
 import logger from "lib/log/logger.ts";
 import { lockManager } from "lib/mq/lockManager.ts";
 import { aidExistsInSongs } from "lib/db/songs.ts";
 import { insertIntoSongs } from "lib/mq/task/collectSongs.ts";
 export const classifyVideoWorker = async (job: Job) => {
 	const client = await db.connect();
 	const aid = job.data.aid;
 	if (!aid) {
 		return 3;
 	}
 	const videoInfo = await getVideoInfoFromAllData(client, aid);
 	const title = videoInfo.title?.trim() || "untitled";
 	const description = videoInfo.description?.trim() || "N/A";
 	const tags = videoInfo.tags?.trim() || "empty";
 	const label = await classifyVideo(title, description, tags, aid);
 	if (label == -1) {
 		logger.warn(`Failed to classify video ${aid}`, "ml");
 	}
 	await insertVideoLabel(client, aid, label);
 	const exists = await aidExistsInSongs(client, aid);
 	if (!exists && label !== 0) {
 		await insertIntoSongs(client, aid);
 	}
 	client.release();
 	await job.updateData({
 		...job.data,
 		label: label,
 	});
 	return 0;
 };
 export const classifyVideosWorker = async () => {
 	if (await lockManager.isLocked("classifyVideos")) {
 		logger.log("job:classifyVideos is locked, skipping.", "mq");
 		return;
 	}
 	await lockManager.acquireLock("classifyVideos");
 	const client = await db.connect();
 	const videos = await getUnlabelledVideos(client);
 	logger.log(`Found ${videos.length} unlabelled videos`);
 	client.release();
 	let i = 0;
 	for (const aid of videos) {
 		if (i > 200) {
 			await lockManager.releaseLock("classifyVideos");
 			return 10000 + i;
 		}
 		await ClassifyVideoQueue.add("classifyVideo", { aid: Number(aid) });
 		i++;
 	}
 	await lockManager.releaseLock("classifyVideos");
 	return 0;
 };
--- a/lib/mq/exec/getLatestVideos.ts
+++ b/lib/mq/exec/getLatestVideos.ts
@ -0,0 +1,37 @@
 import { Job } from "bullmq";
 import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
 import { db } from "lib/db/init.ts";
 import { insertVideoInfo } from "lib/mq/task/getVideoDetails.ts";
 import { collectSongs } from "lib/mq/task/collectSongs.ts";
 export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
 	const client = await db.connect();
 	try {
 		await queueLatestVideos(client);
 	} finally {
 		client.release();
 	}
 };
 export const collectSongsWorker = async (_job: Job): Promise<void> => {
 	const client = await db.connect();
 	try {
 		await collectSongs(client);
 	} finally {
 		client.release();
 	}
 };
 export const getVideoInfoWorker = async (job: Job): Promise<number> => {
 	const client = await db.connect();
 	try {
 		const aid = job.data.aid;
 		if (!aid) {
 			return 3;
 		}
 		await insertVideoInfo(client, aid);
 		return 0;
 	} finally {
 		client.release();
 	}
 };
--- a/lib/mq/exec/snapshotTick.ts
+++ b/lib/mq/exec/snapshotTick.ts
@ -0,0 +1,227 @@
 import { Job } from "bullmq";
 import { HOUR, MINUTE, SECOND } from "$std/datetime/constants.ts";
 import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
 import { db } from "lib/db/init.ts";
 import {
 getIntervalFromLastSnapshotToNow,
 	getShortTermEtaPrediction,
 	getSongsNearMilestone,
 	getUnsnapshotedSongs,
 	songEligibleForMilestoneSnapshot,
 } from "lib/db/snapshot.ts";
 import { SnapshotQueue } from "lib/mq/index.ts";
 import { insertVideoStats } from "lib/mq/task/getVideoStats.ts";
 import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
 import { redis } from "lib/db/redis.ts";
 import { NetSchedulerError } from "lib/mq/scheduler.ts";
 import logger from "lib/log/logger.ts";
 import { formatSeconds } from "lib/utils/formatSeconds.ts";
 import { truncate } from "lib/utils/truncate.ts";
 async function snapshotScheduled(aid: number) {
 	try {
 		return await redis.exists(`cvsa:snapshot:${aid}`);
 	} catch {
 		logger.error(`Failed to check scheduled status for ${aid}`, "mq");
 		return false;
 	}
 }
 async function setSnapshotScheduled(aid: number, value: boolean, exp: number) {
 	try {
 		if (value) {
 			await redis.set(`cvsa:snapshot:${aid}`, 1, "EX", exp);
 		} else {
 			await redis.del(`cvsa:snapshot:${aid}`);
 		}
 	} catch {
 		logger.error(`Failed to set scheduled status to ${value} for ${aid}`, "mq");
 	}
 }
 interface SongNearMilestone {
 	aid: number;
 	id: number;
 	created_at: string;
 	views: number;
 	coins: number;
 	likes: number;
 	favorites: number;
 	shares: number;
 	danmakus: number;
 	replies: number;
 }
 async function processMilestoneSnapshots(client: Client, vidoesNearMilestone: SongNearMilestone[]) {
 	let i = 0;
 	for (const snapshot of vidoesNearMilestone) {
 		if (await snapshotScheduled(snapshot.aid)) {
 			continue;
 		}
 		const timeFromLastSnapshot = await getIntervalFromLastSnapshotToNow(client, snapshot.aid);
 		const lastSnapshotLessThan8Hrs = timeFromLastSnapshot && timeFromLastSnapshot * SECOND < 8 * HOUR;
 		const notEligible = await songEligibleForMilestoneSnapshot(client, snapshot.aid);
 		if (notEligible && lastSnapshotLessThan8Hrs) {
 			continue;
 		}
 		const factor = Math.floor(i / 8);
 		const delayTime = factor * SECOND * 2;
 		await SnapshotQueue.add("snapshotMilestoneVideo", {
 			aid: snapshot.aid,
 			currentViews: snapshot.views,
 			snapshotedAt: snapshot.created_at,
 		}, { delay: delayTime, priority: 1 });
 		await setSnapshotScheduled(snapshot.aid, true, 20 * 60);
 		i++;
 	}
 }
 async function processUnsnapshotedVideos(unsnapshotedVideos: number[]) {
 	let i = 0;
 	for (const aid of unsnapshotedVideos) {
 		if (await snapshotScheduled(aid)) {
 			logger.silly(`Video ${aid} is already scheduled for snapshot`, "mq", "fn:processUnsnapshotedVideos");
 			continue;
 		}
 		const factor = Math.floor(i / 5);
 		const delayTime = factor * SECOND * 4;
 		await SnapshotQueue.add("snapshotVideo", {
 			aid,
 		}, { delay: delayTime, priority: 3 });
 		await setSnapshotScheduled(aid, true, 6 * 60 * 60);
 		i++;
 	}
 }
 export const snapshotTickWorker = async (_job: Job) => {
 	const client = await db.connect();
 	try {
 		const vidoesNearMilestone = await getSongsNearMilestone(client);
 		await processMilestoneSnapshots(client, vidoesNearMilestone);
 		const unsnapshotedVideos = await getUnsnapshotedSongs(client);
 		await processUnsnapshotedVideos(unsnapshotedVideos);
 	} finally {
 		client.release();
 	}
 };
 const log = (a: number, b: number = 10) => Math.log(a) / Math.log(b);
 export const takeSnapshotForMilestoneVideoWorker = async (job: Job) => {
 	const client = await db.connect();
 	await setSnapshotScheduled(job.data.aid, true, 20 * 60);
 	try {
 		const aid: number = job.data.aid;
 		const currentViews: number = job.data.currentViews;
 		const lastSnapshoted: string = job.data.snapshotedAt;
 		const stat = await insertVideoStats(client, aid, "snapshotMilestoneVideo");
 		if (typeof stat === "number") {
 			if (stat === -404 || stat === 62002 || stat == 62012) {
 				await setSnapshotScheduled(aid, true, 6 * 60 * 60);
 			} else {
 				await setSnapshotScheduled(aid, false, 0);
 			}
 			return;
 		}
 		const nextMilestone = currentViews >= 100000 ? 1000000 : 100000;
 		if (stat.views >= nextMilestone) {
 			await setSnapshotScheduled(aid, false, 0);
 			return;
 		}
 		let eta = await getShortTermEtaPrediction(client, aid);
 		let factor = 3;
 		if (eta === null) {
 			const DELTA = 0.001;
 			const intervalSeconds = (Date.now() - parseTimestampFromPsql(lastSnapshoted)) / SECOND;
 			const viewsIncrement = stat.views - currentViews;
 			const incrementSpeed = viewsIncrement / (intervalSeconds + DELTA);
 			const viewsToIncrease = nextMilestone - stat.views;
 			eta = viewsToIncrease / (incrementSpeed + DELTA);
 			factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
 		}
 		const scheduledNextSnapshotDelay = eta * SECOND / factor;
 		const maxInterval = 60 * MINUTE;
 		const minInterval = 1 * SECOND;
 		const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
 		await SnapshotQueue.add("snapshotMilestoneVideo", {
 			aid,
 			currentViews: stat.views,
 			snapshotedAt: stat.time,
 		}, { delay, priority: 1 });
 		await job.updateData({
 			...job.data,
 			updatedViews: stat.views,
 			updatedTime: new Date(stat.time).toISOString(),
 			etaInMins: eta / 60,
 		});
 		logger.log(
 			`Scheduled next milestone snapshot for ${aid} in ${
 				formatSeconds(delay / 1000)
 			}, current views: ${stat.views}`,
 			"mq",
 		);
 	} catch (e) {
 		if (e instanceof NetSchedulerError && e.code === "NO_AVAILABLE_PROXY") {
 			logger.warn(
 				`No available proxy for aid ${job.data.aid}.`,
 				"mq",
 				"fn:takeSnapshotForMilestoneVideoWorker",
 			);
 			await SnapshotQueue.add("snapshotMilestoneVideo", {
 				aid: job.data.aid,
 				currentViews: job.data.currentViews,
 				snapshotedAt: job.data.snapshotedAt,
 			}, { delay: 5 * SECOND, priority: 1 });
 			return;
 		}
 		throw e;
 	} finally {
 		client.release();
 	}
 };
 export const takeSnapshotForVideoWorker = async (job: Job) => {
 	const client = await db.connect();
 	await setSnapshotScheduled(job.data.aid, true, 6 * 60 * 60);
 	try {
 		const { aid } = job.data;
 		const stat = await insertVideoStats(client, aid, "getVideoInfo");
 		if (typeof stat === "number") {
 			if (stat === -404 || stat === 62002 || stat == 62012) {
 				await setSnapshotScheduled(aid, true, 6 * 60 * 60);
 			} else {
 				await setSnapshotScheduled(aid, false, 0);
 			}
 			return;
 		}
 		logger.log(`Taken snapshot for ${aid}`, "mq");
 		if (stat == null) {
 			setSnapshotScheduled(aid, false, 0);
 			return;
 		}
 		await job.updateData({
 			...job.data,
 			updatedViews: stat.views,
 			updatedTime: new Date(stat.time).toISOString(),
 		});
 		const nearMilestone = (stat.views >= 90000 && stat.views < 100000) ||
 			(stat.views >= 900000 && stat.views < 1000000);
 		if (nearMilestone) {
 			await SnapshotQueue.add("snapshotMilestoneVideo", {
 				aid,
 				currentViews: stat.views,
 				snapshotedAt: stat.time,
 			}, { delay: 0, priority: 1 });
 		}
 		await setSnapshotScheduled(aid, false, 0);
 	} catch (e) {
 		if (e instanceof NetSchedulerError && e.code === "NO_AVAILABLE_PROXY") {
 			await setSnapshotScheduled(job.data.aid, false, 0);
 			return;
 		}
 		throw e;
 	} finally {
 		client.release();
 	}
 };
--- a/lib/mq/executors.ts
+++ b/lib/mq/executors.ts
@ -0,0 +1 @@
 export * from "lib/mq/exec/getLatestVideos.ts";
--- a/lib/mq/index.ts
+++ b/lib/mq/index.ts
@ -0,0 +1,7 @@
 import { Queue } from "bullmq";
 export const LatestVideosQueue = new Queue("latestVideos");
 export const ClassifyVideoQueue = new Queue("classifyVideo");
 export const SnapshotQueue = new Queue("snapshot");
--- a/lib/mq/init.ts
+++ b/lib/mq/init.ts
@ -0,0 +1,24 @@
 import { MINUTE } from "$std/datetime/constants.ts";
 import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts";
 import logger from "lib/log/logger.ts";
 export async function initMQ() {
 	await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
 		every: 1 * MINUTE,
 		immediately: true,
 	});
 	await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
 		every: 5 * MINUTE,
 		immediately: true,
 	});
 	await LatestVideosQueue.upsertJobScheduler("collectSongs", {
 		every: 3 * MINUTE,
 		immediately: true,
 	});
 	await SnapshotQueue.upsertJobScheduler("scheduleSnapshotTick", {
 		every: 3 * MINUTE,
 		immediately: true,
 	});
 	logger.log("Message queue initialized.");
 }
--- a/packages/core/mq/lockManager.ts
+++ b/packages/core/mq/lockManager.ts
@ -1,5 +1,5 @@
 import { Redis } from "ioredis";
-import { redis } from "@core/db/redis.ts";
+import { redis } from "lib/db/redis.ts";
 class LockManager {
 	private redis: Redis;
--- a/Show More
+++ b/Show More
		`@ -1,4 +0,0 @@`
			`# Crawler`

			A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
`@ -1,4 +1,3 @@`
	`# 目录`	`# 目录`

	`* [视频快照](video-snapshot.md)`	`- [歌曲](songs.md)`
		`@ -0,0 +1,3 @@`
							`import { Redis } from "ioredis";`

							`export const redis = new Redis({ maxRetriesPerRequest: null });`
		`@ -0,0 +1 @@`
							`export * from "lib/mq/exec/getLatestVideos.ts";`