Compare commits
220 Commits
crawler/1.
...
main
Author | SHA1 | Date | |
---|---|---|---|
92c3c8eefe | |||
497ea031d8 | |||
39ca394a56 | |||
0bd1771f35 | |||
328c73c209 | |||
5ac952ec13 | |||
2cf5923b28 | |||
75973c72ee | |||
b40d24721c | |||
0a6ecc6314 | |||
b4a0320e3e | |||
8cf9395354 | |||
1e8d28e194 | |||
c0340677a1 | |||
54a2de0a11 | |||
3abd6666c0 | |||
44e13724fc | |||
dd7e2242a0 | |||
503a93a09f | |||
507f2c331e | |||
a1a4abff46 | |||
c6b7736dac | |||
fa5ab258da | |||
9dd06fa7bc | |||
bb7f846305 | |||
7f9563a2a6 | |||
d0d9c21aba | |||
44bc99dd9d | |||
2c83b79881 | |||
1a20d5afe0 | |||
ae338f88ee | |||
96903dec2b | |||
58b4e2613c | |||
2b0497c83a | |||
3bc72720d1 | |||
557a013b42 | |||
16cfae8bad | |||
cbd46d4030 | |||
6b93a781b7 | |||
fe2fd4fe36 | |||
dd70543594 | |||
1ff71ab241 | |||
cf7a285f57 | |||
79a37d927a | |||
f003e77d52 | |||
4addadb035 | |||
23917b2976 | |||
6d946f74df | |||
c5ba673069 | |||
fa5ccce83f | |||
7786d66dbb | |||
b18b45078f | |||
1633e56b1e | |||
a063f2401b | |||
44f68993a0 | |||
980dd542ee | |||
c82a95d0bc | |||
137c19d74e | |||
5fb1355346 | |||
8456bb7485 | |||
01f5e57864 | |||
bf00918c00 | |||
44f4ee5b01 | |||
cd8fe502ba | |||
95681dcbf3 | |||
59f09ca5eb | |||
d686b6a369 | |||
d8c74a609a | |||
9d5c7cc47d | |||
7528dcdf81 | |||
811a8261b3 | |||
d0aa27b2ad | |||
280825cf67 | |||
e658135a81 | |||
4632bd5906 | |||
43b52dee0b | |||
8900ac7ec7 | |||
2772849933 | |||
784939074a | |||
94dd662c40 | |||
1ebc0d0c1b | |||
728a74f4d3 | |||
0c8a459d92 | |||
d7b6792d05 | |||
50bcb48bd6 | |||
fe386d2b02 | |||
e72b3008d1 | |||
a31f702499 | |||
5a112aeaee | |||
d675187f68 | |||
10de53b773 | |||
f97e42e7d0 | |||
be0ff294be | |||
d74ff02a3f | |||
92e00d033d | |||
6d1698fcb6 | |||
7d8361589c | |||
66b89eb562 | |||
d994e67036 | |||
ee40764fdc | |||
175ec047cf | |||
358dd1ee5e | |||
d9c8253019 | |||
1a86831e90 | |||
a67d896d86 | |||
244298913a | |||
2c47105913 | |||
6eaaf921d6 | |||
288e4f9571 | |||
907c0a6976 | |||
7689e687ff | |||
651eef0b9e | |||
68bd46fd8a | |||
13ea8fec8b | |||
3d9e98c949 | |||
c7dd1cfc2e | |||
e0a19499e1 | |||
0930bbe6f4 | |||
054d28e796 | |||
0614067278 | |||
6df6345ec1 | |||
bae1f84bea | |||
21c918f1fa | |||
f1651fee30 | |||
d0b7d93e5b | |||
7a7c5cada9 | |||
10b761e3db | |||
1f6411b512 | |||
9ef513eed7 | |||
d80a6bfcd9 | |||
7a6892ae8e | |||
b080c51c3e | |||
f4d08e944a | |||
a9582722f4 | |||
4ee4d2ede9 | |||
f21ff45dd3 | |||
b5dbf293a2 | |||
fc90dad185 | |||
0b36f52c6c | |||
445886815a | |||
8e7a1c3076 | |||
71ed0bd66b | |||
b76d8e589c | |||
69fb3604b1 | |||
d98e24b62f | |||
c4c9a3a440 | |||
da1bea7f41 | |||
38c0cbd371 | |||
a90747878e | |||
dd720b18fa | |||
3a83df7954 | |||
a8292d7b6b | |||
0923a34e16 | |||
f34633dc35 | |||
94e19690d1 | |||
20668609dd | |||
33c6a3c1f8 | |||
f39fef0d9a | |||
13ed20cf5c | |||
22b1c337ac | |||
757cbbab7e | |||
b4205049cb | |||
509c10ded0 | |||
99c7a34833 | |||
d808f36c58 | |||
984484cc3f | |||
46578db3e6 | |||
704b5106c6 | |||
2e702f23de | |||
cb5e24e542 | |||
b1e071930c | |||
c3f13cc6e3 | |||
dd829b203d | |||
41f8b42f1c | |||
fba56106cc | |||
8d4edd43bf | |||
2aead46b51 | |||
79af12e526 | |||
cfd4fc3d21 | |||
b53366dbab | |||
7a46f31d7f | |||
cf33c4922d | |||
aa75fdd63e | |||
9c0783c607 | |||
81847cc090 | |||
28772fcd9f | |||
4d2b002264 | |||
834f81eff0 | |||
35f4e0e0d4 | |||
9af7e52464 | |||
a2b55d0900 | |||
1322cc4671 | |||
b2edaf8fc4 | |||
da8b2d3b4d | |||
64a7f13da7 | |||
39acac09e7 | |||
7aa988f0fe | |||
6fd34a88e7 | |||
cded4cd825 | |||
1346700c35 | |||
91566fce83 | |||
db5ea97fae | |||
cc202fb3c6 | |||
8b17f8177c | |||
4fe266ce82 | |||
f585b49ee4 | |||
19d7276280 | |||
f7806c6a39 | |||
106049bfc6 | |||
e0776a452e | |||
291a21d82a | |||
f401417ce2 | |||
0d18c921cb | |||
be3ff00edc | |||
879a6604e5 | |||
d88ad099c4 | |||
636c5e25cb | |||
ba6b8bd5b3 | |||
2ed909268e | |||
35b84787ad |
96
.dockerignore
Normal file
96
.dockerignore
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
built/*
|
||||||
|
tests/cases/rwc/*
|
||||||
|
tests/cases/perf/*
|
||||||
|
!tests/cases/webharness/compilerToString.js
|
||||||
|
test-args.txt
|
||||||
|
~*.docx
|
||||||
|
\#*\#
|
||||||
|
.\#*
|
||||||
|
tests/baselines/local/*
|
||||||
|
tests/baselines/local.old/*
|
||||||
|
tests/services/baselines/local/*
|
||||||
|
tests/baselines/prototyping/local/*
|
||||||
|
tests/baselines/rwc/*
|
||||||
|
tests/baselines/reference/projectOutput/*
|
||||||
|
tests/baselines/local/projectOutput/*
|
||||||
|
tests/baselines/reference/testresults.tap
|
||||||
|
tests/baselines/symlinks/*
|
||||||
|
tests/services/baselines/prototyping/local/*
|
||||||
|
tests/services/browser/typescriptServices.js
|
||||||
|
src/harness/*.js
|
||||||
|
src/compiler/diagnosticInformationMap.generated.ts
|
||||||
|
src/compiler/diagnosticMessages.generated.json
|
||||||
|
src/parser/diagnosticInformationMap.generated.ts
|
||||||
|
src/parser/diagnosticMessages.generated.json
|
||||||
|
rwc-report.html
|
||||||
|
*.swp
|
||||||
|
build.json
|
||||||
|
*.actual
|
||||||
|
tests/webTestServer.js
|
||||||
|
tests/webTestServer.js.map
|
||||||
|
tests/webhost/*.d.ts
|
||||||
|
tests/webhost/webtsc.js
|
||||||
|
tests/cases/**/*.js
|
||||||
|
tests/cases/**/*.js.map
|
||||||
|
*.config
|
||||||
|
scripts/eslint/built/
|
||||||
|
scripts/debug.bat
|
||||||
|
scripts/run.bat
|
||||||
|
scripts/**/*.js
|
||||||
|
scripts/**/*.js.map
|
||||||
|
coverage/
|
||||||
|
internal/
|
||||||
|
**/.DS_Store
|
||||||
|
.settings
|
||||||
|
**/.vs
|
||||||
|
**/.vscode/*
|
||||||
|
!**/.vscode/tasks.json
|
||||||
|
!**/.vscode/settings.template.json
|
||||||
|
!**/.vscode/launch.template.json
|
||||||
|
!**/.vscode/extensions.json
|
||||||
|
!tests/cases/projects/projectOption/**/node_modules
|
||||||
|
!tests/cases/projects/NodeModulesSearch/**/*
|
||||||
|
!tests/baselines/reference/project/nodeModules*/**/*
|
||||||
|
yarn.lock
|
||||||
|
yarn-error.log
|
||||||
|
.parallelperf.*
|
||||||
|
tests/baselines/reference/dt
|
||||||
|
.failed-tests
|
||||||
|
TEST-results.xml
|
||||||
|
package-lock.json
|
||||||
|
.eslintcache
|
||||||
|
*v8.log
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# npm dependencies
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# project specific
|
||||||
|
logs/
|
||||||
|
__pycache__
|
||||||
|
ml/filter/runs
|
||||||
|
ml/pred/runs
|
||||||
|
ml/pred/checkpoints
|
||||||
|
ml/pred/observed
|
||||||
|
ml/data/
|
||||||
|
ml/filter/checkpoints
|
||||||
|
scripts
|
||||||
|
model/
|
||||||
|
|
||||||
|
|
||||||
|
.astro
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.dump
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
data/
|
||||||
|
|
||||||
|
docker-compose.yml
|
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
*.woff2 filter=lfs diff=lfs merge=lfs -text
|
95
.gitignore
vendored
95
.gitignore
vendored
@ -1,87 +1,44 @@
|
|||||||
built/*
|
|
||||||
tests/cases/rwc/*
|
|
||||||
tests/cases/perf/*
|
|
||||||
!tests/cases/webharness/compilerToString.js
|
|
||||||
test-args.txt
|
|
||||||
~*.docx
|
|
||||||
\#*\#
|
|
||||||
.\#*
|
|
||||||
tests/baselines/local/*
|
|
||||||
tests/baselines/local.old/*
|
|
||||||
tests/services/baselines/local/*
|
|
||||||
tests/baselines/prototyping/local/*
|
|
||||||
tests/baselines/rwc/*
|
|
||||||
tests/baselines/reference/projectOutput/*
|
|
||||||
tests/baselines/local/projectOutput/*
|
|
||||||
tests/baselines/reference/testresults.tap
|
|
||||||
tests/baselines/symlinks/*
|
|
||||||
tests/services/baselines/prototyping/local/*
|
|
||||||
tests/services/browser/typescriptServices.js
|
|
||||||
src/harness/*.js
|
|
||||||
src/compiler/diagnosticInformationMap.generated.ts
|
|
||||||
src/compiler/diagnosticMessages.generated.json
|
|
||||||
src/parser/diagnosticInformationMap.generated.ts
|
|
||||||
src/parser/diagnosticMessages.generated.json
|
|
||||||
rwc-report.html
|
|
||||||
*.swp
|
|
||||||
build.json
|
|
||||||
*.actual
|
|
||||||
tests/webTestServer.js
|
|
||||||
tests/webTestServer.js.map
|
|
||||||
tests/webhost/*.d.ts
|
|
||||||
tests/webhost/webtsc.js
|
|
||||||
tests/cases/**/*.js
|
|
||||||
tests/cases/**/*.js.map
|
|
||||||
*.config
|
|
||||||
scripts/eslint/built/
|
|
||||||
scripts/debug.bat
|
|
||||||
scripts/run.bat
|
|
||||||
scripts/**/*.js
|
|
||||||
scripts/**/*.js.map
|
|
||||||
coverage/
|
|
||||||
internal/
|
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
.settings
|
.settings
|
||||||
**/.vs
|
**/.vs
|
||||||
**/.vscode/*
|
**/.vscode/*
|
||||||
!**/.vscode/tasks.json
|
|
||||||
!**/.vscode/settings.template.json
|
|
||||||
!**/.vscode/launch.template.json
|
|
||||||
!**/.vscode/extensions.json
|
|
||||||
!tests/cases/projects/projectOption/**/node_modules
|
|
||||||
!tests/cases/projects/NodeModulesSearch/**/*
|
|
||||||
!tests/baselines/reference/project/nodeModules*/**/*
|
|
||||||
.idea
|
|
||||||
yarn.lock
|
|
||||||
yarn-error.log
|
|
||||||
.parallelperf.*
|
|
||||||
tests/baselines/reference/dt
|
|
||||||
.failed-tests
|
|
||||||
TEST-results.xml
|
|
||||||
package-lock.json
|
|
||||||
.eslintcache
|
.eslintcache
|
||||||
*v8.log
|
*v8.log
|
||||||
|
|
||||||
# dotenv environment variable files
|
# dotenv environment variable files
|
||||||
.env
|
.env
|
||||||
.env.development.local
|
.env.*
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# Fresh build directory
|
|
||||||
_fresh/
|
|
||||||
# npm dependencies
|
# npm dependencies
|
||||||
node_modules/
|
node_modules/
|
||||||
|
|
||||||
|
|
||||||
# project specific
|
# project specific
|
||||||
logs/
|
logs/
|
||||||
__pycache__
|
__pycache__
|
||||||
filter/runs
|
ml/filter/runs
|
||||||
pred/runs
|
ml/pred/runs
|
||||||
pred/checkpoints
|
ml/pred/checkpoints
|
||||||
data/
|
ml/pred/observed
|
||||||
filter/checkpoints
|
ml/data/
|
||||||
|
ml/filter/checkpoints
|
||||||
scripts
|
scripts
|
||||||
model/
|
model/
|
||||||
|
|
||||||
|
.astro
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.dump
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
data/
|
||||||
|
redis/
|
||||||
|
|
||||||
|
# Build
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
|
||||||
|
docker-compose.yml
|
||||||
|
|
||||||
|
ucaptcha-config.yaml
|
10
.idea/.gitignore
vendored
Normal file
10
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
dataSources.xml
|
||||||
|
MarsCodeWorkspaceAppSettings.xml
|
6
.idea/bun.xml
Normal file
6
.idea/bun.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="BunSettings">
|
||||||
|
<option name="bunPath" value="$USER_HOME$/.bun/bin/bun" />
|
||||||
|
</component>
|
||||||
|
</project>
|
55
.idea/codeStyles/Project.xml
Normal file
55
.idea/codeStyles/Project.xml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<component name="ProjectCodeStyleConfiguration">
|
||||||
|
<code_scheme name="Project" version="173">
|
||||||
|
<option name="LINE_SEPARATOR" value=" " />
|
||||||
|
<HTMLCodeStyleSettings>
|
||||||
|
<option name="HTML_SPACE_INSIDE_EMPTY_TAG" value="true" />
|
||||||
|
</HTMLCodeStyleSettings>
|
||||||
|
<JSCodeStyleSettings version="0">
|
||||||
|
<option name="FORCE_SEMICOLON_STYLE" value="true" />
|
||||||
|
<option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
|
||||||
|
<option name="FORCE_QUOTE_STYlE" value="true" />
|
||||||
|
<option name="ENFORCE_TRAILING_COMMA" value="Remove" />
|
||||||
|
<option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
|
||||||
|
<option name="SPACES_WITHIN_IMPORTS" value="true" />
|
||||||
|
</JSCodeStyleSettings>
|
||||||
|
<TypeScriptCodeStyleSettings version="0">
|
||||||
|
<option name="FORCE_SEMICOLON_STYLE" value="true" />
|
||||||
|
<option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
|
||||||
|
<option name="FORCE_QUOTE_STYlE" value="true" />
|
||||||
|
<option name="ENFORCE_TRAILING_COMMA" value="Remove" />
|
||||||
|
<option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
|
||||||
|
<option name="SPACES_WITHIN_IMPORTS" value="true" />
|
||||||
|
</TypeScriptCodeStyleSettings>
|
||||||
|
<VueCodeStyleSettings>
|
||||||
|
<option name="INTERPOLATION_NEW_LINE_AFTER_START_DELIMITER" value="false" />
|
||||||
|
<option name="INTERPOLATION_NEW_LINE_BEFORE_END_DELIMITER" value="false" />
|
||||||
|
</VueCodeStyleSettings>
|
||||||
|
<codeStyleSettings language="HTML">
|
||||||
|
<option name="SOFT_MARGINS" value="120" />
|
||||||
|
<indentOptions>
|
||||||
|
<option name="CONTINUATION_INDENT_SIZE" value="4" />
|
||||||
|
<option name="USE_TAB_CHARACTER" value="true" />
|
||||||
|
</indentOptions>
|
||||||
|
</codeStyleSettings>
|
||||||
|
<codeStyleSettings language="JavaScript">
|
||||||
|
<option name="SOFT_MARGINS" value="120" />
|
||||||
|
<indentOptions>
|
||||||
|
<option name="USE_TAB_CHARACTER" value="true" />
|
||||||
|
</indentOptions>
|
||||||
|
</codeStyleSettings>
|
||||||
|
<codeStyleSettings language="TypeScript">
|
||||||
|
<option name="SOFT_MARGINS" value="120" />
|
||||||
|
<indentOptions>
|
||||||
|
<option name="USE_TAB_CHARACTER" value="true" />
|
||||||
|
</indentOptions>
|
||||||
|
</codeStyleSettings>
|
||||||
|
<codeStyleSettings language="Vue">
|
||||||
|
<option name="SOFT_MARGINS" value="120" />
|
||||||
|
<indentOptions>
|
||||||
|
<option name="INDENT_SIZE" value="4" />
|
||||||
|
<option name="TAB_SIZE" value="4" />
|
||||||
|
<option name="USE_TAB_CHARACTER" value="true" />
|
||||||
|
</indentOptions>
|
||||||
|
</codeStyleSettings>
|
||||||
|
</code_scheme>
|
||||||
|
</component>
|
5
.idea/codeStyles/codeStyleConfig.xml
Normal file
5
.idea/codeStyles/codeStyleConfig.xml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<component name="ProjectCodeStyleConfiguration">
|
||||||
|
<state>
|
||||||
|
<option name="USE_PER_PROJECT_SETTINGS" value="true" />
|
||||||
|
</state>
|
||||||
|
</component>
|
37
.idea/cvsa.iml
Normal file
37
.idea/cvsa.iml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/temp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/tmp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/data" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/doc" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/logs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/model" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/src/db" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.idea" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.vscode" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.zed" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/packages/frontend/.astro" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/scripts" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.astro" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/pred/checkpoints" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/pred/observed" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/pred/runs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/packages/backend/logs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/packages/core/net/logs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/packages/crawler/logs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/data" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/redis" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/src" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
7
.idea/deno.xml
Normal file
7
.idea/deno.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="DenoSettings">
|
||||||
|
<option name="denoInit" value="{ "enable": true, "lint": true, "unstable": true, "importMap": "import_map.json", "config": "deno.json", "fmt": { "useTabs": true, "lineWidth": 120, "indentWidth": 4, "semiColons": true, "proseWrap": "always" } }" />
|
||||||
|
<option name="useDenoValue" value="DISABLE" />
|
||||||
|
</component>
|
||||||
|
</project>
|
36
.idea/inspectionProfiles/Project_Default.xml
Normal file
36
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<option name="scopesOrder">
|
||||||
|
<list>
|
||||||
|
<option value="Astro" />
|
||||||
|
<option value="All Changed Files" />
|
||||||
|
<option value="Open Files" />
|
||||||
|
<option value="Project Files" />
|
||||||
|
<option value="Scratches and Consoles" />
|
||||||
|
<option value="Tests" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
<inspection_tool class="ES6UnusedImports" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<scope name="Astro" level="INFORMATION" enabled="false" editorAttributes="INFORMATION_ATTRIBUTES" />
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
|
||||||
|
<inspection_tool class="HtmlUnknownAttribute" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="myValues">
|
||||||
|
<value>
|
||||||
|
<list size="1">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="autocorrect" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
<option name="myCustomValuesEnabled" value="true" />
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||||
|
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||||
|
<option name="processCode" value="true" />
|
||||||
|
<option name="processLiterals" value="true" />
|
||||||
|
<option name="processComments" value="true" />
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="TsLint" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||||
|
</profile>
|
||||||
|
</component>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
3
.idea/scopes/Astro.xml
Normal file
3
.idea/scopes/Astro.xml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<component name="DependencyValidationManager">
|
||||||
|
<scope name="Astro" pattern="file:*.astro" />
|
||||||
|
</component>
|
6
.idea/sqldialects.xml
Normal file
6
.idea/sqldialects.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="SqlDialectMappings">
|
||||||
|
<file url="PROJECT" dialect="PostgreSQL" />
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
8
.prettierrc
Normal file
8
.prettierrc
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"useTabs": true,
|
||||||
|
"tabWidth": 4,
|
||||||
|
"trailingComma": "none",
|
||||||
|
"singleQuote": false,
|
||||||
|
"printWidth": 120,
|
||||||
|
"endOfLine": "lf"
|
||||||
|
}
|
@ -3,3 +3,9 @@ data
|
|||||||
*.svg
|
*.svg
|
||||||
*.txt
|
*.txt
|
||||||
*.md
|
*.md
|
||||||
|
*config*
|
||||||
|
Inter.css
|
||||||
|
MiSans.css
|
||||||
|
*.yaml
|
||||||
|
*.yml
|
||||||
|
*.mdx
|
||||||
|
6
.vscode/extensions.json
vendored
6
.vscode/extensions.json
vendored
@ -1,6 +0,0 @@
|
|||||||
{
|
|
||||||
"recommendations": [
|
|
||||||
"denoland.vscode-deno",
|
|
||||||
"bradlc.vscode-tailwindcss"
|
|
||||||
]
|
|
||||||
}
|
|
@ -1,35 +0,0 @@
|
|||||||
// Folder-specific settings
|
|
||||||
//
|
|
||||||
// For a full list of overridable settings, and general information on folder-specific settings,
|
|
||||||
// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
|
|
||||||
{
|
|
||||||
"lsp": {
|
|
||||||
"deno": {
|
|
||||||
"settings": {
|
|
||||||
"deno": {
|
|
||||||
"enable": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"languages": {
|
|
||||||
"TypeScript": {
|
|
||||||
"language_servers": [
|
|
||||||
"deno",
|
|
||||||
"!typescript-language-server",
|
|
||||||
"!vtsls",
|
|
||||||
"!eslint"
|
|
||||||
],
|
|
||||||
"formatter": "language_server"
|
|
||||||
},
|
|
||||||
"TSX": {
|
|
||||||
"language_servers": [
|
|
||||||
"deno",
|
|
||||||
"!typescript-language-server",
|
|
||||||
"!vtsls",
|
|
||||||
"!eslint"
|
|
||||||
],
|
|
||||||
"formatter": "language_server"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
23
Dockerfile.backend
Normal file
23
Dockerfile.backend
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
FROM oven/bun:1.2.8-debian
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./packages/core ./core
|
||||||
|
|
||||||
|
COPY ./packages/backend/package.json ./packages/backend/bun.lock ./backend/
|
||||||
|
|
||||||
|
RUN apt update && apt install -y curl
|
||||||
|
|
||||||
|
RUN ln -s /bin/uname /usr/bin/uname
|
||||||
|
|
||||||
|
RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
|
||||||
|
|
||||||
|
WORKDIR backend
|
||||||
|
|
||||||
|
RUN bun install
|
||||||
|
|
||||||
|
COPY ./packages/backend/ .
|
||||||
|
|
||||||
|
RUN mkdir -p /app/logs
|
||||||
|
|
||||||
|
CMD ["bun", "start"]
|
19
Dockerfile.crawler
Normal file
19
Dockerfile.crawler
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
FROM oven/bun:1.2.8-debian
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN bun i
|
||||||
|
|
||||||
|
RUN mkdir -p /app/logs
|
||||||
|
|
||||||
|
RUN apt update && apt install -y curl
|
||||||
|
|
||||||
|
RUN ln -s /bin/uname /usr/bin/uname
|
||||||
|
|
||||||
|
RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
|
||||||
|
|
||||||
|
WORKDIR packages/crawler
|
||||||
|
|
||||||
|
CMD ["bun", "all"]
|
23
Dockerfile.frontend
Normal file
23
Dockerfile.frontend
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
FROM oven/bun
|
||||||
|
|
||||||
|
ARG BACKEND_URL
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN bun install
|
||||||
|
|
||||||
|
WORKDIR packages/frontend
|
||||||
|
|
||||||
|
RUN bun run build
|
||||||
|
|
||||||
|
ENV HOST=0.0.0.0
|
||||||
|
ENV PORT=4321
|
||||||
|
ENV BACKEND_URL=${BACKEND_URL}
|
||||||
|
|
||||||
|
EXPOSE 4321
|
||||||
|
|
||||||
|
RUN mkdir -p /app/logs
|
||||||
|
|
||||||
|
CMD ["bun", "/app/packages/frontend/dist/server/entry.mjs"]
|
14
Dockerfile.next
Normal file
14
Dockerfile.next
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
FROM node:lts-slim AS production
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./packages/next/.next ./.next
|
||||||
|
COPY ./packages/next/public ./public
|
||||||
|
COPY ./packages/next/package.json ./package.json
|
||||||
|
COPY ./packages/next/node_modules ./node_modules
|
||||||
|
|
||||||
|
ENV NODE_ENV production
|
||||||
|
|
||||||
|
EXPOSE 7400
|
||||||
|
|
||||||
|
CMD ["npm", "start"]
|
@ -2,6 +2,11 @@
|
|||||||
|
|
||||||
「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
|
「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
|
||||||
|
|
||||||
|
## 新闻 - 测试版本上线
|
||||||
|
|
||||||
|
目前,中V档案馆上线了用于测试的前端网页和API接口,它们分别位于[projectcvsa.com](https://projectcvsa.com)和[api.projectcvsa.com](https://api.projectcvsa.com)。
|
||||||
|
API调用方法请参见[接口文档](https://docs.projectcvsa.com/api-doc/)。
|
||||||
|
|
||||||
## 创建背景与关联工作
|
## 创建背景与关联工作
|
||||||
|
|
||||||
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
纵观整个互联网,对于「中文歌声合成」或「中文虚拟歌手」(常简称为中V或VC)相关信息进行较为系统、全面地整理收集的主要有以下几个网站:
|
||||||
@ -31,7 +36,7 @@
|
|||||||
|
|
||||||
## 技术架构
|
## 技术架构
|
||||||
|
|
||||||
参见[CVSA文档](https://cvsa.gitbook.io/)。
|
参见[CVSA文档](https://docs.projectcvsa.com/)。
|
||||||
|
|
||||||
## 开放许可
|
## 开放许可
|
||||||
|
|
||||||
|
@ -1,12 +0,0 @@
|
|||||||
import { JSX } from "preact";
|
|
||||||
import { IS_BROWSER } from "$fresh/runtime.ts";
|
|
||||||
|
|
||||||
export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
|
|
||||||
return (
|
|
||||||
<button
|
|
||||||
{...props}
|
|
||||||
disabled={!IS_BROWSER || props.disabled}
|
|
||||||
class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
|
|
||||||
/>
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,55 +0,0 @@
|
|||||||
import json
|
|
||||||
import random
|
|
||||||
|
|
||||||
def process_data(input_file, output_file):
|
|
||||||
"""
|
|
||||||
从输入文件中读取数据,找出model和human不一致的行,
|
|
||||||
删除"model"键,将"human"键重命名为"label",
|
|
||||||
然后将处理后的数据添加到输出文件中。
|
|
||||||
在写入之前,它会加载output_file中的所有样本,
|
|
||||||
并使用aid键进行去重过滤。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_file (str): 输入文件的路径。
|
|
||||||
output_file (str): 输出文件的路径。
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 加载output_file中已有的数据,用于去重
|
|
||||||
existing_data = set()
|
|
||||||
try:
|
|
||||||
with open(output_file, 'r', encoding='utf-8') as f_out:
|
|
||||||
for line in f_out:
|
|
||||||
try:
|
|
||||||
data = json.loads(line)
|
|
||||||
existing_data.add(data['aid'])
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass # 忽略JSON解码错误,继续读取下一行
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass # 如果文件不存在,则忽略
|
|
||||||
|
|
||||||
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
|
|
||||||
for line in f_in:
|
|
||||||
try:
|
|
||||||
data = json.loads(line)
|
|
||||||
|
|
||||||
if data['model'] != data['human'] or random.random() < 0.2:
|
|
||||||
if data['aid'] not in existing_data: # 检查aid是否已存在
|
|
||||||
del data['model']
|
|
||||||
data['label'] = data['human']
|
|
||||||
del data['human']
|
|
||||||
f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
|
|
||||||
existing_data.add(data['aid']) # 将新的aid添加到集合中
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"JSON解码错误: {e}")
|
|
||||||
print(f"错误行内容: {line.strip()}")
|
|
||||||
except KeyError as e:
|
|
||||||
print(f"KeyError: 键 '{e}' 不存在")
|
|
||||||
print(f"错误行内容: {line.strip()}")
|
|
||||||
|
|
||||||
# 调用函数处理数据
|
|
||||||
input_file = 'real_test.jsonl'
|
|
||||||
output_file = 'labeled_data.jsonl'
|
|
||||||
process_data(input_file, output_file)
|
|
||||||
print(f"处理完成,结果已写入 {output_file}")
|
|
||||||
|
|
60
deno.json
60
deno.json
@ -1,60 +0,0 @@
|
|||||||
{
|
|
||||||
"lock": false,
|
|
||||||
"tasks": {
|
|
||||||
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
|
|
||||||
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
|
|
||||||
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
|
|
||||||
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
|
|
||||||
"manifest": "deno task cli manifest $(pwd)",
|
|
||||||
"start": "deno run -A --watch=static/,routes/ dev.ts",
|
|
||||||
"build": "deno run -A dev.ts build",
|
|
||||||
"preview": "deno run -A main.ts",
|
|
||||||
"update": "deno run -A -r https://fresh.deno.dev/update .",
|
|
||||||
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts",
|
|
||||||
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
|
|
||||||
"adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
|
|
||||||
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
|
|
||||||
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
|
|
||||||
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
|
|
||||||
},
|
|
||||||
"lint": {
|
|
||||||
"rules": {
|
|
||||||
"tags": ["fresh", "recommended"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"exclude": ["**/_fresh/*"],
|
|
||||||
"imports": {
|
|
||||||
"@std/assert": "jsr:@std/assert@1",
|
|
||||||
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
|
|
||||||
"preact": "https://esm.sh/preact@10.22.0",
|
|
||||||
"preact/": "https://esm.sh/preact@10.22.0/",
|
|
||||||
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
|
|
||||||
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
|
|
||||||
"tailwindcss": "npm:tailwindcss@3.4.1",
|
|
||||||
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
|
|
||||||
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
|
|
||||||
"$std/": "https://deno.land/std@0.216.0/",
|
|
||||||
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
|
|
||||||
"bullmq": "npm:bullmq",
|
|
||||||
"lib/": "./lib/",
|
|
||||||
"ioredis": "npm:ioredis",
|
|
||||||
"@bull-board/api": "npm:@bull-board/api",
|
|
||||||
"@bull-board/express": "npm:@bull-board/express",
|
|
||||||
"express": "npm:express",
|
|
||||||
"src/": "./src/",
|
|
||||||
"onnxruntime": "npm:onnxruntime-node@1.19.2",
|
|
||||||
"chalk": "npm:chalk"
|
|
||||||
},
|
|
||||||
"compilerOptions": {
|
|
||||||
"jsx": "react-jsx",
|
|
||||||
"jsxImportSource": "preact"
|
|
||||||
},
|
|
||||||
"nodeModulesDir": "auto",
|
|
||||||
"fmt": {
|
|
||||||
"useTabs": true,
|
|
||||||
"lineWidth": 120,
|
|
||||||
"indentWidth": 4,
|
|
||||||
"semiColons": true,
|
|
||||||
"proseWrap": "always"
|
|
||||||
}
|
|
||||||
}
|
|
7
dev.ts
7
dev.ts
@ -1,7 +0,0 @@
|
|||||||
#!/usr/bin/env -S deno run -A --watch=static/,routes/
|
|
||||||
|
|
||||||
import dev from "$fresh/dev.ts";
|
|
||||||
import config from "./fresh.config.ts";
|
|
||||||
|
|
||||||
import "$std/dotenv/load.ts";
|
|
||||||
await dev(import.meta.url, "./main.ts", config);
|
|
@ -1,22 +1,21 @@
|
|||||||
# Table of contents
|
# Table of contents
|
||||||
|
|
||||||
- [Welcome](README.md)
|
* [Welcome](README.md)
|
||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
- [About CVSA Project](about/this-project.md)
|
* [About CVSA Project](about/this-project.md)
|
||||||
- [Scope of Inclusion](about/scope-of-inclusion.md)
|
* [Scope of Inclusion](about/scope-of-inclusion.md)
|
||||||
|
|
||||||
## Architecure
|
## Architecure
|
||||||
|
|
||||||
- [Overview](architecure/overview.md)
|
* [Overview](architecure/overview.md)
|
||||||
- [Database Structure](architecure/database-structure/README.md)
|
* [Crawler](architecure/crawler.md)
|
||||||
- [Type of Song](architecure/database-structure/type-of-song.md)
|
* [Database Structure](architecure/database-structure/README.md)
|
||||||
- [Message Queue](architecure/message-queue/README.md)
|
* [Type of Song](architecure/database-structure/type-of-song.md)
|
||||||
- [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
|
* [Artificial Intelligence](architecure/artificial-intelligence.md)
|
||||||
- [Artificial Intelligence](architecure/artificial-intelligence.md)
|
|
||||||
|
|
||||||
## API Doc
|
## API Doc
|
||||||
|
|
||||||
- [Catalog](api-doc/catalog.md)
|
* [Catalog](api-doc/catalog.md)
|
||||||
- [Songs](api-doc/songs.md)
|
* [Songs](api-doc/songs.md)
|
||||||
|
@ -7,13 +7,34 @@ For a **song**, it must meet the following conditions to be included in CVSA:
|
|||||||
|
|
||||||
### Category 30
|
### Category 30
|
||||||
|
|
||||||
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
|
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
|
||||||
posted on Bilibili. In some special cases, this rule may not be enforced. 
|
[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
|
||||||
|
[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
|
||||||
|
uploaded to bilibili / categorized under this category.
|
||||||
|
|
||||||
### At Leats One Line of Chinese
|
#### NEWS
|
||||||
|
|
||||||
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
|
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
|
||||||
Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
|
entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
|
||||||
|
parent category "Music"). 
|
||||||
|
|
||||||
|
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
|
||||||
|
published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
|
||||||
|
there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
|
||||||
|
the "Music General" sub-category.\
|
||||||
|
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
|
||||||
|
program's crawling.
|
||||||
|
|
||||||
|
### At Leats One Line of Chinese / Chinese Virtual Singer
|
||||||
|
|
||||||
|
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
|
||||||
|
Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
|
||||||
|
|
||||||
|
We define a **Chinese virtual singer** as follows:
|
||||||
|
|
||||||
|
1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
|
||||||
|
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
|
||||||
|
Taiwan.
|
||||||
|
|
||||||
### Using Vocal Synthesizer
|
### Using Vocal Synthesizer
|
||||||
|
|
||||||
|
@ -12,3 +12,10 @@ Located at `/filter/` under project root dir, it classifies a video in the
|
|||||||
- 0: Not related to Chinese vocal synthesis
|
- 0: Not related to Chinese vocal synthesis
|
||||||
- 1: A original song with Chinese vocal synthesis
|
- 1: A original song with Chinese vocal synthesis
|
||||||
- 2: A cover/remix song with Chinese vocal synthesis
|
- 2: A cover/remix song with Chinese vocal synthesis
|
||||||
|
|
||||||
|
### The Predictor
|
||||||
|
|
||||||
|
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
|
||||||
|
takes historical view trends of a video, other contextual information (such as the current time), and future time points
|
||||||
|
to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
|
||||||
|
future time point.
|
||||||
|
4
doc/en/architecure/crawler.md
Normal file
4
doc/en/architecure/crawler.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Crawler
|
||||||
|
|
||||||
|
A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
|
||||||
|
|
@ -10,3 +10,6 @@ following tables:
|
|||||||
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
|
||||||
- labelling\_result: Contains label of videos in `all_data`tagged by our
|
- labelling\_result: Contains label of videos in `all_data`tagged by our
|
||||||
[AI system](../artificial-intelligence.md#the-filter).
|
[AI system](../artificial-intelligence.md#the-filter).
|
||||||
|
- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
|
||||||
|
fetch process as "snapshot".
|
||||||
|
- snapshot\_schedule: The scheduling information for video snapshots.
|
||||||
|
@ -1 +0,0 @@
|
|||||||
# Message Queue
|
|
@ -1,12 +0,0 @@
|
|||||||
# VideoTagsQueue
|
|
||||||
|
|
||||||
### Jobs
|
|
||||||
|
|
||||||
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
|
|
||||||
video, and the latter is responsible for scheduling the former.
|
|
||||||
|
|
||||||
### Return value
|
|
||||||
|
|
||||||
The return values across two jobs follows the following table:
|
|
||||||
|
|
||||||
<table><thead><tr><th width="168">Return Value</th><th>Description</th></tr></thead><tbody><tr><td>0</td><td>In <code>getVideoTags</code>: the tags was successfully fetched<br>In <code>getVideosTags</code>: all null-tags videos have a corresponding job successfully queued.</td></tr><tr><td>1</td><td>Used in <code>getVideoTags</code>: occured <code>fetch</code>error during the job</td></tr><tr><td>2</td><td>Used in <code>getVideoTags</code>: we've reached the rate limit set in NetScheduler</td></tr><tr><td>3</td><td>Used in <code>getVideoTags</code>: did't provide aid in the job data</td></tr><tr><td>4</td><td>Used in<code>getVideosTags</code>: There's no video with NULL as `tags`</td></tr><tr><td>1xx</td><td>Used in<code>getVideosTags</code>: the number of tasks in the queue has exceeded the limit, thus <code>getVideosTags</code> stops adding tasks. <code>xx</code> is the number of jobs added to the queue during execution.</td></tr></tbody></table>
|
|
@ -1,5 +1,4 @@
|
|||||||
---
|
---
|
||||||
icon: globe-pointer
|
|
||||||
layout:
|
layout:
|
||||||
title:
|
title:
|
||||||
visible: true
|
visible: true
|
||||||
@ -15,5 +14,29 @@ layout:
|
|||||||
|
|
||||||
# Overview
|
# Overview
|
||||||
|
|
||||||
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
|
The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`. 
|
||||||
[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
|
|
||||||
|
**Project structure:**
|
||||||
|
|
||||||
|
```
|
||||||
|
cvsa
|
||||||
|
├── deno.json
|
||||||
|
├── packages
|
||||||
|
│ ├── backend
|
||||||
|
│ ├── core
|
||||||
|
│ ├── crawler
|
||||||
|
│ └── frontend
|
||||||
|
└── README.md
|
||||||
|
```
|
||||||
|
|
||||||
|
**Package Breakdown:**
|
||||||
|
|
||||||
|
* **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
|
||||||
|
* **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
|
||||||
|
* **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
|
||||||
|
* **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
|
||||||
|
|
||||||
|
### Crawler
|
||||||
|
|
||||||
|
Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.
|
||||||
|
|
||||||
|
106
doc/zh/.gitbook/assets/1.yaml
Normal file
106
doc/zh/.gitbook/assets/1.yaml
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
openapi: 3.0.0
|
||||||
|
info:
|
||||||
|
title: CVSA API
|
||||||
|
version: v1
|
||||||
|
|
||||||
|
servers:
|
||||||
|
- url: https://api.projectcvsa.com
|
||||||
|
|
||||||
|
paths:
|
||||||
|
/video/{id}/snapshots:
|
||||||
|
get:
|
||||||
|
summary: 获取视频快照列表
|
||||||
|
description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字,以 "BV" 开头的 12 位字母数字,或者一个正整数。
|
||||||
|
parameters:
|
||||||
|
- in: path
|
||||||
|
name: id
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)"
|
||||||
|
- in: query
|
||||||
|
name: ps
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
description: 每页返回的快照数量 (pageSize),默认为 1000。
|
||||||
|
- in: query
|
||||||
|
name: pn
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
description: 页码 (pageNumber),用于分页查询。offset 与 pn 只能选择一个。
|
||||||
|
- in: query
|
||||||
|
name: offset
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
description: 偏移量,用于基于偏移量的查询。offset 与 pn 只能选择一个。
|
||||||
|
- in: query
|
||||||
|
name: reverse
|
||||||
|
schema:
|
||||||
|
type: boolean
|
||||||
|
description: 是否反向排序(从旧到新),默认为 false。
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: 成功获取快照列表
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: integer
|
||||||
|
description: 快照 ID
|
||||||
|
aid:
|
||||||
|
type: integer
|
||||||
|
description: 视频的 av 号
|
||||||
|
views:
|
||||||
|
type: integer
|
||||||
|
description: 视频播放量
|
||||||
|
coins:
|
||||||
|
type: integer
|
||||||
|
description: 视频投币数
|
||||||
|
likes:
|
||||||
|
type: integer
|
||||||
|
description: 视频点赞数
|
||||||
|
favorites:
|
||||||
|
type: integer
|
||||||
|
description: 视频收藏数
|
||||||
|
shares:
|
||||||
|
type: integer
|
||||||
|
description: 视频分享数
|
||||||
|
danmakus:
|
||||||
|
type: integer
|
||||||
|
description: 视频弹幕数
|
||||||
|
replies:
|
||||||
|
type: integer
|
||||||
|
description: 视频评论数
|
||||||
|
'400':
|
||||||
|
description: 无效的查询参数
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
description: 错误消息
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
description: 详细的错误信息
|
||||||
|
'500':
|
||||||
|
description: 服务器内部错误
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
description: 错误消息
|
||||||
|
error:
|
||||||
|
type: object
|
||||||
|
description: 详细的错误信息
|
@ -1,11 +1,11 @@
|
|||||||
# Table of contents
|
# Table of contents
|
||||||
|
|
||||||
- [欢迎](README.md)
|
* [欢迎](README.md)
|
||||||
|
|
||||||
## 关于 <a href="#about" id="about"></a>
|
## 关于 <a href="#about" id="about"></a>
|
||||||
|
|
||||||
- [关于本项目](about/this-project.md)
|
* [关于本项目](about/this-project.md)
|
||||||
- [收录范围](about/scope-of-inclusion.md)
|
* [收录范围](about/scope-of-inclusion.md)
|
||||||
|
|
||||||
## 技术架构 <a href="#architecture" id="architecture"></a>
|
## 技术架构 <a href="#architecture" id="architecture"></a>
|
||||||
|
|
||||||
@ -14,9 +14,9 @@
|
|||||||
- [歌曲类型](architecture/database-structure/type-of-song.md)
|
- [歌曲类型](architecture/database-structure/type-of-song.md)
|
||||||
- [人工智能](architecture/artificial-intelligence.md)
|
- [人工智能](architecture/artificial-intelligence.md)
|
||||||
- [消息队列](architecture/message-queue/README.md)
|
- [消息队列](architecture/message-queue/README.md)
|
||||||
- [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
|
- [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
|
||||||
|
|
||||||
## API 文档 <a href="#api-doc" id="api-doc"></a>
|
## API 文档 <a href="#api-doc" id="api-doc"></a>
|
||||||
|
|
||||||
- [目录](api-doc/catalog.md)
|
* [目录](api-doc/catalog.md)
|
||||||
- [歌曲](api-doc/songs.md)
|
* [视频快照](api-doc/video-snapshot.md)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
# 目录
|
# 目录
|
||||||
|
|
||||||
- [歌曲](songs.md)
|
* [视频快照](video-snapshot.md)
|
||||||
|
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
# 歌曲
|
|
||||||
|
|
||||||
暂未实现。
|
|
6
doc/zh/api-doc/video-snapshot.md
Normal file
6
doc/zh/api-doc/video-snapshot.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# 视频快照
|
||||||
|
|
||||||
|
{% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %}
|
||||||
|
[1.yaml](../.gitbook/assets/1.yaml)
|
||||||
|
{% endopenapi %}
|
||||||
|
|
@ -2,9 +2,14 @@
|
|||||||
|
|
||||||
CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
|
CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
|
||||||
|
|
||||||
|
CVSA 设计了两个
|
||||||
|
|
||||||
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
|
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
|
||||||
|
|
||||||
- songs:存储歌曲的主要信息
|
- songs:存储歌曲的主要信息
|
||||||
- bili\_user:存储 Bilibili 用户信息快照
|
- bilibili\_user:存储 Bilibili 用户信息快照
|
||||||
- all\_data:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据。
|
- bilibili\_metadata:[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
|
||||||
- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
- labelling\_result:包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
|
||||||
|
- latest\_video\_snapshot:存储视频最新的快照
|
||||||
|
- video\_snapshot:存储视频的快照,包括特定时间下视频的统计信息(播放量、点赞数等)
|
||||||
|
- snapshot\_schedule:视频快照的规划信息,为辅助表
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
# LatestVideosQueue 队列
|
@ -1,15 +0,0 @@
|
|||||||
---
|
|
||||||
description: 关于VideoTagsQueue队列的信息。
|
|
||||||
---
|
|
||||||
|
|
||||||
# VideoTagsQueue队列
|
|
||||||
|
|
||||||
### 任务
|
|
||||||
|
|
||||||
视频标签队列包含两个任务:`getVideoTags`和`getVideosTags`。前者用于获取视频的标签,后者负责调度前者。
|
|
||||||
|
|
||||||
### 返回值
|
|
||||||
|
|
||||||
两个任务的返回值遵循以下表格:
|
|
||||||
|
|
||||||
<table><thead><tr><th width="168">返回值</th><th>描述</th></tr></thead><tbody><tr><td>0</td><td>在 <code>getVideoTags</code> 中:标签成功获取<br>在 <code>getVideosTags</code> 中:所有无标签视频的相应任务已成功排队。</td></tr><tr><td>1</td><td>在 <code>getVideoTags</code> 中:任务期间发生 <code>fetch</code> 错误</td></tr><tr><td>2</td><td>在 <code>getVideoTags</code> 中:已达到 NetScheduler 设置的速率限制</td></tr><tr><td>3</td><td>在 <code>getVideoTags</code> 中:未在任务数据中提供帮助</td></tr><tr><td>4</td><td>在 <code>getVideosTags</code> 中:没有视频的 `tags` 为 NULL</td></tr><tr><td>1xx</td><td>在 <code>getVideosTags</code> 中:队列中的任务数量超过了限制,因此 <code>getVideosTags</code> 停止添加任务。<code>xx</code> 是在执行期间添加到队列的任务数量。</td></tr></tbody></table>
|
|
@ -1,5 +1,4 @@
|
|||||||
---
|
---
|
||||||
icon: globe-pointer
|
|
||||||
layout:
|
layout:
|
||||||
title:
|
title:
|
||||||
visible: true
|
visible: true
|
||||||
@ -15,4 +14,13 @@ layout:
|
|||||||
|
|
||||||
# 概览
|
# 概览
|
||||||
|
|
||||||
自动化是 CVSA 技术设计的最大亮点,为了实现自动化,我们使用BullMQ驱动的消息队列来并发处理数据采集生命周期中的各项任务。
|
整个CVSA项目分为三个组件:**crawler**, **frontend** 和 **backend。**
|
||||||
|
|
||||||
|
### **crawler**
|
||||||
|
|
||||||
|
位于项目目录`packages/crawler` 下,它负责以下工作:
|
||||||
|
|
||||||
|
- 抓取新的视频并收录作品
|
||||||
|
- 持续监控视频的播放量等统计信息
|
||||||
|
|
||||||
|
整个 crawler 由 BullMQ 消息队列驱动,使用 Redis 和 PostgreSQL 管理状态。
|
||||||
|
71
docker-compose.example.yml
Normal file
71
docker-compose.example.yml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:17
|
||||||
|
ports:
|
||||||
|
- "5431:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: cvsa
|
||||||
|
POSTGRES_PASSWORD: ""
|
||||||
|
POSTGRES_DB: cvsa_main
|
||||||
|
volumes:
|
||||||
|
- ./data:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:latest
|
||||||
|
ports:
|
||||||
|
- "6378:6379"
|
||||||
|
volumes:
|
||||||
|
- ./redis/data:/data
|
||||||
|
- ./redis/redis.conf:/usr/local/etc/redis/redis.conf
|
||||||
|
- ./redis/logs:/logs
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.frontend
|
||||||
|
ports:
|
||||||
|
- "4321:4321"
|
||||||
|
environment:
|
||||||
|
- HOST=0.0.0.0
|
||||||
|
- PORT=4321
|
||||||
|
- DB_HOST=db
|
||||||
|
- DB_NAME=cvsa_main
|
||||||
|
- DB_NAME_CRED=cvsa_cred
|
||||||
|
- DB_USER=cvsa
|
||||||
|
- DB_PORT=5432
|
||||||
|
- DB_PASSWORD=""
|
||||||
|
- LOG_VERBOSE=/app/logs/verbose.log
|
||||||
|
- LOG_WARN=/app/logs/warn.log
|
||||||
|
- LOG_ERR=/app/logs/error.log
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
volumes:
|
||||||
|
- /path/to/your/logs:/app/logs
|
||||||
|
backend:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.backend
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- HOST=0.0.0.0
|
||||||
|
- DB_HOST=db
|
||||||
|
- DB_NAME=cvsa_main
|
||||||
|
- DB_NAME_CRED=cvsa_cred
|
||||||
|
- DB_USER=cvsa
|
||||||
|
- DB_PORT=5432
|
||||||
|
- DB_PASSWORD=""
|
||||||
|
- LOG_VERBOSE=/app/logs/verbose.log
|
||||||
|
- LOG_WARN=/app/logs/warn.log
|
||||||
|
- LOG_ERR=/app/logs/error.log
|
||||||
|
- REDIS_HOST=redis
|
||||||
|
- REDIS_PORT=6379
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
volumes:
|
||||||
|
- /path/to/your/logs:/app/logs
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db_data:
|
@ -1,6 +0,0 @@
|
|||||||
import { defineConfig } from "$fresh/server.ts";
|
|
||||||
import tailwind from "$fresh/plugins/tailwind.ts";
|
|
||||||
|
|
||||||
export default defineConfig({
|
|
||||||
plugins: [tailwind()],
|
|
||||||
});
|
|
27
fresh.gen.ts
27
fresh.gen.ts
@ -1,27 +0,0 @@
|
|||||||
// DO NOT EDIT. This file is generated by Fresh.
|
|
||||||
// This file SHOULD be checked into source version control.
|
|
||||||
// This file is automatically updated during development when running `dev.ts`.
|
|
||||||
|
|
||||||
import * as $_404 from "./routes/_404.tsx";
|
|
||||||
import * as $_app from "./routes/_app.tsx";
|
|
||||||
import * as $api_joke from "./routes/api/joke.ts";
|
|
||||||
import * as $greet_name_ from "./routes/greet/[name].tsx";
|
|
||||||
import * as $index from "./routes/index.tsx";
|
|
||||||
import * as $Counter from "./islands/Counter.tsx";
|
|
||||||
import type { Manifest } from "$fresh/server.ts";
|
|
||||||
|
|
||||||
const manifest = {
|
|
||||||
routes: {
|
|
||||||
"./routes/_404.tsx": $_404,
|
|
||||||
"./routes/_app.tsx": $_app,
|
|
||||||
"./routes/api/joke.ts": $api_joke,
|
|
||||||
"./routes/greet/[name].tsx": $greet_name_,
|
|
||||||
"./routes/index.tsx": $index,
|
|
||||||
},
|
|
||||||
islands: {
|
|
||||||
"./islands/Counter.tsx": $Counter,
|
|
||||||
},
|
|
||||||
baseUrl: import.meta.url,
|
|
||||||
} satisfies Manifest;
|
|
||||||
|
|
||||||
export default manifest;
|
|
@ -1,16 +0,0 @@
|
|||||||
import type { Signal } from "@preact/signals";
|
|
||||||
import { Button } from "../components/Button.tsx";
|
|
||||||
|
|
||||||
interface CounterProps {
|
|
||||||
count: Signal<number>;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function Counter(props: CounterProps) {
|
|
||||||
return (
|
|
||||||
<div class="flex gap-8 py-6">
|
|
||||||
<Button onClick={() => props.count.value -= 1}>-1</Button>
|
|
||||||
<p class="text-3xl tabular-nums">{props.count}</p>
|
|
||||||
<Button onClick={() => props.count.value += 1}>+1</Button>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,87 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
|
|
||||||
import Akari from "lib/ml/akari.ts";
|
|
||||||
|
|
||||||
export async function videoExistsInAllData(client: Client, aid: number) {
|
|
||||||
return await client.queryObject<{ exists: boolean }>(
|
|
||||||
`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`,
|
|
||||||
[aid],
|
|
||||||
)
|
|
||||||
.then((result) => result.rows[0].exists);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function userExistsInBiliUsers(client: Client, uid: number) {
|
|
||||||
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_user WHERE uid = $1)`, [
|
|
||||||
uid,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getUnlabelledVideos(client: Client) {
|
|
||||||
const queryResult = await client.queryObject<{ aid: number }>(
|
|
||||||
`SELECT a.aid FROM bilibili_metadata a LEFT JOIN labelling_result l ON a.aid = l.aid WHERE l.aid IS NULL`,
|
|
||||||
);
|
|
||||||
return queryResult.rows.map((row) => row.aid);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function insertVideoLabel(client: Client, aid: number, label: number) {
|
|
||||||
return await client.queryObject(
|
|
||||||
`INSERT INTO labelling_result (aid, label, model_version) VALUES ($1, $2, $3) ON CONFLICT (aid, model_version) DO NOTHING`,
|
|
||||||
[aid, label, Akari.getModelVersion()],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getVideoInfoFromAllData(client: Client, aid: number) {
|
|
||||||
const queryResult = await client.queryObject<AllDataType>(
|
|
||||||
`SELECT * FROM bilibili_metadata WHERE aid = $1`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
const row = queryResult.rows[0];
|
|
||||||
let authorInfo = "";
|
|
||||||
if (row.uid && await userExistsInBiliUsers(client, row.uid)) {
|
|
||||||
const q = await client.queryObject<BiliUserType>(
|
|
||||||
`SELECT * FROM bilibili_user WHERE uid = $1`,
|
|
||||||
[row.uid],
|
|
||||||
);
|
|
||||||
const userRow = q.rows[0];
|
|
||||||
if (userRow) {
|
|
||||||
authorInfo = userRow.desc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
title: row.title,
|
|
||||||
description: row.description,
|
|
||||||
tags: row.tags,
|
|
||||||
author_info: authorInfo,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getUnArchivedBiliUsers(client: Client) {
|
|
||||||
const queryResult = await client.queryObject<{ uid: number }>(
|
|
||||||
`
|
|
||||||
SELECT ad.uid
|
|
||||||
FROM bilibili_metadata ad
|
|
||||||
LEFT JOIN bilibili_user bu ON ad.uid = bu.uid
|
|
||||||
WHERE bu.uid IS NULL;
|
|
||||||
`,
|
|
||||||
[],
|
|
||||||
);
|
|
||||||
const rows = queryResult.rows;
|
|
||||||
return rows.map((row) => row.uid);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function setBiliVideoStatus(client: Client, aid: number, status: number) {
|
|
||||||
return await client.queryObject(
|
|
||||||
`UPDATE bilibili_metadata SET status = $1 WHERE aid = $2`,
|
|
||||||
[status, aid],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getBiliVideoStatus(client: Client, aid: number) {
|
|
||||||
const queryResult = await client.queryObject<{ status: number }>(
|
|
||||||
`SELECT status FROM bilibili_metadata WHERE aid = $1`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
const rows = queryResult.rows;
|
|
||||||
if (rows.length === 0) return 0;
|
|
||||||
return rows[0].status;
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { postgresConfig } from "lib/db/pgConfig.ts";
|
|
||||||
|
|
||||||
const pool = new Pool(postgresConfig, 12);
|
|
||||||
|
|
||||||
export const db = pool;
|
|
@ -1,21 +0,0 @@
|
|||||||
const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT"];
|
|
||||||
|
|
||||||
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
|
|
||||||
|
|
||||||
if (unsetVars.length > 0) {
|
|
||||||
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const databaseHost = Deno.env.get("DB_HOST")!;
|
|
||||||
const databaseName = Deno.env.get("DB_NAME")!;
|
|
||||||
const databaseUser = Deno.env.get("DB_USER")!;
|
|
||||||
const databasePassword = Deno.env.get("DB_PASSWORD")!;
|
|
||||||
const databasePort = Deno.env.get("DB_PORT")!;
|
|
||||||
|
|
||||||
export const postgresConfig = {
|
|
||||||
hostname: databaseHost,
|
|
||||||
port: parseInt(databasePort),
|
|
||||||
database: databaseName,
|
|
||||||
user: databaseUser,
|
|
||||||
password: databasePassword,
|
|
||||||
};
|
|
@ -1,3 +0,0 @@
|
|||||||
import { Redis } from "ioredis";
|
|
||||||
|
|
||||||
export const redis = new Redis({ maxRetriesPerRequest: null });
|
|
@ -1,44 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { LatestSnapshotType } from "lib/db/schema.d.ts";
|
|
||||||
|
|
||||||
export async function getVideosNearMilestone(client: Client) {
|
|
||||||
const queryResult = await client.queryObject<LatestSnapshotType>(`
|
|
||||||
SELECT ls.*
|
|
||||||
FROM latest_video_snapshot ls
|
|
||||||
INNER JOIN
|
|
||||||
songs s ON ls.aid = s.aid
|
|
||||||
AND s.deleted = false
|
|
||||||
WHERE
|
|
||||||
s.deleted = false AND
|
|
||||||
(views >= 90000 AND views < 100000) OR
|
|
||||||
(views >= 900000 AND views < 1000000) OR
|
|
||||||
(views >= 9900000 AND views < 10000000)
|
|
||||||
`);
|
|
||||||
return queryResult.rows.map((row) => {
|
|
||||||
return {
|
|
||||||
...row,
|
|
||||||
aid: Number(row.aid),
|
|
||||||
};
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | LatestSnapshotType> {
|
|
||||||
const queryResult = await client.queryObject<LatestSnapshotType>(
|
|
||||||
`
|
|
||||||
SELECT *
|
|
||||||
FROM latest_video_snapshot
|
|
||||||
WHERE aid = $1
|
|
||||||
`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
if (queryResult.rows.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return queryResult.rows.map((row) => {
|
|
||||||
return {
|
|
||||||
...row,
|
|
||||||
aid: Number(row.aid),
|
|
||||||
time: new Date(row.time).getTime(),
|
|
||||||
};
|
|
||||||
})[0];
|
|
||||||
}
|
|
@ -1,309 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
|
||||||
import { SnapshotScheduleType } from "./schema.d.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { MINUTE } from "$std/datetime/constants.ts";
|
|
||||||
import { redis } from "lib/db/redis.ts";
|
|
||||||
import { Redis } from "ioredis";
|
|
||||||
|
|
||||||
const REDIS_KEY = "cvsa:snapshot_window_counts";
|
|
||||||
|
|
||||||
function getCurrentWindowIndex(): number {
|
|
||||||
const now = new Date();
|
|
||||||
const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes();
|
|
||||||
const currentWindow = Math.floor(minutesSinceMidnight / 5);
|
|
||||||
return currentWindow;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function refreshSnapshotWindowCounts(client: Client, redisClient: Redis) {
|
|
||||||
const now = new Date();
|
|
||||||
const startTime = now.getTime();
|
|
||||||
|
|
||||||
const result = await client.queryObject<{ window_start: Date; count: number }>`
|
|
||||||
SELECT
|
|
||||||
date_trunc('hour', started_at) +
|
|
||||||
(EXTRACT(minute FROM started_at)::int / 5 * INTERVAL '5 minutes') AS window_start,
|
|
||||||
COUNT(*) AS count
|
|
||||||
FROM snapshot_schedule
|
|
||||||
WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days'
|
|
||||||
GROUP BY 1
|
|
||||||
ORDER BY window_start
|
|
||||||
`
|
|
||||||
|
|
||||||
await redisClient.del(REDIS_KEY);
|
|
||||||
|
|
||||||
const currentWindow = getCurrentWindowIndex();
|
|
||||||
|
|
||||||
for (const row of result.rows) {
|
|
||||||
const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE));
|
|
||||||
const offset = (currentWindow + targetOffset);
|
|
||||||
if (offset >= 0) {
|
|
||||||
await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function initSnapshotWindowCounts(client: Client, redisClient: Redis) {
|
|
||||||
await refreshSnapshotWindowCounts(client, redisClient);
|
|
||||||
setInterval(async () => {
|
|
||||||
await refreshSnapshotWindowCounts(client, redisClient);
|
|
||||||
}, 5 * MINUTE);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getWindowCount(redisClient: Redis, offset: number): Promise<number> {
|
|
||||||
const count = await redisClient.hget(REDIS_KEY, offset.toString());
|
|
||||||
return count ? parseInt(count, 10) : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function snapshotScheduleExists(client: Client, id: number) {
|
|
||||||
const res = await client.queryObject<{ id: number }>(
|
|
||||||
`SELECT id FROM snapshot_schedule WHERE id = $1`,
|
|
||||||
[id],
|
|
||||||
);
|
|
||||||
return res.rows.length > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function videoHasActiveSchedule(client: Client, aid: number) {
|
|
||||||
const res = await client.queryObject<{ status: string }>(
|
|
||||||
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND (status = 'pending' OR status = 'processing')`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
return res.rows.length > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function videoHasProcessingSchedule(client: Client, aid: number) {
|
|
||||||
const res = await client.queryObject<{ status: string }>(
|
|
||||||
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND status = 'processing'`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
return res.rows.length > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function bulkGetVideosWithoutProcessingSchedules(client: Client, aids: number[]) {
|
|
||||||
const res = await client.queryObject<{ aid: number }>(
|
|
||||||
`SELECT aid FROM snapshot_schedule WHERE aid = ANY($1) AND status != 'processing' GROUP BY aid`,
|
|
||||||
[aids],
|
|
||||||
);
|
|
||||||
return res.rows.map((row) => row.aid);
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Snapshot {
|
|
||||||
created_at: number;
|
|
||||||
views: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function findClosestSnapshot(
|
|
||||||
client: Client,
|
|
||||||
aid: number,
|
|
||||||
targetTime: Date,
|
|
||||||
): Promise<Snapshot | null> {
|
|
||||||
const query = `
|
|
||||||
SELECT created_at, views
|
|
||||||
FROM video_snapshot
|
|
||||||
WHERE aid = $1
|
|
||||||
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz)))
|
|
||||||
LIMIT 1
|
|
||||||
`;
|
|
||||||
const result = await client.queryObject<{ created_at: string; views: number }>(
|
|
||||||
query,
|
|
||||||
[aid, targetTime.toISOString()],
|
|
||||||
);
|
|
||||||
if (result.rows.length === 0) return null;
|
|
||||||
const row = result.rows[0];
|
|
||||||
return {
|
|
||||||
created_at: new Date(row.created_at).getTime(),
|
|
||||||
views: row.views,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function findSnapshotBefore(
|
|
||||||
client: Client,
|
|
||||||
aid: number,
|
|
||||||
targetTime: Date,
|
|
||||||
): Promise<Snapshot | null> {
|
|
||||||
const query = `
|
|
||||||
SELECT created_at, views
|
|
||||||
FROM video_snapshot
|
|
||||||
WHERE aid = $1
|
|
||||||
AND created_at <= $2::timestamptz
|
|
||||||
ORDER BY created_at DESC
|
|
||||||
LIMIT 1
|
|
||||||
`;
|
|
||||||
const result = await client.queryObject<{ created_at: string; views: number }>(
|
|
||||||
query,
|
|
||||||
[aid, targetTime.toISOString()],
|
|
||||||
);
|
|
||||||
if (result.rows.length === 0) return null;
|
|
||||||
const row = result.rows[0];
|
|
||||||
return {
|
|
||||||
created_at: new Date(row.created_at).getTime(),
|
|
||||||
views: row.views,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function hasAtLeast2Snapshots(client: Client, aid: number) {
|
|
||||||
const res = await client.queryObject<{ count: number }>(
|
|
||||||
`SELECT COUNT(*) FROM video_snapshot WHERE aid = $1`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
return res.rows[0].count >= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getLatestSnapshot(client: Client, aid: number): Promise<Snapshot | null> {
|
|
||||||
const res = await client.queryObject<{ created_at: string; views: number }>(
|
|
||||||
`SELECT created_at, views FROM video_snapshot WHERE aid = $1 ORDER BY created_at DESC LIMIT 1`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
if (res.rows.length === 0) return null;
|
|
||||||
const row = res.rows[0];
|
|
||||||
return {
|
|
||||||
created_at: new Date(row.created_at).getTime(),
|
|
||||||
views: row.views,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the number of snapshot schedules within the specified range.
|
|
||||||
* @param client The database client.
|
|
||||||
* @param start The start time of the range. (Timestamp in milliseconds)
|
|
||||||
* @param end The end time of the range. (Timestamp in milliseconds)
|
|
||||||
*/
|
|
||||||
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
|
|
||||||
const startTimeString = formatTimestampToPsql(start);
|
|
||||||
const endTimeString = formatTimestampToPsql(end);
|
|
||||||
const query = `
|
|
||||||
SELECT COUNT(*) FROM snapshot_schedule
|
|
||||||
WHERE started_at BETWEEN $1 AND $2
|
|
||||||
AND status = 'pending'
|
|
||||||
`;
|
|
||||||
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
|
|
||||||
return res.rows[0].count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Creates a new snapshot schedule record.
|
|
||||||
* @param client The database client.
|
|
||||||
* @param aid The aid of the video.
|
|
||||||
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
|
|
||||||
*/
|
|
||||||
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number, force: boolean = false) {
|
|
||||||
if (await videoHasActiveSchedule(client, aid) && !force) return;
|
|
||||||
let adjustedTime = new Date(targetTime);
|
|
||||||
if (type !== "milestone" && type !== "new") {
|
|
||||||
adjustedTime = await adjustSnapshotTime(new Date(targetTime), 1000, redis);
|
|
||||||
}
|
|
||||||
logger.log(`Scheduled snapshot for ${aid} at ${adjustedTime.toISOString()}`, "mq", "fn:scheduleSnapshot");
|
|
||||||
return client.queryObject(
|
|
||||||
`INSERT INTO snapshot_schedule (aid, type, started_at) VALUES ($1, $2, $3)`,
|
|
||||||
[aid, type, adjustedTime.toISOString()],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function bulkScheduleSnapshot(client: Client, aids: number[], type: string, targetTime: number, force: boolean = false) {
|
|
||||||
for (const aid of aids) {
|
|
||||||
await scheduleSnapshot(client, aid, type, targetTime, force);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function adjustSnapshotTime(
|
|
||||||
expectedStartTime: Date,
|
|
||||||
allowedCounts: number = 1000,
|
|
||||||
redisClient: Redis,
|
|
||||||
): Promise<Date> {
|
|
||||||
const currentWindow = getCurrentWindowIndex();
|
|
||||||
const targetOffset = Math.floor((expectedStartTime.getTime() - Date.now()) / (5 * MINUTE)) - 6;
|
|
||||||
|
|
||||||
const initialOffset = currentWindow + Math.max(targetOffset, 0);
|
|
||||||
|
|
||||||
let timePerIteration = 0;
|
|
||||||
const MAX_ITERATIONS = 2880;
|
|
||||||
let iters = 0;
|
|
||||||
const t = performance.now();
|
|
||||||
for (let i = initialOffset; i < MAX_ITERATIONS; i++) {
|
|
||||||
iters++;
|
|
||||||
const offset = i;
|
|
||||||
const count = await getWindowCount(redisClient, offset);
|
|
||||||
|
|
||||||
if (count < allowedCounts) {
|
|
||||||
await redisClient.hincrby(REDIS_KEY, offset.toString(), 1);
|
|
||||||
|
|
||||||
const startPoint = new Date();
|
|
||||||
startPoint.setHours(0, 0, 0, 0);
|
|
||||||
const startTime = startPoint.getTime();
|
|
||||||
const windowStart = startTime + offset * 5 * MINUTE;
|
|
||||||
const randomDelay = Math.floor(Math.random() * 5 * MINUTE);
|
|
||||||
const delayedDate = new Date(windowStart + randomDelay);
|
|
||||||
const now = new Date();
|
|
||||||
|
|
||||||
if (delayedDate.getTime() < now.getTime()) {
|
|
||||||
const elapsed = performance.now() - t;
|
|
||||||
timePerIteration = elapsed / (i+1);
|
|
||||||
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
|
|
||||||
return now;
|
|
||||||
}
|
|
||||||
const elapsed = performance.now() - t;
|
|
||||||
timePerIteration = elapsed / (i+1);
|
|
||||||
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
|
|
||||||
return delayedDate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const elapsed = performance.now() - t;
|
|
||||||
timePerIteration = elapsed / MAX_ITERATIONS;
|
|
||||||
logger.log(`${timePerIteration.toFixed(3)}ms * ${MAX_ITERATIONS} iterations`, "perf", "fn:adjustSnapshotTime");
|
|
||||||
return expectedStartTime;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
export async function getSnapshotsInNextSecond(client: Client) {
|
|
||||||
const query = `
|
|
||||||
SELECT *
|
|
||||||
FROM snapshot_schedule
|
|
||||||
WHERE started_at <= NOW() + INTERVAL '1 seconds' AND status = 'pending' AND type != 'normal'
|
|
||||||
ORDER BY
|
|
||||||
CASE
|
|
||||||
WHEN type = 'milestone' THEN 0
|
|
||||||
ELSE 1
|
|
||||||
END,
|
|
||||||
started_at
|
|
||||||
LIMIT 10;
|
|
||||||
`;
|
|
||||||
const res = await client.queryObject<SnapshotScheduleType>(query, []);
|
|
||||||
return res.rows;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getBulkSnapshotsInNextSecond(client: Client) {
|
|
||||||
const query = `
|
|
||||||
SELECT *
|
|
||||||
FROM snapshot_schedule
|
|
||||||
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'
|
|
||||||
ORDER BY started_at
|
|
||||||
LIMIT 1000;
|
|
||||||
`;
|
|
||||||
const res = await client.queryObject<SnapshotScheduleType>(query, []);
|
|
||||||
return res.rows;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function setSnapshotStatus(client: Client, id: number, status: string) {
|
|
||||||
return await client.queryObject(
|
|
||||||
`UPDATE snapshot_schedule SET status = $2 WHERE id = $1`,
|
|
||||||
[id, status],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function bulkSetSnapshotStatus(client: Client, ids: number[], status: string) {
|
|
||||||
return await client.queryObject(
|
|
||||||
`UPDATE snapshot_schedule SET status = $2 WHERE id = ANY($1)`,
|
|
||||||
[ids, status],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getVideosWithoutActiveSnapshotSchedule(client: Client) {
|
|
||||||
const query: string = `
|
|
||||||
SELECT s.aid
|
|
||||||
FROM songs s
|
|
||||||
LEFT JOIN snapshot_schedule ss ON s.aid = ss.aid AND (ss.status = 'pending' OR ss.status = 'processing')
|
|
||||||
WHERE ss.aid IS NULL
|
|
||||||
`;
|
|
||||||
const res = await client.queryObject<{ aid: number }>(query, []);
|
|
||||||
return res.rows.map((r) => Number(r.aid));
|
|
||||||
}
|
|
@ -1,45 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
|
||||||
|
|
||||||
export async function getNotCollectedSongs(client: Client) {
|
|
||||||
const queryResult = await client.queryObject<{ aid: number }>(`
|
|
||||||
SELECT lr.aid
|
|
||||||
FROM labelling_result lr
|
|
||||||
WHERE lr.label != 0
|
|
||||||
AND NOT EXISTS (
|
|
||||||
SELECT 1
|
|
||||||
FROM songs s
|
|
||||||
WHERE s.aid = lr.aid
|
|
||||||
);
|
|
||||||
`);
|
|
||||||
return queryResult.rows.map((row) => row.aid);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function aidExistsInSongs(client: Client, aid: number) {
|
|
||||||
const queryResult = await client.queryObject<{ exists: boolean }>(
|
|
||||||
`
|
|
||||||
SELECT EXISTS (
|
|
||||||
SELECT 1
|
|
||||||
FROM songs
|
|
||||||
WHERE aid = $1
|
|
||||||
);
|
|
||||||
`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
return queryResult.rows[0].exists;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getSongsPublihsedAt(client: Client, aid: number) {
|
|
||||||
const queryResult = await client.queryObject<{ published_at: string }>(
|
|
||||||
`
|
|
||||||
SELECT published_at
|
|
||||||
FROM songs
|
|
||||||
WHERE aid = $1;
|
|
||||||
`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
if (queryResult.rows.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return parseTimestampFromPsql(queryResult.rows[0].published_at);
|
|
||||||
}
|
|
@ -1,179 +0,0 @@
|
|||||||
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
|
||||||
import * as ort from "onnxruntime";
|
|
||||||
|
|
||||||
function softmax(logits: Float32Array): number[] {
|
|
||||||
const maxLogit = Math.max(...logits);
|
|
||||||
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
|
|
||||||
const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
|
|
||||||
return Array.from(exponents.map((exp) => exp / sumOfExponents));
|
|
||||||
}
|
|
||||||
|
|
||||||
// 配置参数
|
|
||||||
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
|
|
||||||
const onnxClassifierPath = "./model/video_classifier_v3_17.onnx";
|
|
||||||
const onnxEmbeddingPath = "./model/embedding_original.onnx";
|
|
||||||
const testDataPath = "./data/filter/test1.jsonl";
|
|
||||||
|
|
||||||
// 初始化会话
|
|
||||||
const [sessionClassifier, sessionEmbedding] = await Promise.all([
|
|
||||||
ort.InferenceSession.create(onnxClassifierPath),
|
|
||||||
ort.InferenceSession.create(onnxEmbeddingPath),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let tokenizer: PreTrainedTokenizer;
|
|
||||||
|
|
||||||
// 初始化分词器
|
|
||||||
async function loadTokenizer() {
|
|
||||||
const tokenizerConfig = { local_files_only: true };
|
|
||||||
tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 新的嵌入生成函数(使用ONNX)
|
|
||||||
async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
|
|
||||||
const { input_ids } = await tokenizer(texts, {
|
|
||||||
add_special_tokens: false,
|
|
||||||
return_tensor: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
// 构造输入参数
|
|
||||||
const cumsum = (arr: number[]): number[] =>
|
|
||||||
arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
|
|
||||||
|
|
||||||
const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
|
|
||||||
const flattened_input_ids = input_ids.flat();
|
|
||||||
|
|
||||||
// 准备ONNX输入
|
|
||||||
const inputs = {
|
|
||||||
input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
|
|
||||||
flattened_input_ids.length,
|
|
||||||
]),
|
|
||||||
offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
|
|
||||||
};
|
|
||||||
|
|
||||||
// 执行推理
|
|
||||||
const { embeddings } = await session.run(inputs);
|
|
||||||
return Array.from(embeddings.data as Float32Array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 分类推理函数
|
|
||||||
async function runClassification(embeddings: number[]): Promise<number[]> {
|
|
||||||
const inputTensor = new ort.Tensor(
|
|
||||||
Float32Array.from(embeddings),
|
|
||||||
[1, 3, 1024],
|
|
||||||
);
|
|
||||||
|
|
||||||
const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
|
|
||||||
return softmax(logits.data as Float32Array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 指标计算函数
|
|
||||||
function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
|
|
||||||
accuracy: number;
|
|
||||||
precision: number;
|
|
||||||
recall: number;
|
|
||||||
f1: number;
|
|
||||||
"Class 0 Prec": number;
|
|
||||||
speed: string;
|
|
||||||
} {
|
|
||||||
// 输出label和prediction不一样的index列表
|
|
||||||
const arr = [];
|
|
||||||
for (let i = 0; i < labels.length; i++) {
|
|
||||||
if (labels[i] !== predictions[i] && predictions[i] == 0) {
|
|
||||||
arr.push([i + 1, labels[i], predictions[i]]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(arr);
|
|
||||||
// 初始化混淆矩阵
|
|
||||||
const classCount = Math.max(...labels, ...predictions) + 1;
|
|
||||||
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
|
|
||||||
|
|
||||||
// 填充矩阵
|
|
||||||
labels.forEach((trueLabel, i) => {
|
|
||||||
matrix[trueLabel][predictions[i]]++;
|
|
||||||
});
|
|
||||||
|
|
||||||
// 计算各指标
|
|
||||||
let totalTP = 0, totalFP = 0, totalFN = 0;
|
|
||||||
|
|
||||||
for (let c = 0; c < classCount; c++) {
|
|
||||||
const TP = matrix[c][c];
|
|
||||||
const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
|
|
||||||
const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
|
|
||||||
|
|
||||||
totalTP += TP;
|
|
||||||
totalFP += FP;
|
|
||||||
totalFN += FN;
|
|
||||||
}
|
|
||||||
|
|
||||||
const precision = totalTP / (totalTP + totalFP);
|
|
||||||
const recall = totalTP / (totalTP + totalFN);
|
|
||||||
const f1 = 2 * (precision * recall) / (precision + recall) || 0;
|
|
||||||
|
|
||||||
// 计算Class 0 Precision
|
|
||||||
const class0TP = matrix[0][0];
|
|
||||||
const class0FP = matrix.flatMap((row, i) => i === 0 ? [] : [row[0]]).reduce((a, b) => a + b, 0);
|
|
||||||
const class0Precision = class0TP / (class0TP + class0FP) || 0;
|
|
||||||
|
|
||||||
return {
|
|
||||||
accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
|
|
||||||
precision,
|
|
||||||
recall,
|
|
||||||
f1,
|
|
||||||
speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
|
|
||||||
"Class 0 Prec": class0Precision,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 改造后的评估函数
|
|
||||||
async function evaluateModel(session: ort.InferenceSession): Promise<{
|
|
||||||
accuracy: number;
|
|
||||||
precision: number;
|
|
||||||
recall: number;
|
|
||||||
f1: number;
|
|
||||||
"Class 0 Prec": number;
|
|
||||||
}> {
|
|
||||||
const data = await Deno.readTextFile(testDataPath);
|
|
||||||
const samples = data.split("\n")
|
|
||||||
.map((line) => {
|
|
||||||
try {
|
|
||||||
return JSON.parse(line);
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.filter(Boolean);
|
|
||||||
|
|
||||||
const allPredictions: number[] = [];
|
|
||||||
const allLabels: number[] = [];
|
|
||||||
|
|
||||||
const t = new Date().getTime();
|
|
||||||
for (const sample of samples) {
|
|
||||||
try {
|
|
||||||
const embeddings = await getONNXEmbeddings([
|
|
||||||
sample.title,
|
|
||||||
sample.description,
|
|
||||||
sample.tags.join(","),
|
|
||||||
], session);
|
|
||||||
|
|
||||||
const probabilities = await runClassification(embeddings);
|
|
||||||
allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
|
|
||||||
allLabels.push(sample.label);
|
|
||||||
} catch (error) {
|
|
||||||
console.error("Processing error:", error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const elapsed = new Date().getTime() - t;
|
|
||||||
|
|
||||||
return calculateMetrics(allLabels, allPredictions, elapsed);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 主函数
|
|
||||||
async function main() {
|
|
||||||
await loadTokenizer();
|
|
||||||
|
|
||||||
const metrics = await evaluateModel(sessionEmbedding);
|
|
||||||
console.log("Model Metrics:");
|
|
||||||
console.table(metrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
await main();
|
|
@ -1,22 +0,0 @@
|
|||||||
import { AIManager } from "lib/ml/manager.ts";
|
|
||||||
import * as ort from "onnxruntime";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { WorkerError } from "lib/mq/schema.ts";
|
|
||||||
|
|
||||||
const modelPath = "./model/model.onnx";
|
|
||||||
|
|
||||||
class MantisProto extends AIManager {
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
this.models = {
|
|
||||||
"predictor": modelPath,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public override async init(): Promise<void> {
|
|
||||||
await super.init();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const Mantis = new MantisProto();
|
|
||||||
export default Mantis;
|
|
@ -1,171 +0,0 @@
|
|||||||
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
|
||||||
import * as ort from "onnxruntime";
|
|
||||||
|
|
||||||
function softmax(logits: Float32Array): number[] {
|
|
||||||
const maxLogit = Math.max(...logits);
|
|
||||||
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
|
|
||||||
const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
|
|
||||||
return Array.from(exponents.map((exp) => exp / sumOfExponents));
|
|
||||||
}
|
|
||||||
|
|
||||||
// 配置参数
|
|
||||||
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
|
|
||||||
const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
|
|
||||||
const onnxEmbeddingOriginalPath = "./model/embedding_original.onnx";
|
|
||||||
const onnxEmbeddingQuantizedPath = "./model/embedding_original.onnx";
|
|
||||||
|
|
||||||
// 初始化会话
|
|
||||||
const [sessionClassifier, sessionEmbeddingOriginal, sessionEmbeddingQuantized] = await Promise.all([
|
|
||||||
ort.InferenceSession.create(onnxClassifierPath),
|
|
||||||
ort.InferenceSession.create(onnxEmbeddingOriginalPath),
|
|
||||||
ort.InferenceSession.create(onnxEmbeddingQuantizedPath),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let tokenizer: PreTrainedTokenizer;
|
|
||||||
|
|
||||||
// 初始化分词器
|
|
||||||
async function loadTokenizer() {
|
|
||||||
const tokenizerConfig = { local_files_only: true };
|
|
||||||
tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 新的嵌入生成函数(使用ONNX)
|
|
||||||
async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
|
|
||||||
const { input_ids } = await tokenizer(texts, {
|
|
||||||
add_special_tokens: false,
|
|
||||||
return_tensor: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
// 构造输入参数
|
|
||||||
const cumsum = (arr: number[]): number[] =>
|
|
||||||
arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
|
|
||||||
|
|
||||||
const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
|
|
||||||
const flattened_input_ids = input_ids.flat();
|
|
||||||
|
|
||||||
// 准备ONNX输入
|
|
||||||
const inputs = {
|
|
||||||
input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
|
|
||||||
flattened_input_ids.length,
|
|
||||||
]),
|
|
||||||
offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
|
|
||||||
};
|
|
||||||
|
|
||||||
// 执行推理
|
|
||||||
const { embeddings } = await session.run(inputs);
|
|
||||||
return Array.from(embeddings.data as Float32Array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 分类推理函数
|
|
||||||
async function runClassification(embeddings: number[]): Promise<number[]> {
|
|
||||||
const inputTensor = new ort.Tensor(
|
|
||||||
Float32Array.from(embeddings),
|
|
||||||
[1, 4, 1024],
|
|
||||||
);
|
|
||||||
|
|
||||||
const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
|
|
||||||
return softmax(logits.data as Float32Array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 指标计算函数
|
|
||||||
function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
|
|
||||||
accuracy: number;
|
|
||||||
precision: number;
|
|
||||||
recall: number;
|
|
||||||
f1: number;
|
|
||||||
speed: string;
|
|
||||||
} {
|
|
||||||
// 初始化混淆矩阵
|
|
||||||
const classCount = Math.max(...labels, ...predictions) + 1;
|
|
||||||
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
|
|
||||||
|
|
||||||
// 填充矩阵
|
|
||||||
labels.forEach((trueLabel, i) => {
|
|
||||||
matrix[trueLabel][predictions[i]]++;
|
|
||||||
});
|
|
||||||
|
|
||||||
// 计算各指标
|
|
||||||
let totalTP = 0, totalFP = 0, totalFN = 0;
|
|
||||||
|
|
||||||
for (let c = 0; c < classCount; c++) {
|
|
||||||
const TP = matrix[c][c];
|
|
||||||
const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
|
|
||||||
const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
|
|
||||||
|
|
||||||
totalTP += TP;
|
|
||||||
totalFP += FP;
|
|
||||||
totalFN += FN;
|
|
||||||
}
|
|
||||||
|
|
||||||
const precision = totalTP / (totalTP + totalFP);
|
|
||||||
const recall = totalTP / (totalTP + totalFN);
|
|
||||||
const f1 = 2 * (precision * recall) / (precision + recall) || 0;
|
|
||||||
|
|
||||||
return {
|
|
||||||
accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
|
|
||||||
precision,
|
|
||||||
recall,
|
|
||||||
f1,
|
|
||||||
speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// 改造后的评估函数
|
|
||||||
async function evaluateModel(session: ort.InferenceSession): Promise<{
|
|
||||||
accuracy: number;
|
|
||||||
precision: number;
|
|
||||||
recall: number;
|
|
||||||
f1: number;
|
|
||||||
}> {
|
|
||||||
const data = await Deno.readTextFile("./data/filter/test1.jsonl");
|
|
||||||
const samples = data.split("\n")
|
|
||||||
.map((line) => {
|
|
||||||
try {
|
|
||||||
return JSON.parse(line);
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.filter(Boolean);
|
|
||||||
|
|
||||||
const allPredictions: number[] = [];
|
|
||||||
const allLabels: number[] = [];
|
|
||||||
|
|
||||||
const t = new Date().getTime();
|
|
||||||
for (const sample of samples) {
|
|
||||||
try {
|
|
||||||
const embeddings = await getONNXEmbeddings([
|
|
||||||
sample.title,
|
|
||||||
sample.description,
|
|
||||||
sample.tags.join(","),
|
|
||||||
sample.author_info,
|
|
||||||
], session);
|
|
||||||
|
|
||||||
const probabilities = await runClassification(embeddings);
|
|
||||||
allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
|
|
||||||
allLabels.push(sample.label);
|
|
||||||
} catch (error) {
|
|
||||||
console.error("Processing error:", error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const elapsed = new Date().getTime() - t;
|
|
||||||
|
|
||||||
return calculateMetrics(allLabels, allPredictions, elapsed);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 主函数
|
|
||||||
async function main() {
|
|
||||||
await loadTokenizer();
|
|
||||||
|
|
||||||
// 评估原始模型
|
|
||||||
const originalMetrics = await evaluateModel(sessionEmbeddingOriginal);
|
|
||||||
console.log("Original Model Metrics:");
|
|
||||||
console.table(originalMetrics);
|
|
||||||
|
|
||||||
// 评估量化模型
|
|
||||||
const quantizedMetrics = await evaluateModel(sessionEmbeddingQuantized);
|
|
||||||
console.log("Quantized Model Metrics:");
|
|
||||||
console.table(quantizedMetrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
await main();
|
|
@ -1,37 +0,0 @@
|
|||||||
import { Job } from "bullmq";
|
|
||||||
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
|
|
||||||
import { db } from "lib/db/init.ts";
|
|
||||||
import { insertVideoInfo } from "lib/mq/task/getVideoDetails.ts";
|
|
||||||
import { collectSongs } from "lib/mq/task/collectSongs.ts";
|
|
||||||
|
|
||||||
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
await queueLatestVideos(client);
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const collectSongsWorker = async (_job: Job): Promise<void> => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
await collectSongs(client);
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const aid = job.data.aid;
|
|
||||||
if (!aid) {
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
await insertVideoInfo(client, aid);
|
|
||||||
return 0;
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
@ -1,397 +0,0 @@
|
|||||||
import { Job } from "bullmq";
|
|
||||||
import { db } from "lib/db/init.ts";
|
|
||||||
import { getLatestVideoSnapshot, getVideosNearMilestone } from "lib/db/snapshot.ts";
|
|
||||||
import {
|
|
||||||
bulkGetVideosWithoutProcessingSchedules,
|
|
||||||
bulkScheduleSnapshot,
|
|
||||||
bulkSetSnapshotStatus,
|
|
||||||
findClosestSnapshot,
|
|
||||||
findSnapshotBefore,
|
|
||||||
getLatestSnapshot,
|
|
||||||
getSnapshotsInNextSecond,
|
|
||||||
getVideosWithoutActiveSnapshotSchedule,
|
|
||||||
hasAtLeast2Snapshots,
|
|
||||||
scheduleSnapshot,
|
|
||||||
setSnapshotStatus,
|
|
||||||
snapshotScheduleExists,
|
|
||||||
videoHasProcessingSchedule,
|
|
||||||
getBulkSnapshotsInNextSecond
|
|
||||||
} from "lib/db/snapshotSchedule.ts";
|
|
||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { SnapshotQueue } from "lib/mq/index.ts";
|
|
||||||
import { insertVideoSnapshot } from "lib/mq/task/getVideoStats.ts";
|
|
||||||
import { NetSchedulerError } from "lib/mq/scheduler.ts";
|
|
||||||
import { getBiliVideoStatus, setBiliVideoStatus } from "lib/db/allData.ts";
|
|
||||||
import { truncate } from "lib/utils/truncate.ts";
|
|
||||||
import { lockManager } from "lib/mq/lockManager.ts";
|
|
||||||
import { getSongsPublihsedAt } from "lib/db/songs.ts";
|
|
||||||
import { bulkGetVideoStats } from "lib/net/bulkGetVideoStats.ts";
|
|
||||||
|
|
||||||
const priorityMap: { [key: string]: number } = {
|
|
||||||
"milestone": 1,
|
|
||||||
"normal": 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
const snapshotTypeToTaskMap: { [key: string]: string } = {
|
|
||||||
"milestone": "snapshotMilestoneVideo",
|
|
||||||
"normal": "snapshotVideo",
|
|
||||||
"new": "snapshotMilestoneVideo",
|
|
||||||
};
|
|
||||||
|
|
||||||
export const bulkSnapshotTickWorker = async (_job: Job) => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const schedules = await getBulkSnapshotsInNextSecond(client);
|
|
||||||
const count = schedules.length;
|
|
||||||
const groups = Math.ceil(count / 30);
|
|
||||||
for (let i = 0; i < groups; i++) {
|
|
||||||
const group = schedules.slice(i * 30, (i + 1) * 30);
|
|
||||||
const aids = group.map((schedule) => Number(schedule.aid));
|
|
||||||
const filteredAids = await bulkGetVideosWithoutProcessingSchedules(client, aids);
|
|
||||||
if (filteredAids.length === 0) continue;
|
|
||||||
await bulkSetSnapshotStatus(client, filteredAids, "processing");
|
|
||||||
const dataMap: { [key: number]: number } = {};
|
|
||||||
for (const schedule of group) {
|
|
||||||
const id = Number(schedule.id);
|
|
||||||
dataMap[id] = Number(schedule.aid);
|
|
||||||
}
|
|
||||||
await SnapshotQueue.add("bulkSnapshotVideo", {
|
|
||||||
map: dataMap,
|
|
||||||
}, { priority: 3 });
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error(e as Error);
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const snapshotTickWorker = async (_job: Job) => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const schedules = await getSnapshotsInNextSecond(client);
|
|
||||||
for (const schedule of schedules) {
|
|
||||||
if (await videoHasProcessingSchedule(client, Number(schedule.aid))) {
|
|
||||||
return `ALREADY_PROCESSING`;
|
|
||||||
}
|
|
||||||
let priority = 3;
|
|
||||||
if (schedule.type && priorityMap[schedule.type]) {
|
|
||||||
priority = priorityMap[schedule.type];
|
|
||||||
}
|
|
||||||
const aid = Number(schedule.aid);
|
|
||||||
await setSnapshotStatus(client, schedule.id, "processing");
|
|
||||||
await SnapshotQueue.add("snapshotVideo", {
|
|
||||||
aid: aid,
|
|
||||||
id: Number(schedule.id),
|
|
||||||
type: schedule.type ?? "normal",
|
|
||||||
}, { priority });
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error(e as Error);
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const closetMilestone = (views: number) => {
|
|
||||||
if (views < 100000) return 100000;
|
|
||||||
if (views < 1000000) return 1000000;
|
|
||||||
return 10000000;
|
|
||||||
};
|
|
||||||
|
|
||||||
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the minimum ETA in hours for the next snapshot
|
|
||||||
* @param client - Postgres client
|
|
||||||
* @param aid - aid of the video
|
|
||||||
* @returns ETA in hours
|
|
||||||
*/
|
|
||||||
const getAdjustedShortTermETA = async (client: Client, aid: number) => {
|
|
||||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
|
||||||
// Immediately dispatch a snapshot if there is no snapshot yet
|
|
||||||
if (!latestSnapshot) return 0;
|
|
||||||
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
|
|
||||||
if (!snapshotsEnough) return 0;
|
|
||||||
|
|
||||||
const currentTimestamp = new Date().getTime();
|
|
||||||
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR];
|
|
||||||
const DELTA = 0.00001;
|
|
||||||
let minETAHours = Infinity;
|
|
||||||
|
|
||||||
for (const timeInterval of timeIntervals) {
|
|
||||||
const date = new Date(currentTimestamp - timeInterval);
|
|
||||||
const snapshot = await findClosestSnapshot(client, aid, date);
|
|
||||||
if (!snapshot) continue;
|
|
||||||
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
|
|
||||||
const viewsDiff = latestSnapshot.views - snapshot.views;
|
|
||||||
if (viewsDiff <= 0) continue;
|
|
||||||
const speed = viewsDiff / (hoursDiff + DELTA);
|
|
||||||
const target = closetMilestone(latestSnapshot.views);
|
|
||||||
const viewsToIncrease = target - latestSnapshot.views;
|
|
||||||
const eta = viewsToIncrease / (speed + DELTA);
|
|
||||||
let factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
|
|
||||||
factor = truncate(factor, 3, 100);
|
|
||||||
const adjustedETA = eta / factor;
|
|
||||||
if (adjustedETA < minETAHours) {
|
|
||||||
minETAHours = adjustedETA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isNaN(minETAHours)) {
|
|
||||||
minETAHours = Infinity;
|
|
||||||
}
|
|
||||||
|
|
||||||
return minETAHours;
|
|
||||||
};
|
|
||||||
|
|
||||||
export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const videos = await getVideosNearMilestone(client);
|
|
||||||
for (const video of videos) {
|
|
||||||
const aid = Number(video.aid);
|
|
||||||
const eta = await getAdjustedShortTermETA(client, aid);
|
|
||||||
if (eta > 72) continue;
|
|
||||||
const now = Date.now();
|
|
||||||
const scheduledNextSnapshotDelay = eta * HOUR;
|
|
||||||
const maxInterval = 4 * HOUR;
|
|
||||||
const minInterval = 1 * SECOND;
|
|
||||||
const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
|
|
||||||
const targetTime = now + delay;
|
|
||||||
await scheduleSnapshot(client, aid, "milestone", targetTime);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const getRegularSnapshotInterval = async (client: Client, aid: number) => {
|
|
||||||
const now = Date.now();
|
|
||||||
const date = new Date(now - 24 * HOUR);
|
|
||||||
let oldSnapshot = await findSnapshotBefore(client, aid, date);
|
|
||||||
if (!oldSnapshot) oldSnapshot = await findClosestSnapshot(client, aid, date);
|
|
||||||
const latestSnapshot = await getLatestSnapshot(client, aid);
|
|
||||||
if (!oldSnapshot || !latestSnapshot) return 0;
|
|
||||||
if (oldSnapshot.created_at === latestSnapshot.created_at) return 0;
|
|
||||||
const hoursDiff = (latestSnapshot.created_at - oldSnapshot.created_at) / HOUR;
|
|
||||||
if (hoursDiff < 8) return 24;
|
|
||||||
const viewsDiff = latestSnapshot.views - oldSnapshot.views;
|
|
||||||
if (viewsDiff === 0) return 72;
|
|
||||||
const speedPerDay = viewsDiff / (hoursDiff + 0.001) * 24;
|
|
||||||
if (speedPerDay < 6) return 36;
|
|
||||||
if (speedPerDay < 120) return 24;
|
|
||||||
if (speedPerDay < 320) return 12;
|
|
||||||
return 6;
|
|
||||||
};
|
|
||||||
|
|
||||||
export const regularSnapshotsWorker = async (_job: Job) => {
|
|
||||||
const client = await db.connect();
|
|
||||||
const startedAt = Date.now();
|
|
||||||
if (await lockManager.isLocked("dispatchRegularSnapshots")) {
|
|
||||||
logger.log("dispatchRegularSnapshots is already running", "mq");
|
|
||||||
client.release();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
await lockManager.acquireLock("dispatchRegularSnapshots", 30 * 60);
|
|
||||||
try {
|
|
||||||
const aids = await getVideosWithoutActiveSnapshotSchedule(client);
|
|
||||||
for (const rawAid of aids) {
|
|
||||||
const aid = Number(rawAid);
|
|
||||||
const latestSnapshot = await getLatestVideoSnapshot(client, aid);
|
|
||||||
const now = Date.now();
|
|
||||||
const lastSnapshotedAt = latestSnapshot?.time ?? now;
|
|
||||||
const interval = await getRegularSnapshotInterval(client, aid);
|
|
||||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
|
||||||
const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK);
|
|
||||||
await scheduleSnapshot(client, aid, "normal", targetTime);
|
|
||||||
if (now - startedAt > 25 * MINUTE) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error(e as Error, "mq", "fn:regularSnapshotsWorker");
|
|
||||||
} finally {
|
|
||||||
lockManager.releaseLock("dispatchRegularSnapshots");
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
|
|
||||||
const dataMap: { [key: number]: number } = job.data.map;
|
|
||||||
const ids = Object.keys(dataMap).map((id) => Number(id));
|
|
||||||
const aidsToFetch: number[] = [];
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
for (const id of ids) {
|
|
||||||
const aid = Number(dataMap[id]);
|
|
||||||
const exists = await snapshotScheduleExists(client, id);
|
|
||||||
if (!exists) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
aidsToFetch.push(aid);
|
|
||||||
}
|
|
||||||
const data = await bulkGetVideoStats(aidsToFetch);
|
|
||||||
if (typeof data === "number") {
|
|
||||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
|
||||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 15 * SECOND);
|
|
||||||
return `GET_BILI_STATUS_${data}`;
|
|
||||||
}
|
|
||||||
for (const video of data) {
|
|
||||||
const aid = video.id;
|
|
||||||
const stat = video.cnt_info;
|
|
||||||
const views = stat.play;
|
|
||||||
const danmakus = stat.danmaku;
|
|
||||||
const replies = stat.reply;
|
|
||||||
const likes = stat.thumb_up;
|
|
||||||
const coins = stat.coin;
|
|
||||||
const shares = stat.share;
|
|
||||||
const favorites = stat.collect;
|
|
||||||
const query: string = `
|
|
||||||
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
|
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
||||||
`;
|
|
||||||
await client.queryObject(
|
|
||||||
query,
|
|
||||||
[aid, views, danmakus, replies, likes, coins, shares, favorites],
|
|
||||||
);
|
|
||||||
|
|
||||||
logger.log(`Taken snapshot for video ${aid} in bulk.`, "net", "fn:takeBulkSnapshotForVideosWorker");
|
|
||||||
}
|
|
||||||
await bulkSetSnapshotStatus(client, ids, "completed");
|
|
||||||
for (const aid of aidsToFetch) {
|
|
||||||
const interval = await getRegularSnapshotInterval(client, aid);
|
|
||||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
|
||||||
await scheduleSnapshot(client, aid, "normal", Date.now() + interval * HOUR);
|
|
||||||
}
|
|
||||||
return `DONE`;
|
|
||||||
} catch (e) {
|
|
||||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
|
||||||
logger.warn(
|
|
||||||
`No available proxy for bulk request now.`,
|
|
||||||
"mq",
|
|
||||||
"fn:takeBulkSnapshotForVideosWorker",
|
|
||||||
);
|
|
||||||
await bulkSetSnapshotStatus(client, ids, "completed");
|
|
||||||
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 2 * MINUTE);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
|
|
||||||
await bulkSetSnapshotStatus(client, ids, "failed");
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const takeSnapshotForVideoWorker = async (job: Job) => {
|
|
||||||
const id = job.data.id;
|
|
||||||
const aid = Number(job.data.aid);
|
|
||||||
const type = job.data.type;
|
|
||||||
const task = snapshotTypeToTaskMap[type] ?? "snapshotVideo";
|
|
||||||
const client = await db.connect();
|
|
||||||
const retryInterval = type === "milestone" ? 5 * SECOND : 2 * MINUTE;
|
|
||||||
const exists = await snapshotScheduleExists(client, id);
|
|
||||||
if (!exists) {
|
|
||||||
client.release();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const status = await getBiliVideoStatus(client, aid);
|
|
||||||
if (status !== 0) {
|
|
||||||
client.release();
|
|
||||||
return `REFUSE_WORKING_BILI_STATUS_${status}`;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
await setSnapshotStatus(client, id, "processing");
|
|
||||||
const stat = await insertVideoSnapshot(client, aid, task);
|
|
||||||
if (typeof stat === "number") {
|
|
||||||
await setBiliVideoStatus(client, aid, stat);
|
|
||||||
await setSnapshotStatus(client, id, "completed");
|
|
||||||
return `GET_BILI_STATUS_${stat}`;
|
|
||||||
}
|
|
||||||
await setSnapshotStatus(client, id, "completed");
|
|
||||||
if (type === "normal") {
|
|
||||||
const interval = await getRegularSnapshotInterval(client, aid);
|
|
||||||
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
|
|
||||||
await scheduleSnapshot(client, aid, type, Date.now() + interval * HOUR);
|
|
||||||
return `DONE`;
|
|
||||||
} else if (type === "new") {
|
|
||||||
const publihsedAt = await getSongsPublihsedAt(client, aid);
|
|
||||||
const timeSincePublished = stat.time - publihsedAt!;
|
|
||||||
const viewsPerHour = stat.views / timeSincePublished * HOUR;
|
|
||||||
if (timeSincePublished > 48 * HOUR) {
|
|
||||||
return `DONE`;
|
|
||||||
}
|
|
||||||
if (timeSincePublished > 2 * HOUR && viewsPerHour < 10) {
|
|
||||||
return `DONE`;
|
|
||||||
}
|
|
||||||
let intervalMins = 240;
|
|
||||||
if (viewsPerHour > 50) {
|
|
||||||
intervalMins = 120;
|
|
||||||
}
|
|
||||||
if (viewsPerHour > 100) {
|
|
||||||
intervalMins = 60;
|
|
||||||
}
|
|
||||||
if (viewsPerHour > 1000) {
|
|
||||||
intervalMins = 15;
|
|
||||||
}
|
|
||||||
await scheduleSnapshot(client, aid, type, Date.now() + intervalMins * MINUTE, true);
|
|
||||||
}
|
|
||||||
if (type !== "milestone") return `DONE`;
|
|
||||||
const eta = await getAdjustedShortTermETA(client, aid);
|
|
||||||
if (eta > 72) return "ETA_TOO_LONG";
|
|
||||||
const now = Date.now();
|
|
||||||
const targetTime = now + eta * HOUR;
|
|
||||||
await scheduleSnapshot(client, aid, type, targetTime);
|
|
||||||
return `DONE`;
|
|
||||||
} catch (e) {
|
|
||||||
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
|
|
||||||
logger.warn(
|
|
||||||
`No available proxy for aid ${job.data.aid}.`,
|
|
||||||
"mq",
|
|
||||||
"fn:takeSnapshotForVideoWorker",
|
|
||||||
);
|
|
||||||
await setSnapshotStatus(client, id, "completed");
|
|
||||||
await scheduleSnapshot(client, aid, type, Date.now() + retryInterval);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
logger.error(e as Error, "mq", "fn:takeSnapshotForVideoWorker");
|
|
||||||
await setSnapshotStatus(client, id, "failed");
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const scheduleCleanupWorker = async (_job: Job) => {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
const query = `
|
|
||||||
SELECT id, aid, type
|
|
||||||
FROM snapshot_schedule
|
|
||||||
WHERE status IN ('pending', 'processing')
|
|
||||||
AND started_at < NOW() - INTERVAL '5 minutes'
|
|
||||||
`;
|
|
||||||
const { rows } = await client.queryObject<{ id: bigint; aid: bigint; type: string }>(query);
|
|
||||||
if (rows.length === 0) return;
|
|
||||||
for (const row of rows) {
|
|
||||||
const id = Number(row.id);
|
|
||||||
const aid = Number(row.aid);
|
|
||||||
const type = row.type;
|
|
||||||
await setSnapshotStatus(client, id, "timeout");
|
|
||||||
await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND);
|
|
||||||
logger.log(
|
|
||||||
`Schedule ${id} has no response received for 5 minutes, rescheduled.`,
|
|
||||||
"mq",
|
|
||||||
"fn:scheduleCleanupWorker",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error(e as Error, "mq", "fn:scheduleCleanupWorker");
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
};
|
|
@ -1 +0,0 @@
|
|||||||
export * from "lib/mq/exec/getLatestVideos.ts";
|
|
@ -1,7 +0,0 @@
|
|||||||
import { Queue } from "bullmq";
|
|
||||||
|
|
||||||
export const LatestVideosQueue = new Queue("latestVideos");
|
|
||||||
|
|
||||||
export const ClassifyVideoQueue = new Queue("classifyVideo");
|
|
||||||
|
|
||||||
export const SnapshotQueue = new Queue("snapshot");
|
|
@ -1,67 +0,0 @@
|
|||||||
import { MINUTE, SECOND } from "$std/datetime/constants.ts";
|
|
||||||
import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { initSnapshotWindowCounts } from "lib/db/snapshotSchedule.ts";
|
|
||||||
import { db } from "lib/db/init.ts";
|
|
||||||
import { redis } from "lib/db/redis.ts";
|
|
||||||
|
|
||||||
export async function initMQ() {
|
|
||||||
const client = await db.connect();
|
|
||||||
try {
|
|
||||||
await initSnapshotWindowCounts(client, redis);
|
|
||||||
|
|
||||||
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
|
|
||||||
every: 1 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
|
|
||||||
every: 5 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
await LatestVideosQueue.upsertJobScheduler("collectSongs", {
|
|
||||||
every: 3 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
await SnapshotQueue.upsertJobScheduler("snapshotTick", {
|
|
||||||
every: 1 * SECOND,
|
|
||||||
immediately: true,
|
|
||||||
}, {
|
|
||||||
opts: {
|
|
||||||
removeOnComplete: 1,
|
|
||||||
removeOnFail: 1,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
await SnapshotQueue.upsertJobScheduler("bulkSnapshotTick", {
|
|
||||||
every: 15 * SECOND,
|
|
||||||
immediately: true,
|
|
||||||
}, {
|
|
||||||
opts: {
|
|
||||||
removeOnComplete: 1,
|
|
||||||
removeOnFail: 1,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
await SnapshotQueue.upsertJobScheduler("collectMilestoneSnapshots", {
|
|
||||||
every: 5 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
await SnapshotQueue.upsertJobScheduler("dispatchRegularSnapshots", {
|
|
||||||
every: 30 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
await SnapshotQueue.upsertJobScheduler("scheduleCleanup", {
|
|
||||||
every: 30 * MINUTE,
|
|
||||||
immediately: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
logger.log("Message queue initialized.");
|
|
||||||
} finally {
|
|
||||||
client.release();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,56 +0,0 @@
|
|||||||
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
|
|
||||||
|
|
||||||
export interface RateLimiterConfig {
|
|
||||||
window: SlidingWindow;
|
|
||||||
max: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export class RateLimiter {
|
|
||||||
private readonly configs: RateLimiterConfig[];
|
|
||||||
private readonly configEventNames: string[];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @param name The name of the rate limiter
|
|
||||||
* @param configs The configuration of the rate limiter, containing:
|
|
||||||
* - window: The sliding window to use
|
|
||||||
* - max: The maximum number of events allowed in the window
|
|
||||||
*/
|
|
||||||
constructor(name: string, configs: RateLimiterConfig[]) {
|
|
||||||
this.configs = configs;
|
|
||||||
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if the event has reached the rate limit
|
|
||||||
*/
|
|
||||||
async getAvailability(): Promise<boolean> {
|
|
||||||
for (let i = 0; i < this.configs.length; i++) {
|
|
||||||
const config = this.configs[i];
|
|
||||||
const eventName = this.configEventNames[i];
|
|
||||||
const count = await config.window.count(eventName);
|
|
||||||
if (count >= config.max) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Trigger an event in the rate limiter
|
|
||||||
*/
|
|
||||||
async trigger(): Promise<void> {
|
|
||||||
for (let i = 0; i < this.configs.length; i++) {
|
|
||||||
const config = this.configs[i];
|
|
||||||
const eventName = this.configEventNames[i];
|
|
||||||
await config.window.event(eventName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async clear(): Promise<void> {
|
|
||||||
for (let i = 0; i < this.configs.length; i++) {
|
|
||||||
const config = this.configs[i];
|
|
||||||
const eventName = this.configEventNames[i];
|
|
||||||
await config.window.clear(eventName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,31 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts";
|
|
||||||
import { MINUTE } from "$std/datetime/constants.ts";
|
|
||||||
|
|
||||||
export async function collectSongs(client: Client) {
|
|
||||||
const aids = await getNotCollectedSongs(client);
|
|
||||||
for (const aid of aids) {
|
|
||||||
const exists = await aidExistsInSongs(client, aid);
|
|
||||||
if (exists) continue;
|
|
||||||
await insertIntoSongs(client, aid);
|
|
||||||
await scheduleSnapshot(client, aid, "new", Date.now() + 10 * MINUTE, true);
|
|
||||||
logger.log(`Video ${aid} was added into the songs table.`, "mq", "fn:collectSongs");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function insertIntoSongs(client: Client, aid: number) {
|
|
||||||
await client.queryObject(
|
|
||||||
`
|
|
||||||
INSERT INTO songs (aid, published_at, duration)
|
|
||||||
VALUES (
|
|
||||||
$1,
|
|
||||||
(SELECT published_at FROM bilibili_metadata WHERE aid = $1),
|
|
||||||
(SELECT duration FROM bilibili_metadata WHERE aid = $1)
|
|
||||||
)
|
|
||||||
ON CONFLICT DO NOTHING
|
|
||||||
`,
|
|
||||||
[aid],
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,46 +0,0 @@
|
|||||||
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
|
|
||||||
import { getVideoDetails } from "lib/net/getVideoDetails.ts";
|
|
||||||
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
import { ClassifyVideoQueue } from "lib/mq/index.ts";
|
|
||||||
import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
|
|
||||||
import { HOUR, SECOND } from "$std/datetime/constants.ts";
|
|
||||||
|
|
||||||
export async function insertVideoInfo(client: Client, aid: number) {
|
|
||||||
const videoExists = await videoExistsInAllData(client, aid);
|
|
||||||
if (videoExists) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const data = await getVideoDetails(aid);
|
|
||||||
if (data === null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const bvid = data.View.bvid;
|
|
||||||
const desc = data.View.desc;
|
|
||||||
const uid = data.View.owner.mid;
|
|
||||||
const tags = data.Tags
|
|
||||||
.filter((tag) => !["old_channel", "topic"].indexOf(tag.tag_type))
|
|
||||||
.map((tag) => tag.tag_name).join(",");
|
|
||||||
const title = data.View.title;
|
|
||||||
const published_at = formatTimestampToPsql(data.View.pubdate * SECOND + 8 * HOUR);
|
|
||||||
const duration = data.View.duration;
|
|
||||||
await client.queryObject(
|
|
||||||
`INSERT INTO bilibili_metadata (aid, bvid, description, uid, tags, title, published_at, duration)
|
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
|
||||||
[aid, bvid, desc, uid, tags, title, published_at, duration],
|
|
||||||
);
|
|
||||||
const userExists = await userExistsInBiliUsers(client, aid);
|
|
||||||
if (!userExists) {
|
|
||||||
await client.queryObject(
|
|
||||||
`INSERT INTO bilibili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
|
|
||||||
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
await client.queryObject(
|
|
||||||
`UPDATE bilibili_user SET fans = $1 WHERE uid = $2`,
|
|
||||||
[data.Card.follower, uid],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
|
|
||||||
await ClassifyVideoQueue.add("classifyVideo", { aid });
|
|
||||||
}
|
|
@ -1,27 +0,0 @@
|
|||||||
import netScheduler from "lib/mq/scheduler.ts";
|
|
||||||
import { VideoInfoData, VideoInfoResponse } from "lib/net/bilibili.d.ts";
|
|
||||||
import logger from "lib/log/logger.ts";
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Fetch video metadata from bilibili API
|
|
||||||
* @param {number} aid - The video's aid
|
|
||||||
* @param {string} task - The task name used in scheduler. It can be one of the following:
|
|
||||||
* - snapshotVideo
|
|
||||||
* - getVideoInfo
|
|
||||||
* - snapshotMilestoneVideo
|
|
||||||
* @returns {Promise<VideoInfoData | number>} VideoInfoData or the error code returned by bilibili API
|
|
||||||
* @throws {NetSchedulerError} - The error will be thrown in following cases:
|
|
||||||
* - No proxy is available currently: with error code `NO_PROXY_AVAILABLE`
|
|
||||||
* - The native `fetch` function threw an error: with error code `FETCH_ERROR`
|
|
||||||
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
|
|
||||||
*/
|
|
||||||
export async function getVideoInfo(aid: number, task: string): Promise<VideoInfoData | number> {
|
|
||||||
const url = `https://api.bilibili.com/x/web-interface/view?aid=${aid}`;
|
|
||||||
const data = await netScheduler.request<VideoInfoResponse>(url, task);
|
|
||||||
const errMessage = `Error fetching metadata for ${aid}:`;
|
|
||||||
if (data.code !== 0) {
|
|
||||||
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
|
|
||||||
return data.code;
|
|
||||||
}
|
|
||||||
return data.data;
|
|
||||||
}
|
|
13
main.ts
13
main.ts
@ -1,13 +0,0 @@
|
|||||||
/// <reference no-default-lib="true" />
|
|
||||||
/// <reference lib="dom" />
|
|
||||||
/// <reference lib="dom.iterable" />
|
|
||||||
/// <reference lib="dom.asynciterable" />
|
|
||||||
/// <reference lib="deno.ns" />
|
|
||||||
|
|
||||||
import "$std/dotenv/load.ts";
|
|
||||||
|
|
||||||
import { start } from "$fresh/server.ts";
|
|
||||||
import manifest from "./fresh.gen.ts";
|
|
||||||
import config from "./fresh.config.ts";
|
|
||||||
|
|
||||||
await start(manifest, config);
|
|
0
lab/.gitignore → ml/lab/.gitignore
vendored
0
lab/.gitignore → ml/lab/.gitignore
vendored
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user