Compare commits

..

220 Commits

Author SHA1 Message Date
92c3c8eefe
update: dependency version 2025-06-10 15:08:08 +08:00
497ea031d8
fix: missing field in db schema 2025-06-10 15:03:39 +08:00
39ca394a56
fix: several bugs about type 2025-06-10 14:46:28 +08:00
0bd1771f35
improve: date formatting in frontend 2025-06-10 14:38:55 +08:00
328c73c209
fix: 'close' is missing in DialogButtonGroup in ErrorDialog 2025-06-08 18:24:15 +08:00
5ac952ec13
merge: branch 'feat/frontend' into main 2025-06-08 18:09:49 +08:00
2cf5923b28
add: logout in frontend 2025-06-08 18:06:46 +08:00
75973c72ee
add: login, user profile page & license 2025-06-07 19:42:36 +08:00
b40d24721c
add: milestone monitoring for videos close to N million views 2025-06-06 21:37:55 +08:00
0a6ecc6314
add: route for getting videos 2025-06-06 21:03:52 +08:00
b4a0320e3e
version: cvsa/3.15.34 2025-06-06 17:40:32 +08:00
8cf9395354
fix: don't adjust start time for schedule when no proxy is available 2025-06-06 17:11:44 +08:00
1e8d28e194
fix: incorrectly ignored type when collecting videos for archive snapshots 2025-06-06 16:52:27 +08:00
c0340677a1
merge: pull request #8 from ref/next 2025-06-05 22:31:06 +08:00
54a2de0a11
Merge branch 'main' into ref/next 2025-06-05 22:30:19 +08:00
3abd6666c0
add: missing dependency in backend 2025-06-02 05:33:50 +08:00
44e13724fc
fix: missing dependency in backend 2025-06-02 05:17:46 +08:00
dd7e2242a0
add: docker support for nextjs frontend 2025-06-02 05:04:26 +08:00
503a93a09f
update: .tokeignore 2025-06-02 04:06:56 +08:00
507f2c331e
add: several pages, synced with old frontend 2025-06-02 04:04:17 +08:00
a1a4abff46
fix: missing import in @cvsa/core 2025-06-01 21:31:37 +08:00
c6b7736dac
fix: import from the core package in next frontend 2025-06-01 18:48:50 +08:00
fa5ab258da
add: login status detection in frontend 2025-06-01 17:21:09 +08:00
9dd06fa7bc
add: set cookie after signing up 2025-06-01 16:18:01 +08:00
bb7f846305
improve: the structure of the error handling in sign up page 2025-06-01 14:36:55 +08:00
7f9563a2a6
ref: use axios for API requests
add: a useCaptcha hook
2025-06-01 01:53:06 +08:00
d0d9c21aba
update: as-nned URL prefix for i18n routing 2025-05-31 21:56:15 +08:00
44bc99dd9d
add: i18n 2025-05-31 21:29:47 +08:00
2c83b79881
update: termination condition to time-based in classifyVideosWorker 2025-05-31 12:23:01 +08:00
1a20d5afe0
update: schedule archive snapshots to next Saturday midnight
fix: no expire when acquiring lock for classifyVideos
ref: format
2025-05-31 12:13:56 +08:00
ae338f88ee
update: the error dialog 2025-05-31 11:47:45 +08:00
96903dec2b
fix: prevent scrolling of body when model dialog is shown 2025-05-30 03:24:37 +08:00
58b4e2613c
add: button components in dialog 2025-05-28 23:01:43 +08:00
2b0497c83a
fix: incorrect timezone when adding to bilibili metadata 2025-05-27 22:40:53 +08:00
3bc72720d1
fix: remove bull-board from all scripts to prevent crashing 2025-05-27 22:35:44 +08:00
557a013b42
add: some components, fonts 2025-05-27 22:33:28 +08:00
16cfae8bad
fix: unexpectedly set prop variant of TextField to required 2025-05-25 03:15:51 +08:00
cbd46d4030
improve: code style and some subtle changes 2025-05-25 03:15:10 +08:00
6b93a781b7
add: sign up page 2025-05-24 17:13:26 +08:00
fe2fd4fe36
feat: home page with header 2025-05-24 03:08:32 +08:00
dd70543594
init: next for frontend 2025-05-22 03:15:07 +08:00
1ff71ab241
improve: code quality 2025-05-19 00:13:01 +08:00
cf7a285f57
fix: incorrect import for psql.d.ts, remove unnecessary benchmark files 2025-05-19 00:11:53 +08:00
79a37d927a
fix: forget to specify type when collecting videos without active schedules;
missing return when eta too long for milestone snapshot
2025-05-19 00:10:33 +08:00
f003e77d52
add: text fields and button in signup page 2025-05-18 20:55:51 +08:00
4addadb035
update: the style in content.css 2025-05-18 15:34:12 +08:00
23917b2976
merge: branch 'feat/backend' into main 2025-05-18 03:53:22 +08:00
6d946f74df
update: rate limiter 2025-05-18 03:52:42 +08:00
c5ba673069
fix: several bugs in captcha rate limiting 2025-05-18 03:28:27 +08:00
fa5ccce83f
fix: incorrect import of type Psql 2025-05-18 00:36:39 +08:00
7786d66dbb
add: complete guarding under uCaptcha 2025-05-18 00:33:43 +08:00
b18b45078f
add: tokenID added to JWT in endpoint GET /captcha/:id/result 2025-05-17 02:15:05 +08:00
1633e56b1e
update: returns JWT in the /captcha/:id/result endpoint 2025-05-11 19:40:24 +08:00
a063f2401b
update: rename the captcha endpoint 2025-05-11 19:14:19 +08:00
44f68993a0
fix: missing env during building in frontend 2025-05-11 16:20:22 +08:00
980dd542ee
add: video info page in frontend 2025-05-11 03:57:26 +08:00
c82a95d0bc
fix: missing aliyun CLI installation process in backend Dockerfile 2025-05-11 02:38:46 +08:00
137c19d74e
ref: move from sliding window to token bucket in rate limiter 2025-05-11 01:50:02 +08:00
5fb1355346
ref: rename endpoint to POST /validation/session 2025-05-10 22:41:10 +08:00
8456bb7485
add: route for validation 2025-05-10 21:48:02 +08:00
01f5e57864
ref: dir structure of backend package 2025-05-10 00:20:20 +08:00
bf00918c00
merge: branch 'main' into feat/backend 2025-05-10 00:19:55 +08:00
44f4ee5b01
fix: incorrect path in build.ts in crawler 2025-05-06 03:42:42 +08:00
cd8fe502ba
update: Dockerfile for crawler
fix: incorrect condition in logger
2025-05-06 03:24:08 +08:00
95681dcbf3
add: Dockerfile for crawler 2025-05-06 03:10:20 +08:00
59f09ca5eb
ref: switch to bun 2025-05-05 01:53:33 +08:00
d686b6a369
add: example docker compose file 2025-05-04 02:59:04 +08:00
d8c74a609a
merge: branch 'main' into ref/deno 2025-05-03 05:02:21 +08:00
9d5c7cc47d
update: new delegate that fits to new infra 2025-05-02 20:14:05 +08:00
7528dcdf81
debug: add debug logging in alicloudFcRequest 2025-05-02 20:01:21 +08:00
811a8261b3
fix: BullMQ crashes when not setting Redis config maxRetriesPerRequest to null 2025-05-02 19:48:17 +08:00
d0aa27b2ad
ref: backend structure, make backend & frontend containerize
fix: some errors in dockerfile
2025-04-29 21:45:38 +08:00
280825cf67
fix: missing dependency in frontend 2025-04-29 06:42:24 +08:00
e658135a81
fix: missing name in packages/core/package.json 2025-04-29 06:23:11 +08:00
4632bd5906
fix: missing dependency that causes failure of resolving import 2025-04-29 06:20:22 +08:00
43b52dee0b
add: complete docker config 2025-04-29 06:12:44 +08:00
8900ac7ec7
update: .gitignore 2025-04-29 04:53:26 +08:00
2772849933
update: .gitignore 2025-04-29 04:52:58 +08:00
784939074a
merge: branch 'ref/deno' into ref/docker 2025-04-29 04:00:57 +08:00
94dd662c40
fix: module not found "net/getVideoInfo.ts" 2025-04-29 04:00:15 +08:00
1ebc0d0c1b
update: .gitignore 2025-04-29 03:52:20 +08:00
728a74f4d3
merge: branch 'feat/backend' into ref/docker 2025-04-29 03:49:27 +08:00
0c8a459d92
update: .gitignore 2025-04-29 03:49:17 +08:00
d7b6792d05
add: basic docker config 2025-04-29 03:45:43 +08:00
50bcb48bd6
fix: incorrect key causing RangeError in date-fns 2025-04-29 00:33:12 +08:00
fe386d2b02
debug: add logs for info.astro 2025-04-29 00:24:05 +08:00
e72b3008d1
ref: switch to bun & postgres.js for backend 2025-04-29 00:07:12 +08:00
a31f702499
fix: missing navigator object in server-side 2025-04-28 06:15:06 +08:00
5a112aeaee
fix: missing navigator object in server-side 2025-04-28 06:05:37 +08:00
d675187f68
add: showing browser version in the test result of VDF benchmark 2025-04-28 06:01:07 +08:00
10de53b773
add: title for test-vdf page 2025-04-28 05:51:07 +08:00
f97e42e7d0
fix: incorrect speedSample 2025-04-28 05:46:24 +08:00
be0ff294be
update: UI of VDF test page 2025-04-28 05:40:02 +08:00
d74ff02a3f
chores: .idea config & gitignore 2025-04-28 02:19:41 +08:00
92e00d033d
improve: UI/UX for info page and search box 2025-04-28 02:15:50 +08:00
6d1698fcb6
update: color system with tailwind theme 2025-04-28 02:02:52 +08:00
7d8361589c
add: link to homepage in login page 2025-04-27 01:07:35 +08:00
66b89eb562
fix: incorrectly hard-coded height for a p tag, resulting in overflow on small screens 2025-04-27 01:06:13 +08:00
d994e67036
fix: incorrect method of importing tailwindcss in content.css 2025-04-27 00:57:32 +08:00
ee40764fdc
merge: branch 'feat/frontend' into main 2025-04-27 00:53:15 +08:00
175ec047cf
update: login & register page 2025-04-27 00:51:25 +08:00
358dd1ee5e
improve: style for search bar & song info page 2025-04-26 23:51:40 +08:00
d9c8253019
add: custom fonts, register page
improve: style
2025-04-26 20:50:30 +08:00
1a86831e90
fix: handle ALICLOUD_PROXY_ERR error in snapshotVideoWorker 2025-04-24 03:42:25 +08:00
a67d896d86
update: WebStorm config 2025-04-19 03:20:15 +08:00
244298913a
fix: comparison condition error in scheduleSnapshot 2025-04-18 22:38:02 +08:00
2c47105913
fix: incorrectly delayed milestone schedule due to UPDATE 2025-04-17 02:18:25 +08:00
6eaaf921d6
fix: ignored the case of inserting schedule when type = 'milestone' in scheduleSnapshot 2025-04-15 22:06:14 +08:00
288e4f9571
fix: incorrect SQL in scheduleSnapshot 2025-04-15 22:00:02 +08:00
907c0a6976
fix: update started_at instead of returning for milestone snapshots in scheduleSnapshot 2025-04-15 21:42:02 +08:00
7689e687ff
ref: extract scheduleCleanup into individual file
improve: logic of scheduleCleanup
2025-04-15 03:50:03 +08:00
651eef0b9e
fix: incorrectly set snapshot status to 'completed' when bilibili returned other status code 2025-04-15 03:34:45 +08:00
68bd46fd8a
improve: logging text in snapshotVideo.ts 2025-04-14 05:39:27 +08:00
13ea8fec8b
improve: remove unused import in snapshotTick.ts 2025-04-14 05:37:06 +08:00
3d9e98c949
ref: extract snapshotVideoWorker into individual file 2025-04-14 05:36:07 +08:00
c7dd1cfc2e
improve: unused imports in snapshotTick.ts 2025-04-14 05:21:06 +08:00
e0a19499e1
ref: move some worker functions into individual files 2025-04-14 05:20:35 +08:00
0930bbe6f4
improve: remove unused import 2025-04-14 05:11:35 +08:00
054d28e796
merge: branch 'main' into ref/structure 2025-04-14 05:10:44 +08:00
0614067278
update: handler for SIGINT & SIGTERM 2025-04-14 00:49:57 +08:00
6df6345ec1
update: status of schedule when no proxy available 2025-04-14 00:46:43 +08:00
bae1f84bea
add: logging for releasing locks 2025-04-14 00:39:17 +08:00
21c918f1fa
fix: BitInt serialization in bulkSnapshotTickWorker 2025-04-14 00:32:06 +08:00
f1651fee30
fix: did not release lock for dispatchArchiveSnapshots when quitting 2025-04-14 00:28:45 +08:00
d0b7d93e5b
fix: incorrect logging text in archiveSnapshotsWorker 2025-04-14 00:26:59 +08:00
7a7c5cada9
feat: full snapshot archiving for all videos 2025-04-14 00:25:39 +08:00
10b761e3db
improve: query for getVideosNearMilestone 2025-04-14 00:07:50 +08:00
1f6411b512
fix: added lower limit in collecting videos near 100k milestone 2025-04-14 00:02:00 +08:00
9ef513eed7
update: add randomness in interval of re-scheduling when no proxy available 2025-04-13 23:55:43 +08:00
d80a6bfcd9
add: restarting when process quit 2025-04-13 23:15:09 +08:00
7a6892ae8e
fix: passing reference causes a const object to be incorrectly changed 2025-04-13 22:22:41 +08:00
b080c51c3e
merge: branch 'main' into ref/structure 2025-04-13 18:38:34 +08:00
f4d08e944a
fix: expand the candidate range of milestone snapshot 2025-04-13 16:50:19 +08:00
a9582722f4
ref: wrap some worker functions with withDbConnection 2025-04-13 05:06:52 +08:00
4ee4d2ede9
ref: move some functions into separate files 2025-04-12 02:08:04 +08:00
f21ff45dd3
version: crawler/1.0.26 2025-04-08 02:12:04 +08:00
b5dbf293a2
fix: missing import map for logger 2025-04-08 02:07:53 +08:00
fc90dad185
ref: move some common files to @core 2025-04-08 02:05:44 +08:00
0b36f52c6c
merge: branch 'feat/backend' into ref/structure 2025-04-08 01:43:46 +08:00
445886815a
add: insert snapshot when videoInfo API hit 2025-04-07 21:45:07 +08:00
8e7a1c3076
add: cache in videoInfo API 2025-04-07 21:37:59 +08:00
71ed0bd66b
feat: videoInfo API in backend 2025-04-07 21:29:53 +08:00
b76d8e589c
merge: branch 'main' into ref/structure 2025-04-07 02:44:21 +08:00
69fb3604b1
fix: missing setSnapshotStatus in takeSnapshotForVideoWorker 2025-04-07 00:41:27 +08:00
d98e24b62f
fix: incorrect return from snapshotTickWorker 2025-04-07 00:36:44 +08:00
c4c9a3a440
update: return the value from the worker in BullMQ worker 2025-04-07 00:35:36 +08:00
da1bea7f41
fix: unlimited addition of milestone tasks 2025-04-07 00:28:28 +08:00
38c0cbd371
fix: missing dependeny in frontend 2025-04-06 21:39:11 +08:00
a90747878e
fix: incorrect SQL ingetVideosNearMilestone 2025-04-06 16:21:46 +08:00
dd720b18fa
add: debug logging for collectMilestoneSnapshotsWorker 2025-04-06 16:19:05 +08:00
3a83df7954
add: log for collectMilestoneSnapshotsWorker 2025-04-06 15:51:21 +08:00
a8292d7b6b
merge: branch 'gitbook' into ref/structure 2025-04-06 02:01:05 +08:00
0923a34e16
doc: GitBook - No subject 2025-04-05 18:00:09 +00:00
f34633dc35
doc: GitBook - No subject 2025-04-05 17:59:42 +00:00
94e19690d1
ref: rename NetScheduler to NetworkDelegate 2025-04-06 01:38:45 +08:00
20668609dd
fix: SQL query to make WebStorm happy 2025-04-05 18:34:06 +08:00
33c6a3c1f8
ref: the imports of db schema 2025-04-05 18:26:01 +08:00
f39fef0d9a
ref: change all reference to db/schema to @core/db/schema 2025-04-05 15:54:40 +08:00
13ed20cf5c
fix: milestone snapshot being ignore when a pending schedule exists 2025-04-05 06:22:25 +08:00
22b1c337ac
verson: frontend/1.3.8 2025-04-05 06:19:50 +08:00
757cbbab7e
merge: branch 'main' into gitbook 2025-04-05 06:15:44 +08:00
b4205049cb
merge: branch 'main' into lab/pred 2025-04-05 06:13:23 +08:00
509c10ded0
merge: branch 'feat/frontend' into feat/backend 2025-04-05 06:11:45 +08:00
99c7a34833
add: speed in benchmark result 2025-04-02 23:37:43 +08:00
d808f36c58
improve: number display in benchmark result 2025-04-02 23:32:26 +08:00
984484cc3f
improve: eliminate UI flickering between tests 2025-04-02 23:30:49 +08:00
46578db3e6
update: lower the difficulty of benchmark 2025-04-02 23:24:52 +08:00
704b5106c6
fix: difficulty too low in benchmark 2025-04-02 23:15:45 +08:00
2e702f23de
improve: UI of showing benchmark result 2025-04-02 23:13:53 +08:00
cb5e24e542
improve: UI of VDF benchmark 2025-04-02 22:55:55 +08:00
b1e071930c
fix: incorrect build config 2025-04-02 22:48:19 +08:00
c3f13cc6e3
update: VDF benchmark 2025-04-02 22:14:32 +08:00
dd829b203d
test: argon2id tester 2025-04-02 21:28:23 +08:00
41f8b42f1c
improve: the registration API 2025-04-01 10:30:44 +08:00
fba56106cc
add: user registration API 2025-03-31 20:07:58 +08:00
8d4edd43bf
update: link in about content 2025-03-31 06:39:52 +08:00
2aead46b51
update: title for frontend page 2025-03-31 06:36:36 +08:00
79af12e526
update: README 2025-03-31 06:35:44 +08:00
cfd4fc3d21
fix: incorrect API used in validation 2025-03-31 06:24:54 +08:00
b53366dbab
doc: GitBook - No subject 2025-03-30 22:20:54 +00:00
7a46f31d7f
version: backend/0.2.3 2025-03-31 06:09:49 +08:00
cf33c4922d
fix: incorrect parsing for id in API route /video/:id/snapshots 2025-03-31 06:09:28 +08:00
aa75fdd63e
fix: incorrect schema check for numbers 2025-03-31 06:05:59 +08:00
9c0783c607
fix: incorrect type check for number in request params 2025-03-31 06:02:07 +08:00
81847cc090
merge: branch 'main' into lab/pred 2025-03-31 05:38:17 +08:00
28772fcd9f
merge: branch 'gitbook' of https://github.com/alikia2x/cvsa into gitbook 2025-03-31 05:34:55 +08:00
4d2b002264
merge: branch 'main' into gitbook 2025-03-31 05:34:24 +08:00
834f81eff0
doc: GitBook - No subject 2025-03-30 21:29:02 +00:00
35f4e0e0d4
merge: branch 'main' into feat/backend 2025-03-31 05:13:24 +08:00
9af7e52464
version: crawler/1.0.17 2025-03-31 05:12:18 +08:00
a2b55d0900
feat: root path for API
improve: the GET video snapshots API
2025-03-31 05:07:22 +08:00
1322cc4671
init: backend 2025-03-31 02:47:33 +08:00
b2edaf8fc4
improve: prevent Infinity for getAdjustedShortTermETA 2025-03-30 21:16:05 +08:00
da8b2d3b4d
fix: timeout interva ltoo short in cleanup 2025-03-30 20:48:40 +08:00
64a7f13da7
merge: branch 'feat/frontend' into hotfix 2025-03-30 20:48:03 +08:00
39acac09e7
version: frontend/1.2.6 2025-03-30 19:10:23 +08:00
7aa988f0fe
fix: remove deno from building & previewing 2025-03-30 19:07:02 +08:00
6fd34a88e7
improve: UI for song info page & titlebar on mobile 2025-03-30 18:54:10 +08:00
cded4cd825
fix: redundant 'key' attribute in HTML tag 2025-03-30 18:42:24 +08:00
1346700c35
improve: search bar supports bvid 2025-03-30 08:02:36 +08:00
91566fce83
update: table view for videoInfo in song page 2025-03-30 08:00:15 +08:00
db5ea97fae
update: song info page 2025-03-30 07:58:37 +08:00
cc202fb3c6
add: about page 2025-03-30 07:48:43 +08:00
8b17f8177c
update: song page 2025-03-30 07:31:24 +08:00
4fe266ce82
add: few pages 2025-03-30 06:45:27 +08:00
f585b49ee4
add: frontend with Astro 2025-03-30 03:57:21 +08:00
19d7276280
fix: missing param in SQL for inserting into bilibili_metadata 2025-03-30 03:24:11 +08:00
f7806c6a39
add: tailwind dependency 2025-03-30 03:23:38 +08:00
106049bfc6
update: insert cover into bilibili_metadata 2025-03-30 02:40:22 +08:00
e0776a452e
ref: format 2025-03-29 22:23:31 +08:00
291a21d82a
init: astro frontend 2025-03-29 19:10:52 +08:00
f401417ce2
update: remove refactor guide 2025-03-29 19:02:48 +08:00
0d18c921cb
merge: branch 'main' into lab/pred 2025-03-29 18:59:09 +08:00
be3ff00edc
fix: incorrect path for model file 2025-03-29 18:52:44 +08:00
879a6604e5
fix: missing export in deno.json 2025-03-29 18:43:47 +08:00
d88ad099c4
ref: monorepo support 2025-03-29 18:13:57 +08:00
636c5e25cb
ref: move ML stuff
add: .idea to VCS, the refactor guide
2025-03-29 14:13:15 +08:00
ba6b8bd5b3
temp: try to modify some features for the pred model 2025-03-20 00:53:59 +08:00
2ed909268e
add: a short-term feature in pred model 2025-03-16 15:10:32 +08:00
35b84787ad
doc: GitBook - No subject 2025-03-15 13:42:19 +00:00
492 changed files with 24689 additions and 3032 deletions

96
.dockerignore Normal file
View File

@ -0,0 +1,96 @@
built/*
tests/cases/rwc/*
tests/cases/perf/*
!tests/cases/webharness/compilerToString.js
test-args.txt
~*.docx
\#*\#
.\#*
tests/baselines/local/*
tests/baselines/local.old/*
tests/services/baselines/local/*
tests/baselines/prototyping/local/*
tests/baselines/rwc/*
tests/baselines/reference/projectOutput/*
tests/baselines/local/projectOutput/*
tests/baselines/reference/testresults.tap
tests/baselines/symlinks/*
tests/services/baselines/prototyping/local/*
tests/services/browser/typescriptServices.js
src/harness/*.js
src/compiler/diagnosticInformationMap.generated.ts
src/compiler/diagnosticMessages.generated.json
src/parser/diagnosticInformationMap.generated.ts
src/parser/diagnosticMessages.generated.json
rwc-report.html
*.swp
build.json
*.actual
tests/webTestServer.js
tests/webTestServer.js.map
tests/webhost/*.d.ts
tests/webhost/webtsc.js
tests/cases/**/*.js
tests/cases/**/*.js.map
*.config
scripts/eslint/built/
scripts/debug.bat
scripts/run.bat
scripts/**/*.js
scripts/**/*.js.map
coverage/
internal/
**/.DS_Store
.settings
**/.vs
**/.vscode/*
!**/.vscode/tasks.json
!**/.vscode/settings.template.json
!**/.vscode/launch.template.json
!**/.vscode/extensions.json
!tests/cases/projects/projectOption/**/node_modules
!tests/cases/projects/NodeModulesSearch/**/*
!tests/baselines/reference/project/nodeModules*/**/*
yarn.lock
yarn-error.log
.parallelperf.*
tests/baselines/reference/dt
.failed-tests
TEST-results.xml
package-lock.json
.eslintcache
*v8.log
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# npm dependencies
node_modules/
# project specific
logs/
__pycache__
ml/filter/runs
ml/pred/runs
ml/pred/checkpoints
ml/pred/observed
ml/data/
ml/filter/checkpoints
scripts
model/
.astro
# Database
*.dump
*.db
*.sqlite
*.sqlite3
data/
docker-compose.yml

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
*.woff2 filter=lfs diff=lfs merge=lfs -text

95
.gitignore vendored
View File

@ -1,87 +1,44 @@
built/*
tests/cases/rwc/*
tests/cases/perf/*
!tests/cases/webharness/compilerToString.js
test-args.txt
~*.docx
\#*\#
.\#*
tests/baselines/local/*
tests/baselines/local.old/*
tests/services/baselines/local/*
tests/baselines/prototyping/local/*
tests/baselines/rwc/*
tests/baselines/reference/projectOutput/*
tests/baselines/local/projectOutput/*
tests/baselines/reference/testresults.tap
tests/baselines/symlinks/*
tests/services/baselines/prototyping/local/*
tests/services/browser/typescriptServices.js
src/harness/*.js
src/compiler/diagnosticInformationMap.generated.ts
src/compiler/diagnosticMessages.generated.json
src/parser/diagnosticInformationMap.generated.ts
src/parser/diagnosticMessages.generated.json
rwc-report.html
*.swp
build.json
*.actual
tests/webTestServer.js
tests/webTestServer.js.map
tests/webhost/*.d.ts
tests/webhost/webtsc.js
tests/cases/**/*.js
tests/cases/**/*.js.map
*.config
scripts/eslint/built/
scripts/debug.bat
scripts/run.bat
scripts/**/*.js
scripts/**/*.js.map
coverage/
internal/
**/.DS_Store
.settings
**/.vs
**/.vscode/*
!**/.vscode/tasks.json
!**/.vscode/settings.template.json
!**/.vscode/launch.template.json
!**/.vscode/extensions.json
!tests/cases/projects/projectOption/**/node_modules
!tests/cases/projects/NodeModulesSearch/**/*
!tests/baselines/reference/project/nodeModules*/**/*
.idea
yarn.lock
yarn-error.log
.parallelperf.*
tests/baselines/reference/dt
.failed-tests
TEST-results.xml
package-lock.json
.eslintcache
*v8.log
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
.env.*
# Fresh build directory
_fresh/
# npm dependencies
node_modules/
# project specific
logs/
__pycache__
filter/runs
pred/runs
pred/checkpoints
data/
filter/checkpoints
ml/filter/runs
ml/pred/runs
ml/pred/checkpoints
ml/pred/observed
ml/data/
ml/filter/checkpoints
scripts
model/
.astro
# Database
*.dump
*.db
*.sqlite
*.sqlite3
data/
redis/
# Build
dist/
build/
docker-compose.yml
ucaptcha-config.yaml

10
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
dataSources.xml
MarsCodeWorkspaceAppSettings.xml

6
.idea/bun.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="BunSettings">
<option name="bunPath" value="$USER_HOME$/.bun/bin/bun" />
</component>
</project>

View File

@ -0,0 +1,55 @@
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<option name="LINE_SEPARATOR" value="&#10;" />
<HTMLCodeStyleSettings>
<option name="HTML_SPACE_INSIDE_EMPTY_TAG" value="true" />
</HTMLCodeStyleSettings>
<JSCodeStyleSettings version="0">
<option name="FORCE_SEMICOLON_STYLE" value="true" />
<option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
<option name="FORCE_QUOTE_STYlE" value="true" />
<option name="ENFORCE_TRAILING_COMMA" value="Remove" />
<option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
<option name="SPACES_WITHIN_IMPORTS" value="true" />
</JSCodeStyleSettings>
<TypeScriptCodeStyleSettings version="0">
<option name="FORCE_SEMICOLON_STYLE" value="true" />
<option name="SPACE_BEFORE_FUNCTION_LEFT_PARENTH" value="false" />
<option name="FORCE_QUOTE_STYlE" value="true" />
<option name="ENFORCE_TRAILING_COMMA" value="Remove" />
<option name="SPACES_WITHIN_OBJECT_LITERAL_BRACES" value="true" />
<option name="SPACES_WITHIN_IMPORTS" value="true" />
</TypeScriptCodeStyleSettings>
<VueCodeStyleSettings>
<option name="INTERPOLATION_NEW_LINE_AFTER_START_DELIMITER" value="false" />
<option name="INTERPOLATION_NEW_LINE_BEFORE_END_DELIMITER" value="false" />
</VueCodeStyleSettings>
<codeStyleSettings language="HTML">
<option name="SOFT_MARGINS" value="120" />
<indentOptions>
<option name="CONTINUATION_INDENT_SIZE" value="4" />
<option name="USE_TAB_CHARACTER" value="true" />
</indentOptions>
</codeStyleSettings>
<codeStyleSettings language="JavaScript">
<option name="SOFT_MARGINS" value="120" />
<indentOptions>
<option name="USE_TAB_CHARACTER" value="true" />
</indentOptions>
</codeStyleSettings>
<codeStyleSettings language="TypeScript">
<option name="SOFT_MARGINS" value="120" />
<indentOptions>
<option name="USE_TAB_CHARACTER" value="true" />
</indentOptions>
</codeStyleSettings>
<codeStyleSettings language="Vue">
<option name="SOFT_MARGINS" value="120" />
<indentOptions>
<option name="INDENT_SIZE" value="4" />
<option name="TAB_SIZE" value="4" />
<option name="USE_TAB_CHARACTER" value="true" />
</indentOptions>
</codeStyleSettings>
</code_scheme>
</component>

View File

@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="USE_PER_PROJECT_SETTINGS" value="true" />
</state>
</component>

37
.idea/cvsa.iml Normal file
View File

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
<excludeFolder url="file://$MODULE_DIR$/temp" />
<excludeFolder url="file://$MODULE_DIR$/tmp" />
<excludeFolder url="file://$MODULE_DIR$/ml/data" />
<excludeFolder url="file://$MODULE_DIR$/doc" />
<excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
<excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
<excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
<excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
<excludeFolder url="file://$MODULE_DIR$/logs" />
<excludeFolder url="file://$MODULE_DIR$/model" />
<excludeFolder url="file://$MODULE_DIR$/src/db" />
<excludeFolder url="file://$MODULE_DIR$/.idea" />
<excludeFolder url="file://$MODULE_DIR$/.vscode" />
<excludeFolder url="file://$MODULE_DIR$/.zed" />
<excludeFolder url="file://$MODULE_DIR$/packages/frontend/.astro" />
<excludeFolder url="file://$MODULE_DIR$/scripts" />
<excludeFolder url="file://$MODULE_DIR$/.astro" />
<excludeFolder url="file://$MODULE_DIR$/ml/pred/checkpoints" />
<excludeFolder url="file://$MODULE_DIR$/ml/pred/observed" />
<excludeFolder url="file://$MODULE_DIR$/ml/pred/runs" />
<excludeFolder url="file://$MODULE_DIR$/packages/backend/logs" />
<excludeFolder url="file://$MODULE_DIR$/packages/core/net/logs" />
<excludeFolder url="file://$MODULE_DIR$/packages/crawler/logs" />
<excludeFolder url="file://$MODULE_DIR$/data" />
<excludeFolder url="file://$MODULE_DIR$/redis" />
<excludeFolder url="file://$MODULE_DIR$/ml" />
<excludeFolder url="file://$MODULE_DIR$/src" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

7
.idea/deno.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DenoSettings">
<option name="denoInit" value="{&#10; &quot;enable&quot;: true,&#10; &quot;lint&quot;: true,&#10; &quot;unstable&quot;: true,&#10; &quot;importMap&quot;: &quot;import_map.json&quot;,&#10; &quot;config&quot;: &quot;deno.json&quot;,&#10; &quot;fmt&quot;: {&#10; &quot;useTabs&quot;: true,&#10; &quot;lineWidth&quot;: 120,&#10; &quot;indentWidth&quot;: 4,&#10; &quot;semiColons&quot;: true,&#10; &quot;proseWrap&quot;: &quot;always&quot;&#10; }&#10;}" />
<option name="useDenoValue" value="DISABLE" />
</component>
</project>

View File

@ -0,0 +1,36 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<option name="scopesOrder">
<list>
<option value="Astro" />
<option value="All Changed Files" />
<option value="Open Files" />
<option value="Project Files" />
<option value="Scratches and Consoles" />
<option value="Tests" />
</list>
</option>
<inspection_tool class="ES6UnusedImports" enabled="true" level="WARNING" enabled_by_default="true">
<scope name="Astro" level="INFORMATION" enabled="false" editorAttributes="INFORMATION_ATTRIBUTES" />
</inspection_tool>
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
<inspection_tool class="HtmlUnknownAttribute" enabled="true" level="WARNING" enabled_by_default="true">
<option name="myValues">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="autocorrect" />
</list>
</value>
</option>
<option name="myCustomValuesEnabled" value="true" />
</inspection_tool>
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
<inspection_tool class="TsLint" enabled="true" level="WARNING" enabled_by_default="true" />
</profile>
</component>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
</modules>
</component>
</project>

3
.idea/scopes/Astro.xml Normal file
View File

@ -0,0 +1,3 @@
<component name="DependencyValidationManager">
<scope name="Astro" pattern="file:*.astro" />
</component>

6
.idea/sqldialects.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="PROJECT" dialect="PostgreSQL" />
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

8
.prettierrc Normal file
View File

@ -0,0 +1,8 @@
{
"useTabs": true,
"tabWidth": 4,
"trailingComma": "none",
"singleQuote": false,
"printWidth": 120,
"endOfLine": "lf"
}

View File

@ -2,4 +2,10 @@ data
*.json
*.svg
*.txt
*.md
*.md
*config*
Inter.css
MiSans.css
*.yaml
*.yml
*.mdx

View File

@ -1,6 +0,0 @@
{
"recommendations": [
"denoland.vscode-deno",
"bradlc.vscode-tailwindcss"
]
}

View File

@ -1,35 +0,0 @@
// Folder-specific settings
//
// For a full list of overridable settings, and general information on folder-specific settings,
// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
{
"lsp": {
"deno": {
"settings": {
"deno": {
"enable": true
}
}
}
},
"languages": {
"TypeScript": {
"language_servers": [
"deno",
"!typescript-language-server",
"!vtsls",
"!eslint"
],
"formatter": "language_server"
},
"TSX": {
"language_servers": [
"deno",
"!typescript-language-server",
"!vtsls",
"!eslint"
],
"formatter": "language_server"
}
}
}

23
Dockerfile.backend Normal file
View File

@ -0,0 +1,23 @@
FROM oven/bun:1.2.8-debian
WORKDIR /app
COPY ./packages/core ./core
COPY ./packages/backend/package.json ./packages/backend/bun.lock ./backend/
RUN apt update && apt install -y curl
RUN ln -s /bin/uname /usr/bin/uname
RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
WORKDIR backend
RUN bun install
COPY ./packages/backend/ .
RUN mkdir -p /app/logs
CMD ["bun", "start"]

19
Dockerfile.crawler Normal file
View File

@ -0,0 +1,19 @@
FROM oven/bun:1.2.8-debian
WORKDIR /app
COPY . .
RUN bun i
RUN mkdir -p /app/logs
RUN apt update && apt install -y curl
RUN ln -s /bin/uname /usr/bin/uname
RUN /bin/bash -c "$(curl -fsSL https://aliyuncli.alicdn.com/install.sh)"
WORKDIR packages/crawler
CMD ["bun", "all"]

23
Dockerfile.frontend Normal file
View File

@ -0,0 +1,23 @@
FROM oven/bun
ARG BACKEND_URL
WORKDIR /app
COPY . .
RUN bun install
WORKDIR packages/frontend
RUN bun run build
ENV HOST=0.0.0.0
ENV PORT=4321
ENV BACKEND_URL=${BACKEND_URL}
EXPOSE 4321
RUN mkdir -p /app/logs
CMD ["bun", "/app/packages/frontend/dist/server/entry.mjs"]

14
Dockerfile.next Normal file
View File

@ -0,0 +1,14 @@
FROM node:lts-slim AS production
WORKDIR /app
COPY ./packages/next/.next ./.next
COPY ./packages/next/public ./public
COPY ./packages/next/package.json ./package.json
COPY ./packages/next/node_modules ./node_modules
ENV NODE_ENV production
EXPOSE 7400
CMD ["npm", "start"]

View File

@ -2,6 +2,11 @@
「中V档案馆」是一个旨在收录与展示「中文歌声合成作品」及有关信息的网站。
## 新闻 - 测试版本上线
目前中V档案馆上线了用于测试的前端网页和API接口它们分别位于[projectcvsa.com](https://projectcvsa.com)和[api.projectcvsa.com](https://api.projectcvsa.com)。
API调用方法请参见[接口文档](https://docs.projectcvsa.com/api-doc/)。
## 创建背景与关联工作
纵观整个互联网对于「中文歌声合成」或「中文虚拟歌手」常简称为中V或VC相关信息进行较为系统、全面地整理收集的主要有以下几个网站
@ -31,7 +36,7 @@
## 技术架构
参见[CVSA文档](https://cvsa.gitbook.io/)。
参见[CVSA文档](https://docs.projectcvsa.com/)。
## 开放许可

1732
bun.lock Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +0,0 @@
import { JSX } from "preact";
import { IS_BROWSER } from "$fresh/runtime.ts";
export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
return (
<button
{...props}
disabled={!IS_BROWSER || props.disabled}
class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
/>
);
}

View File

@ -1,55 +0,0 @@
import json
import random
def process_data(input_file, output_file):
"""
从输入文件中读取数据找出model和human不一致的行
删除"model""human"键重命名为"label"
然后将处理后的数据添加到输出文件中
在写入之前它会加载output_file中的所有样本
并使用aid键进行去重过滤
Args:
input_file (str): 输入文件的路径
output_file (str): 输出文件的路径
"""
# 加载output_file中已有的数据用于去重
existing_data = set()
try:
with open(output_file, 'r', encoding='utf-8') as f_out:
for line in f_out:
try:
data = json.loads(line)
existing_data.add(data['aid'])
except json.JSONDecodeError:
pass # 忽略JSON解码错误继续读取下一行
except FileNotFoundError:
pass # 如果文件不存在,则忽略
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
for line in f_in:
try:
data = json.loads(line)
if data['model'] != data['human'] or random.random() < 0.2:
if data['aid'] not in existing_data: # 检查aid是否已存在
del data['model']
data['label'] = data['human']
del data['human']
f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
existing_data.add(data['aid']) # 将新的aid添加到集合中
except json.JSONDecodeError as e:
print(f"JSON解码错误: {e}")
print(f"错误行内容: {line.strip()}")
except KeyError as e:
print(f"KeyError: 键 '{e}' 不存在")
print(f"错误行内容: {line.strip()}")
# 调用函数处理数据
input_file = 'real_test.jsonl'
output_file = 'labeled_data.jsonl'
process_data(input_file, output_file)
print(f"处理完成,结果已写入 {output_file}")

View File

@ -1,60 +0,0 @@
{
"lock": false,
"tasks": {
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
"manifest": "deno task cli manifest $(pwd)",
"start": "deno run -A --watch=static/,routes/ dev.ts",
"build": "deno run -A dev.ts build",
"preview": "deno run -A main.ts",
"update": "deno run -A -r https://fresh.deno.dev/update .",
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts",
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
"adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
},
"lint": {
"rules": {
"tags": ["fresh", "recommended"]
}
},
"exclude": ["**/_fresh/*"],
"imports": {
"@std/assert": "jsr:@std/assert@1",
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
"preact": "https://esm.sh/preact@10.22.0",
"preact/": "https://esm.sh/preact@10.22.0/",
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
"tailwindcss": "npm:tailwindcss@3.4.1",
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
"$std/": "https://deno.land/std@0.216.0/",
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"lib/": "./lib/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
"express": "npm:express",
"src/": "./src/",
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "preact"
},
"nodeModulesDir": "auto",
"fmt": {
"useTabs": true,
"lineWidth": 120,
"indentWidth": 4,
"semiColons": true,
"proseWrap": "always"
}
}

7
dev.ts
View File

@ -1,7 +0,0 @@
#!/usr/bin/env -S deno run -A --watch=static/,routes/
import dev from "$fresh/dev.ts";
import config from "./fresh.config.ts";
import "$std/dotenv/load.ts";
await dev(import.meta.url, "./main.ts", config);

View File

@ -1,22 +1,21 @@
# Table of contents
- [Welcome](README.md)
* [Welcome](README.md)
## About
- [About CVSA Project](about/this-project.md)
- [Scope of Inclusion](about/scope-of-inclusion.md)
* [About CVSA Project](about/this-project.md)
* [Scope of Inclusion](about/scope-of-inclusion.md)
## Architecure
- [Overview](architecure/overview.md)
- [Database Structure](architecure/database-structure/README.md)
- [Type of Song](architecure/database-structure/type-of-song.md)
- [Message Queue](architecure/message-queue/README.md)
- [VideoTagsQueue](architecure/message-queue/videotagsqueue.md)
- [Artificial Intelligence](architecure/artificial-intelligence.md)
* [Overview](architecure/overview.md)
* [Crawler](architecure/crawler.md)
* [Database Structure](architecure/database-structure/README.md)
* [Type of Song](architecure/database-structure/type-of-song.md)
* [Artificial Intelligence](architecure/artificial-intelligence.md)
## API Doc
- [Catalog](api-doc/catalog.md)
- [Songs](api-doc/songs.md)
* [Catalog](api-doc/catalog.md)
* [Songs](api-doc/songs.md)

View File

@ -7,13 +7,34 @@ For a **song**, it must meet the following conditions to be included in CVSA:
### Category 30
In principle, the songs featured in CVSA must be included in a video categorized under VOCALOID·UTAU (ID 30) that is
posted on Bilibili. In some special cases, this rule may not be enforced.&#x20;
In principle, the songs must be featured in a video that is categorized under the VOCALOID·UTAU (ID 30) category in
[Bilibili](https://en.wikipedia.org/wiki/Bilibili) in order to be observed by our
[automation program](../architecure/overview.md#crawler). We welcome editors to manually add songs that have not been
uploaded to bilibili / categorized under this category.
### At Leats One Line of Chinese
#### NEWS
The lyrics of the song must contain at least one line in Chinese. This means that even if a voicebank that only supports
Chinese is used, if the lyrics of the song do not contain Chinese, it will not be included in the CVSA.
Recently, Bilibili seems to be offlining the sub-category. This means the VOCALOID·UTAU category can no longer be
entered from the frontend, and producers can no longer upload videos to this category (instead, they can only choose the
parent category "Music").&#x20;
According to our experiments, Bilibili still retains the code logic of sub-categories in the backend, and newly
published songs may still be in the VOCALOID·UTAU sub-category, and the related APIs can still work normally. However,
there are [reports](https://www.bilibili.com/opus/1041223385394184199) that some of the new songs have been placed under
the "Music General" sub-category.\
We are still waiting for Bilibili's follow-up actions, and in the future, we may adjust the scope of our automated
program's crawling.
### At Leats One Line of Chinese / Chinese Virtual Singer
The lyrics of the song must contain at least one line in Chinese. Otherwise, if the lyrics of the song do not contain
Chinese, it will only be included in the CVSA only if a Chinese virtual singer has been used.
We define a **Chinese virtual singer** as follows:
1. The singer primarily uses Chinese voicebank (i.e. the most widely used voickbank for the singer is Chinese)
2. The singer is operated by a company, organization, individual or group located in Mainland China, Hong Kong, Macau or
Taiwan.
### Using Vocal Synthesizer

View File

@ -12,3 +12,10 @@ Located at `/filter/` under project root dir, it classifies a video in the
- 0: Not related to Chinese vocal synthesis
- 1: A original song with Chinese vocal synthesis
- 2: A cover/remix song with Chinese vocal synthesis
### The Predictor
Located at `/pred/`under the project root dir, it predicts the future views of a video. This is a regression model that
takes historical view trends of a video, other contextual information (such as the current time), and future time points
to be predicted as feature inputs, and outputs the increment in the video's view count from "now" to the specified
future time point.

View File

@ -0,0 +1,4 @@
# Crawler
A central aspect of CVSA's technical design is its emphasis on automation. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.

View File

@ -10,3 +10,6 @@ following tables:
- all\_data: metadata of all videos in [category 30](../../about/scope-of-inclusion.md#category-30).
- labelling\_result: Contains label of videos in `all_data`tagged by our
[AI system](../artificial-intelligence.md#the-filter).
- video\_snapshot: Statistical data of videos that are fetched regularly (e.g., number of views, etc.), we call this
fetch process as "snapshot".
- snapshot\_schedule: The scheduling information for video snapshots.

View File

@ -1 +0,0 @@
# Message Queue

View File

@ -1,12 +0,0 @@
# VideoTagsQueue
### Jobs
The VideoTagsQueue contains two jobs: `getVideoTags`and `getVideosTags`. The former is used to fetch the tags of a
video, and the latter is responsible for scheduling the former.
### Return value
The return values across two jobs follows the following table:
<table><thead><tr><th width="168">Return Value</th><th>Description</th></tr></thead><tbody><tr><td>0</td><td>In <code>getVideoTags</code>: the tags was successfully fetched<br>In <code>getVideosTags</code>: all null-tags videos have a corresponding job successfully queued.</td></tr><tr><td>1</td><td>Used in <code>getVideoTags</code>: occured <code>fetch</code>error during the job</td></tr><tr><td>2</td><td>Used in <code>getVideoTags</code>: we've reached the rate limit set in NetScheduler</td></tr><tr><td>3</td><td>Used in <code>getVideoTags</code>: did't provide aid in the job data</td></tr><tr><td>4</td><td>Used in<code>getVideosTags</code>: There's no video with NULL as `tags`</td></tr><tr><td>1xx</td><td>Used in<code>getVideosTags</code>: the number of tasks in the queue has exceeded the limit, thus <code>getVideosTags</code> stops adding tasks. <code>xx</code> is the number of jobs added to the queue during execution.</td></tr></tbody></table>

View File

@ -1,5 +1,4 @@
---
icon: globe-pointer
layout:
title:
visible: true
@ -15,5 +14,29 @@ layout:
# Overview
Automation is the biggest highlight of CVSA's technical design. To achieve this, we use a message queue powered by
[BullMQ](https://bullmq.io/) to concurrently process various tasks in the data collection life cycle.
The CVSA is a [monorepo](https://en.wikipedia.org/wiki/Monorepo) codebase, mainly using TypeScript as the development language. With [Deno workspace](https://docs.deno.com/runtime/fundamentals/workspaces/), the major part of the codebase is under `packages/`.&#x20;
**Project structure:**
```
cvsa
├── deno.json
├── packages
│ ├── backend
│ ├── core
│ ├── crawler
│ └── frontend
└── README.md
```
**Package Breakdown:**
* **`backend`**: This package houses the server-side logic, built with the [Hono](https://hono.dev/) web framework. It's responsible for interacting with the database and exposing data through REST and GraphQL APIs for consumption by the frontend, internal applications, and third-party developers.
* **`frontend`**: The user-facing web interface of CVSA is developed using [Astro](https://astro.build/). This package handles the presentation layer, displaying information fetched from the database.
* **`crawler`**: This automated data collection system is a key component of CVSA. It's designed to automatically discover and gather new song data from bilibili, as well as track relevant statistics over time.
* **`core`**: This package contains reusable and generic code that is utilized across multiple workspaces within the CVSA monorepo.
### Crawler
Automation is the biggest highlight of CVSA's technical design. The data collection process within the `crawler` is orchestrated using a message queue powered by [BullMQ](https://bullmq.io/). This enables concurrent processing of various tasks involved in the data collection lifecycle. State management and data persistence are handled by a combination of Redis for caching and real-time data, and PostgreSQL as the primary database.

View File

@ -0,0 +1,106 @@
openapi: 3.0.0
info:
title: CVSA API
version: v1
servers:
- url: https://api.projectcvsa.com
paths:
/video/{id}/snapshots:
get:
summary: 获取视频快照列表
description: 根据视频 ID 获取视频的快照列表。视频 ID 可以是以 "av" 开头的数字,以 "BV" 开头的 12 位字母数字,或者一个正整数。
parameters:
- in: path
name: id
required: true
schema:
type: string
description: "视频 ID (如: av78977256, BV1KJ411C7CW, 78977256)"
- in: query
name: ps
schema:
type: integer
minimum: 1
description: 每页返回的快照数量 (pageSize),默认为 1000。
- in: query
name: pn
schema:
type: integer
minimum: 1
description: 页码 (pageNumber)用于分页查询。offset 与 pn 只能选择一个。
- in: query
name: offset
schema:
type: integer
minimum: 1
description: 偏移量用于基于偏移量的查询。offset 与 pn 只能选择一个。
- in: query
name: reverse
schema:
type: boolean
description: 是否反向排序(从旧到新),默认为 false。
responses:
'200':
description: 成功获取快照列表
content:
application/json:
schema:
type: array
items:
type: object
properties:
id:
type: integer
description: 快照 ID
aid:
type: integer
description: 视频的 av 号
views:
type: integer
description: 视频播放量
coins:
type: integer
description: 视频投币数
likes:
type: integer
description: 视频点赞数
favorites:
type: integer
description: 视频收藏数
shares:
type: integer
description: 视频分享数
danmakus:
type: integer
description: 视频弹幕数
replies:
type: integer
description: 视频评论数
'400':
description: 无效的查询参数
content:
application/json:
schema:
type: object
properties:
message:
type: string
description: 错误消息
errors:
type: object
description: 详细的错误信息
'500':
description: 服务器内部错误
content:
application/json:
schema:
type: object
properties:
message:
type: string
description: 错误消息
error:
type: object
description: 详细的错误信息

View File

@ -1,11 +1,11 @@
# Table of contents
- [欢迎](README.md)
* [欢迎](README.md)
## 关于 <a href="#about" id="about"></a>
- [关于本项目](about/this-project.md)
- [收录范围](about/scope-of-inclusion.md)
* [关于本项目](about/this-project.md)
* [收录范围](about/scope-of-inclusion.md)
## 技术架构 <a href="#architecture" id="architecture"></a>
@ -14,9 +14,9 @@
- [歌曲类型](architecture/database-structure/type-of-song.md)
- [人工智能](architecture/artificial-intelligence.md)
- [消息队列](architecture/message-queue/README.md)
- [VideoTagsQueue队列](architecture/message-queue/video-tags-queue.md)
- [LatestVideosQueue 队列](architecture/message-queue/latestvideosqueue-dui-lie.md)
## API 文档 <a href="#api-doc" id="api-doc"></a>
- [目录](api-doc/catalog.md)
- [歌曲](api-doc/songs.md)
* [目录](api-doc/catalog.md)
* [视频快照](api-doc/video-snapshot.md)

View File

@ -1,3 +1,4 @@
# 目录
- [歌曲](songs.md)
* [视频快照](video-snapshot.md)

View File

@ -1,3 +0,0 @@
# 歌曲
暂未实现。

View File

@ -0,0 +1,6 @@
# 视频快照
{% openapi src="../.gitbook/assets/1.yaml" path="/video/{id}/snapshots" method="get" %}
[1.yaml](../.gitbook/assets/1.yaml)
{% endopenapi %}

View File

@ -2,9 +2,14 @@
CVSA 使用 [PostgreSQL](https://www.postgresql.org/) 作为数据库。
CVSA 设计了两个
CVSA 的所有公开数据(不包括用户的个人数据)都存储在名为 `cvsa_main` 的数据库中,该数据库包含以下表:
- songs存储歌曲的主要信息
- bili\_user存储 Bilibili 用户信息快照
- all\_data[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
- bilibili\_user存储 Bilibili 用户信息快照
- bilibili\_metadata[分区 30](../../about/scope-of-inclusion.md#vocaloiduatu-fen-qu) 中所有视频的元数据
- labelling\_result包含由我们的 AI 系统 标记的 `all_data` 中视频的标签。
- latest\_video\_snapshot存储视频最新的快照
- video\_snapshot存储视频的快照包括特定时间下视频的统计信息播放量、点赞数等
- snapshot\_schedule视频快照的规划信息为辅助表

View File

@ -0,0 +1 @@
# LatestVideosQueue 队列

View File

@ -1,15 +0,0 @@
---
description: 关于VideoTagsQueue队列的信息。
---
# VideoTagsQueue队列
### 任务
视频标签队列包含两个任务:`getVideoTags`和`getVideosTags`。前者用于获取视频的标签,后者负责调度前者。
### 返回值
两个任务的返回值遵循以下表格:
<table><thead><tr><th width="168">返回值</th><th>描述</th></tr></thead><tbody><tr><td>0</td><td><code>getVideoTags</code> 中:标签成功获取<br><code>getVideosTags</code> 中:所有无标签视频的相应任务已成功排队。</td></tr><tr><td>1</td><td><code>getVideoTags</code> 中:任务期间发生 <code>fetch</code> 错误</td></tr><tr><td>2</td><td><code>getVideoTags</code> 中:已达到 NetScheduler 设置的速率限制</td></tr><tr><td>3</td><td><code>getVideoTags</code> 中:未在任务数据中提供帮助</td></tr><tr><td>4</td><td><code>getVideosTags</code> 中:没有视频的 `tags` 为 NULL</td></tr><tr><td>1xx</td><td><code>getVideosTags</code> 中:队列中的任务数量超过了限制,因此 <code>getVideosTags</code> 停止添加任务。<code>xx</code> 是在执行期间添加到队列的任务数量。</td></tr></tbody></table>

View File

@ -1,5 +1,4 @@
---
icon: globe-pointer
layout:
title:
visible: true
@ -15,4 +14,13 @@ layout:
# 概览
自动化是 CVSA 技术设计的最大亮点为了实现自动化我们使用BullMQ驱动的消息队列来并发处理数据采集生命周期中的各项任务。
整个CVSA项目分为三个组件**crawler**, **frontend** 和 **backend。**
### **crawler**
位于项目目录`packages/crawler` 下,它负责以下工作:
- 抓取新的视频并收录作品
- 持续监控视频的播放量等统计信息
整个 crawler 由 BullMQ 消息队列驱动,使用 Redis 和 PostgreSQL 管理状态。

View File

@ -0,0 +1,71 @@
version: '3.8'
services:
db:
image: postgres:17
ports:
- "5431:5432"
environment:
POSTGRES_USER: cvsa
POSTGRES_PASSWORD: ""
POSTGRES_DB: cvsa_main
volumes:
- ./data:/var/lib/postgresql/data
redis:
image: redis:latest
ports:
- "6378:6379"
volumes:
- ./redis/data:/data
- ./redis/redis.conf:/usr/local/etc/redis/redis.conf
- ./redis/logs:/logs
frontend:
build:
context: .
dockerfile: Dockerfile.frontend
ports:
- "4321:4321"
environment:
- HOST=0.0.0.0
- PORT=4321
- DB_HOST=db
- DB_NAME=cvsa_main
- DB_NAME_CRED=cvsa_cred
- DB_USER=cvsa
- DB_PORT=5432
- DB_PASSWORD=""
- LOG_VERBOSE=/app/logs/verbose.log
- LOG_WARN=/app/logs/warn.log
- LOG_ERR=/app/logs/error.log
depends_on:
- db
volumes:
- /path/to/your/logs:/app/logs
backend:
build:
context: .
dockerfile: Dockerfile.backend
ports:
- "8000:8000"
environment:
- HOST=0.0.0.0
- DB_HOST=db
- DB_NAME=cvsa_main
- DB_NAME_CRED=cvsa_cred
- DB_USER=cvsa
- DB_PORT=5432
- DB_PASSWORD=""
- LOG_VERBOSE=/app/logs/verbose.log
- LOG_WARN=/app/logs/warn.log
- LOG_ERR=/app/logs/error.log
- REDIS_HOST=redis
- REDIS_PORT=6379
depends_on:
- db
volumes:
- /path/to/your/logs:/app/logs
volumes:
db_data:

View File

@ -1,6 +0,0 @@
import { defineConfig } from "$fresh/server.ts";
import tailwind from "$fresh/plugins/tailwind.ts";
export default defineConfig({
plugins: [tailwind()],
});

View File

@ -1,27 +0,0 @@
// DO NOT EDIT. This file is generated by Fresh.
// This file SHOULD be checked into source version control.
// This file is automatically updated during development when running `dev.ts`.
import * as $_404 from "./routes/_404.tsx";
import * as $_app from "./routes/_app.tsx";
import * as $api_joke from "./routes/api/joke.ts";
import * as $greet_name_ from "./routes/greet/[name].tsx";
import * as $index from "./routes/index.tsx";
import * as $Counter from "./islands/Counter.tsx";
import type { Manifest } from "$fresh/server.ts";
const manifest = {
routes: {
"./routes/_404.tsx": $_404,
"./routes/_app.tsx": $_app,
"./routes/api/joke.ts": $api_joke,
"./routes/greet/[name].tsx": $greet_name_,
"./routes/index.tsx": $index,
},
islands: {
"./islands/Counter.tsx": $Counter,
},
baseUrl: import.meta.url,
} satisfies Manifest;
export default manifest;

View File

@ -1,16 +0,0 @@
import type { Signal } from "@preact/signals";
import { Button } from "../components/Button.tsx";
interface CounterProps {
count: Signal<number>;
}
export default function Counter(props: CounterProps) {
return (
<div class="flex gap-8 py-6">
<Button onClick={() => props.count.value -= 1}>-1</Button>
<p class="text-3xl tabular-nums">{props.count}</p>
<Button onClick={() => props.count.value += 1}>+1</Button>
</div>
);
}

View File

@ -1,87 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
import Akari from "lib/ml/akari.ts";
export async function videoExistsInAllData(client: Client, aid: number) {
return await client.queryObject<{ exists: boolean }>(
`SELECT EXISTS(SELECT 1 FROM bilibili_metadata WHERE aid = $1)`,
[aid],
)
.then((result) => result.rows[0].exists);
}
export async function userExistsInBiliUsers(client: Client, uid: number) {
return await client.queryObject<{ exists: boolean }>(`SELECT EXISTS(SELECT 1 FROM bilibili_user WHERE uid = $1)`, [
uid,
]);
}
export async function getUnlabelledVideos(client: Client) {
const queryResult = await client.queryObject<{ aid: number }>(
`SELECT a.aid FROM bilibili_metadata a LEFT JOIN labelling_result l ON a.aid = l.aid WHERE l.aid IS NULL`,
);
return queryResult.rows.map((row) => row.aid);
}
export async function insertVideoLabel(client: Client, aid: number, label: number) {
return await client.queryObject(
`INSERT INTO labelling_result (aid, label, model_version) VALUES ($1, $2, $3) ON CONFLICT (aid, model_version) DO NOTHING`,
[aid, label, Akari.getModelVersion()],
);
}
export async function getVideoInfoFromAllData(client: Client, aid: number) {
const queryResult = await client.queryObject<AllDataType>(
`SELECT * FROM bilibili_metadata WHERE aid = $1`,
[aid],
);
const row = queryResult.rows[0];
let authorInfo = "";
if (row.uid && await userExistsInBiliUsers(client, row.uid)) {
const q = await client.queryObject<BiliUserType>(
`SELECT * FROM bilibili_user WHERE uid = $1`,
[row.uid],
);
const userRow = q.rows[0];
if (userRow) {
authorInfo = userRow.desc;
}
}
return {
title: row.title,
description: row.description,
tags: row.tags,
author_info: authorInfo,
};
}
export async function getUnArchivedBiliUsers(client: Client) {
const queryResult = await client.queryObject<{ uid: number }>(
`
SELECT ad.uid
FROM bilibili_metadata ad
LEFT JOIN bilibili_user bu ON ad.uid = bu.uid
WHERE bu.uid IS NULL;
`,
[],
);
const rows = queryResult.rows;
return rows.map((row) => row.uid);
}
export async function setBiliVideoStatus(client: Client, aid: number, status: number) {
return await client.queryObject(
`UPDATE bilibili_metadata SET status = $1 WHERE aid = $2`,
[status, aid],
);
}
export async function getBiliVideoStatus(client: Client, aid: number) {
const queryResult = await client.queryObject<{ status: number }>(
`SELECT status FROM bilibili_metadata WHERE aid = $1`,
[aid],
);
const rows = queryResult.rows;
if (rows.length === 0) return 0;
return rows[0].status;
}

View File

@ -1,6 +0,0 @@
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { postgresConfig } from "lib/db/pgConfig.ts";
const pool = new Pool(postgresConfig, 12);
export const db = pool;

View File

@ -1,21 +0,0 @@
const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT"];
const unsetVars = requiredEnvVars.filter((key) => Deno.env.get(key) === undefined);
if (unsetVars.length > 0) {
throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`);
}
const databaseHost = Deno.env.get("DB_HOST")!;
const databaseName = Deno.env.get("DB_NAME")!;
const databaseUser = Deno.env.get("DB_USER")!;
const databasePassword = Deno.env.get("DB_PASSWORD")!;
const databasePort = Deno.env.get("DB_PORT")!;
export const postgresConfig = {
hostname: databaseHost,
port: parseInt(databasePort),
database: databaseName,
user: databaseUser,
password: databasePassword,
};

View File

@ -1,3 +0,0 @@
import { Redis } from "ioredis";
export const redis = new Redis({ maxRetriesPerRequest: null });

View File

@ -1,44 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { LatestSnapshotType } from "lib/db/schema.d.ts";
export async function getVideosNearMilestone(client: Client) {
const queryResult = await client.queryObject<LatestSnapshotType>(`
SELECT ls.*
FROM latest_video_snapshot ls
INNER JOIN
songs s ON ls.aid = s.aid
AND s.deleted = false
WHERE
s.deleted = false AND
(views >= 90000 AND views < 100000) OR
(views >= 900000 AND views < 1000000) OR
(views >= 9900000 AND views < 10000000)
`);
return queryResult.rows.map((row) => {
return {
...row,
aid: Number(row.aid),
};
});
}
export async function getLatestVideoSnapshot(client: Client, aid: number): Promise<null | LatestSnapshotType> {
const queryResult = await client.queryObject<LatestSnapshotType>(
`
SELECT *
FROM latest_video_snapshot
WHERE aid = $1
`,
[aid],
);
if (queryResult.rows.length === 0) {
return null;
}
return queryResult.rows.map((row) => {
return {
...row,
aid: Number(row.aid),
time: new Date(row.time).getTime(),
};
})[0];
}

View File

@ -1,309 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import { SnapshotScheduleType } from "./schema.d.ts";
import logger from "lib/log/logger.ts";
import { MINUTE } from "$std/datetime/constants.ts";
import { redis } from "lib/db/redis.ts";
import { Redis } from "ioredis";
const REDIS_KEY = "cvsa:snapshot_window_counts";
function getCurrentWindowIndex(): number {
const now = new Date();
const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes();
const currentWindow = Math.floor(minutesSinceMidnight / 5);
return currentWindow;
}
export async function refreshSnapshotWindowCounts(client: Client, redisClient: Redis) {
const now = new Date();
const startTime = now.getTime();
const result = await client.queryObject<{ window_start: Date; count: number }>`
SELECT
date_trunc('hour', started_at) +
(EXTRACT(minute FROM started_at)::int / 5 * INTERVAL '5 minutes') AS window_start,
COUNT(*) AS count
FROM snapshot_schedule
WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days'
GROUP BY 1
ORDER BY window_start
`
await redisClient.del(REDIS_KEY);
const currentWindow = getCurrentWindowIndex();
for (const row of result.rows) {
const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE));
const offset = (currentWindow + targetOffset);
if (offset >= 0) {
await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count));
}
}
}
export async function initSnapshotWindowCounts(client: Client, redisClient: Redis) {
await refreshSnapshotWindowCounts(client, redisClient);
setInterval(async () => {
await refreshSnapshotWindowCounts(client, redisClient);
}, 5 * MINUTE);
}
async function getWindowCount(redisClient: Redis, offset: number): Promise<number> {
const count = await redisClient.hget(REDIS_KEY, offset.toString());
return count ? parseInt(count, 10) : 0;
}
export async function snapshotScheduleExists(client: Client, id: number) {
const res = await client.queryObject<{ id: number }>(
`SELECT id FROM snapshot_schedule WHERE id = $1`,
[id],
);
return res.rows.length > 0;
}
export async function videoHasActiveSchedule(client: Client, aid: number) {
const res = await client.queryObject<{ status: string }>(
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND (status = 'pending' OR status = 'processing')`,
[aid],
);
return res.rows.length > 0;
}
export async function videoHasProcessingSchedule(client: Client, aid: number) {
const res = await client.queryObject<{ status: string }>(
`SELECT status FROM snapshot_schedule WHERE aid = $1 AND status = 'processing'`,
[aid],
);
return res.rows.length > 0;
}
export async function bulkGetVideosWithoutProcessingSchedules(client: Client, aids: number[]) {
const res = await client.queryObject<{ aid: number }>(
`SELECT aid FROM snapshot_schedule WHERE aid = ANY($1) AND status != 'processing' GROUP BY aid`,
[aids],
);
return res.rows.map((row) => row.aid);
}
interface Snapshot {
created_at: number;
views: number;
}
export async function findClosestSnapshot(
client: Client,
aid: number,
targetTime: Date,
): Promise<Snapshot | null> {
const query = `
SELECT created_at, views
FROM video_snapshot
WHERE aid = $1
ORDER BY ABS(EXTRACT(EPOCH FROM (created_at - $2::timestamptz)))
LIMIT 1
`;
const result = await client.queryObject<{ created_at: string; views: number }>(
query,
[aid, targetTime.toISOString()],
);
if (result.rows.length === 0) return null;
const row = result.rows[0];
return {
created_at: new Date(row.created_at).getTime(),
views: row.views,
};
}
export async function findSnapshotBefore(
client: Client,
aid: number,
targetTime: Date,
): Promise<Snapshot | null> {
const query = `
SELECT created_at, views
FROM video_snapshot
WHERE aid = $1
AND created_at <= $2::timestamptz
ORDER BY created_at DESC
LIMIT 1
`;
const result = await client.queryObject<{ created_at: string; views: number }>(
query,
[aid, targetTime.toISOString()],
);
if (result.rows.length === 0) return null;
const row = result.rows[0];
return {
created_at: new Date(row.created_at).getTime(),
views: row.views,
};
}
export async function hasAtLeast2Snapshots(client: Client, aid: number) {
const res = await client.queryObject<{ count: number }>(
`SELECT COUNT(*) FROM video_snapshot WHERE aid = $1`,
[aid],
);
return res.rows[0].count >= 2;
}
export async function getLatestSnapshot(client: Client, aid: number): Promise<Snapshot | null> {
const res = await client.queryObject<{ created_at: string; views: number }>(
`SELECT created_at, views FROM video_snapshot WHERE aid = $1 ORDER BY created_at DESC LIMIT 1`,
[aid],
);
if (res.rows.length === 0) return null;
const row = res.rows[0];
return {
created_at: new Date(row.created_at).getTime(),
views: row.views,
};
}
/*
* Returns the number of snapshot schedules within the specified range.
* @param client The database client.
* @param start The start time of the range. (Timestamp in milliseconds)
* @param end The end time of the range. (Timestamp in milliseconds)
*/
export async function getSnapshotScheduleCountWithinRange(client: Client, start: number, end: number) {
const startTimeString = formatTimestampToPsql(start);
const endTimeString = formatTimestampToPsql(end);
const query = `
SELECT COUNT(*) FROM snapshot_schedule
WHERE started_at BETWEEN $1 AND $2
AND status = 'pending'
`;
const res = await client.queryObject<{ count: number }>(query, [startTimeString, endTimeString]);
return res.rows[0].count;
}
/*
* Creates a new snapshot schedule record.
* @param client The database client.
* @param aid The aid of the video.
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
*/
export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number, force: boolean = false) {
if (await videoHasActiveSchedule(client, aid) && !force) return;
let adjustedTime = new Date(targetTime);
if (type !== "milestone" && type !== "new") {
adjustedTime = await adjustSnapshotTime(new Date(targetTime), 1000, redis);
}
logger.log(`Scheduled snapshot for ${aid} at ${adjustedTime.toISOString()}`, "mq", "fn:scheduleSnapshot");
return client.queryObject(
`INSERT INTO snapshot_schedule (aid, type, started_at) VALUES ($1, $2, $3)`,
[aid, type, adjustedTime.toISOString()],
);
}
export async function bulkScheduleSnapshot(client: Client, aids: number[], type: string, targetTime: number, force: boolean = false) {
for (const aid of aids) {
await scheduleSnapshot(client, aid, type, targetTime, force);
}
}
export async function adjustSnapshotTime(
expectedStartTime: Date,
allowedCounts: number = 1000,
redisClient: Redis,
): Promise<Date> {
const currentWindow = getCurrentWindowIndex();
const targetOffset = Math.floor((expectedStartTime.getTime() - Date.now()) / (5 * MINUTE)) - 6;
const initialOffset = currentWindow + Math.max(targetOffset, 0);
let timePerIteration = 0;
const MAX_ITERATIONS = 2880;
let iters = 0;
const t = performance.now();
for (let i = initialOffset; i < MAX_ITERATIONS; i++) {
iters++;
const offset = i;
const count = await getWindowCount(redisClient, offset);
if (count < allowedCounts) {
await redisClient.hincrby(REDIS_KEY, offset.toString(), 1);
const startPoint = new Date();
startPoint.setHours(0, 0, 0, 0);
const startTime = startPoint.getTime();
const windowStart = startTime + offset * 5 * MINUTE;
const randomDelay = Math.floor(Math.random() * 5 * MINUTE);
const delayedDate = new Date(windowStart + randomDelay);
const now = new Date();
if (delayedDate.getTime() < now.getTime()) {
const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
return now;
}
const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime");
return delayedDate;
}
}
const elapsed = performance.now() - t;
timePerIteration = elapsed / MAX_ITERATIONS;
logger.log(`${timePerIteration.toFixed(3)}ms * ${MAX_ITERATIONS} iterations`, "perf", "fn:adjustSnapshotTime");
return expectedStartTime;
}
export async function getSnapshotsInNextSecond(client: Client) {
const query = `
SELECT *
FROM snapshot_schedule
WHERE started_at <= NOW() + INTERVAL '1 seconds' AND status = 'pending' AND type != 'normal'
ORDER BY
CASE
WHEN type = 'milestone' THEN 0
ELSE 1
END,
started_at
LIMIT 10;
`;
const res = await client.queryObject<SnapshotScheduleType>(query, []);
return res.rows;
}
export async function getBulkSnapshotsInNextSecond(client: Client) {
const query = `
SELECT *
FROM snapshot_schedule
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'
ORDER BY started_at
LIMIT 1000;
`;
const res = await client.queryObject<SnapshotScheduleType>(query, []);
return res.rows;
}
export async function setSnapshotStatus(client: Client, id: number, status: string) {
return await client.queryObject(
`UPDATE snapshot_schedule SET status = $2 WHERE id = $1`,
[id, status],
);
}
export async function bulkSetSnapshotStatus(client: Client, ids: number[], status: string) {
return await client.queryObject(
`UPDATE snapshot_schedule SET status = $2 WHERE id = ANY($1)`,
[ids, status],
);
}
export async function getVideosWithoutActiveSnapshotSchedule(client: Client) {
const query: string = `
SELECT s.aid
FROM songs s
LEFT JOIN snapshot_schedule ss ON s.aid = ss.aid AND (ss.status = 'pending' OR ss.status = 'processing')
WHERE ss.aid IS NULL
`;
const res = await client.queryObject<{ aid: number }>(query, []);
return res.rows.map((r) => Number(r.aid));
}

View File

@ -1,45 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
export async function getNotCollectedSongs(client: Client) {
const queryResult = await client.queryObject<{ aid: number }>(`
SELECT lr.aid
FROM labelling_result lr
WHERE lr.label != 0
AND NOT EXISTS (
SELECT 1
FROM songs s
WHERE s.aid = lr.aid
);
`);
return queryResult.rows.map((row) => row.aid);
}
export async function aidExistsInSongs(client: Client, aid: number) {
const queryResult = await client.queryObject<{ exists: boolean }>(
`
SELECT EXISTS (
SELECT 1
FROM songs
WHERE aid = $1
);
`,
[aid],
);
return queryResult.rows[0].exists;
}
export async function getSongsPublihsedAt(client: Client, aid: number) {
const queryResult = await client.queryObject<{ published_at: string }>(
`
SELECT published_at
FROM songs
WHERE aid = $1;
`,
[aid],
);
if (queryResult.rows.length === 0) {
return null;
}
return parseTimestampFromPsql(queryResult.rows[0].published_at);
}

View File

@ -1,179 +0,0 @@
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
function softmax(logits: Float32Array): number[] {
const maxLogit = Math.max(...logits);
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
return Array.from(exponents.map((exp) => exp / sumOfExponents));
}
// 配置参数
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
const onnxClassifierPath = "./model/video_classifier_v3_17.onnx";
const onnxEmbeddingPath = "./model/embedding_original.onnx";
const testDataPath = "./data/filter/test1.jsonl";
// 初始化会话
const [sessionClassifier, sessionEmbedding] = await Promise.all([
ort.InferenceSession.create(onnxClassifierPath),
ort.InferenceSession.create(onnxEmbeddingPath),
]);
let tokenizer: PreTrainedTokenizer;
// 初始化分词器
async function loadTokenizer() {
const tokenizerConfig = { local_files_only: true };
tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
}
// 新的嵌入生成函数使用ONNX
async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
const { input_ids } = await tokenizer(texts, {
add_special_tokens: false,
return_tensor: false,
});
// 构造输入参数
const cumsum = (arr: number[]): number[] =>
arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
const flattened_input_ids = input_ids.flat();
// 准备ONNX输入
const inputs = {
input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
flattened_input_ids.length,
]),
offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
};
// 执行推理
const { embeddings } = await session.run(inputs);
return Array.from(embeddings.data as Float32Array);
}
// 分类推理函数
async function runClassification(embeddings: number[]): Promise<number[]> {
const inputTensor = new ort.Tensor(
Float32Array.from(embeddings),
[1, 3, 1024],
);
const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
return softmax(logits.data as Float32Array);
}
// 指标计算函数
function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
accuracy: number;
precision: number;
recall: number;
f1: number;
"Class 0 Prec": number;
speed: string;
} {
// 输出label和prediction不一样的index列表
const arr = [];
for (let i = 0; i < labels.length; i++) {
if (labels[i] !== predictions[i] && predictions[i] == 0) {
arr.push([i + 1, labels[i], predictions[i]]);
}
}
console.log(arr);
// 初始化混淆矩阵
const classCount = Math.max(...labels, ...predictions) + 1;
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
// 填充矩阵
labels.forEach((trueLabel, i) => {
matrix[trueLabel][predictions[i]]++;
});
// 计算各指标
let totalTP = 0, totalFP = 0, totalFN = 0;
for (let c = 0; c < classCount; c++) {
const TP = matrix[c][c];
const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
totalTP += TP;
totalFP += FP;
totalFN += FN;
}
const precision = totalTP / (totalTP + totalFP);
const recall = totalTP / (totalTP + totalFN);
const f1 = 2 * (precision * recall) / (precision + recall) || 0;
// 计算Class 0 Precision
const class0TP = matrix[0][0];
const class0FP = matrix.flatMap((row, i) => i === 0 ? [] : [row[0]]).reduce((a, b) => a + b, 0);
const class0Precision = class0TP / (class0TP + class0FP) || 0;
return {
accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
precision,
recall,
f1,
speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
"Class 0 Prec": class0Precision,
};
}
// 改造后的评估函数
async function evaluateModel(session: ort.InferenceSession): Promise<{
accuracy: number;
precision: number;
recall: number;
f1: number;
"Class 0 Prec": number;
}> {
const data = await Deno.readTextFile(testDataPath);
const samples = data.split("\n")
.map((line) => {
try {
return JSON.parse(line);
} catch {
return null;
}
})
.filter(Boolean);
const allPredictions: number[] = [];
const allLabels: number[] = [];
const t = new Date().getTime();
for (const sample of samples) {
try {
const embeddings = await getONNXEmbeddings([
sample.title,
sample.description,
sample.tags.join(","),
], session);
const probabilities = await runClassification(embeddings);
allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
allLabels.push(sample.label);
} catch (error) {
console.error("Processing error:", error);
}
}
const elapsed = new Date().getTime() - t;
return calculateMetrics(allLabels, allPredictions, elapsed);
}
// 主函数
async function main() {
await loadTokenizer();
const metrics = await evaluateModel(sessionEmbedding);
console.log("Model Metrics:");
console.table(metrics);
}
await main();

View File

@ -1,22 +0,0 @@
import { AIManager } from "lib/ml/manager.ts";
import * as ort from "onnxruntime";
import logger from "lib/log/logger.ts";
import { WorkerError } from "lib/mq/schema.ts";
const modelPath = "./model/model.onnx";
class MantisProto extends AIManager {
constructor() {
super();
this.models = {
"predictor": modelPath,
};
}
public override async init(): Promise<void> {
await super.init();
}
}
const Mantis = new MantisProto();
export default Mantis;

View File

@ -1,171 +0,0 @@
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
import * as ort from "onnxruntime";
function softmax(logits: Float32Array): number[] {
const maxLogit = Math.max(...logits);
const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
return Array.from(exponents.map((exp) => exp / sumOfExponents));
}
// 配置参数
const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
const onnxEmbeddingOriginalPath = "./model/embedding_original.onnx";
const onnxEmbeddingQuantizedPath = "./model/embedding_original.onnx";
// 初始化会话
const [sessionClassifier, sessionEmbeddingOriginal, sessionEmbeddingQuantized] = await Promise.all([
ort.InferenceSession.create(onnxClassifierPath),
ort.InferenceSession.create(onnxEmbeddingOriginalPath),
ort.InferenceSession.create(onnxEmbeddingQuantizedPath),
]);
let tokenizer: PreTrainedTokenizer;
// 初始化分词器
async function loadTokenizer() {
const tokenizerConfig = { local_files_only: true };
tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
}
// 新的嵌入生成函数使用ONNX
async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
const { input_ids } = await tokenizer(texts, {
add_special_tokens: false,
return_tensor: false,
});
// 构造输入参数
const cumsum = (arr: number[]): number[] =>
arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
const flattened_input_ids = input_ids.flat();
// 准备ONNX输入
const inputs = {
input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [
flattened_input_ids.length,
]),
offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length]),
};
// 执行推理
const { embeddings } = await session.run(inputs);
return Array.from(embeddings.data as Float32Array);
}
// 分类推理函数
async function runClassification(embeddings: number[]): Promise<number[]> {
const inputTensor = new ort.Tensor(
Float32Array.from(embeddings),
[1, 4, 1024],
);
const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
return softmax(logits.data as Float32Array);
}
// 指标计算函数
function calculateMetrics(labels: number[], predictions: number[], elapsedTime: number): {
accuracy: number;
precision: number;
recall: number;
f1: number;
speed: string;
} {
// 初始化混淆矩阵
const classCount = Math.max(...labels, ...predictions) + 1;
const matrix = Array.from({ length: classCount }, () => Array.from({ length: classCount }, () => 0));
// 填充矩阵
labels.forEach((trueLabel, i) => {
matrix[trueLabel][predictions[i]]++;
});
// 计算各指标
let totalTP = 0, totalFP = 0, totalFN = 0;
for (let c = 0; c < classCount; c++) {
const TP = matrix[c][c];
const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
totalTP += TP;
totalFP += FP;
totalFN += FN;
}
const precision = totalTP / (totalTP + totalFP);
const recall = totalTP / (totalTP + totalFN);
const f1 = 2 * (precision * recall) / (precision + recall) || 0;
return {
accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
precision,
recall,
f1,
speed: `${(labels.length / (elapsedTime / 1000)).toFixed(1)} samples/sec`,
};
}
// 改造后的评估函数
async function evaluateModel(session: ort.InferenceSession): Promise<{
accuracy: number;
precision: number;
recall: number;
f1: number;
}> {
const data = await Deno.readTextFile("./data/filter/test1.jsonl");
const samples = data.split("\n")
.map((line) => {
try {
return JSON.parse(line);
} catch {
return null;
}
})
.filter(Boolean);
const allPredictions: number[] = [];
const allLabels: number[] = [];
const t = new Date().getTime();
for (const sample of samples) {
try {
const embeddings = await getONNXEmbeddings([
sample.title,
sample.description,
sample.tags.join(","),
sample.author_info,
], session);
const probabilities = await runClassification(embeddings);
allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
allLabels.push(sample.label);
} catch (error) {
console.error("Processing error:", error);
}
}
const elapsed = new Date().getTime() - t;
return calculateMetrics(allLabels, allPredictions, elapsed);
}
// 主函数
async function main() {
await loadTokenizer();
// 评估原始模型
const originalMetrics = await evaluateModel(sessionEmbeddingOriginal);
console.log("Original Model Metrics:");
console.table(originalMetrics);
// 评估量化模型
const quantizedMetrics = await evaluateModel(sessionEmbeddingQuantized);
console.log("Quantized Model Metrics:");
console.table(quantizedMetrics);
}
await main();

View File

@ -1,37 +0,0 @@
import { Job } from "bullmq";
import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
import { db } from "lib/db/init.ts";
import { insertVideoInfo } from "lib/mq/task/getVideoDetails.ts";
import { collectSongs } from "lib/mq/task/collectSongs.ts";
export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
const client = await db.connect();
try {
await queueLatestVideos(client);
} finally {
client.release();
}
};
export const collectSongsWorker = async (_job: Job): Promise<void> => {
const client = await db.connect();
try {
await collectSongs(client);
} finally {
client.release();
}
};
export const getVideoInfoWorker = async (job: Job): Promise<number> => {
const client = await db.connect();
try {
const aid = job.data.aid;
if (!aid) {
return 3;
}
await insertVideoInfo(client, aid);
return 0;
} finally {
client.release();
}
};

View File

@ -1,397 +0,0 @@
import { Job } from "bullmq";
import { db } from "lib/db/init.ts";
import { getLatestVideoSnapshot, getVideosNearMilestone } from "lib/db/snapshot.ts";
import {
bulkGetVideosWithoutProcessingSchedules,
bulkScheduleSnapshot,
bulkSetSnapshotStatus,
findClosestSnapshot,
findSnapshotBefore,
getLatestSnapshot,
getSnapshotsInNextSecond,
getVideosWithoutActiveSnapshotSchedule,
hasAtLeast2Snapshots,
scheduleSnapshot,
setSnapshotStatus,
snapshotScheduleExists,
videoHasProcessingSchedule,
getBulkSnapshotsInNextSecond
} from "lib/db/snapshotSchedule.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
import logger from "lib/log/logger.ts";
import { SnapshotQueue } from "lib/mq/index.ts";
import { insertVideoSnapshot } from "lib/mq/task/getVideoStats.ts";
import { NetSchedulerError } from "lib/mq/scheduler.ts";
import { getBiliVideoStatus, setBiliVideoStatus } from "lib/db/allData.ts";
import { truncate } from "lib/utils/truncate.ts";
import { lockManager } from "lib/mq/lockManager.ts";
import { getSongsPublihsedAt } from "lib/db/songs.ts";
import { bulkGetVideoStats } from "lib/net/bulkGetVideoStats.ts";
const priorityMap: { [key: string]: number } = {
"milestone": 1,
"normal": 3,
};
const snapshotTypeToTaskMap: { [key: string]: string } = {
"milestone": "snapshotMilestoneVideo",
"normal": "snapshotVideo",
"new": "snapshotMilestoneVideo",
};
export const bulkSnapshotTickWorker = async (_job: Job) => {
const client = await db.connect();
try {
const schedules = await getBulkSnapshotsInNextSecond(client);
const count = schedules.length;
const groups = Math.ceil(count / 30);
for (let i = 0; i < groups; i++) {
const group = schedules.slice(i * 30, (i + 1) * 30);
const aids = group.map((schedule) => Number(schedule.aid));
const filteredAids = await bulkGetVideosWithoutProcessingSchedules(client, aids);
if (filteredAids.length === 0) continue;
await bulkSetSnapshotStatus(client, filteredAids, "processing");
const dataMap: { [key: number]: number } = {};
for (const schedule of group) {
const id = Number(schedule.id);
dataMap[id] = Number(schedule.aid);
}
await SnapshotQueue.add("bulkSnapshotVideo", {
map: dataMap,
}, { priority: 3 });
}
} catch (e) {
logger.error(e as Error);
} finally {
client.release();
}
};
export const snapshotTickWorker = async (_job: Job) => {
const client = await db.connect();
try {
const schedules = await getSnapshotsInNextSecond(client);
for (const schedule of schedules) {
if (await videoHasProcessingSchedule(client, Number(schedule.aid))) {
return `ALREADY_PROCESSING`;
}
let priority = 3;
if (schedule.type && priorityMap[schedule.type]) {
priority = priorityMap[schedule.type];
}
const aid = Number(schedule.aid);
await setSnapshotStatus(client, schedule.id, "processing");
await SnapshotQueue.add("snapshotVideo", {
aid: aid,
id: Number(schedule.id),
type: schedule.type ?? "normal",
}, { priority });
}
} catch (e) {
logger.error(e as Error);
} finally {
client.release();
}
};
export const closetMilestone = (views: number) => {
if (views < 100000) return 100000;
if (views < 1000000) return 1000000;
return 10000000;
};
const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base);
/*
* Returns the minimum ETA in hours for the next snapshot
* @param client - Postgres client
* @param aid - aid of the video
* @returns ETA in hours
*/
const getAdjustedShortTermETA = async (client: Client, aid: number) => {
const latestSnapshot = await getLatestSnapshot(client, aid);
// Immediately dispatch a snapshot if there is no snapshot yet
if (!latestSnapshot) return 0;
const snapshotsEnough = await hasAtLeast2Snapshots(client, aid);
if (!snapshotsEnough) return 0;
const currentTimestamp = new Date().getTime();
const timeIntervals = [3 * MINUTE, 20 * MINUTE, 1 * HOUR, 3 * HOUR, 6 * HOUR];
const DELTA = 0.00001;
let minETAHours = Infinity;
for (const timeInterval of timeIntervals) {
const date = new Date(currentTimestamp - timeInterval);
const snapshot = await findClosestSnapshot(client, aid, date);
if (!snapshot) continue;
const hoursDiff = (latestSnapshot.created_at - snapshot.created_at) / HOUR;
const viewsDiff = latestSnapshot.views - snapshot.views;
if (viewsDiff <= 0) continue;
const speed = viewsDiff / (hoursDiff + DELTA);
const target = closetMilestone(latestSnapshot.views);
const viewsToIncrease = target - latestSnapshot.views;
const eta = viewsToIncrease / (speed + DELTA);
let factor = log(2.97 / log(viewsToIncrease + 1), 1.14);
factor = truncate(factor, 3, 100);
const adjustedETA = eta / factor;
if (adjustedETA < minETAHours) {
minETAHours = adjustedETA;
}
}
if (isNaN(minETAHours)) {
minETAHours = Infinity;
}
return minETAHours;
};
export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
const client = await db.connect();
try {
const videos = await getVideosNearMilestone(client);
for (const video of videos) {
const aid = Number(video.aid);
const eta = await getAdjustedShortTermETA(client, aid);
if (eta > 72) continue;
const now = Date.now();
const scheduledNextSnapshotDelay = eta * HOUR;
const maxInterval = 4 * HOUR;
const minInterval = 1 * SECOND;
const delay = truncate(scheduledNextSnapshotDelay, minInterval, maxInterval);
const targetTime = now + delay;
await scheduleSnapshot(client, aid, "milestone", targetTime);
}
} catch (e) {
logger.error(e as Error, "mq", "fn:collectMilestoneSnapshotsWorker");
} finally {
client.release();
}
};
const getRegularSnapshotInterval = async (client: Client, aid: number) => {
const now = Date.now();
const date = new Date(now - 24 * HOUR);
let oldSnapshot = await findSnapshotBefore(client, aid, date);
if (!oldSnapshot) oldSnapshot = await findClosestSnapshot(client, aid, date);
const latestSnapshot = await getLatestSnapshot(client, aid);
if (!oldSnapshot || !latestSnapshot) return 0;
if (oldSnapshot.created_at === latestSnapshot.created_at) return 0;
const hoursDiff = (latestSnapshot.created_at - oldSnapshot.created_at) / HOUR;
if (hoursDiff < 8) return 24;
const viewsDiff = latestSnapshot.views - oldSnapshot.views;
if (viewsDiff === 0) return 72;
const speedPerDay = viewsDiff / (hoursDiff + 0.001) * 24;
if (speedPerDay < 6) return 36;
if (speedPerDay < 120) return 24;
if (speedPerDay < 320) return 12;
return 6;
};
export const regularSnapshotsWorker = async (_job: Job) => {
const client = await db.connect();
const startedAt = Date.now();
if (await lockManager.isLocked("dispatchRegularSnapshots")) {
logger.log("dispatchRegularSnapshots is already running", "mq");
client.release();
return;
}
await lockManager.acquireLock("dispatchRegularSnapshots", 30 * 60);
try {
const aids = await getVideosWithoutActiveSnapshotSchedule(client);
for (const rawAid of aids) {
const aid = Number(rawAid);
const latestSnapshot = await getLatestVideoSnapshot(client, aid);
const now = Date.now();
const lastSnapshotedAt = latestSnapshot?.time ?? now;
const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK);
await scheduleSnapshot(client, aid, "normal", targetTime);
if (now - startedAt > 25 * MINUTE) {
return;
}
}
} catch (e) {
logger.error(e as Error, "mq", "fn:regularSnapshotsWorker");
} finally {
lockManager.releaseLock("dispatchRegularSnapshots");
client.release();
}
};
export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
const dataMap: { [key: number]: number } = job.data.map;
const ids = Object.keys(dataMap).map((id) => Number(id));
const aidsToFetch: number[] = [];
const client = await db.connect();
try {
for (const id of ids) {
const aid = Number(dataMap[id]);
const exists = await snapshotScheduleExists(client, id);
if (!exists) {
continue;
}
aidsToFetch.push(aid);
}
const data = await bulkGetVideoStats(aidsToFetch);
if (typeof data === "number") {
await bulkSetSnapshotStatus(client, ids, "failed");
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 15 * SECOND);
return `GET_BILI_STATUS_${data}`;
}
for (const video of data) {
const aid = video.id;
const stat = video.cnt_info;
const views = stat.play;
const danmakus = stat.danmaku;
const replies = stat.reply;
const likes = stat.thumb_up;
const coins = stat.coin;
const shares = stat.share;
const favorites = stat.collect;
const query: string = `
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`;
await client.queryObject(
query,
[aid, views, danmakus, replies, likes, coins, shares, favorites],
);
logger.log(`Taken snapshot for video ${aid} in bulk.`, "net", "fn:takeBulkSnapshotForVideosWorker");
}
await bulkSetSnapshotStatus(client, ids, "completed");
for (const aid of aidsToFetch) {
const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
await scheduleSnapshot(client, aid, "normal", Date.now() + interval * HOUR);
}
return `DONE`;
} catch (e) {
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
logger.warn(
`No available proxy for bulk request now.`,
"mq",
"fn:takeBulkSnapshotForVideosWorker",
);
await bulkSetSnapshotStatus(client, ids, "completed");
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 2 * MINUTE);
return;
}
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
await bulkSetSnapshotStatus(client, ids, "failed");
}
finally {
client.release();
}
};
export const takeSnapshotForVideoWorker = async (job: Job) => {
const id = job.data.id;
const aid = Number(job.data.aid);
const type = job.data.type;
const task = snapshotTypeToTaskMap[type] ?? "snapshotVideo";
const client = await db.connect();
const retryInterval = type === "milestone" ? 5 * SECOND : 2 * MINUTE;
const exists = await snapshotScheduleExists(client, id);
if (!exists) {
client.release();
return;
}
const status = await getBiliVideoStatus(client, aid);
if (status !== 0) {
client.release();
return `REFUSE_WORKING_BILI_STATUS_${status}`;
}
try {
await setSnapshotStatus(client, id, "processing");
const stat = await insertVideoSnapshot(client, aid, task);
if (typeof stat === "number") {
await setBiliVideoStatus(client, aid, stat);
await setSnapshotStatus(client, id, "completed");
return `GET_BILI_STATUS_${stat}`;
}
await setSnapshotStatus(client, id, "completed");
if (type === "normal") {
const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
await scheduleSnapshot(client, aid, type, Date.now() + interval * HOUR);
return `DONE`;
} else if (type === "new") {
const publihsedAt = await getSongsPublihsedAt(client, aid);
const timeSincePublished = stat.time - publihsedAt!;
const viewsPerHour = stat.views / timeSincePublished * HOUR;
if (timeSincePublished > 48 * HOUR) {
return `DONE`;
}
if (timeSincePublished > 2 * HOUR && viewsPerHour < 10) {
return `DONE`;
}
let intervalMins = 240;
if (viewsPerHour > 50) {
intervalMins = 120;
}
if (viewsPerHour > 100) {
intervalMins = 60;
}
if (viewsPerHour > 1000) {
intervalMins = 15;
}
await scheduleSnapshot(client, aid, type, Date.now() + intervalMins * MINUTE, true);
}
if (type !== "milestone") return `DONE`;
const eta = await getAdjustedShortTermETA(client, aid);
if (eta > 72) return "ETA_TOO_LONG";
const now = Date.now();
const targetTime = now + eta * HOUR;
await scheduleSnapshot(client, aid, type, targetTime);
return `DONE`;
} catch (e) {
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
logger.warn(
`No available proxy for aid ${job.data.aid}.`,
"mq",
"fn:takeSnapshotForVideoWorker",
);
await setSnapshotStatus(client, id, "completed");
await scheduleSnapshot(client, aid, type, Date.now() + retryInterval);
return;
}
logger.error(e as Error, "mq", "fn:takeSnapshotForVideoWorker");
await setSnapshotStatus(client, id, "failed");
} finally {
client.release();
}
};
export const scheduleCleanupWorker = async (_job: Job) => {
const client = await db.connect();
try {
const query = `
SELECT id, aid, type
FROM snapshot_schedule
WHERE status IN ('pending', 'processing')
AND started_at < NOW() - INTERVAL '5 minutes'
`;
const { rows } = await client.queryObject<{ id: bigint; aid: bigint; type: string }>(query);
if (rows.length === 0) return;
for (const row of rows) {
const id = Number(row.id);
const aid = Number(row.aid);
const type = row.type;
await setSnapshotStatus(client, id, "timeout");
await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND);
logger.log(
`Schedule ${id} has no response received for 5 minutes, rescheduled.`,
"mq",
"fn:scheduleCleanupWorker",
);
}
} catch (e) {
logger.error(e as Error, "mq", "fn:scheduleCleanupWorker");
} finally {
client.release();
}
};

View File

@ -1 +0,0 @@
export * from "lib/mq/exec/getLatestVideos.ts";

View File

@ -1,7 +0,0 @@
import { Queue } from "bullmq";
export const LatestVideosQueue = new Queue("latestVideos");
export const ClassifyVideoQueue = new Queue("classifyVideo");
export const SnapshotQueue = new Queue("snapshot");

View File

@ -1,67 +0,0 @@
import { MINUTE, SECOND } from "$std/datetime/constants.ts";
import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts";
import logger from "lib/log/logger.ts";
import { initSnapshotWindowCounts } from "lib/db/snapshotSchedule.ts";
import { db } from "lib/db/init.ts";
import { redis } from "lib/db/redis.ts";
export async function initMQ() {
const client = await db.connect();
try {
await initSnapshotWindowCounts(client, redis);
await LatestVideosQueue.upsertJobScheduler("getLatestVideos", {
every: 1 * MINUTE,
immediately: true,
});
await ClassifyVideoQueue.upsertJobScheduler("classifyVideos", {
every: 5 * MINUTE,
immediately: true,
});
await LatestVideosQueue.upsertJobScheduler("collectSongs", {
every: 3 * MINUTE,
immediately: true,
});
await SnapshotQueue.upsertJobScheduler("snapshotTick", {
every: 1 * SECOND,
immediately: true,
}, {
opts: {
removeOnComplete: 1,
removeOnFail: 1,
},
});
await SnapshotQueue.upsertJobScheduler("bulkSnapshotTick", {
every: 15 * SECOND,
immediately: true,
}, {
opts: {
removeOnComplete: 1,
removeOnFail: 1,
},
});
await SnapshotQueue.upsertJobScheduler("collectMilestoneSnapshots", {
every: 5 * MINUTE,
immediately: true,
});
await SnapshotQueue.upsertJobScheduler("dispatchRegularSnapshots", {
every: 30 * MINUTE,
immediately: true,
});
await SnapshotQueue.upsertJobScheduler("scheduleCleanup", {
every: 30 * MINUTE,
immediately: true,
});
logger.log("Message queue initialized.");
} finally {
client.release();
}
}

View File

@ -1,56 +0,0 @@
import { SlidingWindow } from "lib/mq/slidingWindow.ts";
export interface RateLimiterConfig {
window: SlidingWindow;
max: number;
}
export class RateLimiter {
private readonly configs: RateLimiterConfig[];
private readonly configEventNames: string[];
/*
* @param name The name of the rate limiter
* @param configs The configuration of the rate limiter, containing:
* - window: The sliding window to use
* - max: The maximum number of events allowed in the window
*/
constructor(name: string, configs: RateLimiterConfig[]) {
this.configs = configs;
this.configEventNames = configs.map((_, index) => `${name}_config_${index}`);
}
/*
* Check if the event has reached the rate limit
*/
async getAvailability(): Promise<boolean> {
for (let i = 0; i < this.configs.length; i++) {
const config = this.configs[i];
const eventName = this.configEventNames[i];
const count = await config.window.count(eventName);
if (count >= config.max) {
return false;
}
}
return true;
}
/*
* Trigger an event in the rate limiter
*/
async trigger(): Promise<void> {
for (let i = 0; i < this.configs.length; i++) {
const config = this.configs[i];
const eventName = this.configEventNames[i];
await config.window.event(eventName);
}
}
async clear(): Promise<void> {
for (let i = 0; i < this.configs.length; i++) {
const config = this.configs[i];
const eventName = this.configEventNames[i];
await config.window.clear(eventName);
}
}
}

View File

@ -1,31 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts";
import logger from "lib/log/logger.ts";
import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts";
import { MINUTE } from "$std/datetime/constants.ts";
export async function collectSongs(client: Client) {
const aids = await getNotCollectedSongs(client);
for (const aid of aids) {
const exists = await aidExistsInSongs(client, aid);
if (exists) continue;
await insertIntoSongs(client, aid);
await scheduleSnapshot(client, aid, "new", Date.now() + 10 * MINUTE, true);
logger.log(`Video ${aid} was added into the songs table.`, "mq", "fn:collectSongs");
}
}
export async function insertIntoSongs(client: Client, aid: number) {
await client.queryObject(
`
INSERT INTO songs (aid, published_at, duration)
VALUES (
$1,
(SELECT published_at FROM bilibili_metadata WHERE aid = $1),
(SELECT duration FROM bilibili_metadata WHERE aid = $1)
)
ON CONFLICT DO NOTHING
`,
[aid],
);
}

View File

@ -1,46 +0,0 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getVideoDetails } from "lib/net/getVideoDetails.ts";
import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import logger from "lib/log/logger.ts";
import { ClassifyVideoQueue } from "lib/mq/index.ts";
import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
import { HOUR, SECOND } from "$std/datetime/constants.ts";
export async function insertVideoInfo(client: Client, aid: number) {
const videoExists = await videoExistsInAllData(client, aid);
if (videoExists) {
return;
}
const data = await getVideoDetails(aid);
if (data === null) {
return null;
}
const bvid = data.View.bvid;
const desc = data.View.desc;
const uid = data.View.owner.mid;
const tags = data.Tags
.filter((tag) => !["old_channel", "topic"].indexOf(tag.tag_type))
.map((tag) => tag.tag_name).join(",");
const title = data.View.title;
const published_at = formatTimestampToPsql(data.View.pubdate * SECOND + 8 * HOUR);
const duration = data.View.duration;
await client.queryObject(
`INSERT INTO bilibili_metadata (aid, bvid, description, uid, tags, title, published_at, duration)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
[aid, bvid, desc, uid, tags, title, published_at, duration],
);
const userExists = await userExistsInBiliUsers(client, aid);
if (!userExists) {
await client.queryObject(
`INSERT INTO bilibili_user (uid, username, "desc", fans) VALUES ($1, $2, $3, $4)`,
[uid, data.View.owner.name, data.Card.card.sign, data.Card.follower],
);
} else {
await client.queryObject(
`UPDATE bilibili_user SET fans = $1 WHERE uid = $2`,
[data.Card.follower, uid],
);
}
logger.log(`Inserted video metadata for aid: ${aid}`, "mq");
await ClassifyVideoQueue.add("classifyVideo", { aid });
}

View File

@ -1,27 +0,0 @@
import netScheduler from "lib/mq/scheduler.ts";
import { VideoInfoData, VideoInfoResponse } from "lib/net/bilibili.d.ts";
import logger from "lib/log/logger.ts";
/*
* Fetch video metadata from bilibili API
* @param {number} aid - The video's aid
* @param {string} task - The task name used in scheduler. It can be one of the following:
* - snapshotVideo
* - getVideoInfo
* - snapshotMilestoneVideo
* @returns {Promise<VideoInfoData | number>} VideoInfoData or the error code returned by bilibili API
* @throws {NetSchedulerError} - The error will be thrown in following cases:
* - No proxy is available currently: with error code `NO_PROXY_AVAILABLE`
* - The native `fetch` function threw an error: with error code `FETCH_ERROR`
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
*/
export async function getVideoInfo(aid: number, task: string): Promise<VideoInfoData | number> {
const url = `https://api.bilibili.com/x/web-interface/view?aid=${aid}`;
const data = await netScheduler.request<VideoInfoResponse>(url, task);
const errMessage = `Error fetching metadata for ${aid}:`;
if (data.code !== 0) {
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
return data.code;
}
return data.data;
}

13
main.ts
View File

@ -1,13 +0,0 @@
/// <reference no-default-lib="true" />
/// <reference lib="dom" />
/// <reference lib="dom.iterable" />
/// <reference lib="dom.asynciterable" />
/// <reference lib="deno.ns" />
import "$std/dotenv/load.ts";
import { start } from "$fresh/server.ts";
import manifest from "./fresh.gen.ts";
import config from "./fresh.config.ts";
await start(manifest, config);

View File

Some files were not shown because too many files have changed in this diff Show More