diff --git a/deno.json b/deno.json index f4ff4ee..2a573a5 100644 --- a/deno.json +++ b/deno.json @@ -1,60 +1,8 @@ { - "lock": false, + "lock": false, + "workspace": ["./packages/crawler", "./packages/frontend", "./packages/backend", "./packages/core"], + "nodeModulesDir": "auto", "tasks": { - "crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts", - "crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts", - "check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx", - "cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -", - "manifest": "deno task cli manifest $(pwd)", - "start": "deno run -A --watch=static/,routes/ dev.ts", - "build": "deno run -A dev.ts build", - "preview": "deno run -A main.ts", - "update": "deno run -A -r https://fresh.deno.dev/update .", - "worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts", - "worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts", - "adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts", - "bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts", - "all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'", - "test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run" - }, - "lint": { - "rules": { - "tags": ["fresh", "recommended"] - } - }, - "exclude": ["**/_fresh/*"], - "imports": { - "@std/assert": "jsr:@std/assert@1", - "$fresh/": "https://deno.land/x/fresh@1.7.3/", - "preact": "https://esm.sh/preact@10.22.0", - "preact/": "https://esm.sh/preact@10.22.0/", - "@preact/signals": "https://esm.sh/*@preact/signals@1.2.2", - "@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1", - "tailwindcss": "npm:tailwindcss@3.4.1", - "tailwindcss/": "npm:/tailwindcss@3.4.1/", - "tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js", - "$std/": "https://deno.land/std@0.216.0/", - "@huggingface/transformers": "npm:@huggingface/transformers@3.0.0", - "bullmq": "npm:bullmq", - "lib/": "./lib/", - "ioredis": "npm:ioredis", - "@bull-board/api": "npm:@bull-board/api", - "@bull-board/express": "npm:@bull-board/express", - "express": "npm:express", - "src/": "./src/", - "onnxruntime": "npm:onnxruntime-node@1.19.2", - "chalk": "npm:chalk" - }, - "compilerOptions": { - "jsx": "react-jsx", - "jsxImportSource": "preact" - }, - "nodeModulesDir": "auto", - "fmt": { - "useTabs": true, - "lineWidth": 120, - "indentWidth": 4, - "semiColons": true, - "proseWrap": "always" + "crawler": "deno task --filter 'crawler' all" } } diff --git a/lib/ml/mantis.ts b/lib/ml/mantis.ts deleted file mode 100644 index 6960be9..0000000 --- a/lib/ml/mantis.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { AIManager } from "lib/ml/manager.ts"; -import * as ort from "onnxruntime"; -import logger from "lib/log/logger.ts"; -import { WorkerError } from "lib/mq/schema.ts"; - -const modelPath = "./model/model.onnx"; - -class MantisProto extends AIManager { - constructor() { - super(); - this.models = { - "predictor": modelPath, - }; - } - - public override async init(): Promise { - await super.init(); - } -} - -const Mantis = new MantisProto(); -export default Mantis; diff --git a/lib/mq/executors.ts b/lib/mq/executors.ts deleted file mode 100644 index 85c2cc1..0000000 --- a/lib/mq/executors.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "lib/mq/exec/getLatestVideos.ts"; diff --git a/packages/backend/deno.json b/packages/backend/deno.json new file mode 100644 index 0000000..e69de29 diff --git a/packages/core/deno.json b/packages/core/deno.json new file mode 100644 index 0000000..e69de29 diff --git a/lib/db/allData.ts b/packages/crawler/db/allData.ts similarity index 96% rename from lib/db/allData.ts rename to packages/crawler/db/allData.ts index bf92edd..461cb69 100644 --- a/lib/db/allData.ts +++ b/packages/crawler/db/allData.ts @@ -1,6 +1,6 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { AllDataType, BiliUserType } from "lib/db/schema.d.ts"; -import Akari from "lib/ml/akari.ts"; +import { AllDataType, BiliUserType } from "db/schema.d.ts"; +import Akari from "ml/akari.ts"; export async function videoExistsInAllData(client: Client, aid: number) { return await client.queryObject<{ exists: boolean }>( diff --git a/lib/db/init.ts b/packages/crawler/db/init.ts similarity index 72% rename from lib/db/init.ts rename to packages/crawler/db/init.ts index d206872..a1835b0 100644 --- a/lib/db/init.ts +++ b/packages/crawler/db/init.ts @@ -1,5 +1,5 @@ import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { postgresConfig } from "lib/db/pgConfig.ts"; +import { postgresConfig } from "db/pgConfig.ts"; const pool = new Pool(postgresConfig, 12); diff --git a/lib/db/pgConfig.ts b/packages/crawler/db/pgConfig.ts similarity index 100% rename from lib/db/pgConfig.ts rename to packages/crawler/db/pgConfig.ts diff --git a/lib/db/redis.ts b/packages/crawler/db/redis.ts similarity index 100% rename from lib/db/redis.ts rename to packages/crawler/db/redis.ts diff --git a/lib/db/schema.d.ts b/packages/crawler/db/schema.d.ts similarity index 100% rename from lib/db/schema.d.ts rename to packages/crawler/db/schema.d.ts diff --git a/lib/db/snapshot.ts b/packages/crawler/db/snapshot.ts similarity index 95% rename from lib/db/snapshot.ts rename to packages/crawler/db/snapshot.ts index 726bfc5..ef8009d 100644 --- a/lib/db/snapshot.ts +++ b/packages/crawler/db/snapshot.ts @@ -1,5 +1,5 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { LatestSnapshotType } from "lib/db/schema.d.ts"; +import { LatestSnapshotType } from "db/schema.d.ts"; export async function getVideosNearMilestone(client: Client) { const queryResult = await client.queryObject(` diff --git a/lib/db/snapshotSchedule.ts b/packages/crawler/db/snapshotSchedule.ts similarity index 98% rename from lib/db/snapshotSchedule.ts rename to packages/crawler/db/snapshotSchedule.ts index 68228b7..b98f900 100644 --- a/lib/db/snapshotSchedule.ts +++ b/packages/crawler/db/snapshotSchedule.ts @@ -1,9 +1,9 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts"; +import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts"; import { SnapshotScheduleType } from "./schema.d.ts"; -import logger from "lib/log/logger.ts"; +import logger from "log/logger.ts"; import { MINUTE } from "$std/datetime/constants.ts"; -import { redis } from "lib/db/redis.ts"; +import { redis } from "db/redis.ts"; import { Redis } from "ioredis"; const REDIS_KEY = "cvsa:snapshot_window_counts"; diff --git a/lib/db/songs.ts b/packages/crawler/db/songs.ts similarity index 92% rename from lib/db/songs.ts rename to packages/crawler/db/songs.ts index 15a49b3..1bfa002 100644 --- a/lib/db/songs.ts +++ b/packages/crawler/db/songs.ts @@ -1,5 +1,5 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts"; +import { parseTimestampFromPsql } from "utils/formatTimestampToPostgre.ts"; export async function getNotCollectedSongs(client: Client) { const queryResult = await client.queryObject<{ aid: number }>(` diff --git a/packages/crawler/deno.json b/packages/crawler/deno.json new file mode 100644 index 0000000..1d91eda --- /dev/null +++ b/packages/crawler/deno.json @@ -0,0 +1,49 @@ +{ + "name": "@cvsa/crawler", + "tasks": { + "crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts", + "crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts", + "check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx", + "manifest": "deno task cli manifest $(pwd)", + "start": "deno run -A --watch=static/,routes/ dev.ts", + "build": "deno run -A dev.ts build", + "preview": "deno run -A main.ts", + "worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts", + "worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts", + "adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts", + "bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts", + "all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'", + "test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run" + }, + "lint": { + "rules": { + "tags": ["recommended"] + } + }, + "imports": { + "@std/assert": "jsr:@std/assert@1", + "$std/": "https://deno.land/std@0.216.0/", + "@huggingface/transformers": "npm:@huggingface/transformers@3.0.0", + "bullmq": "npm:bullmq", + "mq/": "./mq/", + "db/": "./db/", + "log/": "./log/", + "net/": "./net/", + "ml/": "./ml/", + "utils/": "./utils/", + "ioredis": "npm:ioredis", + "@bull-board/api": "npm:@bull-board/api", + "@bull-board/express": "npm:@bull-board/express", + "express": "npm:express", + "src/": "./src/", + "onnxruntime": "npm:onnxruntime-node@1.19.2", + "chalk": "npm:chalk" + }, + "fmt": { + "useTabs": true, + "lineWidth": 120, + "indentWidth": 4, + "semiColons": true, + "proseWrap": "always" + } +} \ No newline at end of file diff --git a/lib/log/logger.ts b/packages/crawler/log/logger.ts similarity index 100% rename from lib/log/logger.ts rename to packages/crawler/log/logger.ts diff --git a/lib/log/test.ts b/packages/crawler/log/test.ts similarity index 89% rename from lib/log/test.ts rename to packages/crawler/log/test.ts index 71c719c..ee5953c 100644 --- a/lib/log/test.ts +++ b/packages/crawler/log/test.ts @@ -1,4 +1,4 @@ -import logger from "lib/log/logger.ts"; +import logger from "log/logger.ts"; logger.error(Error("test error"), "test service"); logger.debug(`some string`); diff --git a/lib/ml/akari.ts b/packages/crawler/ml/akari.ts similarity index 95% rename from lib/ml/akari.ts rename to packages/crawler/ml/akari.ts index d5ce9b2..ed1153e 100644 --- a/lib/ml/akari.ts +++ b/packages/crawler/ml/akari.ts @@ -1,7 +1,7 @@ -import { AIManager } from "lib/ml/manager.ts"; +import { AIManager } from "ml/manager.ts"; import * as ort from "onnxruntime"; -import logger from "lib/log/logger.ts"; -import { WorkerError } from "lib/mq/schema.ts"; +import logger from "log/logger.ts"; +import { WorkerError } from "mq/schema.ts"; import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers"; const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024"; diff --git a/lib/ml/benchmark.ts b/packages/crawler/ml/benchmark.ts similarity index 100% rename from lib/ml/benchmark.ts rename to packages/crawler/ml/benchmark.ts diff --git a/lib/ml/manager.ts b/packages/crawler/ml/manager.ts similarity index 92% rename from lib/ml/manager.ts rename to packages/crawler/ml/manager.ts index 8230fcf..42f783e 100644 --- a/lib/ml/manager.ts +++ b/packages/crawler/ml/manager.ts @@ -1,6 +1,6 @@ import * as ort from "onnxruntime"; -import logger from "lib/log/logger.ts"; -import { WorkerError } from "lib/mq/schema.ts"; +import logger from "log/logger.ts"; +import { WorkerError } from "mq/schema.ts"; export class AIManager { public sessions: { [key: string]: ort.InferenceSession } = {}; diff --git a/lib/ml/quant_benchmark.ts b/packages/crawler/ml/quant_benchmark.ts similarity index 100% rename from lib/ml/quant_benchmark.ts rename to packages/crawler/ml/quant_benchmark.ts diff --git a/lib/mq/exec/classifyVideo.ts b/packages/crawler/mq/exec/classifyVideo.ts similarity index 79% rename from lib/mq/exec/classifyVideo.ts rename to packages/crawler/mq/exec/classifyVideo.ts index 20545a0..c813b7b 100644 --- a/lib/mq/exec/classifyVideo.ts +++ b/packages/crawler/mq/exec/classifyVideo.ts @@ -1,13 +1,13 @@ import { Job } from "bullmq"; -import { db } from "lib/db/init.ts"; -import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts"; -import Akari from "lib/ml/akari.ts"; -import { ClassifyVideoQueue } from "lib/mq/index.ts"; -import logger from "lib/log/logger.ts"; -import { lockManager } from "lib/mq/lockManager.ts"; -import { aidExistsInSongs } from "lib/db/songs.ts"; -import { insertIntoSongs } from "lib/mq/task/collectSongs.ts"; -import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts"; +import { db } from "db/init.ts"; +import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "db/allData.ts"; +import Akari from "ml/akari.ts"; +import { ClassifyVideoQueue } from "mq/index.ts"; +import logger from "log/logger.ts"; +import { lockManager } from "mq/lockManager.ts"; +import { aidExistsInSongs } from "db/songs.ts"; +import { insertIntoSongs } from "mq/task/collectSongs.ts"; +import { scheduleSnapshot } from "db/snapshotSchedule.ts"; import { MINUTE } from "$std/datetime/constants.ts"; export const classifyVideoWorker = async (job: Job) => { diff --git a/lib/mq/exec/getLatestVideos.ts b/packages/crawler/mq/exec/getLatestVideos.ts similarity index 74% rename from lib/mq/exec/getLatestVideos.ts rename to packages/crawler/mq/exec/getLatestVideos.ts index 34b5d1a..7a19738 100644 --- a/lib/mq/exec/getLatestVideos.ts +++ b/packages/crawler/mq/exec/getLatestVideos.ts @@ -1,8 +1,8 @@ import { Job } from "bullmq"; -import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts"; -import { db } from "lib/db/init.ts"; -import { insertVideoInfo } from "lib/mq/task/getVideoDetails.ts"; -import { collectSongs } from "lib/mq/task/collectSongs.ts"; +import { queueLatestVideos } from "mq/task/queueLatestVideo.ts"; +import { db } from "db/init.ts"; +import { insertVideoInfo } from "mq/task/getVideoDetails.ts"; +import { collectSongs } from "mq/task/collectSongs.ts"; export const getLatestVideosWorker = async (_job: Job): Promise => { const client = await db.connect(); diff --git a/lib/mq/exec/snapshotTick.ts b/packages/crawler/mq/exec/snapshotTick.ts similarity index 95% rename from lib/mq/exec/snapshotTick.ts rename to packages/crawler/mq/exec/snapshotTick.ts index b18d845..876e05a 100644 --- a/lib/mq/exec/snapshotTick.ts +++ b/packages/crawler/mq/exec/snapshotTick.ts @@ -1,6 +1,6 @@ import { Job } from "bullmq"; -import { db } from "lib/db/init.ts"; -import { getLatestVideoSnapshot, getVideosNearMilestone } from "lib/db/snapshot.ts"; +import { db } from "db/init.ts"; +import { getLatestVideoSnapshot, getVideosNearMilestone } from "db/snapshot.ts"; import { bulkGetVideosWithoutProcessingSchedules, bulkScheduleSnapshot, @@ -16,18 +16,18 @@ import { snapshotScheduleExists, videoHasProcessingSchedule, getBulkSnapshotsInNextSecond -} from "lib/db/snapshotSchedule.ts"; +} from "db/snapshotSchedule.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts"; -import logger from "lib/log/logger.ts"; -import { SnapshotQueue } from "lib/mq/index.ts"; -import { insertVideoSnapshot } from "lib/mq/task/getVideoStats.ts"; -import { NetSchedulerError } from "lib/mq/scheduler.ts"; -import { getBiliVideoStatus, setBiliVideoStatus } from "lib/db/allData.ts"; -import { truncate } from "lib/utils/truncate.ts"; -import { lockManager } from "lib/mq/lockManager.ts"; -import { getSongsPublihsedAt } from "lib/db/songs.ts"; -import { bulkGetVideoStats } from "lib/net/bulkGetVideoStats.ts"; +import logger from "log/logger.ts"; +import { SnapshotQueue } from "mq/index.ts"; +import { insertVideoSnapshot } from "mq/task/getVideoStats.ts"; +import { NetSchedulerError } from "mq/scheduler.ts"; +import { getBiliVideoStatus, setBiliVideoStatus } from "db/allData.ts"; +import { truncate } from "utils/truncate.ts"; +import { lockManager } from "mq/lockManager.ts"; +import { getSongsPublihsedAt } from "db/songs.ts"; +import { bulkGetVideoStats } from "net/bulkGetVideoStats.ts"; const priorityMap: { [key: string]: number } = { "milestone": 1, diff --git a/packages/crawler/mq/executors.ts b/packages/crawler/mq/executors.ts new file mode 100644 index 0000000..1e486e1 --- /dev/null +++ b/packages/crawler/mq/executors.ts @@ -0,0 +1 @@ +export * from "mq/exec/getLatestVideos.ts"; diff --git a/lib/mq/index.ts b/packages/crawler/mq/index.ts similarity index 100% rename from lib/mq/index.ts rename to packages/crawler/mq/index.ts diff --git a/lib/mq/init.ts b/packages/crawler/mq/init.ts similarity index 86% rename from lib/mq/init.ts rename to packages/crawler/mq/init.ts index d408f8e..4a302d1 100644 --- a/lib/mq/init.ts +++ b/packages/crawler/mq/init.ts @@ -1,9 +1,9 @@ import { MINUTE, SECOND } from "$std/datetime/constants.ts"; -import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts"; -import logger from "lib/log/logger.ts"; -import { initSnapshotWindowCounts } from "lib/db/snapshotSchedule.ts"; -import { db } from "lib/db/init.ts"; -import { redis } from "lib/db/redis.ts"; +import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "mq/index.ts"; +import logger from "log/logger.ts"; +import { initSnapshotWindowCounts } from "db/snapshotSchedule.ts"; +import { db } from "db/init.ts"; +import { redis } from "db/redis.ts"; export async function initMQ() { const client = await db.connect(); diff --git a/lib/mq/lockManager.ts b/packages/crawler/mq/lockManager.ts similarity index 97% rename from lib/mq/lockManager.ts rename to packages/crawler/mq/lockManager.ts index f83b148..e0c7f8a 100644 --- a/lib/mq/lockManager.ts +++ b/packages/crawler/mq/lockManager.ts @@ -1,5 +1,5 @@ import { Redis } from "ioredis"; -import { redis } from "lib/db/redis.ts"; +import { redis } from "db/redis.ts"; class LockManager { private redis: Redis; diff --git a/lib/mq/rateLimiter.ts b/packages/crawler/mq/rateLimiter.ts similarity index 96% rename from lib/mq/rateLimiter.ts rename to packages/crawler/mq/rateLimiter.ts index 7f62547..aba7c3e 100644 --- a/lib/mq/rateLimiter.ts +++ b/packages/crawler/mq/rateLimiter.ts @@ -1,4 +1,4 @@ -import { SlidingWindow } from "lib/mq/slidingWindow.ts"; +import { SlidingWindow } from "mq/slidingWindow.ts"; export interface RateLimiterConfig { window: SlidingWindow; diff --git a/lib/mq/scheduler.ts b/packages/crawler/mq/scheduler.ts similarity index 98% rename from lib/mq/scheduler.ts rename to packages/crawler/mq/scheduler.ts index 6722519..0e8c036 100644 --- a/lib/mq/scheduler.ts +++ b/packages/crawler/mq/scheduler.ts @@ -1,7 +1,7 @@ -import logger from "lib/log/logger.ts"; -import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts"; -import { SlidingWindow } from "lib/mq/slidingWindow.ts"; -import { redis } from "lib/db/redis.ts"; +import logger from "log/logger.ts"; +import { RateLimiter, RateLimiterConfig } from "mq/rateLimiter.ts"; +import { SlidingWindow } from "mq/slidingWindow.ts"; +import { redis } from "db/redis.ts"; import Redis from "ioredis"; import { SECOND } from "$std/datetime/constants.ts"; diff --git a/lib/mq/schema.ts b/packages/crawler/mq/schema.ts similarity index 100% rename from lib/mq/schema.ts rename to packages/crawler/mq/schema.ts diff --git a/lib/mq/slidingWindow.ts b/packages/crawler/mq/slidingWindow.ts similarity index 100% rename from lib/mq/slidingWindow.ts rename to packages/crawler/mq/slidingWindow.ts diff --git a/lib/mq/task/collectSongs.ts b/packages/crawler/mq/task/collectSongs.ts similarity index 83% rename from lib/mq/task/collectSongs.ts rename to packages/crawler/mq/task/collectSongs.ts index b71aa3b..389ca06 100644 --- a/lib/mq/task/collectSongs.ts +++ b/packages/crawler/mq/task/collectSongs.ts @@ -1,7 +1,7 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts"; -import logger from "lib/log/logger.ts"; -import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts"; +import { aidExistsInSongs, getNotCollectedSongs } from "db/songs.ts"; +import logger from "log/logger.ts"; +import { scheduleSnapshot } from "db/snapshotSchedule.ts"; import { MINUTE } from "$std/datetime/constants.ts"; export async function collectSongs(client: Client) { diff --git a/lib/mq/task/getVideoDetails.ts b/packages/crawler/mq/task/getVideoDetails.ts similarity index 82% rename from lib/mq/task/getVideoDetails.ts rename to packages/crawler/mq/task/getVideoDetails.ts index ea5f903..9b675e5 100644 --- a/lib/mq/task/getVideoDetails.ts +++ b/packages/crawler/mq/task/getVideoDetails.ts @@ -1,9 +1,9 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { getVideoDetails } from "lib/net/getVideoDetails.ts"; -import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts"; -import logger from "lib/log/logger.ts"; -import { ClassifyVideoQueue } from "lib/mq/index.ts"; -import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts"; +import { getVideoDetails } from "net/getVideoDetails.ts"; +import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts"; +import logger from "log/logger.ts"; +import { ClassifyVideoQueue } from "mq/index.ts"; +import { userExistsInBiliUsers, videoExistsInAllData } from "db/allData.ts"; import { HOUR, SECOND } from "$std/datetime/constants.ts"; export async function insertVideoInfo(client: Client, aid: number) { diff --git a/lib/mq/task/getVideoStats.ts b/packages/crawler/mq/task/getVideoStats.ts similarity index 91% rename from lib/mq/task/getVideoStats.ts rename to packages/crawler/mq/task/getVideoStats.ts index 3be1cd7..34b6c42 100644 --- a/lib/mq/task/getVideoStats.ts +++ b/packages/crawler/mq/task/getVideoStats.ts @@ -1,7 +1,7 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { getVideoInfo } from "lib/net/getVideoInfo.ts"; -import { LatestSnapshotType } from "lib/db/schema.d.ts"; -import logger from "lib/log/logger.ts"; +import { getVideoInfo } from "net/getVideoInfo.ts"; +import { LatestSnapshotType } from "db/schema.d.ts"; +import logger from "log/logger.ts"; /* * Fetch video stats from bilibili API and insert into database diff --git a/lib/mq/task/queueLatestVideo.ts b/packages/crawler/mq/task/queueLatestVideo.ts similarity index 82% rename from lib/mq/task/queueLatestVideo.ts rename to packages/crawler/mq/task/queueLatestVideo.ts index d2e938b..d8b3993 100644 --- a/lib/mq/task/queueLatestVideo.ts +++ b/packages/crawler/mq/task/queueLatestVideo.ts @@ -1,10 +1,10 @@ import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; -import { getLatestVideoAids } from "lib/net/getLatestVideoAids.ts"; -import { videoExistsInAllData } from "lib/db/allData.ts"; -import { sleep } from "lib/utils/sleep.ts"; +import { getLatestVideoAids } from "net/getLatestVideoAids.ts"; +import { videoExistsInAllData } from "db/allData.ts"; +import { sleep } from "utils/sleep.ts"; import { SECOND } from "$std/datetime/constants.ts"; -import logger from "lib/log/logger.ts"; -import { LatestVideosQueue } from "lib/mq/index.ts"; +import logger from "log/logger.ts"; +import { LatestVideosQueue } from "mq/index.ts"; export async function queueLatestVideos( client: Client, diff --git a/lib/net/bilibili.d.ts b/packages/crawler/net/bilibili.d.ts similarity index 100% rename from lib/net/bilibili.d.ts rename to packages/crawler/net/bilibili.d.ts diff --git a/lib/net/bulkGetVideoStats.ts b/packages/crawler/net/bulkGetVideoStats.ts similarity index 86% rename from lib/net/bulkGetVideoStats.ts rename to packages/crawler/net/bulkGetVideoStats.ts index 7240bed..2b0c7f2 100644 --- a/lib/net/bulkGetVideoStats.ts +++ b/packages/crawler/net/bulkGetVideoStats.ts @@ -1,6 +1,6 @@ -import netScheduler from "lib/mq/scheduler.ts"; -import { MediaListInfoData, MediaListInfoResponse } from "lib/net/bilibili.d.ts"; -import logger from "lib/log/logger.ts"; +import netScheduler from "mq/scheduler.ts"; +import { MediaListInfoData, MediaListInfoResponse } from "net/bilibili.d.ts"; +import logger from "log/logger.ts"; /* * Bulk fetch video metadata from bilibili API diff --git a/lib/net/getLatestVideoAids.ts b/packages/crawler/net/getLatestVideoAids.ts similarity index 83% rename from lib/net/getLatestVideoAids.ts rename to packages/crawler/net/getLatestVideoAids.ts index 2fb44be..7dacd46 100644 --- a/lib/net/getLatestVideoAids.ts +++ b/packages/crawler/net/getLatestVideoAids.ts @@ -1,6 +1,6 @@ -import { VideoListResponse } from "lib/net/bilibili.d.ts"; -import logger from "lib/log/logger.ts"; -import netScheduler from "lib/mq/scheduler.ts"; +import { VideoListResponse } from "net/bilibili.d.ts"; +import logger from "log/logger.ts"; +import netScheduler from "mq/scheduler.ts"; export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise { const startFrom = 1 + pageSize * (page - 1); diff --git a/lib/net/getVideoDetails.ts b/packages/crawler/net/getVideoDetails.ts similarity index 73% rename from lib/net/getVideoDetails.ts rename to packages/crawler/net/getVideoDetails.ts index 9e421cf..d6d52c1 100644 --- a/lib/net/getVideoDetails.ts +++ b/packages/crawler/net/getVideoDetails.ts @@ -1,6 +1,6 @@ -import netScheduler from "lib/mq/scheduler.ts"; -import { VideoDetailsData, VideoDetailsResponse } from "lib/net/bilibili.d.ts"; -import logger from "lib/log/logger.ts"; +import netScheduler from "mq/scheduler.ts"; +import { VideoDetailsData, VideoDetailsResponse } from "net/bilibili.d.ts"; +import logger from "log/logger.ts"; export async function getVideoDetails(aid: number): Promise { const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`; diff --git a/lib/net/getVideoInfo.ts b/packages/crawler/net/getVideoInfo.ts similarity index 87% rename from lib/net/getVideoInfo.ts rename to packages/crawler/net/getVideoInfo.ts index 897fc62..0533c53 100644 --- a/lib/net/getVideoInfo.ts +++ b/packages/crawler/net/getVideoInfo.ts @@ -1,6 +1,6 @@ -import netScheduler from "lib/mq/scheduler.ts"; -import { VideoInfoData, VideoInfoResponse } from "lib/net/bilibili.d.ts"; -import logger from "lib/log/logger.ts"; +import netScheduler from "mq/scheduler.ts"; +import { VideoInfoData, VideoInfoResponse } from "net/bilibili.d.ts"; +import logger from "log/logger.ts"; /* * Fetch video metadata from bilibili API diff --git a/src/bullui.ts b/packages/crawler/src/bullui.ts similarity index 97% rename from src/bullui.ts rename to packages/crawler/src/bullui.ts index acf8d3f..5765540 100644 --- a/src/bullui.ts +++ b/packages/crawler/src/bullui.ts @@ -2,7 +2,7 @@ import express from "express"; import { createBullBoard } from "@bull-board/api"; import { BullMQAdapter } from "@bull-board/api/bullMQAdapter.js"; import { ExpressAdapter } from "@bull-board/express"; -import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts"; +import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "mq/index.ts"; const serverAdapter = new ExpressAdapter(); serverAdapter.setBasePath("/"); diff --git a/src/filterWorker.ts b/packages/crawler/src/filterWorker.ts similarity index 77% rename from src/filterWorker.ts rename to packages/crawler/src/filterWorker.ts index b14ef07..cb336c4 100644 --- a/src/filterWorker.ts +++ b/packages/crawler/src/filterWorker.ts @@ -1,10 +1,10 @@ import { ConnectionOptions, Job, Worker } from "bullmq"; -import { redis } from "lib/db/redis.ts"; -import logger from "lib/log/logger.ts"; -import { classifyVideosWorker, classifyVideoWorker } from "lib/mq/exec/classifyVideo.ts"; -import { WorkerError } from "lib/mq/schema.ts"; -import { lockManager } from "lib/mq/lockManager.ts"; -import Akari from "lib/ml/akari.ts"; +import { redis } from "db/redis.ts"; +import logger from "log/logger.ts"; +import { classifyVideosWorker, classifyVideoWorker } from "mq/exec/classifyVideo.ts"; +import { WorkerError } from "mq/schema.ts"; +import { lockManager } from "mq/lockManager.ts"; +import Akari from "ml/akari.ts"; Deno.addSignalListener("SIGINT", async () => { logger.log("SIGINT Received: Shutting down workers...", "mq"); diff --git a/packages/crawler/src/jobAdder.ts b/packages/crawler/src/jobAdder.ts new file mode 100644 index 0000000..3aefd24 --- /dev/null +++ b/packages/crawler/src/jobAdder.ts @@ -0,0 +1,3 @@ +import { initMQ } from "mq/init.ts"; + +await initMQ(); diff --git a/src/worker.ts b/packages/crawler/src/worker.ts similarity index 87% rename from src/worker.ts rename to packages/crawler/src/worker.ts index fae7b6a..e240a0b 100644 --- a/src/worker.ts +++ b/packages/crawler/src/worker.ts @@ -1,10 +1,10 @@ import { ConnectionOptions, Job, Worker } from "bullmq"; -import { collectSongsWorker, getLatestVideosWorker } from "lib/mq/executors.ts"; -import { redis } from "lib/db/redis.ts"; -import logger from "lib/log/logger.ts"; -import { lockManager } from "lib/mq/lockManager.ts"; -import { WorkerError } from "lib/mq/schema.ts"; -import { getVideoInfoWorker } from "lib/mq/exec/getLatestVideos.ts"; +import { collectSongsWorker, getLatestVideosWorker } from "mq/executors.ts"; +import { redis } from "db/redis.ts"; +import logger from "log/logger.ts"; +import { lockManager } from "mq/lockManager.ts"; +import { WorkerError } from "mq/schema.ts"; +import { getVideoInfoWorker } from "mq/exec/getLatestVideos.ts"; import { collectMilestoneSnapshotsWorker, regularSnapshotsWorker, @@ -13,7 +13,7 @@ import { scheduleCleanupWorker, takeBulkSnapshotForVideosWorker, bulkSnapshotTickWorker -} from "lib/mq/exec/snapshotTick.ts"; +} from "mq/exec/snapshotTick.ts"; Deno.addSignalListener("SIGINT", async () => { logger.log("SIGINT Received: Shutting down workers...", "mq"); diff --git a/lib/utils/formatSeconds.ts b/packages/crawler/utils/formatSeconds.ts similarity index 100% rename from lib/utils/formatSeconds.ts rename to packages/crawler/utils/formatSeconds.ts diff --git a/lib/utils/formatTimestampToPostgre.ts b/packages/crawler/utils/formatTimestampToPostgre.ts similarity index 100% rename from lib/utils/formatTimestampToPostgre.ts rename to packages/crawler/utils/formatTimestampToPostgre.ts diff --git a/lib/utils/sleep.ts b/packages/crawler/utils/sleep.ts similarity index 100% rename from lib/utils/sleep.ts rename to packages/crawler/utils/sleep.ts diff --git a/lib/utils/truncate.ts b/packages/crawler/utils/truncate.ts similarity index 100% rename from lib/utils/truncate.ts rename to packages/crawler/utils/truncate.ts diff --git a/packages/frontend/deno.json b/packages/frontend/deno.json new file mode 100644 index 0000000..e69de29 diff --git a/src/jobAdder.ts b/src/jobAdder.ts deleted file mode 100644 index cb107f4..0000000 --- a/src/jobAdder.ts +++ /dev/null @@ -1,3 +0,0 @@ -import { initMQ } from "lib/mq/init.ts"; - -await initMQ(); diff --git a/test/ml/akari.json b/test/ml/akari.json deleted file mode 100644 index 9de1219..0000000 --- a/test/ml/akari.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "test1": [ - { - "title": "【洛天依】《一花依世界》(2024重调版)|“抬头仰望,夜空多安详”【原创PV付】", - "desc": "本家:BV1Vs411H7JH\n作曲:LS\n作词:杏花包子\n调教:鬼面P\n混音:虎皮猫P\n演唱:洛天依\n曲绘:山下鸭鸭窝\n映像:阿妍\n——————————————————————\n本稿为同人二创,非本家重制", - "tags": "发现《一花依世界》, Vsinger创作激励计划, 洛天依, VOCALOID CHINA, 翻唱, 原创PV付, ACE虚拟歌姬, 中文VOCALOID, 国风电子, 一花依世界, ACE Studio, Vsinger创作激励计划2024冬季物语", - "label": 2 - }, - { - "title": "【鏡音レン】アカシア【VOCALOID Cover】", - "desc": "鏡音リン・レン 13th Anniversary\n\nMusic:BUMP OF CHICKEN https://youtu.be/BoZ0Zwab6Oc\nust:Maplestyle sm37853236\nOff Vocal: https://youtu.be/YMzrUzq1uX0\nSinger:鏡音レン\n\n氷雨ハルカ\nYoutube :https://t.co/8zuv6g7Acm\nniconico:https://t.co/C6DRfdYAp0\ntwitter :https://twitter.com/hisame_haruka\n\n転載禁止\nPlease do not reprint without my permission.", - "tags": "鏡音レン", - "label": 0 - }, - { - "title": "【洛天依原创曲】谪星【姆斯塔之谕】", - "desc": "谪星\n\n策划/世界观:听雨\n作词:听雨\n作曲/编曲:太白\n混音:虎皮猫\n人设:以木\n曲绘:Ar极光\n调校:哈士奇p\n视频:苏卿白", - "tags": "2025虚拟歌手贺岁纪, 洛天依, 原创歌曲, VOCALOID, 虚拟歌手, 原创音乐, 姆斯塔, 中文VOCALOID", - "label": 1 - } - ] -} diff --git a/test/ml/akari.test.ts b/test/ml/akari.test.ts deleted file mode 100644 index f254a01..0000000 --- a/test/ml/akari.test.ts +++ /dev/null @@ -1,46 +0,0 @@ -import Akari from "lib/ml/akari.ts"; -import { assertEquals, assertGreaterOrEqual } from "jsr:@std/assert"; -import { join } from "$std/path/join.ts"; -import { SECOND } from "$std/datetime/constants.ts"; - -Deno.test("Akari AI - normal cases accuracy test", async () => { - const path = import.meta.dirname!; - const dataPath = join(path, "akari.json"); - const rawData = await Deno.readTextFile(dataPath); - const data = JSON.parse(rawData); - await Akari.init(); - for (const testCase of data.test1) { - const result = await Akari.classifyVideo( - testCase.title, - testCase.desc, - testCase.tags, - ); - assertEquals(result, testCase.label); - } -}); - -Deno.test("Akari AI - performance test", async () => { - const path = import.meta.dirname!; - const dataPath = join(path, "akari.json"); - const rawData = await Deno.readTextFile(dataPath); - const data = JSON.parse(rawData); - await Akari.init(); - const N = 200; - const testCase = data.test1[0]; - const title = testCase.title; - const desc = testCase.desc; - const tags = testCase.tags; - const time = performance.now(); - for (let i = 0; i < N; i++) { - await Akari.classifyVideo( - title, - desc, - tags, - ); - } - const end = performance.now(); - const elapsed = (end - time) / SECOND; - const throughput = N / elapsed; - assertGreaterOrEqual(throughput, 100); - console.log(`Akari AI throughput: ${throughput.toFixed(1)} samples / sec`); -}); diff --git a/test/mq/rateLimiter.test.ts b/test/mq/rateLimiter.test.ts deleted file mode 100644 index 2f19723..0000000 --- a/test/mq/rateLimiter.test.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { assertEquals } from "jsr:@std/assert"; -import { SlidingWindow } from "lib/mq/slidingWindow.ts"; -import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts"; -import { Redis } from "npm:ioredis@5.5.0"; - -Deno.test("RateLimiter works correctly", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5; - const maxRequests = 10; - - const slidingWindow = new SlidingWindow(redis, windowSize); - const config: RateLimiterConfig = { - window: slidingWindow, - max: maxRequests, - }; - const rateLimiter = new RateLimiter("test_event", [config]); - await rateLimiter.clear(); - - // Initial availability should be true - assertEquals(await rateLimiter.getAvailability(), true); - - // Trigger events up to the limit - for (let i = 0; i < maxRequests; i++) { - await rateLimiter.trigger(); - } - - // Availability should now be false - assertEquals(await rateLimiter.getAvailability(), false); - - // Wait for the window to slide - await new Promise((resolve) => setTimeout(resolve, windowSize * 1000 + 500)); - - // Availability should be true again - assertEquals(await rateLimiter.getAvailability(), true); - - redis.quit(); -}); - -Deno.test("Multiple configs work correctly", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize1 = 1; - const maxRequests1 = 2; - const windowSize2 = 5; - const maxRequests2 = 6; - - const slidingWindow1 = new SlidingWindow(redis, windowSize1); - const config1: RateLimiterConfig = { - window: slidingWindow1, - max: maxRequests1, - }; - const slidingWindow2 = new SlidingWindow(redis, windowSize2); - const config2: RateLimiterConfig = { - window: slidingWindow2, - max: maxRequests2, - }; - const rateLimiter = new RateLimiter("test_event_multi", [config1, config2]); - await rateLimiter.clear(); - - // Initial availability should be true - assertEquals(await rateLimiter.getAvailability(), true); - - // Trigger events up to the limit of the first config - for (let i = 0; i < maxRequests1; i++) { - await rateLimiter.trigger(); - } - - // Availability should now be false (due to config1) - assertEquals(await rateLimiter.getAvailability(), false); - - // Wait for the first window to slide - await new Promise((resolve) => setTimeout(resolve, windowSize1 * 1000 + 500)); - - // Availability should now be true (due to config1) - assertEquals(await rateLimiter.getAvailability(), true); - - // Trigger events up to the limit of the second config - for (let i = maxRequests1; i < maxRequests2; i++) { - await rateLimiter.trigger(); - } - - // Availability should still be false (due to config2) - assertEquals(await rateLimiter.getAvailability(), false); - - // Wait for the second window to slide - await new Promise((resolve) => setTimeout(resolve, windowSize2 * 1000 + 500)); - - // Availability should be true again - assertEquals(await rateLimiter.getAvailability(), true); - - redis.quit(); -}); diff --git a/test/mq/slidingWindow.test.ts b/test/mq/slidingWindow.test.ts deleted file mode 100644 index a749edc..0000000 --- a/test/mq/slidingWindow.test.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { assertEquals } from "jsr:@std/assert"; -import { SlidingWindow } from "lib/mq/slidingWindow.ts"; -import { Redis } from "ioredis"; - -Deno.test("SlidingWindow - event and count", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5000; // 5 seconds - const slidingWindow = new SlidingWindow(redis, windowSize); - const eventName = "test_event"; - await slidingWindow.clear(eventName); - - await slidingWindow.event(eventName); - const count = await slidingWindow.count(eventName); - - assertEquals(count, 1); - redis.quit(); -}); - -Deno.test("SlidingWindow - multiple events", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5000; // 5 seconds - const slidingWindow = new SlidingWindow(redis, windowSize); - const eventName = "test_event"; - await slidingWindow.clear(eventName); - - await slidingWindow.event(eventName); - await slidingWindow.event(eventName); - await slidingWindow.event(eventName); - const count = await slidingWindow.count(eventName); - - assertEquals(count, 3); - redis.quit(); -}); - -Deno.test("SlidingWindow - no events", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5000; // 5 seconds - const slidingWindow = new SlidingWindow(redis, windowSize); - const eventName = "test_event"; - await slidingWindow.clear(eventName); - - const count = await slidingWindow.count(eventName); - - assertEquals(count, 0); - redis.quit(); -}); - -Deno.test("SlidingWindow - different event names", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5000; // 5 seconds - const slidingWindow = new SlidingWindow(redis, windowSize); - const eventName1 = "test_event_1"; - const eventName2 = "test_event_2"; - await slidingWindow.clear(eventName1); - await slidingWindow.clear(eventName2); - - await slidingWindow.event(eventName1); - await slidingWindow.event(eventName2); - - const count1 = await slidingWindow.count(eventName1); - const count2 = await slidingWindow.count(eventName2); - - assertEquals(count1, 1); - assertEquals(count2, 1); - redis.quit(); -}); - -Deno.test("SlidingWindow - large number of events", async () => { - const redis = new Redis({ maxRetriesPerRequest: null }); - const windowSize = 5000; // 5 seconds - const slidingWindow = new SlidingWindow(redis, windowSize); - const eventName = "test_event"; - await slidingWindow.clear(eventName); - const numEvents = 1000; - - for (let i = 0; i < numEvents; i++) { - await slidingWindow.event(eventName); - } - - const count = await slidingWindow.count(eventName); - - assertEquals(count, numEvents); - redis.quit(); -});