Compare commits

..

No commits in common. "e0776a452e79f2a17506180583cb75f110774931" and "d44ba8a0aee3f93802905a3bbb1dada481349fdc" have entirely different histories.

130 changed files with 715 additions and 900 deletions

12
.gitignore vendored
View File

@ -51,6 +51,7 @@ internal/
!tests/cases/projects/projectOption/**/node_modules !tests/cases/projects/projectOption/**/node_modules
!tests/cases/projects/NodeModulesSearch/**/* !tests/cases/projects/NodeModulesSearch/**/*
!tests/baselines/reference/project/nodeModules*/**/* !tests/baselines/reference/project/nodeModules*/**/*
.idea
yarn.lock yarn.lock
yarn-error.log yarn-error.log
.parallelperf.* .parallelperf.*
@ -77,11 +78,10 @@ node_modules/
# project specific # project specific
logs/ logs/
__pycache__ __pycache__
ml/filter/runs filter/runs
ml/pred/runs pred/runs
ml/pred/checkpoints pred/checkpoints
ml/pred/observed data/
ml/data/ filter/checkpoints
ml/filter/checkpoints
scripts scripts
model/ model/

9
.idea/.gitignore vendored
View File

@ -1,9 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
dataSources.xml

View File

@ -1,21 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
<excludeFolder url="file://$MODULE_DIR$/temp" />
<excludeFolder url="file://$MODULE_DIR$/tmp" />
<excludeFolder url="file://$MODULE_DIR$/ml/data" />
<excludeFolder url="file://$MODULE_DIR$/doc" />
<excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
<excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
<excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
<excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
<excludeFolder url="file://$MODULE_DIR$/logs" />
<excludeFolder url="file://$MODULE_DIR$/model" />
<excludeFolder url="file://$MODULE_DIR$/src/db" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,12 +0,0 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
</modules>
</component>
</project>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="PROJECT" dialect="PostgreSQL" />
</component>
</project>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

12
components/Button.tsx Normal file
View File

@ -0,0 +1,12 @@
import { JSX } from "preact";
import { IS_BROWSER } from "$fresh/runtime.ts";
export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
return (
<button
{...props}
disabled={!IS_BROWSER || props.disabled}
class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
/>
);
}

55
data/filter/1.py Normal file
View File

@ -0,0 +1,55 @@
import json
import random
def process_data(input_file, output_file):
"""
从输入文件中读取数据找出model和human不一致的行
删除"model""human"键重命名为"label"
然后将处理后的数据添加到输出文件中
在写入之前它会加载output_file中的所有样本
并使用aid键进行去重过滤
Args:
input_file (str): 输入文件的路径
output_file (str): 输出文件的路径
"""
# 加载output_file中已有的数据用于去重
existing_data = set()
try:
with open(output_file, 'r', encoding='utf-8') as f_out:
for line in f_out:
try:
data = json.loads(line)
existing_data.add(data['aid'])
except json.JSONDecodeError:
pass # 忽略JSON解码错误继续读取下一行
except FileNotFoundError:
pass # 如果文件不存在,则忽略
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
for line in f_in:
try:
data = json.loads(line)
if data['model'] != data['human'] or random.random() < 0.2:
if data['aid'] not in existing_data: # 检查aid是否已存在
del data['model']
data['label'] = data['human']
del data['human']
f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
existing_data.add(data['aid']) # 将新的aid添加到集合中
except json.JSONDecodeError as e:
print(f"JSON解码错误: {e}")
print(f"错误行内容: {line.strip()}")
except KeyError as e:
print(f"KeyError: 键 '{e}' 不存在")
print(f"错误行内容: {line.strip()}")
# 调用函数处理数据
input_file = 'real_test.jsonl'
output_file = 'labeled_data.jsonl'
process_data(input_file, output_file)
print(f"处理完成,结果已写入 {output_file}")

View File

@ -1,15 +1,55 @@
{ {
"lock": false, "lock": false,
"workspace": [
"./packages/crawler",
"./packages/frontend",
"./packages/backend",
"./packages/core"
],
"nodeModulesDir": "auto",
"tasks": { "tasks": {
"crawler": "deno task --filter 'crawler' all" "crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
"cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -",
"manifest": "deno task cli manifest $(pwd)",
"start": "deno run -A --watch=static/,routes/ dev.ts",
"build": "deno run -A dev.ts build",
"preview": "deno run -A main.ts",
"update": "deno run -A -r https://fresh.deno.dev/update .",
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts",
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
"adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
}, },
"lint": {
"rules": {
"tags": ["fresh", "recommended"]
}
},
"exclude": ["**/_fresh/*"],
"imports": {
"@std/assert": "jsr:@std/assert@1",
"$fresh/": "https://deno.land/x/fresh@1.7.3/",
"preact": "https://esm.sh/preact@10.22.0",
"preact/": "https://esm.sh/preact@10.22.0/",
"@preact/signals": "https://esm.sh/*@preact/signals@1.2.2",
"@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1",
"tailwindcss": "npm:tailwindcss@3.4.1",
"tailwindcss/": "npm:/tailwindcss@3.4.1/",
"tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js",
"$std/": "https://deno.land/std@0.216.0/",
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"lib/": "./lib/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
"express": "npm:express",
"src/": "./src/",
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"compilerOptions": {
"jsx": "react-jsx",
"jsxImportSource": "preact"
},
"nodeModulesDir": "auto",
"fmt": { "fmt": {
"useTabs": true, "useTabs": true,
"lineWidth": 120, "lineWidth": 120,

7
dev.ts Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env -S deno run -A --watch=static/,routes/
import dev from "$fresh/dev.ts";
import config from "./fresh.config.ts";
import "$std/dotenv/load.ts";
await dev(import.meta.url, "./main.ts", config);

6
fresh.config.ts Normal file
View File

@ -0,0 +1,6 @@
import { defineConfig } from "$fresh/server.ts";
import tailwind from "$fresh/plugins/tailwind.ts";
export default defineConfig({
plugins: [tailwind()],
});

27
fresh.gen.ts Normal file
View File

@ -0,0 +1,27 @@
// DO NOT EDIT. This file is generated by Fresh.
// This file SHOULD be checked into source version control.
// This file is automatically updated during development when running `dev.ts`.
import * as $_404 from "./routes/_404.tsx";
import * as $_app from "./routes/_app.tsx";
import * as $api_joke from "./routes/api/joke.ts";
import * as $greet_name_ from "./routes/greet/[name].tsx";
import * as $index from "./routes/index.tsx";
import * as $Counter from "./islands/Counter.tsx";
import type { Manifest } from "$fresh/server.ts";
const manifest = {
routes: {
"./routes/_404.tsx": $_404,
"./routes/_app.tsx": $_app,
"./routes/api/joke.ts": $api_joke,
"./routes/greet/[name].tsx": $greet_name_,
"./routes/index.tsx": $index,
},
islands: {
"./islands/Counter.tsx": $Counter,
},
baseUrl: import.meta.url,
} satisfies Manifest;
export default manifest;

16
islands/Counter.tsx Normal file
View File

@ -0,0 +1,16 @@
import type { Signal } from "@preact/signals";
import { Button } from "../components/Button.tsx";
interface CounterProps {
count: Signal<number>;
}
export default function Counter(props: CounterProps) {
return (
<div class="flex gap-8 py-6">
<Button onClick={() => props.count.value -= 1}>-1</Button>
<p class="text-3xl tabular-nums">{props.count}</p>
<Button onClick={() => props.count.value += 1}>+1</Button>
</div>
);
}

View File

View File

@ -1,6 +1,6 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { AllDataType, BiliUserType } from "db/schema.d.ts"; import { AllDataType, BiliUserType } from "lib/db/schema.d.ts";
import Akari from "ml/akari.ts"; import Akari from "lib/ml/akari.ts";
export async function videoExistsInAllData(client: Client, aid: number) { export async function videoExistsInAllData(client: Client, aid: number) {
return await client.queryObject<{ exists: boolean }>( return await client.queryObject<{ exists: boolean }>(

View File

@ -1,5 +1,5 @@
import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Pool } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { postgresConfig } from "db/pgConfig.ts"; import { postgresConfig } from "lib/db/pgConfig.ts";
const pool = new Pool(postgresConfig, 12); const pool = new Pool(postgresConfig, 12);

View File

@ -1,5 +1,5 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { LatestSnapshotType } from "db/schema.d.ts"; import { LatestSnapshotType } from "lib/db/schema.d.ts";
export async function getVideosNearMilestone(client: Client) { export async function getVideosNearMilestone(client: Client) {
const queryResult = await client.queryObject<LatestSnapshotType>(` const queryResult = await client.queryObject<LatestSnapshotType>(`

View File

@ -1,13 +1,16 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts"; import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import { SnapshotScheduleType } from "./schema.d.ts"; import { SnapshotScheduleType } from "./schema.d.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { MINUTE } from "$std/datetime/constants.ts"; import { MINUTE } from "$std/datetime/constants.ts";
import { redis } from "db/redis.ts"; import { redis } from "lib/db/redis.ts";
import { Redis } from "ioredis"; import { Redis } from "ioredis";
const WINDOW_SIZE = 2880;
const REDIS_KEY = "cvsa:snapshot_window_counts"; const REDIS_KEY = "cvsa:snapshot_window_counts";
let lastAvailableWindow: { offset: number; count: number } | null = null;
function getCurrentWindowIndex(): number { function getCurrentWindowIndex(): number {
const now = new Date(); const now = new Date();
const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes(); const minutesSinceMidnight = now.getHours() * 60 + now.getMinutes();
@ -28,7 +31,7 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days' WHERE started_at >= NOW() AND status = 'pending' AND started_at <= NOW() + INTERVAL '10 days'
GROUP BY 1 GROUP BY 1
ORDER BY window_start ORDER BY window_start
`; `
await redisClient.del(REDIS_KEY); await redisClient.del(REDIS_KEY);
@ -36,11 +39,13 @@ export async function refreshSnapshotWindowCounts(client: Client, redisClient: R
for (const row of result.rows) { for (const row of result.rows) {
const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE)); const targetOffset = Math.floor((row.window_start.getTime() - startTime) / (5 * MINUTE));
const offset = currentWindow + targetOffset; const offset = (currentWindow + targetOffset);
if (offset >= 0) { if (offset >= 0 && offset < WINDOW_SIZE) {
await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count)); await redisClient.hset(REDIS_KEY, offset.toString(), Number(row.count));
} }
} }
lastAvailableWindow = null;
} }
export async function initSnapshotWindowCounts(client: Client, redisClient: Redis) { export async function initSnapshotWindowCounts(client: Client, redisClient: Redis) {
@ -79,14 +84,6 @@ export async function videoHasProcessingSchedule(client: Client, aid: number) {
return res.rows.length > 0; return res.rows.length > 0;
} }
export async function bulkGetVideosWithoutProcessingSchedules(client: Client, aids: number[]) {
const res = await client.queryObject<{ aid: number }>(
`SELECT aid FROM snapshot_schedule WHERE aid = ANY($1) AND status != 'processing' GROUP BY aid`,
[aids],
);
return res.rows.map((row) => row.aid);
}
interface Snapshot { interface Snapshot {
created_at: number; created_at: number;
views: number; views: number;
@ -186,13 +183,7 @@ export async function getSnapshotScheduleCountWithinRange(client: Client, start:
* @param aid The aid of the video. * @param aid The aid of the video.
* @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds) * @param targetTime Scheduled time for snapshot. (Timestamp in milliseconds)
*/ */
export async function scheduleSnapshot( export async function scheduleSnapshot(client: Client, aid: number, type: string, targetTime: number, force: boolean = false) {
client: Client,
aid: number,
type: string,
targetTime: number,
force: boolean = false,
) {
if (await videoHasActiveSchedule(client, aid) && !force) return; if (await videoHasActiveSchedule(client, aid) && !force) return;
let adjustedTime = new Date(targetTime); let adjustedTime = new Date(targetTime);
if (type !== "milestone" && type !== "new") { if (type !== "milestone" && type !== "new") {
@ -205,18 +196,6 @@ export async function scheduleSnapshot(
); );
} }
export async function bulkScheduleSnapshot(
client: Client,
aids: number[],
type: string,
targetTime: number,
force: boolean = false,
) {
for (const aid of aids) {
await scheduleSnapshot(client, aid, type, targetTime, force);
}
}
export async function adjustSnapshotTime( export async function adjustSnapshotTime(
expectedStartTime: Date, expectedStartTime: Date,
allowedCounts: number = 1000, allowedCounts: number = 1000,
@ -225,19 +204,21 @@ export async function adjustSnapshotTime(
const currentWindow = getCurrentWindowIndex(); const currentWindow = getCurrentWindowIndex();
const targetOffset = Math.floor((expectedStartTime.getTime() - Date.now()) / (5 * MINUTE)) - 6; const targetOffset = Math.floor((expectedStartTime.getTime() - Date.now()) / (5 * MINUTE)) - 6;
const initialOffset = currentWindow + Math.max(targetOffset, 0); let initialOffset = currentWindow + Math.max(targetOffset, 0);
if (lastAvailableWindow && lastAvailableWindow.count < allowedCounts) {
initialOffset = Math.max(lastAvailableWindow.offset - 2, 0);
}
let timePerIteration = 0; let timePerIteration = 0;
const MAX_ITERATIONS = 2880;
let iters = 0;
const t = performance.now(); const t = performance.now();
for (let i = initialOffset; i < MAX_ITERATIONS; i++) { for (let i = initialOffset; i < WINDOW_SIZE; i++) {
iters++;
const offset = i; const offset = i;
const count = await getWindowCount(redisClient, offset); const count = await getWindowCount(redisClient, offset);
if (count < allowedCounts) { if (count < allowedCounts) {
await redisClient.hincrby(REDIS_KEY, offset.toString(), 1); const newCount = await redisClient.hincrby(REDIS_KEY, offset.toString(), 1);
lastAvailableWindow = { offset, count: newCount };
const startPoint = new Date(); const startPoint = new Date();
startPoint.setHours(0, 0, 0, 0); startPoint.setHours(0, 0, 0, 0);
@ -250,26 +231,27 @@ export async function adjustSnapshotTime(
if (delayedDate.getTime() < now.getTime()) { if (delayedDate.getTime() < now.getTime()) {
const elapsed = performance.now() - t; const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1); timePerIteration = elapsed / (i+1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime"); logger.log(`${timePerIteration.toFixed(3)}ms * ${i+1}iterations`, "perf", "fn:adjustSnapshotTime");
return now; return now;
} }
const elapsed = performance.now() - t; const elapsed = performance.now() - t;
timePerIteration = elapsed / (i+1); timePerIteration = elapsed / (i+1);
logger.log(`${timePerIteration.toFixed(3)}ms * ${iters} iterations`, "perf", "fn:adjustSnapshotTime"); logger.log(`${timePerIteration.toFixed(3)}ms * ${i+1}iterations`, "perf", "fn:adjustSnapshotTime");
return delayedDate; return delayedDate;
} }
} }
const elapsed = performance.now() - t; const elapsed = performance.now() - t;
timePerIteration = elapsed / MAX_ITERATIONS; timePerIteration = elapsed / WINDOW_SIZE;
logger.log(`${timePerIteration.toFixed(3)}ms * ${MAX_ITERATIONS} iterations`, "perf", "fn:adjustSnapshotTime"); logger.log(`${timePerIteration.toFixed(3)}ms * ${WINDOW_SIZE}iterations`, "perf", "fn:adjustSnapshotTime");
return expectedStartTime; return expectedStartTime;
} }
export async function getSnapshotsInNextSecond(client: Client) { export async function getSnapshotsInNextSecond(client: Client) {
const query = ` const query = `
SELECT * SELECT *
FROM snapshot_schedule FROM snapshot_schedule
WHERE started_at <= NOW() + INTERVAL '1 seconds' AND status = 'pending' AND type != 'normal' WHERE started_at <= NOW() + INTERVAL '1 seconds' AND status = 'pending'
ORDER BY ORDER BY
CASE CASE
WHEN type = 'milestone' THEN 0 WHEN type = 'milestone' THEN 0
@ -282,18 +264,6 @@ export async function getSnapshotsInNextSecond(client: Client) {
return res.rows; return res.rows;
} }
export async function getBulkSnapshotsInNextSecond(client: Client) {
const query = `
SELECT *
FROM snapshot_schedule
WHERE started_at <= NOW() + INTERVAL '15 seconds' AND status = 'pending' AND type = 'normal'
ORDER BY started_at
LIMIT 1000;
`;
const res = await client.queryObject<SnapshotScheduleType>(query, []);
return res.rows;
}
export async function setSnapshotStatus(client: Client, id: number, status: string) { export async function setSnapshotStatus(client: Client, id: number, status: string) {
return await client.queryObject( return await client.queryObject(
`UPDATE snapshot_schedule SET status = $2 WHERE id = $1`, `UPDATE snapshot_schedule SET status = $2 WHERE id = $1`,
@ -301,13 +271,6 @@ export async function setSnapshotStatus(client: Client, id: number, status: stri
); );
} }
export async function bulkSetSnapshotStatus(client: Client, ids: number[], status: string) {
return await client.queryObject(
`UPDATE snapshot_schedule SET status = $2 WHERE id = ANY($1)`,
[ids, status],
);
}
export async function getVideosWithoutActiveSnapshotSchedule(client: Client) { export async function getVideosWithoutActiveSnapshotSchedule(client: Client) {
const query: string = ` const query: string = `
SELECT s.aid SELECT s.aid

View File

@ -1,5 +1,5 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { parseTimestampFromPsql } from "utils/formatTimestampToPostgre.ts"; import { parseTimestampFromPsql } from "lib/utils/formatTimestampToPostgre.ts";
export async function getNotCollectedSongs(client: Client) { export async function getNotCollectedSongs(client: Client) {
const queryResult = await client.queryObject<{ aid: number }>(` const queryResult = await client.queryObject<{ aid: number }>(`

View File

@ -1,4 +1,4 @@
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
logger.error(Error("test error"), "test service"); logger.error(Error("test error"), "test service");
logger.debug(`some string`); logger.debug(`some string`);

View File

@ -1,12 +1,12 @@
import { AIManager } from "ml/manager.ts"; import { AIManager } from "lib/ml/manager.ts";
import * as ort from "onnxruntime"; import * as ort from "onnxruntime";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { WorkerError } from "mq/schema.ts"; import { WorkerError } from "lib/mq/schema.ts";
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers"; import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024"; const tokenizerModel = "alikia2x/jina-embedding-v3-m2v-1024";
const onnxClassifierPath = "../../model/akari/3.17.onnx"; const onnxClassifierPath = "./model/akari/3.17.onnx";
const onnxEmbeddingPath = "../../model/embedding/model.onnx"; const onnxEmbeddingPath = "./model/embedding/model.onnx";
class AkariProto extends AIManager { class AkariProto extends AIManager {
private tokenizer: PreTrainedTokenizer | null = null; private tokenizer: PreTrainedTokenizer | null = null;

View File

@ -1,6 +1,6 @@
import * as ort from "onnxruntime"; import * as ort from "onnxruntime";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { WorkerError } from "mq/schema.ts"; import { WorkerError } from "lib/mq/schema.ts";
export class AIManager { export class AIManager {
public sessions: { [key: string]: ort.InferenceSession } = {}; public sessions: { [key: string]: ort.InferenceSession } = {};

22
lib/ml/mantis.ts Normal file
View File

@ -0,0 +1,22 @@
import { AIManager } from "lib/ml/manager.ts";
import * as ort from "onnxruntime";
import logger from "lib/log/logger.ts";
import { WorkerError } from "lib/mq/schema.ts";
const modelPath = "./model/model.onnx";
class MantisProto extends AIManager {
constructor() {
super();
this.models = {
"predictor": modelPath,
};
}
public override async init(): Promise<void> {
await super.init();
}
}
const Mantis = new MantisProto();
export default Mantis;

View File

@ -1,13 +1,13 @@
import { Job } from "bullmq"; import { Job } from "bullmq";
import { db } from "db/init.ts"; import { db } from "lib/db/init.ts";
import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "db/allData.ts"; import { getUnlabelledVideos, getVideoInfoFromAllData, insertVideoLabel } from "lib/db/allData.ts";
import Akari from "ml/akari.ts"; import Akari from "lib/ml/akari.ts";
import { ClassifyVideoQueue } from "mq/index.ts"; import { ClassifyVideoQueue } from "lib/mq/index.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { lockManager } from "mq/lockManager.ts"; import { lockManager } from "lib/mq/lockManager.ts";
import { aidExistsInSongs } from "db/songs.ts"; import { aidExistsInSongs } from "lib/db/songs.ts";
import { insertIntoSongs } from "mq/task/collectSongs.ts"; import { insertIntoSongs } from "lib/mq/task/collectSongs.ts";
import { scheduleSnapshot } from "db/snapshotSchedule.ts"; import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts";
import { MINUTE } from "$std/datetime/constants.ts"; import { MINUTE } from "$std/datetime/constants.ts";
export const classifyVideoWorker = async (job: Job) => { export const classifyVideoWorker = async (job: Job) => {

View File

@ -1,8 +1,8 @@
import { Job } from "bullmq"; import { Job } from "bullmq";
import { queueLatestVideos } from "mq/task/queueLatestVideo.ts"; import { queueLatestVideos } from "lib/mq/task/queueLatestVideo.ts";
import { db } from "db/init.ts"; import { db } from "lib/db/init.ts";
import { insertVideoInfo } from "mq/task/getVideoDetails.ts"; import { insertVideoInfo } from "lib/mq/task/getVideoDetails.ts";
import { collectSongs } from "mq/task/collectSongs.ts"; import { collectSongs } from "lib/mq/task/collectSongs.ts";
export const getLatestVideosWorker = async (_job: Job): Promise<void> => { export const getLatestVideosWorker = async (_job: Job): Promise<void> => {
const client = await db.connect(); const client = await db.connect();

View File

@ -1,13 +1,9 @@
import { Job } from "bullmq"; import { Job } from "bullmq";
import { db } from "db/init.ts"; import { db } from "lib/db/init.ts";
import { getLatestVideoSnapshot, getVideosNearMilestone } from "db/snapshot.ts"; import { getLatestVideoSnapshot, getVideosNearMilestone } from "lib/db/snapshot.ts";
import { import {
bulkGetVideosWithoutProcessingSchedules,
bulkScheduleSnapshot,
bulkSetSnapshotStatus,
findClosestSnapshot, findClosestSnapshot,
findSnapshotBefore, findSnapshotBefore,
getBulkSnapshotsInNextSecond,
getLatestSnapshot, getLatestSnapshot,
getSnapshotsInNextSecond, getSnapshotsInNextSecond,
getVideosWithoutActiveSnapshotSchedule, getVideosWithoutActiveSnapshotSchedule,
@ -16,18 +12,17 @@ import {
setSnapshotStatus, setSnapshotStatus,
snapshotScheduleExists, snapshotScheduleExists,
videoHasProcessingSchedule, videoHasProcessingSchedule,
} from "db/snapshotSchedule.ts"; } from "lib/db/snapshotSchedule.ts";
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts"; import { HOUR, MINUTE, SECOND, WEEK } from "$std/datetime/constants.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { SnapshotQueue } from "mq/index.ts"; import { SnapshotQueue } from "lib/mq/index.ts";
import { insertVideoSnapshot } from "mq/task/getVideoStats.ts"; import { insertVideoSnapshot } from "lib/mq/task/getVideoStats.ts";
import { NetSchedulerError } from "mq/scheduler.ts"; import { NetSchedulerError } from "lib/mq/scheduler.ts";
import { getBiliVideoStatus, setBiliVideoStatus } from "db/allData.ts"; import { getBiliVideoStatus, setBiliVideoStatus } from "lib/db/allData.ts";
import { truncate } from "utils/truncate.ts"; import { truncate } from "lib/utils/truncate.ts";
import { lockManager } from "mq/lockManager.ts"; import { lockManager } from "lib/mq/lockManager.ts";
import { getSongsPublihsedAt } from "db/songs.ts"; import { getSongsPublihsedAt } from "lib/db/songs.ts";
import { bulkGetVideoStats } from "net/bulkGetVideoStats.ts";
const priorityMap: { [key: string]: number } = { const priorityMap: { [key: string]: number } = {
"milestone": 1, "milestone": 1,
@ -40,40 +35,12 @@ const snapshotTypeToTaskMap: { [key: string]: string } = {
"new": "snapshotMilestoneVideo", "new": "snapshotMilestoneVideo",
}; };
export const bulkSnapshotTickWorker = async (_job: Job) => {
const client = await db.connect();
try {
const schedules = await getBulkSnapshotsInNextSecond(client);
const count = schedules.length;
const groups = Math.ceil(count / 30);
for (let i = 0; i < groups; i++) {
const group = schedules.slice(i * 30, (i + 1) * 30);
const aids = group.map((schedule) => Number(schedule.aid));
const filteredAids = await bulkGetVideosWithoutProcessingSchedules(client, aids);
if (filteredAids.length === 0) continue;
await bulkSetSnapshotStatus(client, filteredAids, "processing");
const dataMap: { [key: number]: number } = {};
for (const schedule of group) {
const id = Number(schedule.id);
dataMap[id] = Number(schedule.aid);
}
await SnapshotQueue.add("bulkSnapshotVideo", {
map: dataMap,
}, { priority: 3 });
}
} catch (e) {
logger.error(e as Error);
} finally {
client.release();
}
};
export const snapshotTickWorker = async (_job: Job) => { export const snapshotTickWorker = async (_job: Job) => {
const client = await db.connect(); const client = await db.connect();
try { try {
const schedules = await getSnapshotsInNextSecond(client); const schedules = await getSnapshotsInNextSecond(client);
for (const schedule of schedules) { for (const schedule of schedules) {
if (await videoHasProcessingSchedule(client, Number(schedule.aid))) { if (await videoHasProcessingSchedule(client, schedule.aid)) {
return `ALREADY_PROCESSING`; return `ALREADY_PROCESSING`;
} }
let priority = 3; let priority = 3;
@ -173,8 +140,7 @@ export const collectMilestoneSnapshotsWorker = async (_job: Job) => {
const getRegularSnapshotInterval = async (client: Client, aid: number) => { const getRegularSnapshotInterval = async (client: Client, aid: number) => {
const now = Date.now(); const now = Date.now();
const date = new Date(now - 24 * HOUR); const date = new Date(now - 24 * HOUR);
let oldSnapshot = await findSnapshotBefore(client, aid, date); const oldSnapshot = await findSnapshotBefore(client, aid, date);
if (!oldSnapshot) oldSnapshot = await findClosestSnapshot(client, aid, date);
const latestSnapshot = await getLatestSnapshot(client, aid); const latestSnapshot = await getLatestSnapshot(client, aid);
if (!oldSnapshot || !latestSnapshot) return 0; if (!oldSnapshot || !latestSnapshot) return 0;
if (oldSnapshot.created_at === latestSnapshot.created_at) return 0; if (oldSnapshot.created_at === latestSnapshot.created_at) return 0;
@ -182,7 +148,7 @@ const getRegularSnapshotInterval = async (client: Client, aid: number) => {
if (hoursDiff < 8) return 24; if (hoursDiff < 8) return 24;
const viewsDiff = latestSnapshot.views - oldSnapshot.views; const viewsDiff = latestSnapshot.views - oldSnapshot.views;
if (viewsDiff === 0) return 72; if (viewsDiff === 0) return 72;
const speedPerDay = viewsDiff / (hoursDiff + 0.001) * 24; const speedPerDay = viewsDiff / hoursDiff * 24;
if (speedPerDay < 6) return 36; if (speedPerDay < 6) return 36;
if (speedPerDay < 120) return 24; if (speedPerDay < 120) return 24;
if (speedPerDay < 320) return 12; if (speedPerDay < 320) return 12;
@ -206,7 +172,7 @@ export const regularSnapshotsWorker = async (_job: Job) => {
const now = Date.now(); const now = Date.now();
const lastSnapshotedAt = latestSnapshot?.time ?? now; const lastSnapshotedAt = latestSnapshot?.time ?? now;
const interval = await getRegularSnapshotInterval(client, aid); const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq"); logger.log(`Schedule regular snapshot for aid ${aid} in ${interval} hours.`, "mq")
const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK); const targetTime = truncate(lastSnapshotedAt + interval * HOUR, now + 1, now + 100000 * WEEK);
await scheduleSnapshot(client, aid, "normal", targetTime); await scheduleSnapshot(client, aid, "normal", targetTime);
if (now - startedAt > 25 * MINUTE) { if (now - startedAt > 25 * MINUTE) {
@ -221,72 +187,6 @@ export const regularSnapshotsWorker = async (_job: Job) => {
} }
}; };
export const takeBulkSnapshotForVideosWorker = async (job: Job) => {
const dataMap: { [key: number]: number } = job.data.map;
const ids = Object.keys(dataMap).map((id) => Number(id));
const aidsToFetch: number[] = [];
const client = await db.connect();
try {
for (const id of ids) {
const aid = Number(dataMap[id]);
const exists = await snapshotScheduleExists(client, id);
if (!exists) {
continue;
}
aidsToFetch.push(aid);
}
const data = await bulkGetVideoStats(aidsToFetch);
if (typeof data === "number") {
await bulkSetSnapshotStatus(client, ids, "failed");
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 15 * SECOND);
return `GET_BILI_STATUS_${data}`;
}
for (const video of data) {
const aid = video.id;
const stat = video.cnt_info;
const views = stat.play;
const danmakus = stat.danmaku;
const replies = stat.reply;
const likes = stat.thumb_up;
const coins = stat.coin;
const shares = stat.share;
const favorites = stat.collect;
const query: string = `
INSERT INTO video_snapshot (aid, views, danmakus, replies, likes, coins, shares, favorites)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`;
await client.queryObject(
query,
[aid, views, danmakus, replies, likes, coins, shares, favorites],
);
logger.log(`Taken snapshot for video ${aid} in bulk.`, "net", "fn:takeBulkSnapshotForVideosWorker");
}
await bulkSetSnapshotStatus(client, ids, "completed");
for (const aid of aidsToFetch) {
const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq");
await scheduleSnapshot(client, aid, "normal", Date.now() + interval * HOUR);
}
return `DONE`;
} catch (e) {
if (e instanceof NetSchedulerError && e.code === "NO_PROXY_AVAILABLE") {
logger.warn(
`No available proxy for bulk request now.`,
"mq",
"fn:takeBulkSnapshotForVideosWorker",
);
await bulkSetSnapshotStatus(client, ids, "completed");
await bulkScheduleSnapshot(client, aidsToFetch, "normal", Date.now() + 2 * MINUTE);
return;
}
logger.error(e as Error, "mq", "fn:takeBulkSnapshotForVideosWorker");
await bulkSetSnapshotStatus(client, ids, "failed");
} finally {
client.release();
}
};
export const takeSnapshotForVideoWorker = async (job: Job) => { export const takeSnapshotForVideoWorker = async (job: Job) => {
const id = job.data.id; const id = job.data.id;
const aid = Number(job.data.aid); const aid = Number(job.data.aid);
@ -315,7 +215,7 @@ export const takeSnapshotForVideoWorker = async (job: Job) => {
await setSnapshotStatus(client, id, "completed"); await setSnapshotStatus(client, id, "completed");
if (type === "normal") { if (type === "normal") {
const interval = await getRegularSnapshotInterval(client, aid); const interval = await getRegularSnapshotInterval(client, aid);
logger.log(`Scheduled regular snapshot for aid ${aid} in ${interval} hours.`, "mq"); logger.log(`Schedule regular snapshot for aid ${aid} in ${interval} hours.`, "mq")
await scheduleSnapshot(client, aid, type, Date.now() + interval * HOUR); await scheduleSnapshot(client, aid, type, Date.now() + interval * HOUR);
return `DONE`; return `DONE`;
} else if (type === "new") { } else if (type === "new") {
@ -382,11 +282,7 @@ export const scheduleCleanupWorker = async (_job: Job) => {
const type = row.type; const type = row.type;
await setSnapshotStatus(client, id, "timeout"); await setSnapshotStatus(client, id, "timeout");
await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND); await scheduleSnapshot(client, aid, type, Date.now() + 10 * SECOND);
logger.log( logger.log(`Schedule ${id} has no response received for 5 minutes, rescheduled.`, "mq", "fn:scheduleCleanupWorker")
`Schedule ${id} has no response received for 5 minutes, rescheduled.`,
"mq",
"fn:scheduleCleanupWorker",
);
} }
} catch (e) { } catch (e) {
logger.error(e as Error, "mq", "fn:scheduleCleanupWorker"); logger.error(e as Error, "mq", "fn:scheduleCleanupWorker");

1
lib/mq/executors.ts Normal file
View File

@ -0,0 +1 @@
export * from "lib/mq/exec/getLatestVideos.ts";

View File

@ -1,9 +1,9 @@
import { MINUTE, SECOND } from "$std/datetime/constants.ts"; import { MINUTE, SECOND } from "$std/datetime/constants.ts";
import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "mq/index.ts"; import { ClassifyVideoQueue, LatestVideosQueue, SnapshotQueue } from "lib/mq/index.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { initSnapshotWindowCounts } from "db/snapshotSchedule.ts"; import { initSnapshotWindowCounts } from "lib/db/snapshotSchedule.ts";
import { db } from "db/init.ts"; import { db } from "lib/db/init.ts";
import { redis } from "db/redis.ts"; import { redis } from "lib/db/redis.ts";
export async function initMQ() { export async function initMQ() {
const client = await db.connect(); const client = await db.connect();
@ -35,16 +35,6 @@ export async function initMQ() {
}, },
}); });
await SnapshotQueue.upsertJobScheduler("bulkSnapshotTick", {
every: 15 * SECOND,
immediately: true,
}, {
opts: {
removeOnComplete: 1,
removeOnFail: 1,
},
});
await SnapshotQueue.upsertJobScheduler("collectMilestoneSnapshots", { await SnapshotQueue.upsertJobScheduler("collectMilestoneSnapshots", {
every: 5 * MINUTE, every: 5 * MINUTE,
immediately: true, immediately: true,

View File

@ -1,5 +1,5 @@
import { Redis } from "ioredis"; import { Redis } from "ioredis";
import { redis } from "db/redis.ts"; import { redis } from "lib/db/redis.ts";
class LockManager { class LockManager {
private redis: Redis; private redis: Redis;

View File

@ -1,4 +1,4 @@
import { SlidingWindow } from "mq/slidingWindow.ts"; import { SlidingWindow } from "lib/mq/slidingWindow.ts";
export interface RateLimiterConfig { export interface RateLimiterConfig {
window: SlidingWindow; window: SlidingWindow;

View File

@ -1,7 +1,7 @@
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { RateLimiter, RateLimiterConfig } from "mq/rateLimiter.ts"; import { RateLimiter, RateLimiterConfig } from "lib/mq/rateLimiter.ts";
import { SlidingWindow } from "mq/slidingWindow.ts"; import { SlidingWindow } from "lib/mq/slidingWindow.ts";
import { redis } from "db/redis.ts"; import { redis } from "lib/db/redis.ts";
import Redis from "ioredis"; import Redis from "ioredis";
import { SECOND } from "$std/datetime/constants.ts"; import { SECOND } from "$std/datetime/constants.ts";
@ -333,18 +333,12 @@ const biliLimiterConfig: RateLimiterConfig[] = [
}, },
]; ];
const bili_test = [...biliLimiterConfig]; const bili_test = biliLimiterConfig;
bili_test[0].max = 10; bili_test[0].max = 10;
bili_test[1].max = 36; bili_test[1].max = 36;
bili_test[2].max = 150; bili_test[2].max = 150;
bili_test[3].max = 1000; bili_test[3].max = 1000;
const bili_strict = [...biliLimiterConfig];
bili_strict[0].max = 1;
bili_strict[1].max = 4;
bili_strict[2].max = 12;
bili_strict[3].max = 100;
/* /*
Execution order for setup: Execution order for setup:
@ -384,21 +378,11 @@ netScheduler.addTask("snapshotVideo", "bili_test", [
"alicloud-shenzhen", "alicloud-shenzhen",
"alicloud-hohhot", "alicloud-hohhot",
]); ]);
netScheduler.addTask("bulkSnapshot", "bili_strict", [
"alicloud-qingdao",
"alicloud-shanghai",
"alicloud-zhangjiakou",
"alicloud-chengdu",
"alicloud-shenzhen",
"alicloud-hohhot",
]);
netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig); netScheduler.setTaskLimiter("getVideoInfo", videoInfoRateLimiterConfig);
netScheduler.setTaskLimiter("getLatestVideos", null); netScheduler.setTaskLimiter("getLatestVideos", null);
netScheduler.setTaskLimiter("snapshotMilestoneVideo", null); netScheduler.setTaskLimiter("snapshotMilestoneVideo", null);
netScheduler.setTaskLimiter("snapshotVideo", null); netScheduler.setTaskLimiter("snapshotVideo", null);
netScheduler.setTaskLimiter("bulkSnapshot", null);
netScheduler.setProviderLimiter("bilibili", biliLimiterConfig); netScheduler.setProviderLimiter("bilibili", biliLimiterConfig);
netScheduler.setProviderLimiter("bili_test", bili_test); netScheduler.setProviderLimiter("bili_test", bili_test);
netScheduler.setProviderLimiter("bili_strict", bili_strict);
export default netScheduler; export default netScheduler;

View File

@ -1,7 +1,7 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { aidExistsInSongs, getNotCollectedSongs } from "db/songs.ts"; import { aidExistsInSongs, getNotCollectedSongs } from "lib/db/songs.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { scheduleSnapshot } from "db/snapshotSchedule.ts"; import { scheduleSnapshot } from "lib/db/snapshotSchedule.ts";
import { MINUTE } from "$std/datetime/constants.ts"; import { MINUTE } from "$std/datetime/constants.ts";
export async function collectSongs(client: Client) { export async function collectSongs(client: Client) {

View File

@ -1,9 +1,9 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getVideoDetails } from "net/getVideoDetails.ts"; import { getVideoDetails } from "lib/net/getVideoDetails.ts";
import { formatTimestampToPsql } from "utils/formatTimestampToPostgre.ts"; import { formatTimestampToPsql } from "lib/utils/formatTimestampToPostgre.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { ClassifyVideoQueue } from "mq/index.ts"; import { ClassifyVideoQueue } from "lib/mq/index.ts";
import { userExistsInBiliUsers, videoExistsInAllData } from "db/allData.ts"; import { userExistsInBiliUsers, videoExistsInAllData } from "lib/db/allData.ts";
import { HOUR, SECOND } from "$std/datetime/constants.ts"; import { HOUR, SECOND } from "$std/datetime/constants.ts";
export async function insertVideoInfo(client: Client, aid: number) { export async function insertVideoInfo(client: Client, aid: number) {

View File

@ -1,7 +1,7 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getVideoInfo } from "net/getVideoInfo.ts"; import { getVideoInfo } from "lib/net/getVideoInfo.ts";
import { LatestSnapshotType } from "db/schema.d.ts"; import { LatestSnapshotType } from "lib/db/schema.d.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
/* /*
* Fetch video stats from bilibili API and insert into database * Fetch video stats from bilibili API and insert into database

View File

@ -1,10 +1,10 @@
import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts"; import { Client } from "https://deno.land/x/postgres@v0.19.3/mod.ts";
import { getLatestVideoAids } from "net/getLatestVideoAids.ts"; import { getLatestVideoAids } from "lib/net/getLatestVideoAids.ts";
import { videoExistsInAllData } from "db/allData.ts"; import { videoExistsInAllData } from "lib/db/allData.ts";
import { sleep } from "utils/sleep.ts"; import { sleep } from "lib/utils/sleep.ts";
import { SECOND } from "$std/datetime/constants.ts"; import { SECOND } from "$std/datetime/constants.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import { LatestVideosQueue } from "mq/index.ts"; import { LatestVideosQueue } from "lib/mq/index.ts";
export async function queueLatestVideos( export async function queueLatestVideos(
client: Client, client: Client,

View File

@ -11,10 +11,10 @@ export type VideoTagsResponse = BaseResponse<VideoTagsData>;
export type VideoInfoResponse = BaseResponse<VideoInfoData>; export type VideoInfoResponse = BaseResponse<VideoInfoData>;
export type MediaListInfoResponse = BaseResponse<MediaListInfoData>; export type MediaListInfoResponse = BaseResponse<MediaListInfoData>;
export type MediaListInfoData = MediaListInfoItem[]; type MediaListInfoData = MediaListInfoItem[];
export interface MediaListInfoItem {
attr: number; interface MediaListInfoItem {
bvid: string; bvid: string;
id: number; id: number;
cnt_info: { cnt_info: {
@ -25,7 +25,7 @@ export interface MediaListInfoItem {
reply: number; reply: number;
share: number; share: number;
thumb_up: number; thumb_up: number;
}; }
} }
interface VideoInfoData { interface VideoInfoData {

View File

@ -1,6 +1,6 @@
import { VideoListResponse } from "net/bilibili.d.ts"; import { VideoListResponse } from "lib/net/bilibili.d.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
import netScheduler from "mq/scheduler.ts"; import netScheduler from "lib/mq/scheduler.ts";
export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> { export async function getLatestVideoAids(page: number = 1, pageSize: number = 10): Promise<number[]> {
const startFrom = 1 + pageSize * (page - 1); const startFrom = 1 + pageSize * (page - 1);

View File

@ -1,6 +1,6 @@
import netScheduler from "mq/scheduler.ts"; import netScheduler from "lib/mq/scheduler.ts";
import { VideoDetailsData, VideoDetailsResponse } from "net/bilibili.d.ts"; import { VideoDetailsData, VideoDetailsResponse } from "lib/net/bilibili.d.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> { export async function getVideoDetails(aid: number): Promise<VideoDetailsData | null> {
const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`; const url = `https://api.bilibili.com/x/web-interface/view/detail?aid=${aid}`;

View File

@ -1,6 +1,6 @@
import netScheduler from "mq/scheduler.ts"; import netScheduler from "lib/mq/scheduler.ts";
import { VideoInfoData, VideoInfoResponse } from "net/bilibili.d.ts"; import { VideoInfoData, VideoInfoResponse } from "lib/net/bilibili.d.ts";
import logger from "log/logger.ts"; import logger from "lib/log/logger.ts";
/* /*
* Fetch video metadata from bilibili API * Fetch video metadata from bilibili API

13
main.ts Normal file
View File

@ -0,0 +1,13 @@
/// <reference no-default-lib="true" />
/// <reference lib="dom" />
/// <reference lib="dom.iterable" />
/// <reference lib="dom.asynciterable" />
/// <reference lib="deno.ns" />
import "$std/dotenv/load.ts";
import { start } from "$fresh/server.ts";
import manifest from "./fresh.gen.ts";
import config from "./fresh.config.ts";
await start(manifest, config);

View File

@ -1,43 +0,0 @@
{
"name": "@cvsa/crawler",
"tasks": {
"crawl-raw-bili": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/insertAidsToDB.ts",
"crawl-bili-aids": "deno --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run src/db/raw/fetchAids.ts",
"check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx",
"manifest": "deno task cli manifest $(pwd)",
"start": "deno run -A --watch=static/,routes/ dev.ts",
"build": "deno run -A dev.ts build",
"preview": "deno run -A main.ts",
"worker:main": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write --allow-run ./src/worker.ts",
"worker:filter": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net --allow-write ./src/filterWorker.ts",
"adder": "deno run --env-file=.env --allow-env --allow-read --allow-ffi --allow-net ./src/jobAdder.ts",
"bullui": "deno run --allow-read --allow-env --allow-ffi --allow-net ./src/bullui.ts",
"all": "concurrently 'deno task worker:main' 'deno task adder' 'deno task bullui' 'deno task worker:filter'",
"test": "deno test ./test/ --allow-env --allow-ffi --allow-read --allow-net --allow-write --allow-run"
},
"lint": {
"rules": {
"tags": ["recommended"]
}
},
"imports": {
"@std/assert": "jsr:@std/assert@1",
"$std/": "https://deno.land/std@0.216.0/",
"@huggingface/transformers": "npm:@huggingface/transformers@3.0.0",
"bullmq": "npm:bullmq",
"mq/": "./mq/",
"db/": "./db/",
"log/": "./log/",
"net/": "./net/",
"ml/": "./ml/",
"utils/": "./utils/",
"ioredis": "npm:ioredis",
"@bull-board/api": "npm:@bull-board/api",
"@bull-board/express": "npm:@bull-board/express",
"express": "npm:express",
"src/": "./src/",
"onnxruntime": "npm:onnxruntime-node@1.19.2",
"chalk": "npm:chalk"
},
"exports": "./main.ts"
}

View File

@ -1,7 +0,0 @@
// DENO ASK ME TO EXPORT SOMETHING WHEN 'name' IS SPECIFIED
// AND IF I DON'T SPECIFY 'name', THE --filter FLAG IN `deno task` WON'T WORK.
// I DONT'T KNOW WHY
// SO HERE'S A PLACHOLDER EXPORT FOR DENO:
export const DENO = "FUCK YOU DENO";
// Oh, maybe export the version is a good idea
export const VERSION = "1.0.13";

View File

@ -1 +0,0 @@
export * from "mq/exec/getLatestVideos.ts";

View File

@ -1,27 +0,0 @@
import netScheduler from "mq/scheduler.ts";
import { MediaListInfoData, MediaListInfoResponse } from "net/bilibili.d.ts";
import logger from "log/logger.ts";
/*
* Bulk fetch video metadata from bilibili API
* @param {number[]} aids - The aid list to fetch
* @returns {Promise<MediaListInfoData | number>} MediaListInfoData or the error code returned by bilibili API
* @throws {NetSchedulerError} - The error will be thrown in following cases:
* - No proxy is available currently: with error code `NO_PROXY_AVAILABLE`
* - The native `fetch` function threw an error: with error code `FETCH_ERROR`
* - The alicloud-fc threw an error: with error code `ALICLOUD_FC_ERROR`
*/
export async function bulkGetVideoStats(aids: number[]): Promise<MediaListInfoData | number> {
const baseURL = `https://api.bilibili.com/medialist/gateway/base/resource/infos?resources=`;
let url = baseURL;
for (const aid of aids) {
url += `${aid}:2,`;
}
const data = await netScheduler.request<MediaListInfoResponse>(url, "bulkSnapshot");
const errMessage = `Error fetching metadata for aid list: ${aids.join(",")}:`;
if (data.code !== 0) {
logger.error(errMessage + data.code + "-" + data.message, "net", "fn:getVideoInfo");
return data.code;
}
return data.data;
}

View File

@ -1,3 +0,0 @@
import { initMQ } from "mq/init.ts";
await initMQ();

View File

@ -1,24 +0,0 @@
# build output
dist/
# generated types
.astro/
# dependencies
node_modules/
# logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# environment variables
.env
.env.production
# macOS-specific files
.DS_Store
# jetbrains setting folder
.idea/

View File

@ -1,50 +0,0 @@
# Astro Starter Kit: Basics
```sh
deno create astro@latest -- --template basics
```
[![Open in StackBlitz](https://developer.stackblitz.com/img/open_in_stackblitz.svg)](https://stackblitz.com/github/withastro/astro/tree/latest/examples/basics)
[![Open with CodeSandbox](https://assets.codesandbox.io/github/button-edit-lime.svg)](https://codesandbox.io/p/sandbox/github/withastro/astro/tree/latest/examples/basics)
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/withastro/astro?devcontainer_path=.devcontainer/basics/devcontainer.json)
> 🧑‍🚀 **Seasoned astronaut?** Delete this file. Have fun!
![just-the-basics](https://github.com/withastro/astro/assets/2244813/a0a5533c-a856-4198-8470-2d67b1d7c554)
## 🚀 Project Structure
Inside of your Astro project, you'll see the following folders and files:
```text
/
├── public/
│ └── favicon.svg
├── src/
│ ├── layouts/
│ │ └── Layout.astro
│ └── pages/
│ └── index.astro
└── package.json
```
To learn more about the folder structure of an Astro project, refer to
[our guide on project structure](https://docs.astro.build/en/basics/project-structure/).
## 🧞 Commands
All commands are run from the root of the project, from a terminal:
| Command | Action |
| :--------------------- | :----------------------------------------------- |
| `deno install` | Installs dependencies |
| `deno dev` | Starts local dev server at `localhost:4321` |
| `deno build` | Build your production site to `./dist/` |
| `deno preview` | Preview your build locally, before deploying |
| `deno astro ...` | Run CLI commands like `astro add`, `astro check` |
| `deno astro -- --help` | Get help using the Astro CLI |
## 👀 Want to learn more?
Feel free to check [our documentation](https://docs.astro.build) or jump into our
[Discord server](https://astro.build/chat).

View File

@ -1,13 +0,0 @@
// @ts-check
import { defineConfig } from "astro/config";
// https://astro.build/config
export default defineConfig({
vite: {
server: {
fs: {
allow: [".", "../../"],
},
},
},
});

View File

@ -1,14 +0,0 @@
{
"name": "frontend",
"type": "module",
"version": "0.0.1",
"scripts": {
"dev": "astro dev",
"build": "astro build",
"preview": "astro preview",
"astro": "astro"
},
"dependencies": {
"astro": "^5.5.5"
}
}

View File

@ -1,9 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 128 128">
<path d="M50.4 78.5a75.1 75.1 0 0 0-28.5 6.9l24.2-65.7c.7-2 1.9-3.2 3.4-3.2h29c1.5 0 2.7 1.2 3.4 3.2l24.2 65.7s-11.6-7-28.5-7L67 45.5c-.4-1.7-1.6-2.8-2.9-2.8-1.3 0-2.5 1.1-2.9 2.7L50.4 78.5Zm-1.1 28.2Zm-4.2-20.2c-2 6.6-.6 15.8 4.2 20.2a17.5 17.5 0 0 1 .2-.7 5.5 5.5 0 0 1 5.7-4.5c2.8.1 4.3 1.5 4.7 4.7.2 1.1.2 2.3.2 3.5v.4c0 2.7.7 5.2 2.2 7.4a13 13 0 0 0 5.7 4.9v-.3l-.2-.3c-1.8-5.6-.5-9.5 4.4-12.8l1.5-1a73 73 0 0 0 3.2-2.2 16 16 0 0 0 6.8-11.4c.3-2 .1-4-.6-6l-.8.6-1.6 1a37 37 0 0 1-22.4 2.7c-5-.7-9.7-2-13.2-6.2Z" />
<style>
path { fill: #000; }
@media (prefers-color-scheme: dark) {
path { fill: #FFF; }
}
</style>
</svg>

Before

Width:  |  Height:  |  Size: 749 B

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="115" height="48"><path fill="#17191E" d="M7.77 36.35C6.4 35.11 6 32.51 6.57 30.62c.99 1.2 2.35 1.57 3.75 1.78 2.18.33 4.31.2 6.33-.78.23-.12.44-.27.7-.42.18.55.23 1.1.17 1.67a4.56 4.56 0 0 1-1.94 3.23c-.43.32-.9.61-1.34.91-1.38.94-1.76 2.03-1.24 3.62l.05.17a3.63 3.63 0 0 1-1.6-1.38 3.87 3.87 0 0 1-.63-2.1c0-.37 0-.74-.05-1.1-.13-.9-.55-1.3-1.33-1.32a1.56 1.56 0 0 0-1.63 1.26c0 .06-.03.12-.05.2Z"/><path fill="url(#a)" d="M7.77 36.35C6.4 35.11 6 32.51 6.57 30.62c.99 1.2 2.35 1.57 3.75 1.78 2.18.33 4.31.2 6.33-.78.23-.12.44-.27.7-.42.18.55.23 1.1.17 1.67a4.56 4.56 0 0 1-1.94 3.23c-.43.32-.9.61-1.34.91-1.38.94-1.76 2.03-1.24 3.62l.05.17a3.63 3.63 0 0 1-1.6-1.38 3.87 3.87 0 0 1-.63-2.1c0-.37 0-.74-.05-1.1-.13-.9-.55-1.3-1.33-1.32a1.56 1.56 0 0 0-1.63 1.26c0 .06-.03.12-.05.2Z"/><path fill="#17191E" d="M.02 30.31s4.02-1.95 8.05-1.95l3.04-9.4c.11-.45.44-.76.82-.76.37 0 .7.31.82.76l3.04 9.4c4.77 0 8.05 1.95 8.05 1.95L17 11.71c-.2-.56-.53-.91-.98-.91H7.83c-.44 0-.76.35-.97.9L.02 30.31Zm42.37-5.97c0 1.64-2.05 2.62-4.88 2.62-1.85 0-2.5-.45-2.5-1.41 0-1 .8-1.49 2.65-1.49 1.67 0 3.09.03 4.73.23v.05Zm.03-2.04a21.37 21.37 0 0 0-4.37-.36c-5.32 0-7.82 1.25-7.82 4.18 0 3.04 1.71 4.2 5.68 4.2 3.35 0 5.63-.84 6.46-2.92h.14c-.03.5-.05 1-.05 1.4 0 1.07.18 1.16 1.06 1.16h4.15a16.9 16.9 0 0 1-.36-4c0-1.67.06-2.93.06-4.62 0-3.45-2.07-5.64-8.56-5.64-2.8 0-5.9.48-8.26 1.19.22.93.54 2.83.7 4.06 2.04-.96 4.95-1.37 7.2-1.37 3.11 0 3.97.71 3.97 2.15v.57Zm11.37 3c-.56.07-1.33.07-2.12.07-.83 0-1.6-.03-2.12-.1l-.02.58c0 2.85 1.87 4.52 8.45 4.52 6.2 0 8.2-1.64 8.2-4.55 0-2.74-1.33-4.09-7.2-4.39-4.58-.2-4.99-.7-4.99-1.28 0-.66.59-1 3.65-1 3.18 0 4.03.43 4.03 1.35v.2a46.13 46.13 0 0 1 4.24.03l.02-.55c0-3.36-2.8-4.46-8.2-4.46-6.08 0-8.13 1.49-8.13 4.39 0 2.6 1.64 4.23 7.48 4.48 4.3.14 4.77.62 4.77 1.28 0 .7-.7 1.03-3.71 1.03-3.47 0-4.35-.48-4.35-1.47v-.13Zm19.82-12.05a17.5 17.5 0 0 1-6.24 3.48c.03.84.03 2.4.03 3.24l1.5.02c-.02 1.63-.04 3.6-.04 4.9 0 3.04 1.6 5.32 6.58 5.32 2.1 0 3.5-.23 5.23-.6a43.77 43.77 0 0 1-.46-4.13c-1.03.34-2.34.53-3.78.53-2 0-2.82-.55-2.82-2.13 0-1.37 0-2.65.03-3.84 2.57.02 5.13.07 6.64.11-.02-1.18.03-2.9.1-4.04-2.2.04-4.65.07-6.68.07l.07-2.93h-.16Zm13.46 6.04a767.33 767.33 0 0 1 .07-3.18H82.6c.07 1.96.07 3.98.07 6.92 0 2.95-.03 4.99-.07 6.93h5.18c-.09-1.37-.11-3.68-.11-5.65 0-3.1 1.26-4 4.12-4 1.33 0 2.28.16 3.1.46.03-1.16.26-3.43.4-4.43-.86-.25-1.81-.41-2.96-.41-2.46-.03-4.26.98-5.1 3.38l-.17-.02Zm22.55 3.65c0 2.5-1.8 3.66-4.64 3.66-2.81 0-4.61-1.1-4.61-3.66s1.82-3.52 4.61-3.52c2.82 0 4.64 1.03 4.64 3.52Zm4.71-.11c0-4.96-3.87-7.18-9.35-7.18-5.5 0-9.23 2.22-9.23 7.18 0 4.94 3.49 7.59 9.21 7.59 5.77 0 9.37-2.65 9.37-7.6Z"/><defs><linearGradient id="a" x1="6.33" x2="19.43" y1="40.8" y2="34.6" gradientUnits="userSpaceOnUse"><stop stop-color="#D83333"/><stop offset="1" stop-color="#F041FF"/></linearGradient></defs></svg>

Before

Width:  |  Height:  |  Size: 2.8 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="1024" fill="none"><path fill="url(#a)" fill-rule="evenodd" d="M-217.58 475.75c91.82-72.02 225.52-29.38 341.2-44.74C240 415.56 372.33 315.14 466.77 384.9c102.9 76.02 44.74 246.76 90.31 366.31 29.83 78.24 90.48 136.14 129.48 210.23 57.92 109.99 169.67 208.23 155.9 331.77-13.52 121.26-103.42 264.33-224.23 281.37-141.96 20.03-232.72-220.96-374.06-196.99-151.7 25.73-172.68 330.24-325.85 315.72-128.6-12.2-110.9-230.73-128.15-358.76-12.16-90.14 65.87-176.25 44.1-264.57-26.42-107.2-167.12-163.46-176.72-273.45-10.15-116.29 33.01-248.75 124.87-320.79Z" clip-rule="evenodd" style="opacity:.154"/><path fill="url(#b)" fill-rule="evenodd" d="M1103.43 115.43c146.42-19.45 275.33-155.84 413.5-103.59 188.09 71.13 409 212.64 407.06 413.88-1.94 201.25-259.28 278.6-414.96 405.96-130 106.35-240.24 294.39-405.6 265.3-163.7-28.8-161.93-274.12-284.34-386.66-134.95-124.06-436-101.46-445.82-284.6-9.68-180.38 247.41-246.3 413.54-316.9 101.01-42.93 207.83 21.06 316.62 6.61Z" clip-rule="evenodd" style="opacity:.154"/><defs><linearGradient id="b" x1="373" x2="1995.44" y1="1100" y2="118.03" gradientUnits="userSpaceOnUse"><stop stop-color="#D83333"/><stop offset="1" stop-color="#F041FF"/></linearGradient><linearGradient id="a" x1="107.37" x2="1130.66" y1="1993.35" y2="1026.31" gradientUnits="userSpaceOnUse"><stop stop-color="#3245FF"/><stop offset="1" stop-color="#BC52EE"/></linearGradient></defs></svg>

Before

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -1,210 +0,0 @@
---
import astroLogo from '../assets/astro.svg';
import background from '../assets/background.svg';
---
<div id="container">
<img id="background" src={background.src} alt="" fetchpriority="high" />
<main>
<section id="hero">
<a href="https://astro.build"
><img src={astroLogo.src} width="115" height="48" alt="Astro Homepage" /></a
>
<h1>
To get started, open the <code><pre>src/pages</pre></code> directory in your project.
</h1>
<section id="links">
<a class="button" href="https://docs.astro.build">Read our docs</a>
<a href="https://astro.build/chat"
>Join our Discord <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 127.14 96.36"
><path
fill="currentColor"
d="M107.7 8.07A105.15 105.15 0 0 0 81.47 0a72.06 72.06 0 0 0-3.36 6.83 97.68 97.68 0 0 0-29.11 0A72.37 72.37 0 0 0 45.64 0a105.89 105.89 0 0 0-26.25 8.09C2.79 32.65-1.71 56.6.54 80.21a105.73 105.73 0 0 0 32.17 16.15 77.7 77.7 0 0 0 6.89-11.11 68.42 68.42 0 0 1-10.85-5.18c.91-.66 1.8-1.34 2.66-2a75.57 75.57 0 0 0 64.32 0c.87.71 1.76 1.39 2.66 2a68.68 68.68 0 0 1-10.87 5.19 77 77 0 0 0 6.89 11.1 105.25 105.25 0 0 0 32.19-16.14c2.64-27.38-4.51-51.11-18.9-72.15ZM42.45 65.69C36.18 65.69 31 60 31 53s5-12.74 11.43-12.74S54 46 53.89 53s-5.05 12.69-11.44 12.69Zm42.24 0C78.41 65.69 73.25 60 73.25 53s5-12.74 11.44-12.74S96.23 46 96.12 53s-5.04 12.69-11.43 12.69Z"
></path></svg
>
</a>
</section>
</section>
</main>
<a href="https://astro.build/blog/astro-5/" id="news" class="box">
<svg width="32" height="32" fill="none" xmlns="http://www.w3.org/2000/svg"
><path
d="M24.667 12c1.333 1.414 2 3.192 2 5.334 0 4.62-4.934 5.7-7.334 12C18.444 28.567 18 27.456 18 26c0-4.642 6.667-7.053 6.667-14Zm-5.334-5.333c1.6 1.65 2.4 3.43 2.4 5.333 0 6.602-8.06 7.59-6.4 17.334C13.111 27.787 12 25.564 12 22.666c0-4.434 7.333-8 7.333-16Zm-6-5.333C15.111 3.555 16 5.556 16 7.333c0 8.333-11.333 10.962-5.333 22-3.488-.774-6-4-6-8 0-8.667 8.666-10 8.666-20Z"
fill="#111827"></path></svg
>
<h2>What's New in Astro 5.0?</h2>
<p>
From content layers to server islands, click to learn more about the new features and
improvements in Astro 5.0
</p>
</a>
</div>
<style>
#background {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
z-index: -1;
filter: blur(100px);
}
#container {
font-family: Inter, Roboto, 'Helvetica Neue', 'Arial Nova', 'Nimbus Sans', Arial, sans-serif;
height: 100%;
}
main {
height: 100%;
display: flex;
justify-content: center;
}
#hero {
display: flex;
align-items: start;
flex-direction: column;
justify-content: center;
padding: 16px;
}
h1 {
font-size: 22px;
margin-top: 0.25em;
}
#links {
display: flex;
gap: 16px;
}
#links a {
display: flex;
align-items: center;
padding: 10px 12px;
color: #111827;
text-decoration: none;
transition: color 0.2s;
}
#links a:hover {
color: rgb(78, 80, 86);
}
#links a svg {
height: 1em;
margin-left: 8px;
}
#links a.button {
color: white;
background: linear-gradient(83.21deg, #3245ff 0%, #bc52ee 100%);
box-shadow:
inset 0 0 0 1px rgba(255, 255, 255, 0.12),
inset 0 -2px 0 rgba(0, 0, 0, 0.24);
border-radius: 10px;
}
#links a.button:hover {
color: rgb(230, 230, 230);
box-shadow: none;
}
pre {
font-family:
ui-monospace, 'Cascadia Code', 'Source Code Pro', Menlo, Consolas, 'DejaVu Sans Mono',
monospace;
font-weight: normal;
background: linear-gradient(14deg, #d83333 0%, #f041ff 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin: 0;
}
h2 {
margin: 0 0 1em;
font-weight: normal;
color: #111827;
font-size: 20px;
}
p {
color: #4b5563;
font-size: 16px;
line-height: 24px;
letter-spacing: -0.006em;
margin: 0;
}
code {
display: inline-block;
background:
linear-gradient(66.77deg, #f3cddd 0%, #f5cee7 100%) padding-box,
linear-gradient(155deg, #d83333 0%, #f041ff 18%, #f5cee7 45%) border-box;
border-radius: 8px;
border: 1px solid transparent;
padding: 6px 8px;
}
.box {
padding: 16px;
background: rgba(255, 255, 255, 1);
border-radius: 16px;
border: 1px solid white;
}
#news {
position: absolute;
bottom: 16px;
right: 16px;
max-width: 300px;
text-decoration: none;
transition: background 0.2s;
backdrop-filter: blur(50px);
}
#news:hover {
background: rgba(255, 255, 255, 0.55);
}
@media screen and (max-height: 368px) {
#news {
display: none;
}
}
@media screen and (max-width: 768px) {
#container {
display: flex;
flex-direction: column;
}
#hero {
display: block;
padding-top: 10%;
}
#links {
flex-wrap: wrap;
}
#links a.button {
padding: 14px 18px;
}
#news {
right: 16px;
left: 16px;
bottom: 2.5rem;
max-width: 100%;
}
h1 {
line-height: 1.5;
}
}
</style>

Some files were not shown because too many files have changed in this diff Show More