diff --git a/.idea/cvsa.iml b/.idea/cvsa.iml index 7bfcf20..916ca6a 100644 --- a/.idea/cvsa.iml +++ b/.idea/cvsa.iml @@ -28,6 +28,8 @@ + + diff --git a/deno.json b/deno.json deleted file mode 100644 index 20c75ba..0000000 --- a/deno.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "lock": false, - "workspace": ["./packages/crawler", "./packages/core"], - "nodeModulesDir": "auto", - "tasks": { - "crawler": "deno task --filter 'crawler' all", - "backend": "deno task --filter 'backend' start" - }, - "fmt": { - "useTabs": true, - "lineWidth": 120, - "indentWidth": 4, - "semiColons": true, - "proseWrap": "always" - } -} diff --git a/packages/backend/db/config.ts b/packages/backend/db/config.ts deleted file mode 100644 index 9d0e514..0000000 --- a/packages/backend/db/config.ts +++ /dev/null @@ -1,46 +0,0 @@ -const requiredEnvVars = ["DB_HOST", "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT", "DB_NAME_CRED"]; - -const unsetVars = requiredEnvVars.filter((key) => process.env[key] === undefined); - -if (unsetVars.length > 0) { - throw new Error(`Missing required environment variables: ${unsetVars.join(", ")}`); -} - -const databaseHost = process.env["DB_HOST"]!; -const databaseName = process.env["DB_NAME"]; -const databaseNameCred = process.env["DB_NAME_CRED"]!; -const databaseUser = process.env["DB_USER"]!; -const databasePassword = process.env["DB_PASSWORD"]!; -const databasePort = process.env["DB_PORT"]!; - -export const postgresConfig = { - hostname: databaseHost, - port: parseInt(databasePort), - database: databaseName, - user: databaseUser, - password: databasePassword -}; - -export const postgresConfigNpm = { - host: databaseHost, - port: parseInt(databasePort), - database: databaseName, - username: databaseUser, - password: databasePassword -}; - -export const postgresCredConfigNpm = { - host: databaseHost, - port: parseInt(databasePort), - database: databaseNameCred, - username: databaseUser, - password: databasePassword -}; - -export const postgresConfigCred = { - hostname: databaseHost, - port: parseInt(databasePort), - database: databaseNameCred, - user: databaseUser, - password: databasePassword -}; diff --git a/packages/backend/db/db.ts b/packages/backend/db/db.ts deleted file mode 100644 index 6dd488b..0000000 --- a/packages/backend/db/db.ts +++ /dev/null @@ -1,5 +0,0 @@ -import postgres from "postgres"; -import { postgresConfigNpm, postgresCredConfigNpm } from "./config"; - -export const sql = postgres(postgresConfigNpm); -export const sqlCred = postgres(postgresCredConfigNpm); diff --git a/packages/backend/db/snapshots.ts b/packages/backend/db/snapshots.ts index d5b8fa5..83ea7b5 100644 --- a/packages/backend/db/snapshots.ts +++ b/packages/backend/db/snapshots.ts @@ -1,4 +1,4 @@ -import { sql } from "./db"; +import { sql } from "@core/db/dbNew"; import type { VideoSnapshotType } from "@core/db/schema.d.ts"; export async function getVideoSnapshots( diff --git a/packages/backend/lib/auth/captchaDifficulty.ts b/packages/backend/lib/auth/captchaDifficulty.ts new file mode 100644 index 0000000..582fae7 --- /dev/null +++ b/packages/backend/lib/auth/captchaDifficulty.ts @@ -0,0 +1,57 @@ +import { Psql } from "@core/db/psql"; +import { SlidingWindow } from "@core/mq/slidingWindow.ts"; +import { redis } from "@core/db/redis.ts"; + +type seconds = number; + +export interface CaptchaDifficultyConfig { + global: boolean; + duration: seconds; + threshold: number; + difficulty: number; +} + +export const getCaptchaDifficultyConfigByRoute = async (sql: Psql, route: string): Promise => { + return sql` + SELECT duration, threshold, difficulty, global + FROM captcha_difficulty_settings + WHERE CONCAT(method, '-', path) = ${route} + ORDER BY duration + `; +}; + +export const getCaptchaConfigMaxDuration = async (sql: Psql, route: string): Promise => { + const rows = await sql<{max: number}[]>` + SELECT MAX(duration) + FROM captcha_difficulty_settings + WHERE CONCAT(method, '-', path) = ${route} + `; + if (rows.length < 1){ + return Number.MAX_SAFE_INTEGER; + } + return rows[0].max; +} + + +export const getCurrentCaptchaDifficulty = async (sql: Psql, route: string): Promise => { + const configs = await getCaptchaDifficultyConfigByRoute(sql, route); + if (configs.length < 1) { + return null + } + else if (configs.length == 1) { + return configs[0].difficulty + } + const maxDuration = configs.reduce((max, config) => + Math.max(max, config.duration), 0); + const slidingWindow = new SlidingWindow(redis, maxDuration); + for (let i = 0; i < configs.length; i++) { + const config = configs[i]; + const lastConfig = configs[i - 1]; + const count = await slidingWindow.count(`captcha-${route}`, config.duration); + if (count >= config.threshold) { + continue; + } + return lastConfig.difficulty + } + return configs[0].difficulty; +} diff --git a/packages/backend/lib/auth/getJWTsecret.ts b/packages/backend/lib/auth/getJWTsecret.ts new file mode 100644 index 0000000..9388892 --- /dev/null +++ b/packages/backend/lib/auth/getJWTsecret.ts @@ -0,0 +1,13 @@ +import { ErrorResponse } from "src/schema"; + +export const getJWTsecret = () => { + const secret = process.env["JWT_SECRET"]; + if (!secret) { + const response: ErrorResponse = { + message: "JWT_SECRET is not set", + code: "SERVER_ERROR" + }; + return [response, true]; + } + return [secret, null]; +} \ No newline at end of file diff --git a/packages/backend/middleware/captcha.ts b/packages/backend/middleware/captcha.ts new file mode 100644 index 0000000..ceef077 --- /dev/null +++ b/packages/backend/middleware/captcha.ts @@ -0,0 +1,112 @@ +import { Context, Next } from "hono"; +import { ErrorResponse } from "src/schema"; +import { SlidingWindow } from "@core/mq/slidingWindow.ts"; +import { getCaptchaConfigMaxDuration, getCurrentCaptchaDifficulty } from "@/lib/auth/captchaDifficulty.ts"; +import { sqlCred } from "@core/db/dbNew.ts"; +import { redis } from "@core/db/redis.ts"; +import { verify } from 'hono/jwt'; +import { JwtTokenInvalid, JwtTokenExpired } from "hono/utils/jwt/types"; +import { getJWTsecret } from "@/lib/auth/getJWTsecret.ts"; +import { lockManager } from "@core/mq/lockManager.ts"; +import { object, string, number, ValidationError } from "yup"; + +const tokenSchema = object({ + exp: number().integer(), + id: string().length(6), + difficulty: number().integer().moreThan(0) +}); + +export const captchaMiddleware = async (c: Context, next: Next) => { + const authHeader = c.req.header("Authorization"); + + if (!authHeader) { + const response: ErrorResponse = { + message: "'Authorization' header is missing.", + code: "UNAUTHORIZED" + }; + return c.json(response, 401); + } + + const authIsBearer = authHeader.startsWith("Bearer "); + if (!authIsBearer || authHeader.length < 8) { + const response: ErrorResponse = { + message: "'Authorization' header is invalid.", + code: "INVALID_HEADER" + }; + return c.json(response, 400); + } + + const [r, err] = getJWTsecret(); + if (err) { + return c.json(r as ErrorResponse, 500); + } + const jwtSecret = r as string; + + const token = authHeader.substring(7); + + const path = c.req.path; + const method = c.req.method; + const route = `${method}-${path}`; + + const requiredDifficulty = await getCurrentCaptchaDifficulty(sqlCred, route); + + try { + const decodedPayload = await verify(token, jwtSecret); + const payload = await tokenSchema.validate(decodedPayload); + const difficulty = payload.difficulty; + const tokenID = payload.id; + const consumed = await lockManager.isLocked(tokenID); + if (consumed) { + const response: ErrorResponse = { + message: "Token has already been used.", + code: "INVALID_CREDENTIALS" + }; + return c.json(response, 401); + } + if (difficulty < requiredDifficulty) { + const response: ErrorResponse = { + message: "Token to weak.", + code: "UNAUTHORIZED" + }; + return c.json(response, 401); + } + const EXPIRE_FIVE_MINUTES = 300; + await lockManager.acquireLock(tokenID, EXPIRE_FIVE_MINUTES); + } + catch (e) { + if (e instanceof JwtTokenInvalid) { + const response: ErrorResponse = { + message: "Failed to verify the token.", + code: "INVALID_CREDENTIALS" + }; + return c.json(response, 400); + } + else if (e instanceof JwtTokenExpired) { + const response: ErrorResponse = { + message: "Token expired.", + code: "INVALID_CREDENTIALS" + }; + return c.json(response, 400); + } + else if (e instanceof ValidationError) { + const response: ErrorResponse = { + code: "INVALID_QUERY_PARAMS", + message: "Invalid query parameters", + errors: e.errors + }; + return c.json(response, 400); + } + else { + const response: ErrorResponse = { + message: "Unknown error.", + code: "UNKNOWN_ERROR" + }; + return c.json(response, 500); + } + } + const duration = await getCaptchaConfigMaxDuration(sqlCred, route); + const window = new SlidingWindow(redis, duration); + await window.event(`captcha-${route}`); + + await next(); +}; \ No newline at end of file diff --git a/packages/backend/middleware/index.ts b/packages/backend/middleware/index.ts index 932daf0..6f8e411 100644 --- a/packages/backend/middleware/index.ts +++ b/packages/backend/middleware/index.ts @@ -7,6 +7,7 @@ import { preetifyResponse } from "./preetifyResponse.ts"; import { logger } from "./logger.ts"; import { timing } from "hono/timing"; import { contentType } from "./contentType.ts"; +import { captchaMiddleware } from "./captcha.ts"; export function configureMiddleWares(app: Hono<{ Variables: Variables }>) { app.use("*", contentType); @@ -15,5 +16,6 @@ export function configureMiddleWares(app: Hono<{ Variables: Variables }>) { app.use("*", logger({})); app.post("/user", registerRateLimiter); + app.post("/user", captchaMiddleware); app.all("/ping", bodyLimitForPing, ...pingHandler); } diff --git a/packages/backend/middleware/rateLimiters.ts b/packages/backend/middleware/rateLimiters.ts index a065f4c..e937406 100644 --- a/packages/backend/middleware/rateLimiters.ts +++ b/packages/backend/middleware/rateLimiters.ts @@ -5,27 +5,31 @@ import { getConnInfo } from "hono/bun"; import type { Context } from "hono"; import { redis } from "@core/db/redis.ts"; import { RedisStore } from "rate-limit-redis"; +import { generateRandomId } from "@core/lib/randomID.ts"; + +export const getIdentifier = (c: Context, includeIP: boolean = true) => { + let ipAddr = generateRandomId(6); + const info = getConnInfo(c); + if (info.remote && info.remote.address) { + ipAddr = info.remote.address; + } + const forwardedFor = c.req.header("X-Forwarded-For"); + if (forwardedFor) { + ipAddr = forwardedFor.split(",")[0]; + } + const path = c.req.path; + const method = c.req.method; + const ipIdentifier = includeIP ? `@${ipAddr}` : ""; + return `${method}-${path}${ipIdentifier}` +} export const registerRateLimiter = rateLimiter({ windowMs: 60 * MINUTE, limit: 10, standardHeaders: "draft-6", - keyGenerator: (c) => { - let ipAddr = crypto.randomUUID() as string; - const info = getConnInfo(c as unknown as Context); - if (info.remote && info.remote.address) { - ipAddr = info.remote.address; - } - const forwardedFor = c.req.header("X-Forwarded-For"); - if (forwardedFor) { - ipAddr = forwardedFor.split(",")[0]; - } - const path = new URL(c.req.url).pathname; - const method = c.req.method; - return `${method}-${path}@${ipAddr}`; - }, + keyGenerator: getIdentifier, store: new RedisStore({ // @ts-expect-error - Known issue: the `c`all` function is not present in @types/ioredis sendCommand: (...args: string[]) => redis.call(...args) }) as unknown as Store -}); +}); \ No newline at end of file diff --git a/packages/backend/package.json b/packages/backend/package.json index aede4e7..2cc883d 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -10,7 +10,6 @@ "hono": "^4.7.8", "hono-rate-limiter": "^0.4.2", "ioredis": "^5.6.1", - "jose": "^6.0.11", "limiter": "^3.0.0", "postgres": "^3.4.5", "rate-limit-redis": "^4.2.0", diff --git a/packages/backend/routes/captcha/[id]/result/GET.ts b/packages/backend/routes/captcha/[id]/result/GET.ts index cffab6d..25d307c 100644 --- a/packages/backend/routes/captcha/[id]/result/GET.ts +++ b/packages/backend/routes/captcha/[id]/result/GET.ts @@ -2,9 +2,9 @@ import { Context } from "hono"; import { Bindings, BlankEnv } from "hono/types"; import { ErrorResponse } from "src/schema"; import { createHandlers } from "src/utils.ts"; -import * as jose from "jose"; +import { sign } from 'hono/jwt' import { generateRandomId } from "@core/lib/randomID.ts"; -import { lockManager } from "@core/mq/lockManager.ts"; +import { getJWTsecret } from "lib/auth/getJWTsecret.ts"; interface CaptchaResponse { success: boolean; @@ -32,60 +32,55 @@ export const verifyChallengeHandler = createHandlers( const id = c.req.param("id"); const ans = c.req.query("ans"); if (!ans) { - const response: ErrorResponse = { + const response: ErrorResponse = { message: "Missing required query parameter: ans", code: "INVALID_QUERY_PARAMS" }; - return c.json>(response, 400); + return c.json(response, 400); } const res = await getChallengeVerificationResult(id, ans); const data: CaptchaResponse = await res.json(); if (data.error && res.status === 404) { - const response: ErrorResponse = { + const response: ErrorResponse = { message: data.error, code: "ENTITY_NOT_FOUND" }; - return c.json>(response, 401); + return c.json(response, 401); } else if (data.error && res.status === 400) { - const response: ErrorResponse = { + const response: ErrorResponse = { message: data.error, code: "INVALID_QUERY_PARAMS" }; - return c.json>(response, 400); + return c.json(response, 400); } else if (data.error) { - const response: ErrorResponse = { + const response: ErrorResponse = { message: data.error, code: "UNKNOWN_ERROR" }; - return c.json>(response, 500); + return c.json(response, 500); } if (!data.success) { - const response: ErrorResponse = { + const response: ErrorResponse = { message: "Incorrect answer", code: "INVALID_CREDENTIALS" }; - return c.json>(response, 401); + return c.json(response, 401); } - const secret = process.env["JWT_SECRET"]; - if (!secret) { - const response: ErrorResponse = { - message: "JWT_SECRET is not set", - code: "SERVER_ERROR" - }; - return c.json>(response, 500); + const [r, err] = getJWTsecret(); + if (err) { + return c.json(r as ErrorResponse, 500); } - const jwtSecret = new TextEncoder().encode(secret); - const alg = "HS256"; + const jwtSecret = r as string; - - const tokenID = generateRandomId(10); - const EXPIRE_FIVE_MINUTES = 300; - await lockManager.acquireLock(tokenID, EXPIRE_FIVE_MINUTES); - const jwt = await new jose.SignJWT({ difficulty: data.difficulty!, id: tokenID }) - .setProtectedHeader({ alg }) - .setIssuedAt() - .sign(jwtSecret); + const tokenID = generateRandomId(6); + const NOW = Math.floor(Date.now() / 1000) + const FIVE_MINUTES_LATER = NOW + 60 * 5; + const jwt = await sign({ + difficulty: data.difficulty!, + id: tokenID, + exp: FIVE_MINUTES_LATER + }, jwtSecret); return c.json({ token: jwt }); diff --git a/packages/backend/routes/captcha/difficulty/GET.ts b/packages/backend/routes/captcha/difficulty/GET.ts new file mode 100644 index 0000000..0bed7cc --- /dev/null +++ b/packages/backend/routes/captcha/difficulty/GET.ts @@ -0,0 +1,43 @@ +import { createHandlers } from "src/utils.ts"; +import { object, string, ValidationError } from "yup"; +import { ErrorResponse } from "src/schema"; +import { getCurrentCaptchaDifficulty } from "@/lib/auth/captchaDifficulty.ts"; +import { sqlCred } from "@core/db/dbNew.ts"; + +const queryParamsSchema = object({ + route: string().matches(/(?:GET|POST|PUT|PATCH|DELETE)-\/.*/g) +}); + +export const getCaptchaDifficultyHandler = createHandlers(async (c) => { + try { + const queryParams = await queryParamsSchema.validate(c.req.query()); + const { route } = queryParams; + const difficulty = await getCurrentCaptchaDifficulty(sqlCred, route); + if (!difficulty) { + const response: ErrorResponse = { + code: "ENTITY_NOT_FOUND", + message: "No difficulty configs found for this route." + }; + return c.json>(response, 404); + } + return c.json({ + "difficulty": difficulty + }); + } catch (e: unknown) { + if (e instanceof ValidationError) { + const response: ErrorResponse = { + code: "INVALID_QUERY_PARAMS", + message: "Invalid query parameters", + errors: e.errors + }; + return c.json(response, 400); + } else { + const response: ErrorResponse = { + code: "UNKNOWN_ERROR", + message: "Unknown error", + errors: [e] + }; + return c.json>(response, 500); + } + } +}); diff --git a/packages/backend/routes/captcha/session/POST.ts b/packages/backend/routes/captcha/session/POST.ts index 63b9331..7d2c3d1 100644 --- a/packages/backend/routes/captcha/session/POST.ts +++ b/packages/backend/routes/captcha/session/POST.ts @@ -6,7 +6,7 @@ const createNewChallenge = async (difficulty: number) => { const baseURL = process.env["UCAPTCHA_URL"]; const url = new URL(baseURL); url.pathname = "/challenge"; - const res = await fetch(url.toString(), { + return await fetch(url.toString(), { method: "POST", headers: { "Content-Type": "application/json", @@ -15,7 +15,6 @@ const createNewChallenge = async (difficulty: number) => { difficulty: difficulty, }) }); - return res; } export const createCaptchaSessionHandler = createHandlers(async (_c) => { diff --git a/packages/backend/routes/user/register.ts b/packages/backend/routes/user/register.ts index ac8200d..d774b56 100644 --- a/packages/backend/routes/user/register.ts +++ b/packages/backend/routes/user/register.ts @@ -3,7 +3,7 @@ import Argon2id from "@rabbit-company/argon2id"; import { object, string, ValidationError } from "yup"; import type { Context } from "hono"; import type { Bindings, BlankEnv, BlankInput } from "hono/types"; -import { sqlCred } from "db/db.ts"; +import { sqlCred } from "@core/db/dbNew.ts"; import { ErrorResponse, StatusResponse } from "src/schema"; const RegistrationBodySchema = object({ @@ -64,7 +64,7 @@ export const registerHandler = createHandlers(async (c: ContextType) => { return c.json>(response, 400); } else { const response: ErrorResponse = { - message: "Invalid JSON payload.", + message: "Unknown error.", errors: [(e as Error).message], code: "UNKNOWN_ERROR" }; diff --git a/packages/backend/routes/video/[id]/info.ts b/packages/backend/routes/video/[id]/info.ts index ad64045..3e05493 100644 --- a/packages/backend/routes/video/[id]/info.ts +++ b/packages/backend/routes/video/[id]/info.ts @@ -1,8 +1,8 @@ import logger from "@core/log/logger.ts"; import { redis } from "@core/db/redis.ts"; -import { sql } from "../../../db/db.ts"; +import { sql } from "@core/db/dbNew.ts"; import { number, ValidationError } from "yup"; -import { createHandlers } from "../../../src/utils.ts"; +import { createHandlers } from "@/src/utils.ts"; import { getVideoInfo, getVideoInfoByBV } from "@core/net/getVideoInfo.ts"; import { idSchema } from "./snapshots.ts"; import { NetSchedulerError } from "@core/net/delegate.ts"; diff --git a/packages/backend/src/routing.ts b/packages/backend/src/routing.ts index a56ed73..d5a7aa2 100644 --- a/packages/backend/src/routing.ts +++ b/packages/backend/src/routing.ts @@ -5,6 +5,7 @@ import { videoInfoHandler, getSnapshotsHanlder } from "routes/video"; import { Hono } from "hono"; import { Variables } from "hono/types"; import { createCaptchaSessionHandler, verifyChallengeHandler } from "routes/captcha"; +import { getCaptchaDifficultyHandler } from "../routes/captcha/difficulty/GET.ts"; export function configureRoutes(app: Hono<{ Variables: Variables }>) { app.get("/", ...rootHandler); @@ -17,4 +18,6 @@ export function configureRoutes(app: Hono<{ Variables: Variables }>) { app.post("/captcha/session", ...createCaptchaSessionHandler); app.get("/captcha/:id/result", ...verifyChallengeHandler); + + app.get("/captcha/difficulty", ...getCaptchaDifficultyHandler) } diff --git a/packages/backend/src/schema.d.ts b/packages/backend/src/schema.d.ts index 51f54fa..4a50d7b 100644 --- a/packages/backend/src/schema.d.ts +++ b/packages/backend/src/schema.d.ts @@ -3,13 +3,14 @@ type ErrorCode = | "UNKNOWN_ERROR" | "INVALID_PAYLOAD" | "INVALID_FORMAT" + | "INVALID_HEADER" | "BODY_TOO_LARGE" | "UNAUTHORIZED" | "INVALID_CREDENTIALS" | "ENTITY_NOT_FOUND" | "SERVER_ERROR"; -export interface ErrorResponse { +export interface ErrorResponse { code: ErrorCode; message: string; errors?: E[]; diff --git a/packages/backend/tsconfig.json b/packages/backend/tsconfig.json index 6cbd96f..6b2128e 100644 --- a/packages/backend/tsconfig.json +++ b/packages/backend/tsconfig.json @@ -10,6 +10,7 @@ "skipLibCheck": true, "paths": { "@core/*": ["../core/*"], + "@/*": ["./*"], "@crawler/*": ["../crawler/*"] }, "allowSyntheticDefaultImports": true, diff --git a/packages/core/db/dbNew.ts b/packages/core/db/dbNew.ts index 11088e5..2f925ec 100644 --- a/packages/core/db/dbNew.ts +++ b/packages/core/db/dbNew.ts @@ -1,6 +1,8 @@ import postgres from "postgres"; -import { postgresConfigNpm } from "./pgConfigNew"; +import { postgresConfigCred, postgresConfig } from "./pgConfigNew"; -export const sql = postgres(postgresConfigNpm); +export const sql = postgres(postgresConfig); -export const sqlTest = postgres(postgresConfigNpm); \ No newline at end of file +export const sqlCred = postgres(postgresConfigCred); + +export const sqlTest = postgres(postgresConfig); \ No newline at end of file diff --git a/packages/core/db/pgConfigNew.ts b/packages/core/db/pgConfigNew.ts index d6cb437..e41c02f 100644 --- a/packages/core/db/pgConfigNew.ts +++ b/packages/core/db/pgConfigNew.ts @@ -14,14 +14,6 @@ const databasePassword = process.env["DB_PASSWORD"]!; const databasePort = process.env["DB_PORT"]!; export const postgresConfig = { - hostname: databaseHost, - port: parseInt(databasePort), - database: databaseName, - user: databaseUser, - password: databasePassword -}; - -export const postgresConfigNpm = { host: databaseHost, port: parseInt(databasePort), database: databaseName, diff --git a/packages/crawler/global.d.ts b/packages/core/db/psql.d.ts similarity index 51% rename from packages/crawler/global.d.ts rename to packages/core/db/psql.d.ts index 37fc0e2..f629cd5 100644 --- a/packages/crawler/global.d.ts +++ b/packages/core/db/psql.d.ts @@ -1,3 +1,3 @@ import type postgres from "postgres"; -export type Psql = postgres.Sql<{}>; +export type Psql = postgres.Sql; diff --git a/packages/core/lib/randomID.ts b/packages/core/lib/randomID.ts index 477fe4a..7448438 100644 --- a/packages/core/lib/randomID.ts +++ b/packages/core/lib/randomID.ts @@ -1,142 +1,15 @@ -const getSecureRandomInt = (max: number): number => { - const array = new Uint32Array(1); - crypto.getRandomValues(array); - // Using modulo bias is technically present, but negligible here because the space (56) is tiny compared to 2^32. - return array[0] % max; -} - - -/** - * Generates a random ID with characteristics similar to UUIDv7, - * incorporating a timestamp prefix for sortability and a random suffix, - * using a customizable length and a specific character set. - * - * This function aims for sortability by placing a time-based component at the beginning, - * similar to UUIDv7, while allowing a variable total length and using a character set - * designed to avoid visually similar characters. - * - * The character set includes uppercase and lowercase letters and numbers, - * excluding visually similar characters (0, O, I, l, 1). - * - * **Length Reference**: - * - * With a collision probability of **0.1%**, - * the maximum ID generation rate per millisecond for the following lengths is: - * - **10**: 1.8 IDs / ms or 1,844 QPS - * - **12**: 27 IDs / ms or 26,998 QPS - * - **16**: 5784 IDs / ms or 5,784,295 QPS - * - * With a collision probability of **0.001%**, - * the maximum ID generation rate per millisecond for the following lengths is: - * - **11**: 1.5 IDs / ms or 1,520 QPS - * - **14**: 85 IDs / ms or 85,124 QPS - * - **16**: 1246 IDs / ms or 1,245,983 QPS - * - * With a collision probability of **0.00001%**, - * the maximum ID generation rate per millisecond for the following lengths is: - * - **14**: 18 IDs / ms or 18,339 QPS - * - **15**: 70 IDs / ms or 70,164 QPS - * - **16**: 1246 IDs / ms or 268,438 QPS - * - * The formula: max_qps = 1000 * (2 * (56**(length - 8)) * -log(1 - prob))**(1/3) - * - * @param length The desired total length of the ID. Must be at least 8. - * @returns A sortable random ID string of the specified length. - * @throws Error if the requested length is less than the minimum required for the timestamp prefix (8). - */ export function generateRandomId(length: number): string { - // Character set excluding 0, O, I, l, 1 - const allowedChars = "abcdefghijkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; // 56 characters - const base = allowedChars.length; // 56 - const TIMESTAMP_PREFIX_LENGTH = 8; // Fixed length for the timestamp part to ensure sortability + const characters = 'abcdefghijkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'; + const charactersLength = characters.length; + const randomBytes = new Uint8Array(length); - if (length < TIMESTAMP_PREFIX_LENGTH) { - throw new Error(`Length must be at least ${TIMESTAMP_PREFIX_LENGTH} to include the timestamp prefix.`); + crypto.getRandomValues(randomBytes); + + let result = ''; + for (let i = 0; i < length; i++) { + const randomIndex = randomBytes[i] % charactersLength; + result += characters.charAt(randomIndex); } - // --- Generate Timestamp Prefix --- - const timestamp = Date.now(); // Milliseconds since epoch (Unix time) - let timestampBaseString = ""; - let tempTimestamp = timestamp; - const firstChar = allowedChars[0]; // Character for padding ('a') - - // Convert timestamp to a base-56 string - // We process from the least significant "digit" to the most significant - while (tempTimestamp > 0) { - timestampBaseString = allowedChars[tempTimestamp % base] + timestampBaseString; - tempTimestamp = Math.floor(tempTimestamp / base); - } - - // Pad the timestamp string at the beginning to ensure a fixed length. - // This is crucial for chronological sortability of the generated IDs. - while (timestampBaseString.length < TIMESTAMP_PREFIX_LENGTH) { - timestampBaseString = firstChar + timestampBaseString; - } - - // Although highly unlikely with an 8-character prefix using base 56 for current timestamps, - // this would truncate if the timestamp string somehow exceeded the prefix length. - if (timestampBaseString.length > TIMESTAMP_PREFIX_LENGTH) { - timestampBaseString = timestampBaseString.substring(timestampBaseString.length - TIMESTAMP_PREFIX_LENGTH); - } - - // --- Generate Random Suffix --- - const randomLength = length - TIMESTAMP_PREFIX_LENGTH; - let randomSuffix = ""; - const allowedCharsLength = allowedChars.length; - - for (let i = 0; i < randomLength; i++) { - const randomIndex = getSecureRandomInt(allowedCharsLength); - randomSuffix += allowedChars[randomIndex]; - } - - // --- Concatenate and Return --- - return timestampBaseString + randomSuffix; -} - -/** - * Decodes the timestamp (in milliseconds since epoch) from an ID - * generated by a function that uses a fixed-length timestamp prefix - * encoded in a specific base and character set (like the previous example). - * - * It extracts the timestamp prefix and converts it back from the - * custom base-56 encoding to a number. - * - * @param id The ID string containing the timestamp prefix. - * @returns The timestamp in milliseconds since epoch. - * @throws Error if the ID is too short or contains invalid characters in the timestamp prefix. - */ -export function decodeTimestampFromId(id: string): number { - // Character set must match the encoding function used to generate the ID - const allowedChars = "abcdefghijkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; // 56 characters - const base = allowedChars.length; // 56 - const TIMESTAMP_PREFIX_LENGTH = 8; // Fixed length for the timestamp part - - if (id.length < TIMESTAMP_PREFIX_LENGTH) { - throw new Error( - `ID must be at least ${TIMESTAMP_PREFIX_LENGTH} characters long to contain a timestamp prefix.` - ); - } - - // Extract the timestamp prefix from the beginning of the ID string - const timestampPrefix = id.substring(0, TIMESTAMP_PREFIX_LENGTH); - let timestamp = 0; - - // Convert the base-56 timestamp string back to a number - // Iterate through the prefix characters from left to right (most significant 'digit') - for (let i = 0; i < timestampPrefix.length; i++) { - const char = timestampPrefix[i]; - // Find the index (value) of the character in the allowed character set - const charIndex = allowedChars.indexOf(char); - - if (charIndex === -1) { - // If a character is not in the allowed set, the ID is likely invalid - throw new Error(`Invalid character "${char}" found in timestamp prefix.`); - } - - // Standard base conversion: accumulate the value - // For each digit, multiply the current total by the base and add the digit's value - timestamp = timestamp * base + charIndex; - } - - return timestamp; -} + return result; +} \ No newline at end of file diff --git a/packages/core/mq/rateLimiter.ts b/packages/core/mq/rateLimiter.ts index 96f208c..d15ddd5 100644 --- a/packages/core/mq/rateLimiter.ts +++ b/packages/core/mq/rateLimiter.ts @@ -8,7 +8,7 @@ export interface RateLimiterConfig { export class RateLimiter { private configs: RateLimiterConfig[] = []; private buckets: TokenBucket[] = []; - private identifierFn: (configIndex: number) => string; + private readonly identifierFn: (configIndex: number) => string; /* * @param name The name of the rate limiter @@ -26,8 +26,8 @@ export class RateLimiter { for (let i = 0; i < configs.length; i++) { const config = configs[i]; const bucket = new TokenBucket({ - capacity: config.max, - rate: config.max / config.duration, + max: config.max, + duration: config.duration, identifier: this.identifierFn(i), }) this.buckets.push(bucket); diff --git a/packages/core/mq/slidingWindow.ts b/packages/core/mq/slidingWindow.ts new file mode 100644 index 0000000..092190c --- /dev/null +++ b/packages/core/mq/slidingWindow.ts @@ -0,0 +1,57 @@ +import type { Redis } from "ioredis"; + +export class SlidingWindow { + private redis: Redis; + private readonly windowSize: number; + + /* + * Create a new sliding window + * @param redisClient The Redis client used to store the data + * @param windowSize The size of the window in seconds + */ + constructor(redisClient: Redis, windowSize: number) { + this.redis = redisClient; + this.windowSize = windowSize * 1000; + } + + /* + * Trigger an event in the sliding window + * @param eventName The name of the event + */ + async event(eventName: string): Promise { + const now = Date.now(); + const key = `cvsa:sliding_window:${eventName}`; + + const uniqueMember = `${now}-${Math.random()}`; + // Add current timestamp to an ordered set + await this.redis.zadd(key, now, uniqueMember); + + // Remove timestamps outside the window + await this.redis.zremrangebyscore(key, 0, now - this.windowSize); + } + + /* + * Count the number of events in the sliding window + * @param {string} eventName The name of the event + * @param {number} [duration] The duration of the window in seconds + */ + async count(eventName: string, duration?: number): Promise { + const key = `cvsa:sliding_window:${eventName}`; + const now = Date.now(); + + // Remove timestamps outside the window + await this.redis.zremrangebyscore(key, 0, now - this.windowSize); + + if (duration) { + return this.redis.zcount(key, now - duration * 1000, now); + } + + // Get the number of timestamps in the window + return this.redis.zcard(key); + } + + clear(eventName: string): Promise { + const key = `cvsa:sliding_window:${eventName}`; + return this.redis.del(key); + } +} \ No newline at end of file diff --git a/packages/core/mq/tokenBucket.ts b/packages/core/mq/tokenBucket.ts index 8e94db9..e56d368 100644 --- a/packages/core/mq/tokenBucket.ts +++ b/packages/core/mq/tokenBucket.ts @@ -1,28 +1,56 @@ import { redis } from "@core/db/redis"; import { SECOND } from "@core/const/time"; -interface TokenBucketOptions { +export interface TokenBucketRateOptions { capacity: number; rate: number; identifier: string; - keyPrefix?: string; + keyPrefix?: string; } +export interface TokenBucketDurationOptions { + duration: number; + max: number; + identifier: string; + keyPrefix?: string; +} + +export type TokenBucketConstructorOptions = TokenBucketRateOptions | TokenBucketDurationOptions; + export class TokenBucket { private readonly capacity: number; private readonly rate: number; private readonly keyPrefix: string; private readonly identifier: string; - constructor(options: TokenBucketOptions) { - if (options.capacity <= 0 || options.rate <= 0) { - throw new Error("Capacity and rate must be greater than zero."); + constructor(options: TokenBucketConstructorOptions) { + if (!options.identifier) { + throw new Error("Identifier is required."); } - - this.capacity = options.capacity; - this.rate = options.rate; this.identifier = options.identifier; this.keyPrefix = options.keyPrefix || "cvsa:token_bucket:"; + + const isRateOptions = 'capacity' in options && 'rate' in options; + const isDurationOptions = 'duration' in options && 'max' in options; + + if (isRateOptions && isDurationOptions) { + throw new Error("Provide either 'capacity'/'rate' or 'duration'/'max', not both."); + } else if (isRateOptions) { + if (options.capacity <= 0 || options.rate <= 0) { + throw new Error("'capacity' and 'rate' must be greater than zero."); + } + this.capacity = options.capacity; + this.rate = options.rate; + } else if (isDurationOptions) { + if (options.duration <= 0 || options.max <= 0) { + throw new Error("'duration' and 'max' must be greater than zero."); + } + this.capacity = options.max; + this.rate = options.max / options.duration; + + } else { + throw new Error("Provide either 'capacity'/'rate' or 'duration'/'max'."); + } } getKey(): string { diff --git a/packages/core/test/lib/randomID.test.ts b/packages/core/test/lib/randomID.test.ts index 71b859c..8f69dcd 100644 --- a/packages/core/test/lib/randomID.test.ts +++ b/packages/core/test/lib/randomID.test.ts @@ -1,24 +1,13 @@ import { describe, expect, it } from "vitest"; -import { generateRandomId, decodeTimestampFromId } from "@core/lib/randomID.ts"; +import { generateRandomId } from "@core/lib/randomID.ts"; describe("generateRandomId", () => { - it("should throw an error if the requested length is less than 8", () => { - expect(() => generateRandomId(7)).toThrowError("Length must be at least 8 to include the timestamp prefix."); - }); - it("should generate an ID of the specified length", () => { const length = 15; const id = generateRandomId(length); expect(id).toHaveLength(length); }); - it("should generate an ID with a timestamp prefix of length 8", () => { - const id = generateRandomId(12); - expect(id).toHaveProperty("substring"); - expect(id).toHaveProperty("length"); - expect(id.length).toBeGreaterThanOrEqual(8); - }); - it("should generate an ID containing only allowed characters", () => { const allowedChars = "abcdefghijkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789"; const id = generateRandomId(20); @@ -26,55 +15,4 @@ describe("generateRandomId", () => { expect(allowedChars).toContain(char); } }); - - it("should generate IDs that are sortable by creation time", () => { - const id1 = generateRandomId(10); - // Simulate a slight delay to ensure different timestamps - return new Promise((resolve) => { - setTimeout(() => { - const id2 = generateRandomId(10); - expect(id2 >= id1).toBe(true); - resolve(null); - }, 10); - }); - }); -}); - -describe("decodeTimestampFromId", () => { - it("should throw an error if the ID length is less than 8", () => { - expect(() => decodeTimestampFromId("abcdefg")).toThrowError( - "ID must be at least 8 characters long to contain a timestamp prefix." - ); - }); - - it("should throw an error if the timestamp prefix contains invalid characters", () => { - const invalidId = "0bcdefghijk"; - expect(() => decodeTimestampFromId(invalidId)).toThrowError('Invalid character "0" found in timestamp prefix.'); - }); - - it("should correctly decode the timestamp from a generated ID", () => { - const now = Date.now(); - // Mock Date.now to control the timestamp for testing - const originalDateNow = Date.now; - global.Date.now = () => now; - const id = generateRandomId(16); - global.Date.now = originalDateNow; // Restore original Date.now - - const decodedTimestamp = decodeTimestampFromId(id); - // Allow a small margin for potential timing differences in test execution - expect(decodedTimestamp).toBeGreaterThanOrEqual(now - 1); - expect(decodedTimestamp).toBeLessThanOrEqual(now + 1); - }); - - it("should correctly decode the timestamp even with a longer ID", () => { - const now = Date.now(); - const originalDateNow = Date.now; - global.Date.now = () => now; - const id = generateRandomId(20); - global.Date.now = originalDateNow; - - const decodedTimestamp = decodeTimestampFromId(id); - expect(decodedTimestamp).toBeGreaterThanOrEqual(now - 1); - expect(decodedTimestamp).toBeLessThanOrEqual(now + 1); - }); }); diff --git a/packages/crawler/db/bilibili_metadata.ts b/packages/crawler/db/bilibili_metadata.ts index acc136c..90f7513 100644 --- a/packages/crawler/db/bilibili_metadata.ts +++ b/packages/crawler/db/bilibili_metadata.ts @@ -1,4 +1,4 @@ -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; import { AllDataType, BiliUserType } from "@core/db/schema"; import { AkariModelVersion } from "ml/const"; diff --git a/packages/crawler/db/snapshot.ts b/packages/crawler/db/snapshot.ts index b4635ee..e070283 100644 --- a/packages/crawler/db/snapshot.ts +++ b/packages/crawler/db/snapshot.ts @@ -1,6 +1,6 @@ import { LatestSnapshotType } from "@core/db/schema"; import { SnapshotNumber } from "mq/task/getVideoStats.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export async function getVideosNearMilestone(sql: Psql) { const queryResult = await sql` diff --git a/packages/crawler/db/snapshotSchedule.ts b/packages/crawler/db/snapshotSchedule.ts index 9937a2d..a0cb7ec 100644 --- a/packages/crawler/db/snapshotSchedule.ts +++ b/packages/crawler/db/snapshotSchedule.ts @@ -4,7 +4,7 @@ import { MINUTE } from "@core/const/time.ts"; import { redis } from "@core/db/redis.ts"; import { Redis } from "ioredis"; import { parseTimestampFromPsql } from "../utils/formatTimestampToPostgre.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; const REDIS_KEY = "cvsa:snapshot_window_counts"; diff --git a/packages/crawler/db/songs.ts b/packages/crawler/db/songs.ts index 5f5070f..3fbf487 100644 --- a/packages/crawler/db/songs.ts +++ b/packages/crawler/db/songs.ts @@ -1,4 +1,4 @@ -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; import { parseTimestampFromPsql } from "utils/formatTimestampToPostgre.ts"; export async function getNotCollectedSongs(sql: Psql) { diff --git a/packages/crawler/mq/scheduling.ts b/packages/crawler/mq/scheduling.ts index b84d73f..cf7427f 100644 --- a/packages/crawler/mq/scheduling.ts +++ b/packages/crawler/mq/scheduling.ts @@ -2,7 +2,7 @@ import { findClosestSnapshot, getLatestSnapshot, hasAtLeast2Snapshots } from "db import { truncate } from "utils/truncate.ts"; import { closetMilestone } from "./exec/snapshotTick.ts"; import { HOUR, MINUTE } from "@core/const/time.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; const log = (value: number, base: number = 10) => Math.log(value) / Math.log(base); diff --git a/packages/crawler/mq/task/collectSongs.ts b/packages/crawler/mq/task/collectSongs.ts index 1d7634d..a2fef58 100644 --- a/packages/crawler/mq/task/collectSongs.ts +++ b/packages/crawler/mq/task/collectSongs.ts @@ -3,7 +3,7 @@ import { aidExistsInSongs, getNotCollectedSongs } from "db/songs.ts"; import logger from "@core/log/logger.ts"; import { scheduleSnapshot } from "db/snapshotSchedule.ts"; import { MINUTE } from "@core/const/time.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export async function collectSongs() { const aids = await getNotCollectedSongs(sql); diff --git a/packages/crawler/mq/task/getVideoDetails.ts b/packages/crawler/mq/task/getVideoDetails.ts index 80a5867..0d76ebb 100644 --- a/packages/crawler/mq/task/getVideoDetails.ts +++ b/packages/crawler/mq/task/getVideoDetails.ts @@ -4,7 +4,7 @@ import logger from "@core/log/logger.ts"; import { ClassifyVideoQueue } from "mq/index.ts"; import { userExistsInBiliUsers, videoExistsInAllData } from "../../db/bilibili_metadata.ts"; import { HOUR, SECOND } from "@core/const/time.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export async function insertVideoInfo(sql: Psql, aid: number) { const videoExists = await videoExistsInAllData(sql, aid); diff --git a/packages/crawler/mq/task/getVideoStats.ts b/packages/crawler/mq/task/getVideoStats.ts index 2a7d7ff..49c67a8 100644 --- a/packages/crawler/mq/task/getVideoStats.ts +++ b/packages/crawler/mq/task/getVideoStats.ts @@ -1,6 +1,6 @@ import { getVideoInfo } from "@core/net/getVideoInfo.ts"; import logger from "@core/log/logger.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export interface SnapshotNumber { time: number; diff --git a/packages/crawler/mq/task/queueLatestVideo.ts b/packages/crawler/mq/task/queueLatestVideo.ts index af824f7..49362a1 100644 --- a/packages/crawler/mq/task/queueLatestVideo.ts +++ b/packages/crawler/mq/task/queueLatestVideo.ts @@ -4,7 +4,7 @@ import { sleep } from "utils/sleep.ts"; import { SECOND } from "@core/const/time.ts"; import logger from "@core/log/logger.ts"; import { LatestVideosQueue } from "mq/index.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export async function queueLatestVideos( sql: Psql, diff --git a/packages/crawler/mq/task/regularSnapshotInterval.ts b/packages/crawler/mq/task/regularSnapshotInterval.ts index 11871d8..852d401 100644 --- a/packages/crawler/mq/task/regularSnapshotInterval.ts +++ b/packages/crawler/mq/task/regularSnapshotInterval.ts @@ -1,6 +1,6 @@ import { findClosestSnapshot, findSnapshotBefore, getLatestSnapshot } from "db/snapshotSchedule.ts"; import { HOUR } from "@core/const/time.ts"; -import type { Psql } from "global.d.ts"; +import type { Psql } from "@core/db/global.d.ts"; export const getRegularSnapshotInterval = async (sql: Psql, aid: number) => { const now = Date.now();