diff --git a/.gitignore b/.gitignore index 31d6ddf..58df6d2 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ internal/ !tests/cases/projects/projectOption/**/node_modules !tests/cases/projects/NodeModulesSearch/**/* !tests/baselines/reference/project/nodeModules*/**/* -.idea yarn.lock yarn-error.log .parallelperf.* @@ -78,10 +77,10 @@ node_modules/ # project specific logs/ __pycache__ -filter/runs -pred/runs -pred/checkpoints -data/ -filter/checkpoints +ml/filter/runs +ml/pred/runs +ml/pred/checkpoints +ml/data/ +ml/filter/checkpoints scripts model/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..518076d --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,9 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +dataSources.xml \ No newline at end of file diff --git a/.idea/cvsa.iml b/.idea/cvsa.iml new file mode 100644 index 0000000..c155925 --- /dev/null +++ b/.idea/cvsa.iml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..5535e8f --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..4552e71 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml new file mode 100644 index 0000000..6df4889 --- /dev/null +++ b/.idea/sqldialects.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README-refactor.md b/README-refactor.md new file mode 100644 index 0000000..75ffdb9 --- /dev/null +++ b/README-refactor.md @@ -0,0 +1,65 @@ +# 项目重构方案 + +## 目标架构 +采用monorepo结构管理三个独立部分: +1. `packages/crawler` - 现有爬虫功能 +2. `packages/frontend` - 基于Astro的前端 +3. `packages/backend` - 基于Hono的API后端 + +## 目录结构调整方案 + +### 新结构 +``` +. +├── packages/ +│ ├── crawler/ # 爬虫组件 +│ ├── frontend/ # Astro前端 +│ ├── backend/ # Hono后端API +│ └── core/ # 共享代码(未来提取) +├── docs/ # 文档 +├── scripts/ # 项目脚本 +└── package.json # 根项目配置 +``` + +### 具体迁移方案 + +#### 1. 爬虫部分(crawler) +保留以下目录/文件: +- `lib/` (除前端相关) +- `src/db/raw/` +- `src/filterWorker.ts` +- `src/worker.ts` +- `test/` +- `deno.json` +- `.gitignore` + +需要移除: +- Fresh框架相关文件 +- 前端组件(`components/`) +- 静态资源(`static/`) + +#### 2. 前端部分(frontend) +全新创建Astro项目,不保留任何现有前端代码 + +#### 3. 后端部分(backend) +全新创建Hono项目 + +#### 4. 共享代码(core) +未来可从爬虫中提取以下内容到core package: +- 数据库相关:`lib/db/` +- 消息队列:`lib/mq/` +- 网络请求:`lib/net/` +- 工具函数:`lib/utils/` + +## 重构步骤建议 + +1. 初始化monorepo结构 +2. 迁移爬虫代码到`packages/crawler` +3. 创建新的Astro项目在`packages/frontend` +4. 创建新的Hono项目在`packages/backend` +5. 逐步提取共享代码到`packages/core` + +## 注意事项 +- 机器学习相关代码(`pred/`, `filter/`, `lab/`)保持现状 +- 文档(`doc/`)可以迁移到`docs/`目录 +- 需要更新CI/CD流程支持monorepo \ No newline at end of file diff --git a/components/Button.tsx b/components/Button.tsx deleted file mode 100644 index 6e868c5..0000000 --- a/components/Button.tsx +++ /dev/null @@ -1,12 +0,0 @@ -import { JSX } from "preact"; -import { IS_BROWSER } from "$fresh/runtime.ts"; - -export function Button(props: JSX.HTMLAttributes) { - return ( - -

{props.count}

- - - ); -} diff --git a/main.ts b/main.ts deleted file mode 100644 index 675f529..0000000 --- a/main.ts +++ /dev/null @@ -1,13 +0,0 @@ -/// -/// -/// -/// -/// - -import "$std/dotenv/load.ts"; - -import { start } from "$fresh/server.ts"; -import manifest from "./fresh.gen.ts"; -import config from "./fresh.config.ts"; - -await start(manifest, config); diff --git a/filter/RunningLogs.txt b/ml/filter/RunningLogs.txt similarity index 100% rename from filter/RunningLogs.txt rename to ml/filter/RunningLogs.txt diff --git a/filter/checkpoint_conversion.py b/ml/filter/checkpoint_conversion.py similarity index 100% rename from filter/checkpoint_conversion.py rename to ml/filter/checkpoint_conversion.py diff --git a/filter/clean_dataset.py b/ml/filter/clean_dataset.py similarity index 100% rename from filter/clean_dataset.py rename to ml/filter/clean_dataset.py diff --git a/filter/dataset.py b/ml/filter/dataset.py similarity index 100% rename from filter/dataset.py rename to ml/filter/dataset.py diff --git a/filter/db_utils.py b/ml/filter/db_utils.py similarity index 100% rename from filter/db_utils.py rename to ml/filter/db_utils.py diff --git a/filter/embedding.py b/ml/filter/embedding.py similarity index 100% rename from filter/embedding.py rename to ml/filter/embedding.py diff --git a/filter/embedding_range.py b/ml/filter/embedding_range.py similarity index 100% rename from filter/embedding_range.py rename to ml/filter/embedding_range.py diff --git a/filter/embedding_visualization.py b/ml/filter/embedding_visualization.py similarity index 100% rename from filter/embedding_visualization.py rename to ml/filter/embedding_visualization.py diff --git a/filter/labeling_system.py b/ml/filter/labeling_system.py similarity index 100% rename from filter/labeling_system.py rename to ml/filter/labeling_system.py diff --git a/filter/model.py b/ml/filter/model.py similarity index 100% rename from filter/model.py rename to ml/filter/model.py diff --git a/filter/modelV3_10.py b/ml/filter/modelV3_10.py similarity index 100% rename from filter/modelV3_10.py rename to ml/filter/modelV3_10.py diff --git a/filter/modelV3_12.py b/ml/filter/modelV3_12.py similarity index 100% rename from filter/modelV3_12.py rename to ml/filter/modelV3_12.py diff --git a/filter/modelV3_15.py b/ml/filter/modelV3_15.py similarity index 100% rename from filter/modelV3_15.py rename to ml/filter/modelV3_15.py diff --git a/filter/modelV6_0.py b/ml/filter/modelV6_0.py similarity index 100% rename from filter/modelV6_0.py rename to ml/filter/modelV6_0.py diff --git a/filter/onnx_export.py b/ml/filter/onnx_export.py similarity index 100% rename from filter/onnx_export.py rename to ml/filter/onnx_export.py diff --git a/filter/predict.py b/ml/filter/predict.py similarity index 100% rename from filter/predict.py rename to ml/filter/predict.py diff --git a/filter/quantize.py b/ml/filter/quantize.py similarity index 100% rename from filter/quantize.py rename to ml/filter/quantize.py diff --git a/filter/tag.py b/ml/filter/tag.py similarity index 100% rename from filter/tag.py rename to ml/filter/tag.py diff --git a/filter/test.py b/ml/filter/test.py similarity index 100% rename from filter/test.py rename to ml/filter/test.py diff --git a/filter/train.py b/ml/filter/train.py similarity index 100% rename from filter/train.py rename to ml/filter/train.py diff --git a/lab/.gitignore b/ml/lab/.gitignore similarity index 100% rename from lab/.gitignore rename to ml/lab/.gitignore diff --git a/lab/align-pipeline.md b/ml/lab/align-pipeline.md similarity index 100% rename from lab/align-pipeline.md rename to ml/lab/align-pipeline.md diff --git a/lab/mmsAlignment/align2LRC.py b/ml/lab/mmsAlignment/align2LRC.py similarity index 100% rename from lab/mmsAlignment/align2LRC.py rename to ml/lab/mmsAlignment/align2LRC.py diff --git a/lab/mmsAlignment/alignWithMMS.py b/ml/lab/mmsAlignment/alignWithMMS.py similarity index 100% rename from lab/mmsAlignment/alignWithMMS.py rename to ml/lab/mmsAlignment/alignWithMMS.py diff --git a/lab/mmsAlignment/splitSong.py b/ml/lab/mmsAlignment/splitSong.py similarity index 100% rename from lab/mmsAlignment/splitSong.py rename to ml/lab/mmsAlignment/splitSong.py diff --git a/lab/utils/audio.py b/ml/lab/utils/audio.py similarity index 100% rename from lab/utils/audio.py rename to ml/lab/utils/audio.py diff --git a/lab/utils/cleanTempDir.py b/ml/lab/utils/cleanTempDir.py similarity index 100% rename from lab/utils/cleanTempDir.py rename to ml/lab/utils/cleanTempDir.py diff --git a/lab/utils/ttml.py b/ml/lab/utils/ttml.py similarity index 100% rename from lab/utils/ttml.py rename to ml/lab/utils/ttml.py diff --git a/lab/whisperAlignment/align2srt.py b/ml/lab/whisperAlignment/align2srt.py similarity index 100% rename from lab/whisperAlignment/align2srt.py rename to ml/lab/whisperAlignment/align2srt.py diff --git a/lab/whisperAlignment/alignWithGroup.py b/ml/lab/whisperAlignment/alignWithGroup.py similarity index 100% rename from lab/whisperAlignment/alignWithGroup.py rename to ml/lab/whisperAlignment/alignWithGroup.py diff --git a/lab/whisperAlignment/splitGroups.py b/ml/lab/whisperAlignment/splitGroups.py similarity index 100% rename from lab/whisperAlignment/splitGroups.py rename to ml/lab/whisperAlignment/splitGroups.py diff --git a/lab/whisperAlignment/srt2lrc.py b/ml/lab/whisperAlignment/srt2lrc.py similarity index 100% rename from lab/whisperAlignment/srt2lrc.py rename to ml/lab/whisperAlignment/srt2lrc.py diff --git a/pred/count.py b/ml/pred/count.py similarity index 100% rename from pred/count.py rename to ml/pred/count.py diff --git a/pred/crawler.py b/ml/pred/crawler.py similarity index 100% rename from pred/crawler.py rename to ml/pred/crawler.py diff --git a/pred/dataset.py b/ml/pred/dataset.py similarity index 100% rename from pred/dataset.py rename to ml/pred/dataset.py diff --git a/pred/export_onnx.py b/ml/pred/export_onnx.py similarity index 100% rename from pred/export_onnx.py rename to ml/pred/export_onnx.py diff --git a/pred/inference.py b/ml/pred/inference.py similarity index 100% rename from pred/inference.py rename to ml/pred/inference.py diff --git a/pred/model.py b/ml/pred/model.py similarity index 100% rename from pred/model.py rename to ml/pred/model.py diff --git a/pred/train.py b/ml/pred/train.py similarity index 100% rename from pred/train.py rename to ml/pred/train.py diff --git a/routes/_404.tsx b/routes/_404.tsx deleted file mode 100644 index 4628eeb..0000000 --- a/routes/_404.tsx +++ /dev/null @@ -1,27 +0,0 @@ -import { Head } from "$fresh/runtime.ts"; - -export default function Error404() { - return ( - <> - - 404 - Page not found - -
-
- the Fresh logo: a sliced lemon dripping with juice -

404 - Page not found

-

- The page you were looking for doesn't exist. -

- Go back home -
-
- - ); -} diff --git a/routes/_app.tsx b/routes/_app.tsx deleted file mode 100644 index a44414e..0000000 --- a/routes/_app.tsx +++ /dev/null @@ -1,16 +0,0 @@ -import { type PageProps } from "$fresh/server.ts"; -export default function App({ Component }: PageProps) { - return ( - - - - - cvsa - - - - - - - ); -} diff --git a/routes/api/joke.ts b/routes/api/joke.ts deleted file mode 100644 index 68b0ebe..0000000 --- a/routes/api/joke.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { FreshContext } from "$fresh/server.ts"; - -// Jokes courtesy of https://punsandoneliners.com/randomness/programmer-jokes/ -const JOKES = [ - "Why do Java developers often wear glasses? They can't C#.", - "A SQL query walks into a bar, goes up to two tables and says “can I join you?”", - "Wasn't hard to crack Forrest Gump's password. 1forrest1.", - "I love pressing the F5 key. It's refreshing.", - "Called IT support and a chap from Australia came to fix my network connection. I asked “Do you come from a LAN down under?”", - "There are 10 types of people in the world. Those who understand binary and those who don't.", - "Why are assembly programmers often wet? They work below C level.", - "My favourite computer based band is the Black IPs.", - "What programme do you use to predict the music tastes of former US presidential candidates? An Al Gore Rhythm.", - "An SEO expert walked into a bar, pub, inn, tavern, hostelry, public house.", -]; - -export const handler = (_req: Request, _ctx: FreshContext): Response => { - const randomIndex = Math.floor(Math.random() * JOKES.length); - const body = JOKES[randomIndex]; - return new Response(body); -}; diff --git a/routes/greet/[name].tsx b/routes/greet/[name].tsx deleted file mode 100644 index a7a5fe1..0000000 --- a/routes/greet/[name].tsx +++ /dev/null @@ -1,5 +0,0 @@ -import { PageProps } from "$fresh/server.ts"; - -export default function Greet(props: PageProps) { - return
Hello {props.params.name}
; -} diff --git a/routes/index.tsx b/routes/index.tsx deleted file mode 100644 index 67a22a7..0000000 --- a/routes/index.tsx +++ /dev/null @@ -1,25 +0,0 @@ -import { useSignal } from "@preact/signals"; -import Counter from "../islands/Counter.tsx"; - -export default function Home() { - const count = useSignal(3); - return ( -
-
- the Fresh logo: a sliced lemon dripping with juice -

Welcome to Fresh

-

- Try updating this message in the - ./routes/index.tsx file, and refresh. -

- -
-
- ); -} diff --git a/static/favicon.ico b/static/favicon.ico deleted file mode 100644 index 1cfaaa2..0000000 Binary files a/static/favicon.ico and /dev/null differ diff --git a/static/logo.svg b/static/logo.svg deleted file mode 100644 index ef2fbe4..0000000 --- a/static/logo.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/static/styles.css b/static/styles.css deleted file mode 100644 index b5c61c9..0000000 --- a/static/styles.css +++ /dev/null @@ -1,3 +0,0 @@ -@tailwind base; -@tailwind components; -@tailwind utilities; diff --git a/tailwind.config.ts b/tailwind.config.ts deleted file mode 100644 index 0c790d0..0000000 --- a/tailwind.config.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { type Config } from "tailwindcss"; - -export default { - content: [ - "{routes,islands,components}/**/*.{ts,tsx,js,jsx}", - ], -} satisfies Config;