ref: move ML stuff
add: .idea to VCS, the refactor guide
This commit is contained in:
parent
7337538f0b
commit
636c5e25cb
11
.gitignore
vendored
11
.gitignore
vendored
@ -51,7 +51,6 @@ internal/
|
|||||||
!tests/cases/projects/projectOption/**/node_modules
|
!tests/cases/projects/projectOption/**/node_modules
|
||||||
!tests/cases/projects/NodeModulesSearch/**/*
|
!tests/cases/projects/NodeModulesSearch/**/*
|
||||||
!tests/baselines/reference/project/nodeModules*/**/*
|
!tests/baselines/reference/project/nodeModules*/**/*
|
||||||
.idea
|
|
||||||
yarn.lock
|
yarn.lock
|
||||||
yarn-error.log
|
yarn-error.log
|
||||||
.parallelperf.*
|
.parallelperf.*
|
||||||
@ -78,10 +77,10 @@ node_modules/
|
|||||||
# project specific
|
# project specific
|
||||||
logs/
|
logs/
|
||||||
__pycache__
|
__pycache__
|
||||||
filter/runs
|
ml/filter/runs
|
||||||
pred/runs
|
ml/pred/runs
|
||||||
pred/checkpoints
|
ml/pred/checkpoints
|
||||||
data/
|
ml/data/
|
||||||
filter/checkpoints
|
ml/filter/checkpoints
|
||||||
scripts
|
scripts
|
||||||
model/
|
model/
|
||||||
|
9
.idea/.gitignore
vendored
Normal file
9
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
dataSources.xml
|
21
.idea/cvsa.iml
Normal file
21
.idea/cvsa.iml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/temp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/tmp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/data" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/doc" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/logs" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/model" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/src/db" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
|
||||||
|
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||||
|
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||||
|
<option name="processCode" value="true" />
|
||||||
|
<option name="processLiterals" value="true" />
|
||||||
|
<option name="processComments" value="true" />
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/sqldialects.xml
Normal file
6
.idea/sqldialects.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="SqlDialectMappings">
|
||||||
|
<file url="PROJECT" dialect="PostgreSQL" />
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
65
README-refactor.md
Normal file
65
README-refactor.md
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# 项目重构方案
|
||||||
|
|
||||||
|
## 目标架构
|
||||||
|
采用monorepo结构管理三个独立部分:
|
||||||
|
1. `packages/crawler` - 现有爬虫功能
|
||||||
|
2. `packages/frontend` - 基于Astro的前端
|
||||||
|
3. `packages/backend` - 基于Hono的API后端
|
||||||
|
|
||||||
|
## 目录结构调整方案
|
||||||
|
|
||||||
|
### 新结构
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── packages/
|
||||||
|
│ ├── crawler/ # 爬虫组件
|
||||||
|
│ ├── frontend/ # Astro前端
|
||||||
|
│ ├── backend/ # Hono后端API
|
||||||
|
│ └── core/ # 共享代码(未来提取)
|
||||||
|
├── docs/ # 文档
|
||||||
|
├── scripts/ # 项目脚本
|
||||||
|
└── package.json # 根项目配置
|
||||||
|
```
|
||||||
|
|
||||||
|
### 具体迁移方案
|
||||||
|
|
||||||
|
#### 1. 爬虫部分(crawler)
|
||||||
|
保留以下目录/文件:
|
||||||
|
- `lib/` (除前端相关)
|
||||||
|
- `src/db/raw/`
|
||||||
|
- `src/filterWorker.ts`
|
||||||
|
- `src/worker.ts`
|
||||||
|
- `test/`
|
||||||
|
- `deno.json`
|
||||||
|
- `.gitignore`
|
||||||
|
|
||||||
|
需要移除:
|
||||||
|
- Fresh框架相关文件
|
||||||
|
- 前端组件(`components/`)
|
||||||
|
- 静态资源(`static/`)
|
||||||
|
|
||||||
|
#### 2. 前端部分(frontend)
|
||||||
|
全新创建Astro项目,不保留任何现有前端代码
|
||||||
|
|
||||||
|
#### 3. 后端部分(backend)
|
||||||
|
全新创建Hono项目
|
||||||
|
|
||||||
|
#### 4. 共享代码(core)
|
||||||
|
未来可从爬虫中提取以下内容到core package:
|
||||||
|
- 数据库相关:`lib/db/`
|
||||||
|
- 消息队列:`lib/mq/`
|
||||||
|
- 网络请求:`lib/net/`
|
||||||
|
- 工具函数:`lib/utils/`
|
||||||
|
|
||||||
|
## 重构步骤建议
|
||||||
|
|
||||||
|
1. 初始化monorepo结构
|
||||||
|
2. 迁移爬虫代码到`packages/crawler`
|
||||||
|
3. 创建新的Astro项目在`packages/frontend`
|
||||||
|
4. 创建新的Hono项目在`packages/backend`
|
||||||
|
5. 逐步提取共享代码到`packages/core`
|
||||||
|
|
||||||
|
## 注意事项
|
||||||
|
- 机器学习相关代码(`pred/`, `filter/`, `lab/`)保持现状
|
||||||
|
- 文档(`doc/`)可以迁移到`docs/`目录
|
||||||
|
- 需要更新CI/CD流程支持monorepo
|
@ -1,12 +0,0 @@
|
|||||||
import { JSX } from "preact";
|
|
||||||
import { IS_BROWSER } from "$fresh/runtime.ts";
|
|
||||||
|
|
||||||
export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
|
|
||||||
return (
|
|
||||||
<button
|
|
||||||
{...props}
|
|
||||||
disabled={!IS_BROWSER || props.disabled}
|
|
||||||
class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
|
|
||||||
/>
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,55 +0,0 @@
|
|||||||
import json
|
|
||||||
import random
|
|
||||||
|
|
||||||
def process_data(input_file, output_file):
|
|
||||||
"""
|
|
||||||
从输入文件中读取数据,找出model和human不一致的行,
|
|
||||||
删除"model"键,将"human"键重命名为"label",
|
|
||||||
然后将处理后的数据添加到输出文件中。
|
|
||||||
在写入之前,它会加载output_file中的所有样本,
|
|
||||||
并使用aid键进行去重过滤。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_file (str): 输入文件的路径。
|
|
||||||
output_file (str): 输出文件的路径。
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 加载output_file中已有的数据,用于去重
|
|
||||||
existing_data = set()
|
|
||||||
try:
|
|
||||||
with open(output_file, 'r', encoding='utf-8') as f_out:
|
|
||||||
for line in f_out:
|
|
||||||
try:
|
|
||||||
data = json.loads(line)
|
|
||||||
existing_data.add(data['aid'])
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass # 忽略JSON解码错误,继续读取下一行
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass # 如果文件不存在,则忽略
|
|
||||||
|
|
||||||
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
|
|
||||||
for line in f_in:
|
|
||||||
try:
|
|
||||||
data = json.loads(line)
|
|
||||||
|
|
||||||
if data['model'] != data['human'] or random.random() < 0.2:
|
|
||||||
if data['aid'] not in existing_data: # 检查aid是否已存在
|
|
||||||
del data['model']
|
|
||||||
data['label'] = data['human']
|
|
||||||
del data['human']
|
|
||||||
f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
|
|
||||||
existing_data.add(data['aid']) # 将新的aid添加到集合中
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"JSON解码错误: {e}")
|
|
||||||
print(f"错误行内容: {line.strip()}")
|
|
||||||
except KeyError as e:
|
|
||||||
print(f"KeyError: 键 '{e}' 不存在")
|
|
||||||
print(f"错误行内容: {line.strip()}")
|
|
||||||
|
|
||||||
# 调用函数处理数据
|
|
||||||
input_file = 'real_test.jsonl'
|
|
||||||
output_file = 'labeled_data.jsonl'
|
|
||||||
process_data(input_file, output_file)
|
|
||||||
print(f"处理完成,结果已写入 {output_file}")
|
|
||||||
|
|
7
dev.ts
7
dev.ts
@ -1,7 +0,0 @@
|
|||||||
#!/usr/bin/env -S deno run -A --watch=static/,routes/
|
|
||||||
|
|
||||||
import dev from "$fresh/dev.ts";
|
|
||||||
import config from "./fresh.config.ts";
|
|
||||||
|
|
||||||
import "$std/dotenv/load.ts";
|
|
||||||
await dev(import.meta.url, "./main.ts", config);
|
|
@ -1,6 +0,0 @@
|
|||||||
import { defineConfig } from "$fresh/server.ts";
|
|
||||||
import tailwind from "$fresh/plugins/tailwind.ts";
|
|
||||||
|
|
||||||
export default defineConfig({
|
|
||||||
plugins: [tailwind()],
|
|
||||||
});
|
|
27
fresh.gen.ts
27
fresh.gen.ts
@ -1,27 +0,0 @@
|
|||||||
// DO NOT EDIT. This file is generated by Fresh.
|
|
||||||
// This file SHOULD be checked into source version control.
|
|
||||||
// This file is automatically updated during development when running `dev.ts`.
|
|
||||||
|
|
||||||
import * as $_404 from "./routes/_404.tsx";
|
|
||||||
import * as $_app from "./routes/_app.tsx";
|
|
||||||
import * as $api_joke from "./routes/api/joke.ts";
|
|
||||||
import * as $greet_name_ from "./routes/greet/[name].tsx";
|
|
||||||
import * as $index from "./routes/index.tsx";
|
|
||||||
import * as $Counter from "./islands/Counter.tsx";
|
|
||||||
import type { Manifest } from "$fresh/server.ts";
|
|
||||||
|
|
||||||
const manifest = {
|
|
||||||
routes: {
|
|
||||||
"./routes/_404.tsx": $_404,
|
|
||||||
"./routes/_app.tsx": $_app,
|
|
||||||
"./routes/api/joke.ts": $api_joke,
|
|
||||||
"./routes/greet/[name].tsx": $greet_name_,
|
|
||||||
"./routes/index.tsx": $index,
|
|
||||||
},
|
|
||||||
islands: {
|
|
||||||
"./islands/Counter.tsx": $Counter,
|
|
||||||
},
|
|
||||||
baseUrl: import.meta.url,
|
|
||||||
} satisfies Manifest;
|
|
||||||
|
|
||||||
export default manifest;
|
|
@ -1,16 +0,0 @@
|
|||||||
import type { Signal } from "@preact/signals";
|
|
||||||
import { Button } from "../components/Button.tsx";
|
|
||||||
|
|
||||||
interface CounterProps {
|
|
||||||
count: Signal<number>;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function Counter(props: CounterProps) {
|
|
||||||
return (
|
|
||||||
<div class="flex gap-8 py-6">
|
|
||||||
<Button onClick={() => props.count.value -= 1}>-1</Button>
|
|
||||||
<p class="text-3xl tabular-nums">{props.count}</p>
|
|
||||||
<Button onClick={() => props.count.value += 1}>+1</Button>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
13
main.ts
13
main.ts
@ -1,13 +0,0 @@
|
|||||||
/// <reference no-default-lib="true" />
|
|
||||||
/// <reference lib="dom" />
|
|
||||||
/// <reference lib="dom.iterable" />
|
|
||||||
/// <reference lib="dom.asynciterable" />
|
|
||||||
/// <reference lib="deno.ns" />
|
|
||||||
|
|
||||||
import "$std/dotenv/load.ts";
|
|
||||||
|
|
||||||
import { start } from "$fresh/server.ts";
|
|
||||||
import manifest from "./fresh.gen.ts";
|
|
||||||
import config from "./fresh.config.ts";
|
|
||||||
|
|
||||||
await start(manifest, config);
|
|
0
lab/.gitignore → ml/lab/.gitignore
vendored
0
lab/.gitignore → ml/lab/.gitignore
vendored
@ -1,27 +0,0 @@
|
|||||||
import { Head } from "$fresh/runtime.ts";
|
|
||||||
|
|
||||||
export default function Error404() {
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
<Head>
|
|
||||||
<title>404 - Page not found</title>
|
|
||||||
</Head>
|
|
||||||
<div class="px-4 py-8 mx-auto bg-[#86efac]">
|
|
||||||
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
|
|
||||||
<img
|
|
||||||
class="my-6"
|
|
||||||
src="/logo.svg"
|
|
||||||
width="128"
|
|
||||||
height="128"
|
|
||||||
alt="the Fresh logo: a sliced lemon dripping with juice"
|
|
||||||
/>
|
|
||||||
<h1 class="text-4xl font-bold">404 - Page not found</h1>
|
|
||||||
<p class="my-4">
|
|
||||||
The page you were looking for doesn't exist.
|
|
||||||
</p>
|
|
||||||
<a href="/" class="underline">Go back home</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</>
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
import { type PageProps } from "$fresh/server.ts";
|
|
||||||
export default function App({ Component }: PageProps) {
|
|
||||||
return (
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>cvsa</title>
|
|
||||||
<link rel="stylesheet" href="/styles.css" />
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<Component />
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
);
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
import { FreshContext } from "$fresh/server.ts";
|
|
||||||
|
|
||||||
// Jokes courtesy of https://punsandoneliners.com/randomness/programmer-jokes/
|
|
||||||
const JOKES = [
|
|
||||||
"Why do Java developers often wear glasses? They can't C#.",
|
|
||||||
"A SQL query walks into a bar, goes up to two tables and says “can I join you?”",
|
|
||||||
"Wasn't hard to crack Forrest Gump's password. 1forrest1.",
|
|
||||||
"I love pressing the F5 key. It's refreshing.",
|
|
||||||
"Called IT support and a chap from Australia came to fix my network connection. I asked “Do you come from a LAN down under?”",
|
|
||||||
"There are 10 types of people in the world. Those who understand binary and those who don't.",
|
|
||||||
"Why are assembly programmers often wet? They work below C level.",
|
|
||||||
"My favourite computer based band is the Black IPs.",
|
|
||||||
"What programme do you use to predict the music tastes of former US presidential candidates? An Al Gore Rhythm.",
|
|
||||||
"An SEO expert walked into a bar, pub, inn, tavern, hostelry, public house.",
|
|
||||||
];
|
|
||||||
|
|
||||||
export const handler = (_req: Request, _ctx: FreshContext): Response => {
|
|
||||||
const randomIndex = Math.floor(Math.random() * JOKES.length);
|
|
||||||
const body = JOKES[randomIndex];
|
|
||||||
return new Response(body);
|
|
||||||
};
|
|
@ -1,5 +0,0 @@
|
|||||||
import { PageProps } from "$fresh/server.ts";
|
|
||||||
|
|
||||||
export default function Greet(props: PageProps) {
|
|
||||||
return <div>Hello {props.params.name}</div>;
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
import { useSignal } from "@preact/signals";
|
|
||||||
import Counter from "../islands/Counter.tsx";
|
|
||||||
|
|
||||||
export default function Home() {
|
|
||||||
const count = useSignal(3);
|
|
||||||
return (
|
|
||||||
<div class="px-4 py-8 mx-auto bg-[#86efac]">
|
|
||||||
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
|
|
||||||
<img
|
|
||||||
class="my-6"
|
|
||||||
src="/logo.svg"
|
|
||||||
width="128"
|
|
||||||
height="128"
|
|
||||||
alt="the Fresh logo: a sliced lemon dripping with juice"
|
|
||||||
/>
|
|
||||||
<h1 class="text-4xl font-bold">Welcome to Fresh</h1>
|
|
||||||
<p class="my-4">
|
|
||||||
Try updating this message in the
|
|
||||||
<code class="mx-2">./routes/index.tsx</code> file, and refresh.
|
|
||||||
</p>
|
|
||||||
<Counter count={count} />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
Binary file not shown.
Before Width: | Height: | Size: 22 KiB |
@ -1,6 +0,0 @@
|
|||||||
<svg width="40" height="40" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<path d="M34.092 8.845C38.929 20.652 34.092 27 30 30.5c1 3.5-2.986 4.222-4.5 2.5-4.457 1.537-13.512 1.487-20-5C2 24.5 4.73 16.714 14 11.5c8-4.5 16-7 20.092-2.655Z" fill="#FFDB1E"/>
|
|
||||||
<path d="M14 11.5c6.848-4.497 15.025-6.38 18.368-3.47C37.5 12.5 21.5 22.612 15.5 25c-6.5 2.587-3 8.5-6.5 8.5-3 0-2.5-4-5.183-7.75C2.232 23.535 6.16 16.648 14 11.5Z" fill="#fff" stroke="#FFDB1E"/>
|
|
||||||
<path d="M28.535 8.772c4.645 1.25-.365 5.695-4.303 8.536-3.732 2.692-6.606 4.21-7.923 4.83-.366.173-1.617-2.252-1.617-1 0 .417-.7 2.238-.934 2.326-1.365.512-4.223 1.29-5.835 1.29-3.491 0-1.923-4.754 3.014-9.122.892-.789 1.478-.645 2.283-.645-.537-.773-.534-.917.403-1.546C17.79 10.64 23 8.77 25.212 8.42c.366.014.82.35.82.629.41-.14 2.095-.388 2.503-.278Z" fill="#FFE600"/>
|
|
||||||
<path d="M14.297 16.49c.985-.747 1.644-1.01 2.099-2.526.566.121.841-.08 1.29-.701.324.466 1.657.608 2.453.701-.715.451-1.057.852-1.452 2.106-1.464-.611-3.167-.302-4.39.42Z" fill="#fff"/>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 1.0 KiB |
@ -1,3 +0,0 @@
|
|||||||
@tailwind base;
|
|
||||||
@tailwind components;
|
|
||||||
@tailwind utilities;
|
|
@ -1,7 +0,0 @@
|
|||||||
import { type Config } from "tailwindcss";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
content: [
|
|
||||||
"{routes,islands,components}/**/*.{ts,tsx,js,jsx}",
|
|
||||||
],
|
|
||||||
} satisfies Config;
|
|
Loading…
Reference in New Issue
Block a user