ref: move ML stuff
add: .idea to VCS, the refactor guide
This commit is contained in:
parent
7337538f0b
commit
636c5e25cb
11
.gitignore
vendored
11
.gitignore
vendored
@ -51,7 +51,6 @@ internal/
|
||||
!tests/cases/projects/projectOption/**/node_modules
|
||||
!tests/cases/projects/NodeModulesSearch/**/*
|
||||
!tests/baselines/reference/project/nodeModules*/**/*
|
||||
.idea
|
||||
yarn.lock
|
||||
yarn-error.log
|
||||
.parallelperf.*
|
||||
@ -78,10 +77,10 @@ node_modules/
|
||||
# project specific
|
||||
logs/
|
||||
__pycache__
|
||||
filter/runs
|
||||
pred/runs
|
||||
pred/checkpoints
|
||||
data/
|
||||
filter/checkpoints
|
||||
ml/filter/runs
|
||||
ml/pred/runs
|
||||
ml/pred/checkpoints
|
||||
ml/data/
|
||||
ml/filter/checkpoints
|
||||
scripts
|
||||
model/
|
||||
|
9
.idea/.gitignore
vendored
Normal file
9
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
dataSources.xml
|
21
.idea/cvsa.iml
Normal file
21
.idea/cvsa.iml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="WEB_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.tmp" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/temp" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/tmp" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/ml/data" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/doc" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/ml/filter/checkpoints" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/ml/filter/runs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/ml/lab/data" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/ml/lab/temp" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/logs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/model" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/src/db" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="GrazieInspection" enabled="false" level="GRAMMAR_ERROR" enabled_by_default="false" />
|
||||
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
<option name="processCode" value="true" />
|
||||
<option name="processLiterals" value="true" />
|
||||
<option name="processComments" value="true" />
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/cvsa.iml" filepath="$PROJECT_DIR$/.idea/cvsa.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/sqldialects.xml
Normal file
6
.idea/sqldialects.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="SqlDialectMappings">
|
||||
<file url="PROJECT" dialect="PostgreSQL" />
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
65
README-refactor.md
Normal file
65
README-refactor.md
Normal file
@ -0,0 +1,65 @@
|
||||
# 项目重构方案
|
||||
|
||||
## 目标架构
|
||||
采用monorepo结构管理三个独立部分:
|
||||
1. `packages/crawler` - 现有爬虫功能
|
||||
2. `packages/frontend` - 基于Astro的前端
|
||||
3. `packages/backend` - 基于Hono的API后端
|
||||
|
||||
## 目录结构调整方案
|
||||
|
||||
### 新结构
|
||||
```
|
||||
.
|
||||
├── packages/
|
||||
│ ├── crawler/ # 爬虫组件
|
||||
│ ├── frontend/ # Astro前端
|
||||
│ ├── backend/ # Hono后端API
|
||||
│ └── core/ # 共享代码(未来提取)
|
||||
├── docs/ # 文档
|
||||
├── scripts/ # 项目脚本
|
||||
└── package.json # 根项目配置
|
||||
```
|
||||
|
||||
### 具体迁移方案
|
||||
|
||||
#### 1. 爬虫部分(crawler)
|
||||
保留以下目录/文件:
|
||||
- `lib/` (除前端相关)
|
||||
- `src/db/raw/`
|
||||
- `src/filterWorker.ts`
|
||||
- `src/worker.ts`
|
||||
- `test/`
|
||||
- `deno.json`
|
||||
- `.gitignore`
|
||||
|
||||
需要移除:
|
||||
- Fresh框架相关文件
|
||||
- 前端组件(`components/`)
|
||||
- 静态资源(`static/`)
|
||||
|
||||
#### 2. 前端部分(frontend)
|
||||
全新创建Astro项目,不保留任何现有前端代码
|
||||
|
||||
#### 3. 后端部分(backend)
|
||||
全新创建Hono项目
|
||||
|
||||
#### 4. 共享代码(core)
|
||||
未来可从爬虫中提取以下内容到core package:
|
||||
- 数据库相关:`lib/db/`
|
||||
- 消息队列:`lib/mq/`
|
||||
- 网络请求:`lib/net/`
|
||||
- 工具函数:`lib/utils/`
|
||||
|
||||
## 重构步骤建议
|
||||
|
||||
1. 初始化monorepo结构
|
||||
2. 迁移爬虫代码到`packages/crawler`
|
||||
3. 创建新的Astro项目在`packages/frontend`
|
||||
4. 创建新的Hono项目在`packages/backend`
|
||||
5. 逐步提取共享代码到`packages/core`
|
||||
|
||||
## 注意事项
|
||||
- 机器学习相关代码(`pred/`, `filter/`, `lab/`)保持现状
|
||||
- 文档(`doc/`)可以迁移到`docs/`目录
|
||||
- 需要更新CI/CD流程支持monorepo
|
@ -1,12 +0,0 @@
|
||||
import { JSX } from "preact";
|
||||
import { IS_BROWSER } from "$fresh/runtime.ts";
|
||||
|
||||
export function Button(props: JSX.HTMLAttributes<HTMLButtonElement>) {
|
||||
return (
|
||||
<button
|
||||
{...props}
|
||||
disabled={!IS_BROWSER || props.disabled}
|
||||
class="px-2 py-1 border-gray-500 border-2 rounded bg-white hover:bg-gray-200 transition-colors"
|
||||
/>
|
||||
);
|
||||
}
|
@ -1,55 +0,0 @@
|
||||
import json
|
||||
import random
|
||||
|
||||
def process_data(input_file, output_file):
|
||||
"""
|
||||
从输入文件中读取数据,找出model和human不一致的行,
|
||||
删除"model"键,将"human"键重命名为"label",
|
||||
然后将处理后的数据添加到输出文件中。
|
||||
在写入之前,它会加载output_file中的所有样本,
|
||||
并使用aid键进行去重过滤。
|
||||
|
||||
Args:
|
||||
input_file (str): 输入文件的路径。
|
||||
output_file (str): 输出文件的路径。
|
||||
"""
|
||||
|
||||
# 加载output_file中已有的数据,用于去重
|
||||
existing_data = set()
|
||||
try:
|
||||
with open(output_file, 'r', encoding='utf-8') as f_out:
|
||||
for line in f_out:
|
||||
try:
|
||||
data = json.loads(line)
|
||||
existing_data.add(data['aid'])
|
||||
except json.JSONDecodeError:
|
||||
pass # 忽略JSON解码错误,继续读取下一行
|
||||
except FileNotFoundError:
|
||||
pass # 如果文件不存在,则忽略
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'a', encoding='utf-8') as f_out:
|
||||
for line in f_in:
|
||||
try:
|
||||
data = json.loads(line)
|
||||
|
||||
if data['model'] != data['human'] or random.random() < 0.2:
|
||||
if data['aid'] not in existing_data: # 检查aid是否已存在
|
||||
del data['model']
|
||||
data['label'] = data['human']
|
||||
del data['human']
|
||||
f_out.write(json.dumps(data, ensure_ascii=False) + '\n')
|
||||
existing_data.add(data['aid']) # 将新的aid添加到集合中
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"JSON解码错误: {e}")
|
||||
print(f"错误行内容: {line.strip()}")
|
||||
except KeyError as e:
|
||||
print(f"KeyError: 键 '{e}' 不存在")
|
||||
print(f"错误行内容: {line.strip()}")
|
||||
|
||||
# 调用函数处理数据
|
||||
input_file = 'real_test.jsonl'
|
||||
output_file = 'labeled_data.jsonl'
|
||||
process_data(input_file, output_file)
|
||||
print(f"处理完成,结果已写入 {output_file}")
|
||||
|
7
dev.ts
7
dev.ts
@ -1,7 +0,0 @@
|
||||
#!/usr/bin/env -S deno run -A --watch=static/,routes/
|
||||
|
||||
import dev from "$fresh/dev.ts";
|
||||
import config from "./fresh.config.ts";
|
||||
|
||||
import "$std/dotenv/load.ts";
|
||||
await dev(import.meta.url, "./main.ts", config);
|
@ -1,6 +0,0 @@
|
||||
import { defineConfig } from "$fresh/server.ts";
|
||||
import tailwind from "$fresh/plugins/tailwind.ts";
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [tailwind()],
|
||||
});
|
27
fresh.gen.ts
27
fresh.gen.ts
@ -1,27 +0,0 @@
|
||||
// DO NOT EDIT. This file is generated by Fresh.
|
||||
// This file SHOULD be checked into source version control.
|
||||
// This file is automatically updated during development when running `dev.ts`.
|
||||
|
||||
import * as $_404 from "./routes/_404.tsx";
|
||||
import * as $_app from "./routes/_app.tsx";
|
||||
import * as $api_joke from "./routes/api/joke.ts";
|
||||
import * as $greet_name_ from "./routes/greet/[name].tsx";
|
||||
import * as $index from "./routes/index.tsx";
|
||||
import * as $Counter from "./islands/Counter.tsx";
|
||||
import type { Manifest } from "$fresh/server.ts";
|
||||
|
||||
const manifest = {
|
||||
routes: {
|
||||
"./routes/_404.tsx": $_404,
|
||||
"./routes/_app.tsx": $_app,
|
||||
"./routes/api/joke.ts": $api_joke,
|
||||
"./routes/greet/[name].tsx": $greet_name_,
|
||||
"./routes/index.tsx": $index,
|
||||
},
|
||||
islands: {
|
||||
"./islands/Counter.tsx": $Counter,
|
||||
},
|
||||
baseUrl: import.meta.url,
|
||||
} satisfies Manifest;
|
||||
|
||||
export default manifest;
|
@ -1,16 +0,0 @@
|
||||
import type { Signal } from "@preact/signals";
|
||||
import { Button } from "../components/Button.tsx";
|
||||
|
||||
interface CounterProps {
|
||||
count: Signal<number>;
|
||||
}
|
||||
|
||||
export default function Counter(props: CounterProps) {
|
||||
return (
|
||||
<div class="flex gap-8 py-6">
|
||||
<Button onClick={() => props.count.value -= 1}>-1</Button>
|
||||
<p class="text-3xl tabular-nums">{props.count}</p>
|
||||
<Button onClick={() => props.count.value += 1}>+1</Button>
|
||||
</div>
|
||||
);
|
||||
}
|
13
main.ts
13
main.ts
@ -1,13 +0,0 @@
|
||||
/// <reference no-default-lib="true" />
|
||||
/// <reference lib="dom" />
|
||||
/// <reference lib="dom.iterable" />
|
||||
/// <reference lib="dom.asynciterable" />
|
||||
/// <reference lib="deno.ns" />
|
||||
|
||||
import "$std/dotenv/load.ts";
|
||||
|
||||
import { start } from "$fresh/server.ts";
|
||||
import manifest from "./fresh.gen.ts";
|
||||
import config from "./fresh.config.ts";
|
||||
|
||||
await start(manifest, config);
|
0
lab/.gitignore → ml/lab/.gitignore
vendored
0
lab/.gitignore → ml/lab/.gitignore
vendored
@ -1,27 +0,0 @@
|
||||
import { Head } from "$fresh/runtime.ts";
|
||||
|
||||
export default function Error404() {
|
||||
return (
|
||||
<>
|
||||
<Head>
|
||||
<title>404 - Page not found</title>
|
||||
</Head>
|
||||
<div class="px-4 py-8 mx-auto bg-[#86efac]">
|
||||
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
|
||||
<img
|
||||
class="my-6"
|
||||
src="/logo.svg"
|
||||
width="128"
|
||||
height="128"
|
||||
alt="the Fresh logo: a sliced lemon dripping with juice"
|
||||
/>
|
||||
<h1 class="text-4xl font-bold">404 - Page not found</h1>
|
||||
<p class="my-4">
|
||||
The page you were looking for doesn't exist.
|
||||
</p>
|
||||
<a href="/" class="underline">Go back home</a>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
import { type PageProps } from "$fresh/server.ts";
|
||||
export default function App({ Component }: PageProps) {
|
||||
return (
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>cvsa</title>
|
||||
<link rel="stylesheet" href="/styles.css" />
|
||||
</head>
|
||||
<body>
|
||||
<Component />
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
import { FreshContext } from "$fresh/server.ts";
|
||||
|
||||
// Jokes courtesy of https://punsandoneliners.com/randomness/programmer-jokes/
|
||||
const JOKES = [
|
||||
"Why do Java developers often wear glasses? They can't C#.",
|
||||
"A SQL query walks into a bar, goes up to two tables and says “can I join you?”",
|
||||
"Wasn't hard to crack Forrest Gump's password. 1forrest1.",
|
||||
"I love pressing the F5 key. It's refreshing.",
|
||||
"Called IT support and a chap from Australia came to fix my network connection. I asked “Do you come from a LAN down under?”",
|
||||
"There are 10 types of people in the world. Those who understand binary and those who don't.",
|
||||
"Why are assembly programmers often wet? They work below C level.",
|
||||
"My favourite computer based band is the Black IPs.",
|
||||
"What programme do you use to predict the music tastes of former US presidential candidates? An Al Gore Rhythm.",
|
||||
"An SEO expert walked into a bar, pub, inn, tavern, hostelry, public house.",
|
||||
];
|
||||
|
||||
export const handler = (_req: Request, _ctx: FreshContext): Response => {
|
||||
const randomIndex = Math.floor(Math.random() * JOKES.length);
|
||||
const body = JOKES[randomIndex];
|
||||
return new Response(body);
|
||||
};
|
@ -1,5 +0,0 @@
|
||||
import { PageProps } from "$fresh/server.ts";
|
||||
|
||||
export default function Greet(props: PageProps) {
|
||||
return <div>Hello {props.params.name}</div>;
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
import { useSignal } from "@preact/signals";
|
||||
import Counter from "../islands/Counter.tsx";
|
||||
|
||||
export default function Home() {
|
||||
const count = useSignal(3);
|
||||
return (
|
||||
<div class="px-4 py-8 mx-auto bg-[#86efac]">
|
||||
<div class="max-w-screen-md mx-auto flex flex-col items-center justify-center">
|
||||
<img
|
||||
class="my-6"
|
||||
src="/logo.svg"
|
||||
width="128"
|
||||
height="128"
|
||||
alt="the Fresh logo: a sliced lemon dripping with juice"
|
||||
/>
|
||||
<h1 class="text-4xl font-bold">Welcome to Fresh</h1>
|
||||
<p class="my-4">
|
||||
Try updating this message in the
|
||||
<code class="mx-2">./routes/index.tsx</code> file, and refresh.
|
||||
</p>
|
||||
<Counter count={count} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 22 KiB |
@ -1,6 +0,0 @@
|
||||
<svg width="40" height="40" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M34.092 8.845C38.929 20.652 34.092 27 30 30.5c1 3.5-2.986 4.222-4.5 2.5-4.457 1.537-13.512 1.487-20-5C2 24.5 4.73 16.714 14 11.5c8-4.5 16-7 20.092-2.655Z" fill="#FFDB1E"/>
|
||||
<path d="M14 11.5c6.848-4.497 15.025-6.38 18.368-3.47C37.5 12.5 21.5 22.612 15.5 25c-6.5 2.587-3 8.5-6.5 8.5-3 0-2.5-4-5.183-7.75C2.232 23.535 6.16 16.648 14 11.5Z" fill="#fff" stroke="#FFDB1E"/>
|
||||
<path d="M28.535 8.772c4.645 1.25-.365 5.695-4.303 8.536-3.732 2.692-6.606 4.21-7.923 4.83-.366.173-1.617-2.252-1.617-1 0 .417-.7 2.238-.934 2.326-1.365.512-4.223 1.29-5.835 1.29-3.491 0-1.923-4.754 3.014-9.122.892-.789 1.478-.645 2.283-.645-.537-.773-.534-.917.403-1.546C17.79 10.64 23 8.77 25.212 8.42c.366.014.82.35.82.629.41-.14 2.095-.388 2.503-.278Z" fill="#FFE600"/>
|
||||
<path d="M14.297 16.49c.985-.747 1.644-1.01 2.099-2.526.566.121.841-.08 1.29-.701.324.466 1.657.608 2.453.701-.715.451-1.057.852-1.452 2.106-1.464-.611-3.167-.302-4.39.42Z" fill="#fff"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 1.0 KiB |
@ -1,3 +0,0 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
@ -1,7 +0,0 @@
|
||||
import { type Config } from "tailwindcss";
|
||||
|
||||
export default {
|
||||
content: [
|
||||
"{routes,islands,components}/**/*.{ts,tsx,js,jsx}",
|
||||
],
|
||||
} satisfies Config;
|
Loading…
Reference in New Issue
Block a user