add: quantized filter model inference

2025-02-13 07:42:16 +08:00 · 2025-02-13 07:42:16 +08:00 · fd090a25c2
commit fd090a25c2
parent 7e6854db00
11 changed files with 410 additions and 295 deletions
--- a/deno.json
+++ b/deno.json
@ -40,7 +40,9 @@
        "@bull-board/api": "npm:@bull-board/api",
        "@bull-board/express": "npm:@bull-board/express",
        "express": "npm:express",
-        "src/": "./src/"
+        "src/": "./src/",
+        "onnxruntime": "npm:onnxruntime-node",
+        "chalk": "npm:chalk"
    },
    "compilerOptions": {
        "jsx": "react-jsx",
--- a/filter/embedding_range.py
+++ b/filter/embedding_range.py
@ -0,0 +1,54 @@
+import json
+import torch
+import random
+from embedding import prepare_batch
+from tqdm import tqdm
+import numpy as np
+import matplotlib.pyplot as plt
+
+file_path = './data/filter/model_predicted.jsonl'
+
+class Dataset:
+    def __init__(self, file_path):
+        all_examples = self.load_data(file_path)
+        self.examples = all_examples
+
+    def load_data(self, file_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return [json.loads(line) for line in f]
+
+    def __getitem__(self, idx):
+        end_idx = min((idx + 1) * self.batch_size, len(self.examples))
+        texts = {
+            'title': [ex['title'] for ex in self.examples[idx * self.batch_size:end_idx]],
+            'description': [ex['description'] for ex in self.examples[idx * self.batch_size:end_idx]],
+            'tags': [",".join(ex['tags']) for ex in self.examples[idx * self.batch_size:end_idx]],
+            'author_info': [ex['author_info'] for ex in self.examples[idx * self.batch_size:end_idx]]
+        }
+        return texts
+
+    def __len__(self):
+        return len(self.examples)
+
+    def get_batch(self, idx, batch_size):
+        self.batch_size = batch_size
+        return self.__getitem__(idx)
+    
+total = 600000
+batch_size = 512
+batch_num = total // batch_size
+dataset = Dataset(file_path)
+arr_len = batch_size * 4 * 1024
+sample_rate = 0.1
+sample_num = int(arr_len * sample_rate)
+
+data = np.array([])
+for i in tqdm(range(batch_num)):
+    batch = dataset.get_batch(i, batch_size)
+    batch = prepare_batch(batch, device="cpu")
+    arr = batch.flatten().numpy()
+    sampled = np.random.choice(arr.shape[0], size=sample_num, replace=False)
+    data = np.concatenate((data, arr[sampled]), axis=0) if data.size else arr[sampled]
+    if i % 10 == 0:
+        np.save('embedding_range.npy', data)
+np.save('embedding_range.npy', data)
--- a/filter/embedding_visualization.py
+++ b/filter/embedding_visualization.py
@ -0,0 +1,43 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 加载数据
+data = np.load("1.npy")
+
+# 绘制直方图，获取频数
+n, bins, patches = plt.hist(data, bins=32, density=False, alpha=0.7, color='skyblue')
+
+# 计算数据总数
+total_data = len(data)
+
+# 将频数转换为频率
+frequencies = n / total_data
+
+# 计算统计信息
+max_val = np.max(data)
+min_val = np.min(data)
+std_dev = np.std(data)
+
+# 设置图形属性
+plt.title('Frequency Distribution Histogram')
+plt.xlabel('Value')
+plt.ylabel('Frequency')
+
+# 重新绘制直方图，使用频率作为高度
+plt.cla()  # 清除当前坐标轴上的内容
+plt.bar([(bins[i] + bins[i+1])/2 for i in range(len(bins)-1)], frequencies, width=[bins[i+1]-bins[i] for i in range(len(bins)-1)], alpha=0.7, color='skyblue')
+
+# 在柱子上注明频率值
+for i in range(len(patches)):
+    plt.text(bins[i]+(bins[i+1]-bins[i])/2, frequencies[i], f'{frequencies[i]:.2e}', ha='center', va='bottom', fontsize=6)
+
+# 在图表一角显示统计信息
+stats_text = f"Max: {max_val:.6f}\nMin: {min_val:.6f}\nStd: {std_dev:.4e}"
+plt.text(0.95, 0.95, stats_text, transform=plt.gca().transAxes,
+         ha='right', va='top', bbox=dict(facecolor='white', edgecolor='black', alpha=0.8))
+
+# 设置 x 轴刻度对齐柱子边界
+plt.xticks(bins, fontsize = 6)
+
+# 显示图形
+plt.show()
--- a/filter/modelV3_4.py
+++ b/filter/modelV3_4.py
@ -1,111 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-class VideoClassifierV3_4(nn.Module):
-    def __init__(self, embedding_dim=1024, hidden_dim=512, output_dim=3):
-        super().__init__()
-        self.num_channels = 4
-        self.channel_names = ['title', 'description', 'tags', 'author_info']
-        
-        # 可学习温度系数
-        self.temperature = nn.Parameter(torch.tensor(1.7))
-        
-        # 带约束的通道权重（使用Sigmoid替代Softmax）
-        self.channel_weights = nn.Parameter(torch.ones(self.num_channels))
-        
-        # 增强的非线性层
-        self.fc = nn.Sequential(
-            nn.Linear(embedding_dim * self.num_channels, hidden_dim*2),
-            nn.BatchNorm1d(hidden_dim*2),
-            nn.Dropout(0.3),
-            nn.GELU(),
-            nn.Linear(hidden_dim*2, hidden_dim),
-            nn.BatchNorm1d(hidden_dim),
-            nn.Dropout(0.2),
-            nn.GELU(),
-            nn.Linear(hidden_dim, output_dim)
-        )
-        
-        # 权重初始化
-        self._init_weights()
-    
-    def _init_weights(self):
-        for layer in self.fc:
-            if isinstance(layer, nn.Linear):
-                # 使用ReLU的初始化参数（GELU的近似）
-                nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')  # 修改这里
-                
-                # 或者使用Xavier初始化（更适合通用场景）
-                # nn.init.xavier_normal_(layer.weight, gain=nn.init.calculate_gain('relu'))
-                
-                nn.init.zeros_(layer.bias)
-
-
-    def forward(self, input_texts, sentence_transformer):
-        # 合并文本进行批量编码
-        all_texts = [text for channel in self.channel_names for text in input_texts[channel]]
-        
-        # 冻结的文本编码
-        with torch.no_grad():
-            embeddings = torch.tensor(
-                sentence_transformer.encode(all_texts),
-                device=next(self.parameters()).device
-            )
-        
-        # 分割并加权通道特征
-        split_sizes = [len(input_texts[name]) for name in self.channel_names]
-        channel_features = torch.split(embeddings, split_sizes, dim=0)
-        channel_features = torch.stack(channel_features, dim=1)
-        
-        # 自适应通道权重（Sigmoid约束）
-        weights = torch.sigmoid(self.channel_weights)  # [0,1]范围
-        weighted_features = channel_features * weights.unsqueeze(0).unsqueeze(-1)
-        
-        # 特征拼接
-        combined = weighted_features.view(weighted_features.size(0), -1)
-        
-        return self.fc(combined)
-
-    def get_channel_weights(self):
-        """获取各通道权重（带温度调节）"""
-        return torch.softmax(self.channel_weights / self.temperature, dim=0).detach().cpu().numpy()
-    
-
-class AdaptiveRecallLoss(nn.Module):
-    def __init__(self, class_weights, alpha=0.8, gamma=2.0, fp_penalty=0.5):
-        """
-        Args:
-            class_weights (torch.Tensor): 类别权重
-            alpha (float): 召回率调节因子（0-1）
-            gamma (float): Focal Loss参数
-            fp_penalty (float): 类别0假阳性惩罚强度
-        """
-        super().__init__()
-        self.class_weights = class_weights
-        self.alpha = alpha
-        self.gamma = gamma
-        self.fp_penalty = fp_penalty
-
-    def forward(self, logits, targets):
-        # 基础交叉熵损失
-        ce_loss = F.cross_entropy(logits, targets, weight=self.class_weights, reduction='none')
-        
-        # Focal Loss组件
-        pt = torch.exp(-ce_loss)
-        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
-        
-        # 召回率增强（对困难样本加权）
-        class_mask = F.one_hot(targets, num_classes=len(self.class_weights))
-        class_weights = (self.alpha + (1 - self.alpha) * pt.unsqueeze(-1)) * class_mask
-        recall_loss = (class_weights * focal_loss.unsqueeze(-1)).sum(dim=1)
-        
-        # 类别0假阳性惩罚
-        probs = F.softmax(logits, dim=1)
-        fp_mask = (targets != 0) & (torch.argmax(logits, dim=1) == 0)
-        fp_loss = self.fp_penalty * probs[:, 0][fp_mask].pow(2).sum()
-        
-        # 总损失
-        total_loss = recall_loss.mean() + fp_loss / len(targets)
-        
-        return total_loss
--- a/filter/old.py
+++ b/filter/old.py
@ -1,148 +0,0 @@
-import os
-os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]="1"
-
-import torch
-import torch.nn as nn
-from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer
-import json
-from torch.utils.data import Dataset, DataLoader
-import numpy as np
-
-class VideoDataset(Dataset):
-    def __init__(self, data_path, sentence_transformer):
-        self.data = []
-        self.sentence_transformer = sentence_transformer
-        with open(data_path, "r", encoding="utf-8") as f:
-            for line in f:
-                self.data.append(json.loads(line))
-
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self, idx):
-        item = self.data[idx]
-        title = item["title"]
-        description = item["description"]
-        tags = item["tags"]
-        label = item["label"]
-
-        # 获取每个特征的嵌入
-        title_embedding = self.get_embedding(title)
-        description_embedding = self.get_embedding(description)
-        tags_embedding = self.get_embedding(" ".join(tags))
-
-        # 将嵌入连接起来
-        combined_embedding = torch.cat([title_embedding, description_embedding, tags_embedding], dim=0)
-
-        return combined_embedding, label
-
-    def get_embedding(self, text):
-        # 使用SentenceTransformer生成嵌入
-        embedding = self.sentence_transformer.encode(text)
-        return torch.tensor(embedding)
-
-class VideoClassifier(nn.Module):
-    def __init__(self, embedding_dim=768, hidden_dim=256, output_dim=3):
-        super(VideoClassifier, self).__init__()
-        # 每个特征的嵌入维度是embedding_dim，总共有3个特征
-        total_embedding_dim = embedding_dim * 3
-
-        # 全连接层
-        self.fc1 = nn.Linear(total_embedding_dim, hidden_dim)
-        self.fc2 = nn.Linear(hidden_dim, output_dim)
-        self.log_softmax = nn.LogSoftmax(dim=1)
-
-    def forward(self, embedding_features):
-        # 全连接层
-        x = torch.relu(self.fc1(embedding_features))
-        output = self.fc2(x)
-        output = self.log_softmax(output)
-        return output
-
-def train(model, dataloader, criterion, optimizer, device):
-    model.train()
-    total_loss = 0
-    correct = 0
-    total = 0
-    for embedding_features, labels in dataloader:
-        embedding_features = embedding_features.to(device)
-        labels = labels.to(device)
-        optimizer.zero_grad()
-        outputs = model(embedding_features)
-        loss = criterion(outputs, labels)
-        loss.backward()
-        optimizer.step()
-        total_loss += loss.item()
-        _, predicted = torch.max(outputs, 1)
-        correct += (predicted == labels).sum().item()
-        total += labels.size(0)
-    avg_loss = total_loss / len(dataloader)
-    accuracy = correct / total
-    return avg_loss, accuracy
-
-def validate(model, dataloader, criterion, device):
-    model.eval()
-    total_loss = 0
-    correct = 0
-    total = 0
-    with torch.no_grad():
-        for embedding_features, labels in dataloader:
-            embedding_features = embedding_features.to(device)
-            labels = labels.to(device)
-            outputs = model(embedding_features)
-            loss = criterion(outputs, labels)
-            total_loss += loss.item()
-            _, predicted = torch.max(outputs, 1)
-            correct += (predicted == labels).sum().item()
-            total += labels.size(0)
-    avg_loss = total_loss / len(dataloader)
-    accuracy = correct / total
-    return avg_loss, accuracy
-
-# 超参数
-hidden_dim = 256
-output_dim = 3
-batch_size = 32
-num_epochs = 10
-learning_rate = 0.001
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-# 加载数据集
-tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v3")
-sentence_transformer = SentenceTransformer("Thaweewat/jina-embedding-v3-m2v-1024")
-dataset = VideoDataset("labeled_data.jsonl", sentence_transformer=sentence_transformer)
-dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
-# 初始化模型
-model = VideoClassifier(embedding_dim=768, hidden_dim=256, output_dim=3).to(device)
-
-# 损失函数和优化器
-criterion = nn.CrossEntropyLoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
-num_epochs = 5
-# 训练和验证
-for epoch in range(num_epochs):
-    train_loss, train_acc = train(model, dataloader, criterion, optimizer, device)
-    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
-
-# 保存模型
-torch.save(model.state_dict(), "video_classifier.pth")
-model.eval()  # 设置为评估模式
-
-# 2. 定义推理函数
-def predict(model, sentence_transformer, title, description, tags, device):
-    # 将输入数据转换为嵌入
-    title_embedding = torch.tensor(sentence_transformer.encode(title)).to(device)
-    description_embedding = torch.tensor(sentence_transformer.encode(description)).to(device)
-    tags_embedding = torch.tensor(sentence_transformer.encode(" ".join(tags))).to(device)
-
-    # 将嵌入连接起来
-    combined_embedding = torch.cat([title_embedding, description_embedding, tags_embedding], dim=0).unsqueeze(0)
-
-    # 推理
-    with torch.no_grad():
-        output = model(combined_embedding)
-        _, predicted = torch.max(output, 1)
-    
-    return predicted.item()
--- a/filter/onnx_export.py
+++ b/filter/onnx_export.py
@ -2,8 +2,8 @@ import torch
 from modelV3_10 import VideoClassifierV3_10


-def export_onnx(model_path="./filter/checkpoints/best_model_V3.10.pt", 
-               onnx_path="./model/video_classifier_v3_10.onnx"):
+def export_onnx(model_path="./filter/checkpoints/best_model_V3.11.pt", 
+               onnx_path="./model/video_classifier_v3_11.onnx"):
    # 初始化模型
    model = VideoClassifierV3_10()
    model.load_state_dict(torch.load(model_path))
--- a/filter/quantize.py
+++ b/filter/quantize.py
@ -0,0 +1,36 @@
+from safetensors import safe_open
+from safetensors.torch import save_file
+import torch
+
+# 配置路径
+model_path = "./model/embedding/model.safetensors"
+save_path = "./model/embedding/int8_model.safetensors"
+
+# 加载原始嵌入层
+with safe_open(model_path, framework="pt") as f:
+    embeddings_tensor = f.get_tensor("embeddings")
+
+# 计算极值
+min_val = torch.min(embeddings_tensor)
+max_val = torch.max(embeddings_tensor)
+
+# 计算量化参数
+scale = (max_val - min_val) / 255  # int8 的范围是 256 个值（-128 到 127）
+
+# 将浮点数映射到 int8 范围
+int8_tensor = torch.round((embeddings_tensor - min_val) / scale).to(torch.int8) - 128
+
+# 确保与原张量形状一致
+assert int8_tensor.shape == embeddings_tensor.shape
+
+# 保存映射后的 int8 张量
+save_file({"embeddings": int8_tensor}, save_path)
+
+# 输出反映射公式
+print("int8 反映射公式：")
+m = min_val.item()
+am = abs(min_val.item())
+sign = "-" if m < 0 else "+"
+print(f"int8_tensor = (int8_value + 128) × {scale.item()} {sign} {am}")
+
+print("int8 映射完成！")
--- a/lib/log/logger.ts
+++ b/lib/log/logger.ts
@ -1,6 +1,6 @@
 import winston, { format, transports } from "npm:winston";
 import { TransformableInfo } from "npm:logform";
-import chalk from "npm:chalk";
+import chalk from "chalk";

 const customFormat = format.printf((info: TransformableInfo) => {
 	const { timestamp, level, message, service, codePath, error } = info;
--- a/lib/ml/classifyVideo.ts
+++ b/lib/ml/classifyVideo.ts
@ -1,32 +0,0 @@
-import { AutoModel, AutoTokenizer, Tensor } from '@huggingface/transformers';
-
-const modelName = "alikia2x/jina-embedding-v3-m2v-1024";
-
-const modelConfig = {
-    config: { model_type: 'model2vec' },
-    dtype: 'fp32',
-    revision: 'refs/pr/1',
-	cache_dir: undefined,
-	local_files_only: true,
-};
-const tokenizerConfig = {
-    revision: 'refs/pr/2'
-};
-
-const model = await AutoModel.from_pretrained(modelName, modelConfig);
-const tokenizer = await AutoTokenizer.from_pretrained(modelName, tokenizerConfig);
-
-const texts = ['hello', 'hello world'];
-const { input_ids } = await tokenizer(texts, { add_special_tokens: false, return_tensor: false });
-
-const cumsum = arr => arr.reduce((acc, num, i) => [...acc, num + (acc[i - 1] || 0)], []);
-const offsets = [0, ...cumsum(input_ids.slice(0, -1).map(x => x.length))];
-
-const flattened_input_ids = input_ids.flat();
-const modelInputs = {
-    input_ids: new Tensor('int64', flattened_input_ids, [flattened_input_ids.length]),
-    offsets: new Tensor('int64', offsets, [offsets.length])
-};
-
-const { embeddings } = await model(modelInputs);
-console.log(embeddings.tolist()); // output matches python version
--- a/lib/ml/filter_inference.ts
+++ b/lib/ml/filter_inference.ts
@ -0,0 +1,103 @@
+import { AutoTokenizer } from "@huggingface/transformers";
+import * as ort from "onnxruntime";
+
+// 配置参数
+const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
+const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
+const onnxEmbeddingOriginalPath = "./model/model.onnx";
+
+// 初始化会话
+const [sessionClassifier, sessionEmbedding] = await Promise.all([
+    ort.InferenceSession.create(onnxClassifierPath),
+    ort.InferenceSession.create(onnxEmbeddingOriginalPath),
+]);
+
+
+let tokenizer: any;
+
+// 初始化分词器
+async function loadTokenizer() {
+    const tokenizerConfig = { local_files_only: true };
+    tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
+}
+
+function softmax(logits: Float32Array): number[] {
+	const maxLogit = Math.max(...logits);
+	const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
+	const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
+	return Array.from(exponents.map((exp) => exp / sumOfExponents));
+}
+
+async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
+    const { input_ids } = await tokenizer(texts, { 
+        add_special_tokens: false, 
+        return_tensor: false 
+    });
+
+    // 构造输入参数
+    const cumsum = (arr: number[]): number[] =>
+        arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
+    
+    const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
+    const flattened_input_ids = input_ids.flat();
+
+    // 准备ONNX输入
+    const inputs = {
+        input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [flattened_input_ids.length]),
+        offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length])
+    };
+
+    // 执行推理
+    const { embeddings } = await session.run(inputs);
+    return Array.from(embeddings.data as Float32Array);
+}
+
+// 分类推理函数
+async function runClassification(embeddings: number[]): Promise<number[]> {
+    const inputTensor = new ort.Tensor(
+        Float32Array.from(embeddings), 
+        [1, 4, 1024]
+    );
+    
+    const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
+    return softmax(logits.data as Float32Array);
+}
+
+async function processInputTexts(
+	title: string,
+	description: string,
+	tags: string,
+	author_info: string,
+): Promise<number[]> {
+	const embeddings = await getONNXEmbeddings([
+		title,
+		description,
+		tags,
+		author_info
+	], sessionEmbedding);
+
+	const probabilities = await runClassification(embeddings);
+	return probabilities;
+}
+
+async function main() {
+	await loadTokenizer();
+	const titleText = `【洛天依&乐正绫&心华原创】归一【时之歌Project】`
+    const descriptionText = " 《归一》Vocaloid ver\r\n出品：泛音堂 / 作词：冥凰 / 作曲：汤汤 / 编曲&amp;混音：iAn / 调教：花之祭P\r\n后期：向南 / 人设：Pora / 场景：A舍长 / PV：Sung Hsu（麻薯映画） / 海报：易玄玑 \r\n唱：乐正绫 &amp; 洛天依 &amp; 心华\r\n时之歌Project东国世界观歌曲《归一》双本家VC版\r\nMP3：http://5sing.kugou.com/yc/3006072.html \r\n伴奏：http://5sing.kugou.com/bz/2";
+    const tagsText = '乐正绫,洛天依,心华,VOCALOID中文曲,时之歌,花之祭P';
+    const authorInfoText = "时之歌Project: 欢迎光临时之歌~\r\n官博：http://weibo.com/songoftime\r\n官网：http://www.songoftime.com/";
+
+	try {
+		const probabilities = await processInputTexts(titleText, descriptionText, tagsText, authorInfoText);
+		console.log("Class Probabilities:", probabilities);
+		console.log(`Class 0 Probability: ${probabilities[0]}`);
+		console.log(`Class 1 Probability: ${probabilities[1]}`);
+		console.log(`Class 2 Probability: ${probabilities[2]}`);
+        // Hold the session for 10s
+		await new Promise((resolve) => setTimeout(resolve, 10000));
+	} catch (error) {
+		console.error("Error processing texts:", error);
+	}
+}
+
+await main();
--- a/lib/ml/quant_benchmark.ts
+++ b/lib/ml/quant_benchmark.ts
@ -0,0 +1,168 @@
+import { AutoTokenizer } from "@huggingface/transformers";
+import * as ort from "onnxruntime";
+
+// 配置参数
+const sentenceTransformerModelName = "alikia2x/jina-embedding-v3-m2v-1024";
+const onnxClassifierPath = "./model/video_classifier_v3_11.onnx";
+const onnxEmbeddingOriginalPath = "./model/embedding_original.onnx";
+const onnxEmbeddingQuantizedPath = "./model/model.onnx";
+
+// 初始化会话
+const [sessionClassifier, sessionEmbeddingOriginal, sessionEmbeddingQuantized] = await Promise.all([
+    ort.InferenceSession.create(onnxClassifierPath),
+    ort.InferenceSession.create(onnxEmbeddingOriginalPath),
+    ort.InferenceSession.create(onnxEmbeddingQuantizedPath)
+]);
+
+let tokenizer: any;
+
+// 初始化分词器
+async function loadTokenizer() {
+    const tokenizerConfig = { local_files_only: true };
+    tokenizer = await AutoTokenizer.from_pretrained(sentenceTransformerModelName, tokenizerConfig);
+}
+
+// 新的嵌入生成函数（使用ONNX）
+async function getONNXEmbeddings(texts: string[], session: ort.InferenceSession): Promise<number[]> {
+    const { input_ids } = await tokenizer(texts, { 
+        add_special_tokens: false, 
+        return_tensor: false 
+    });
+
+    // 构造输入参数
+    const cumsum = (arr: number[]): number[] =>
+        arr.reduce((acc: number[], num: number, i: number) => [...acc, num + (acc[i - 1] || 0)], []);
+    
+    const offsets: number[] = [0, ...cumsum(input_ids.slice(0, -1).map((x: string) => x.length))];
+    const flattened_input_ids = input_ids.flat();
+
+    // 准备ONNX输入
+    const inputs = {
+        input_ids: new ort.Tensor("int64", new BigInt64Array(flattened_input_ids.map(BigInt)), [flattened_input_ids.length]),
+        offsets: new ort.Tensor("int64", new BigInt64Array(offsets.map(BigInt)), [offsets.length])
+    };
+
+    // 执行推理
+    const { embeddings } = await session.run(inputs);
+    return Array.from(embeddings.data as Float32Array);
+}
+
+function softmax(logits: Float32Array): number[] {
+    const maxLogit = Math.max(...logits);
+    const exponents = logits.map((logit) => Math.exp(logit - maxLogit));
+    const sumOfExponents = exponents.reduce((sum, exp) => sum + exp, 0);
+    return Array.from(exponents.map((exp) => exp / sumOfExponents));
+}
+
+// 分类推理函数
+async function runClassification(embeddings: number[]): Promise<number[]> {
+    const inputTensor = new ort.Tensor(
+        Float32Array.from(embeddings), 
+        [1, 4, 1024]
+    );
+    
+    const { logits } = await sessionClassifier.run({ channel_features: inputTensor });
+    return softmax(logits.data as Float32Array);
+}
+
+// 指标计算函数
+function calculateMetrics(labels: number[], predictions: number[]): {
+    accuracy: number,
+    precision: number,
+    recall: number,
+    f1: number
+} {
+    // 初始化混淆矩阵
+    const classCount = Math.max(...labels, ...predictions) + 1;
+    const matrix = Array.from({ length: classCount }, () => 
+        Array.from({ length: classCount }, () => 0)
+    );
+
+    // 填充矩阵
+    labels.forEach((trueLabel, i) => {
+        matrix[trueLabel][predictions[i]]++;
+    });
+
+    // 计算各指标
+    let totalTP = 0, totalFP = 0, totalFN = 0;
+    
+    for (let c = 0; c < classCount; c++) {
+        const TP = matrix[c][c];
+        const FP = matrix.flatMap((row, i) => i === c ? [] : [row[c]]).reduce((a, b) => a + b, 0);
+        const FN = matrix[c].filter((_, i) => i !== c).reduce((a, b) => a + b, 0);
+
+        totalTP += TP;
+        totalFP += FP;
+        totalFN += FN;
+    }
+
+    const precision = totalTP / (totalTP + totalFP);
+    const recall = totalTP / (totalTP + totalFN);
+    const f1 = 2 * (precision * recall) / (precision + recall) || 0;
+
+    return {
+        accuracy: labels.filter((l, i) => l === predictions[i]).length / labels.length,
+        precision,
+        recall,
+        f1
+    };
+}
+
+// 改造后的评估函数
+async function evaluateModel(session: ort.InferenceSession): Promise<{
+    accuracy: number;
+    precision: number;
+    recall: number;
+    f1: number;
+}> {
+    const data = await Deno.readTextFile("./data/filter/output.jsonl");
+    const samples = data.split("\n")
+        .map(line => {
+            try { return JSON.parse(line); } 
+            catch { return null; }
+        })
+        .filter(Boolean);
+
+    const allPredictions: number[] = [];
+    const allLabels: number[] = [];
+
+    for (const sample of samples) {
+        try {
+            const embeddings = await getONNXEmbeddings([
+                sample.title,
+                sample.description,
+                sample.tags.join(","),
+                sample.author_info
+            ], session);
+
+            const probabilities = await runClassification(embeddings);
+            allPredictions.push(probabilities.indexOf(Math.max(...probabilities)));
+            allLabels.push(sample.label);
+        } catch (error) {
+            console.error("Processing error:", error);
+        }
+    }
+
+    return calculateMetrics(allLabels, allPredictions);
+}
+
+// 主函数
+async function main() {
+    await loadTokenizer();
+
+    // 评估原始模型
+    const t = new Date().getTime();
+    const originalMetrics = await evaluateModel(sessionEmbeddingOriginal);
+    console.log("Original Model Metrics:");
+    console.table(originalMetrics);
+    console.log(`Original Model Metrics: ${new Date().getTime() - t}ms`);
+
+    // 评估量化模型
+    const t2 = new Date().getTime();
+    const quantizedMetrics = await evaluateModel(sessionEmbeddingQuantized);
+    console.log("Quantized Model Metrics:");
+    console.table(quantizedMetrics);
+    console.log(`Quantized Model Metrics: ${new Date().getTime() - t2}ms`);
+}
+
+await main();