diff --git a/filter/RunningLogs.txt b/filter/RunningLogs.txt index e6e5092..80640f4 100644 --- a/filter/RunningLogs.txt +++ b/filter/RunningLogs.txt @@ -24,3 +24,6 @@ Note 2243: V3.11 # 256维嵌入 2253: V3.11 # 1024维度嵌入(对比) 2337: V3.12 # 级联分类 +2350: V3.13 # V3.12, 换用普通交叉熵损失 +0012: V3.11 # 换用普通交叉熵损失 +0039: V3.11 # 级联分类,但使用两个独立模型 \ No newline at end of file diff --git a/filter/labeling_system.py b/filter/labeling_system.py index 504e19c..caba2d9 100644 --- a/filter/labeling_system.py +++ b/filter/labeling_system.py @@ -10,7 +10,7 @@ import tty import termios from sentence_transformers import SentenceTransformer from db_utils import fetch_entry_data, parse_entry_data -from modelV3_9 import VideoClassifierV3_9 +from modelV3_10 import VideoClassifierV3_10 class LabelingSystem: def __init__(self, mode='model_testing', database_path="./data/main.db", @@ -27,7 +27,7 @@ class LabelingSystem: self.model = None self.sentence_transformer = None if self.mode == 'model_testing': - self.model = VideoClassifierV3_9() + self.model = VideoClassifierV3_10() self.model.load_state_dict(torch.load(model_path)) self.model.eval() self.sentence_transformer = SentenceTransformer("Thaweewat/jina-embedding-v3-m2v-1024") diff --git a/filter/modelV3_10.py b/filter/modelV3_10.py index 909590b..9efd0e9 100644 --- a/filter/modelV3_10.py +++ b/filter/modelV3_10.py @@ -3,13 +3,13 @@ import torch.nn as nn import torch.nn.functional as F class VideoClassifierV3_10(nn.Module): - def __init__(self, embedding_dim=1024, hidden_dim=648, output_dim=3): + def __init__(self, embedding_dim=1024, hidden_dim=648, output_dim=3, temperature=1.7): super().__init__() self.num_channels = 4 self.channel_names = ['title', 'description', 'tags', 'author_info'] # 可学习温度系数 - self.temperature = nn.Parameter(torch.tensor(1.7)) + self.temperature = nn.Parameter(torch.tensor(temperature)) # 带约束的通道权重(使用Sigmoid替代Softmax) self.channel_weights = nn.Parameter(torch.ones(self.num_channels)) diff --git a/filter/test.py b/filter/test.py index 1554c98..4a29421 100644 --- a/filter/test.py +++ b/filter/test.py @@ -1,7 +1,7 @@ from labeling_system import LabelingSystem DATABASE_PATH = "./data/main.db" -MODEL_PATH = "./filter/checkpoints/best_model_V3.9.pt" +MODEL_PATH = "./filter/checkpoints/best_model_V3.11.pt" OUTPUT_FILE = "./data/filter/real_test.jsonl" BATCH_SIZE = 50 diff --git a/filter/train.py b/filter/train.py index 065d38e..8f68d62 100644 --- a/filter/train.py +++ b/filter/train.py @@ -4,7 +4,7 @@ import numpy as np from torch.utils.data import DataLoader import torch.optim as optim from dataset import MultiChannelDataset -from filter.modelV3_12 import VideoClassifierV3_12 +from filter.modelV3_10 import VideoClassifierV3_10 from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report import os import torch @@ -51,30 +51,39 @@ class_weights = torch.tensor( ) # 初始化模型和SentenceTransformer -model = VideoClassifierV3_12() -checkpoint_name = './filter/checkpoints/best_model_V3.12.pt' +model1 = VideoClassifierV3_10(output_dim=2, hidden_dim=384) +model2 = VideoClassifierV3_10(output_dim=2, hidden_dim=384) +checkpoint1_name = './filter/checkpoints/best_model_V3.14-part1.pt' +checkpoint2_name = './filter/checkpoints/best_model_V3.14-part2.pt' # 模型保存路径 os.makedirs('./filter/checkpoints', exist_ok=True) # 优化器 -optimizer = optim.AdamW(model.parameters(), lr=4e-4) +optimizer1 = optim.AdamW(model1.parameters(), lr=4e-4) +optimizer2 = optim.AdamW(model2.parameters(), lr=4e-4) # Cross entropy loss -criterion = nn.CrossEntropyLoss() +criterion1 = nn.CrossEntropyLoss() +criterion2 = nn.CrossEntropyLoss() def count_trainable_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) -def evaluate(model, dataloader): - model.eval() +def evaluate(model1, model2, dataloader): + model1.eval() + model2.eval() all_preds = [] all_labels = [] with torch.no_grad(): for batch in dataloader: batch_tensor = prepare_batch(batch['texts']) - logits = model(batch_tensor) - preds = torch.argmax(logits, dim=1) + logits1 = model1(batch_tensor) + logits2 = model2(batch_tensor) + preds1 = torch.argmax(logits1, dim=1) + preds2 = torch.argmax(logits2, dim=1) + # 如果preds1输出为0,那么预测结果为0,否则使用preds2的结果加上1 + preds = torch.where(preds1 == 0, preds1, preds2 + 1) all_preds.extend(preds.cpu().numpy()) all_labels.extend(batch['label'].cpu().numpy()) @@ -86,10 +95,10 @@ def evaluate(model, dataloader): # 获取每个类别的详细指标 class_report = classification_report(all_labels, all_preds, output_dict=True) - - return f1, recall, precision, accuracy, class_report -print(f"Trainable parameters: {count_trainable_parameters(model)}") + return f1, recall, precision, accuracy, class_report + +print(f"Trainable parameters: {count_trainable_parameters(model1) + count_trainable_parameters(model2)}") # 训练循环 best_f1 = 0 @@ -98,29 +107,44 @@ eval_interval = 20 num_epochs = 8 for epoch in range(num_epochs): - model.train() - epoch_loss = 0 + model1.train() + model2.train() + epoch_loss_1 = 0 + epoch_loss_2 = 0 # 训练阶段 for batch_idx, batch in enumerate(train_loader): - optimizer.zero_grad() + optimizer1.zero_grad() + optimizer2.zero_grad() batch_tensor = prepare_batch(batch['texts']) + batch_tensor_1 = batch_tensor + mask = batch['label'] != 0 + batch_tensor_2 = batch_tensor_1[mask] - logits = model(batch_tensor) + logits1 = model1(batch_tensor_1) + logits2 = model2(batch_tensor_2) - loss = criterion(logits, batch['label']) - loss.backward() - optimizer.step() - epoch_loss += loss.item() + label1 = torch.where(batch['label'] == 0, 0, 1) + label2 = torch.where(batch['label'][mask] == 1, 0, 1) + loss1 = criterion1(logits1, label1) + loss1.backward() + loss2 = criterion2(logits2, label2) + loss2.backward() + optimizer1.step() + optimizer2.step() + epoch_loss_1 += loss1.item() + epoch_loss_2 += loss2.item() # 记录训练损失 - writer.add_scalar('Train/Loss', loss.item(), step) + writer.add_scalar('Train/Loss-1', loss1.item(), step) + writer.add_scalar('Train/Loss-2', loss2.item(), step) step += 1 # 每隔 eval_interval 步执行验证 if step % eval_interval == 0: - eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model, eval_loader) + eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model1, model2, eval_loader) + writer.add_scalar('Eval/F1', eval_f1, step) writer.add_scalar('Eval/Recall', eval_recall, step) writer.add_scalar('Eval/Precision', eval_precision, step) @@ -136,17 +160,19 @@ for epoch in range(num_epochs): # 保存最佳模型 if eval_f1 > best_f1: best_f1 = eval_f1 - torch.save(model.state_dict(), checkpoint_name) + torch.save(model1.state_dict(), checkpoint1_name) + torch.save(model2.state_dict(), checkpoint2_name) print(" Saved best model") - print("Channel weights: ", model.get_channel_weights()) + print("Channel weights: ", model1.get_channel_weights()) + print("Channel weights: ", model2.get_channel_weights()) # 记录每个 epoch 的平均训练损失 - avg_epoch_loss = epoch_loss / len(train_loader) + avg_epoch_loss = (epoch_loss_1 + epoch_loss_2) / 2 / len(train_loader) writer.add_scalar('Train/Epoch_Loss', avg_epoch_loss, epoch) # 每个 epoch 结束后执行一次完整验证 - train_f1, train_recall, train_precision, train_accuracy, train_class_report = evaluate(model, train_loader) - eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model, eval_loader) + train_f1, train_recall, train_precision, train_accuracy, train_class_report = evaluate(model1, model2, train_loader) + eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model1, model2, eval_loader) writer.add_scalar('Train/Epoch_F1', train_f1, epoch) writer.add_scalar('Train/Epoch_Recall', train_recall, epoch) @@ -173,8 +199,9 @@ for epoch in range(num_epochs): # 测试阶段 print("\nTesting...") -model.load_state_dict(torch.load(checkpoint_name)) -test_f1, test_recall, test_precision, test_accuracy, test_class_report = evaluate(model, test_loader) +model1.load_state_dict(torch.load(checkpoint1_name)) +model2.load_state_dict(torch.load(checkpoint2_name)) +test_f1, test_recall, test_precision, test_accuracy, test_class_report = evaluate(model1, model2, test_loader) writer.add_scalar('Test/F1', test_f1, step) writer.add_scalar('Test/Recall', test_recall, step) writer.add_scalar('Test/Precision', test_precision, step)