update: seperate two classifiers

This commit is contained in:
alikia2x (寒寒) 2025-03-03 00:56:02 +08:00
parent f488c3ceda
commit f08a863ac6
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
5 changed files with 64 additions and 34 deletions

View File

@ -24,3 +24,6 @@ Note
2243: V3.11 # 256维嵌入 2243: V3.11 # 256维嵌入
2253: V3.11 # 1024维度嵌入对比 2253: V3.11 # 1024维度嵌入对比
2337: V3.12 # 级联分类 2337: V3.12 # 级联分类
2350: V3.13 # V3.12, 换用普通交叉熵损失
0012: V3.11 # 换用普通交叉熵损失
0039: V3.11 # 级联分类,但使用两个独立模型

View File

@ -10,7 +10,7 @@ import tty
import termios import termios
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
from db_utils import fetch_entry_data, parse_entry_data from db_utils import fetch_entry_data, parse_entry_data
from modelV3_9 import VideoClassifierV3_9 from modelV3_10 import VideoClassifierV3_10
class LabelingSystem: class LabelingSystem:
def __init__(self, mode='model_testing', database_path="./data/main.db", def __init__(self, mode='model_testing', database_path="./data/main.db",
@ -27,7 +27,7 @@ class LabelingSystem:
self.model = None self.model = None
self.sentence_transformer = None self.sentence_transformer = None
if self.mode == 'model_testing': if self.mode == 'model_testing':
self.model = VideoClassifierV3_9() self.model = VideoClassifierV3_10()
self.model.load_state_dict(torch.load(model_path)) self.model.load_state_dict(torch.load(model_path))
self.model.eval() self.model.eval()
self.sentence_transformer = SentenceTransformer("Thaweewat/jina-embedding-v3-m2v-1024") self.sentence_transformer = SentenceTransformer("Thaweewat/jina-embedding-v3-m2v-1024")

View File

@ -3,13 +3,13 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
class VideoClassifierV3_10(nn.Module): class VideoClassifierV3_10(nn.Module):
def __init__(self, embedding_dim=1024, hidden_dim=648, output_dim=3): def __init__(self, embedding_dim=1024, hidden_dim=648, output_dim=3, temperature=1.7):
super().__init__() super().__init__()
self.num_channels = 4 self.num_channels = 4
self.channel_names = ['title', 'description', 'tags', 'author_info'] self.channel_names = ['title', 'description', 'tags', 'author_info']
# 可学习温度系数 # 可学习温度系数
self.temperature = nn.Parameter(torch.tensor(1.7)) self.temperature = nn.Parameter(torch.tensor(temperature))
# 带约束的通道权重使用Sigmoid替代Softmax # 带约束的通道权重使用Sigmoid替代Softmax
self.channel_weights = nn.Parameter(torch.ones(self.num_channels)) self.channel_weights = nn.Parameter(torch.ones(self.num_channels))

View File

@ -1,7 +1,7 @@
from labeling_system import LabelingSystem from labeling_system import LabelingSystem
DATABASE_PATH = "./data/main.db" DATABASE_PATH = "./data/main.db"
MODEL_PATH = "./filter/checkpoints/best_model_V3.9.pt" MODEL_PATH = "./filter/checkpoints/best_model_V3.11.pt"
OUTPUT_FILE = "./data/filter/real_test.jsonl" OUTPUT_FILE = "./data/filter/real_test.jsonl"
BATCH_SIZE = 50 BATCH_SIZE = 50

View File

@ -4,7 +4,7 @@ import numpy as np
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import torch.optim as optim import torch.optim as optim
from dataset import MultiChannelDataset from dataset import MultiChannelDataset
from filter.modelV3_12 import VideoClassifierV3_12 from filter.modelV3_10 import VideoClassifierV3_10
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
import os import os
import torch import torch
@ -51,30 +51,39 @@ class_weights = torch.tensor(
) )
# 初始化模型和SentenceTransformer # 初始化模型和SentenceTransformer
model = VideoClassifierV3_12() model1 = VideoClassifierV3_10(output_dim=2, hidden_dim=384)
checkpoint_name = './filter/checkpoints/best_model_V3.12.pt' model2 = VideoClassifierV3_10(output_dim=2, hidden_dim=384)
checkpoint1_name = './filter/checkpoints/best_model_V3.14-part1.pt'
checkpoint2_name = './filter/checkpoints/best_model_V3.14-part2.pt'
# 模型保存路径 # 模型保存路径
os.makedirs('./filter/checkpoints', exist_ok=True) os.makedirs('./filter/checkpoints', exist_ok=True)
# 优化器 # 优化器
optimizer = optim.AdamW(model.parameters(), lr=4e-4) optimizer1 = optim.AdamW(model1.parameters(), lr=4e-4)
optimizer2 = optim.AdamW(model2.parameters(), lr=4e-4)
# Cross entropy loss # Cross entropy loss
criterion = nn.CrossEntropyLoss() criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()
def count_trainable_parameters(model): def count_trainable_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad) return sum(p.numel() for p in model.parameters() if p.requires_grad)
def evaluate(model, dataloader): def evaluate(model1, model2, dataloader):
model.eval() model1.eval()
model2.eval()
all_preds = [] all_preds = []
all_labels = [] all_labels = []
with torch.no_grad(): with torch.no_grad():
for batch in dataloader: for batch in dataloader:
batch_tensor = prepare_batch(batch['texts']) batch_tensor = prepare_batch(batch['texts'])
logits = model(batch_tensor) logits1 = model1(batch_tensor)
preds = torch.argmax(logits, dim=1) logits2 = model2(batch_tensor)
preds1 = torch.argmax(logits1, dim=1)
preds2 = torch.argmax(logits2, dim=1)
# 如果preds1输出为0那么预测结果为0否则使用preds2的结果加上1
preds = torch.where(preds1 == 0, preds1, preds2 + 1)
all_preds.extend(preds.cpu().numpy()) all_preds.extend(preds.cpu().numpy())
all_labels.extend(batch['label'].cpu().numpy()) all_labels.extend(batch['label'].cpu().numpy())
@ -86,10 +95,10 @@ def evaluate(model, dataloader):
# 获取每个类别的详细指标 # 获取每个类别的详细指标
class_report = classification_report(all_labels, all_preds, output_dict=True) class_report = classification_report(all_labels, all_preds, output_dict=True)
return f1, recall, precision, accuracy, class_report
print(f"Trainable parameters: {count_trainable_parameters(model)}") return f1, recall, precision, accuracy, class_report
print(f"Trainable parameters: {count_trainable_parameters(model1) + count_trainable_parameters(model2)}")
# 训练循环 # 训练循环
best_f1 = 0 best_f1 = 0
@ -98,29 +107,44 @@ eval_interval = 20
num_epochs = 8 num_epochs = 8
for epoch in range(num_epochs): for epoch in range(num_epochs):
model.train() model1.train()
epoch_loss = 0 model2.train()
epoch_loss_1 = 0
epoch_loss_2 = 0
# 训练阶段 # 训练阶段
for batch_idx, batch in enumerate(train_loader): for batch_idx, batch in enumerate(train_loader):
optimizer.zero_grad() optimizer1.zero_grad()
optimizer2.zero_grad()
batch_tensor = prepare_batch(batch['texts']) batch_tensor = prepare_batch(batch['texts'])
batch_tensor_1 = batch_tensor
mask = batch['label'] != 0
batch_tensor_2 = batch_tensor_1[mask]
logits = model(batch_tensor) logits1 = model1(batch_tensor_1)
logits2 = model2(batch_tensor_2)
loss = criterion(logits, batch['label']) label1 = torch.where(batch['label'] == 0, 0, 1)
loss.backward() label2 = torch.where(batch['label'][mask] == 1, 0, 1)
optimizer.step() loss1 = criterion1(logits1, label1)
epoch_loss += loss.item() loss1.backward()
loss2 = criterion2(logits2, label2)
loss2.backward()
optimizer1.step()
optimizer2.step()
epoch_loss_1 += loss1.item()
epoch_loss_2 += loss2.item()
# 记录训练损失 # 记录训练损失
writer.add_scalar('Train/Loss', loss.item(), step) writer.add_scalar('Train/Loss-1', loss1.item(), step)
writer.add_scalar('Train/Loss-2', loss2.item(), step)
step += 1 step += 1
# 每隔 eval_interval 步执行验证 # 每隔 eval_interval 步执行验证
if step % eval_interval == 0: if step % eval_interval == 0:
eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model, eval_loader) eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model1, model2, eval_loader)
writer.add_scalar('Eval/F1', eval_f1, step) writer.add_scalar('Eval/F1', eval_f1, step)
writer.add_scalar('Eval/Recall', eval_recall, step) writer.add_scalar('Eval/Recall', eval_recall, step)
writer.add_scalar('Eval/Precision', eval_precision, step) writer.add_scalar('Eval/Precision', eval_precision, step)
@ -136,17 +160,19 @@ for epoch in range(num_epochs):
# 保存最佳模型 # 保存最佳模型
if eval_f1 > best_f1: if eval_f1 > best_f1:
best_f1 = eval_f1 best_f1 = eval_f1
torch.save(model.state_dict(), checkpoint_name) torch.save(model1.state_dict(), checkpoint1_name)
torch.save(model2.state_dict(), checkpoint2_name)
print(" Saved best model") print(" Saved best model")
print("Channel weights: ", model.get_channel_weights()) print("Channel weights: ", model1.get_channel_weights())
print("Channel weights: ", model2.get_channel_weights())
# 记录每个 epoch 的平均训练损失 # 记录每个 epoch 的平均训练损失
avg_epoch_loss = epoch_loss / len(train_loader) avg_epoch_loss = (epoch_loss_1 + epoch_loss_2) / 2 / len(train_loader)
writer.add_scalar('Train/Epoch_Loss', avg_epoch_loss, epoch) writer.add_scalar('Train/Epoch_Loss', avg_epoch_loss, epoch)
# 每个 epoch 结束后执行一次完整验证 # 每个 epoch 结束后执行一次完整验证
train_f1, train_recall, train_precision, train_accuracy, train_class_report = evaluate(model, train_loader) train_f1, train_recall, train_precision, train_accuracy, train_class_report = evaluate(model1, model2, train_loader)
eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model, eval_loader) eval_f1, eval_recall, eval_precision, eval_accuracy, eval_class_report = evaluate(model1, model2, eval_loader)
writer.add_scalar('Train/Epoch_F1', train_f1, epoch) writer.add_scalar('Train/Epoch_F1', train_f1, epoch)
writer.add_scalar('Train/Epoch_Recall', train_recall, epoch) writer.add_scalar('Train/Epoch_Recall', train_recall, epoch)
@ -173,8 +199,9 @@ for epoch in range(num_epochs):
# 测试阶段 # 测试阶段
print("\nTesting...") print("\nTesting...")
model.load_state_dict(torch.load(checkpoint_name)) model1.load_state_dict(torch.load(checkpoint1_name))
test_f1, test_recall, test_precision, test_accuracy, test_class_report = evaluate(model, test_loader) model2.load_state_dict(torch.load(checkpoint2_name))
test_f1, test_recall, test_precision, test_accuracy, test_class_report = evaluate(model1, model2, test_loader)
writer.add_scalar('Test/F1', test_f1, step) writer.add_scalar('Test/F1', test_f1, step)
writer.add_scalar('Test/Recall', test_recall, step) writer.add_scalar('Test/Recall', test_recall, step)
writer.add_scalar('Test/Precision', test_precision, step) writer.add_scalar('Test/Precision', test_precision, step)