From 5a83120ad6e552aece533fb6f03be539c9fac44c Mon Sep 17 00:00:00 2001 From: alikia2x Date: Sat, 25 Jan 2025 02:57:55 +0800 Subject: [PATCH] update: filter model V3.3 --- filter/RunningLogs.txt | 3 +- filter/modelV1_5.py | 47 ----------------------------- filter/{modelV5.py => modelV3_3.py} | 22 +++++--------- filter/train.py | 13 ++------ 4 files changed, 13 insertions(+), 72 deletions(-) delete mode 100644 filter/modelV1_5.py rename filter/{modelV5.py => modelV3_3.py} (75%) diff --git a/filter/RunningLogs.txt b/filter/RunningLogs.txt index c2bf03f..7dd0260 100644 --- a/filter/RunningLogs.txt +++ b/filter/RunningLogs.txt @@ -12,4 +12,5 @@ Note 0125: V4.1-test3 0133: V4.2-test3 0138: V4.3-test3 -0155: V5-test3 # V4 的效果也不是特别好 \ No newline at end of file +0155: V5-test3 # V4 的效果也不是特别好 +0229: V3.3-test3 # 重新回到V3迭代 \ No newline at end of file diff --git a/filter/modelV1_5.py b/filter/modelV1_5.py deleted file mode 100644 index fe69025..0000000 --- a/filter/modelV1_5.py +++ /dev/null @@ -1,47 +0,0 @@ -import torch -import torch.nn as nn - -class VideoClassifierV1_5(nn.Module): - def __init__(self, embedding_dim=1024, hidden_dim=256, output_dim=3): - super().__init__() - self.num_channels = 4 - self.channel_names = ['title', 'description', 'tags', 'author_info'] - - # 通道权重参数(可学习) - self.channel_weights = nn.Parameter(torch.ones(self.num_channels)) - - # 全连接层 - self.fc1 = nn.Linear(embedding_dim * self.num_channels, hidden_dim) - self.fc2 = nn.Linear(hidden_dim, output_dim) - self.log_softmax = nn.LogSoftmax(dim=1) - - def forward(self, input_texts, sentence_transformer): - # 各通道特征提取 - channel_features = [] - for _, name in enumerate(self.channel_names): - # 获取当前通道的批量文本 - batch_texts = input_texts[name] - - # 使用SentenceTransformer生成嵌入 - embeddings = torch.tensor( - sentence_transformer.encode(batch_texts, task="classification") - ) - channel_features.append(embeddings) - - # 将通道特征堆叠并加权 - channel_features = torch.stack(channel_features, dim=1) # [batch_size, num_channels, embedding_dim] - channel_weights = torch.softmax(self.channel_weights, dim=0) - weighted_features = channel_features * channel_weights.unsqueeze(0).unsqueeze(-1) - - # 拼接所有通道特征 - combined_features = weighted_features.view(weighted_features.size(0), -1) # [batch_size, num_channels * embedding_dim] - - # 全连接层 - x = torch.relu(self.fc1(combined_features)) - output = self.fc2(x) - output = self.log_softmax(output) - return output - - def get_channel_weights(self): - """获取各通道的权重(用于解释性分析)""" - return torch.softmax(self.channel_weights, dim=0).detach().cpu().numpy() diff --git a/filter/modelV5.py b/filter/modelV3_3.py similarity index 75% rename from filter/modelV5.py rename to filter/modelV3_3.py index 5cc597d..5ab4f57 100644 --- a/filter/modelV5.py +++ b/filter/modelV3_3.py @@ -1,28 +1,26 @@ import torch import torch.nn as nn -class VideoClassifierV5(nn.Module): - def __init__(self, embedding_dim=1024, hidden_dim=640, output_dim=3): +class VideoClassifierV3_3(nn.Module): + def __init__(self, embedding_dim=1024, hidden_dim=512, output_dim=3): super().__init__() self.num_channels = 4 self.channel_names = ['title', 'description', 'tags', 'author_info'] - # 改进1:带温度系数的通道权重(比原始固定权重更灵活) + # 带温度系数的通道权重(比原始固定权重更灵活) self.channel_weights = nn.Parameter(torch.ones(self.num_channels)) - self.temperature = 1.4 # 可调节的平滑系数 + self.temperature = 1.7 # 可调节的平滑系数 - # 改进2:更稳健的全连接结构 + # 改进后的非线性层 self.fc = nn.Sequential( nn.Linear(embedding_dim * self.num_channels, hidden_dim*2), nn.BatchNorm1d(hidden_dim*2), nn.Dropout(0.1), nn.ReLU(), - nn.Linear(hidden_dim*2, hidden_dim), - nn.LayerNorm(hidden_dim), - nn.Linear(hidden_dim, output_dim) + nn.Linear(hidden_dim*2, output_dim) ) - # 改进3:输出层初始化 + # 输出层初始化 nn.init.xavier_uniform_(self.fc[-1].weight) nn.init.zeros_(self.fc[-1].bias) @@ -55,8 +53,4 @@ class VideoClassifierV5(nn.Module): def get_channel_weights(self): """获取各通道权重(带温度调节)""" - return torch.softmax(self.channel_weights / self.temperature, dim=0).detach().cpu().numpy() - - def set_temperature(self, temperature): - """设置温度值""" - self.temperature = temperature \ No newline at end of file + return torch.softmax(self.channel_weights / self.temperature, dim=0).detach().cpu().numpy() \ No newline at end of file diff --git a/filter/train.py b/filter/train.py index 5f644f4..f4b7ee1 100644 --- a/filter/train.py +++ b/filter/train.py @@ -3,7 +3,7 @@ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]="1" from torch.utils.data import DataLoader import torch.optim as optim from dataset import MultiChannelDataset -from modelV5 import VideoClassifierV5 +from modelV3_3 import VideoClassifierV3_3 from sentence_transformers import SentenceTransformer import torch.nn as nn from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report @@ -39,8 +39,8 @@ test_loader = DataLoader(test_dataset, batch_size=24, shuffle=False) # 初始化模型和SentenceTransformer sentence_transformer = SentenceTransformer("Thaweewat/jina-embedding-v3-m2v-1024") -model = VideoClassifierV5() -checkpoint_name = './filter/checkpoints/best_model_V5.pt' +model = VideoClassifierV3_3() +checkpoint_name = './filter/checkpoints/best_model_V3.3.pt' # 模型保存路径 os.makedirs('./filter/checkpoints', exist_ok=True) @@ -84,19 +84,12 @@ step = 0 eval_interval = 50 num_epochs = 8 -total_steps = num_epochs * len(train_loader) # 总训练步数 -T_max = 1.4 # 初始温度 -T_min = 0.15 # 最终温度 - for epoch in range(num_epochs): model.train() epoch_loss = 0 # 训练阶段 for batch_idx, batch in enumerate(train_loader): - temperature = T_max - (T_max - T_min) * (step / total_steps) - model.set_temperature(temperature) - optimizer.zero_grad() # 传入文本字典和sentence_transformer