improve: 0259-learning rate

This commit is contained in:
alikia2x (寒寒) 2025-03-07 03:04:35 +08:00
parent a6319f4303
commit e2d8394bf0
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
2 changed files with 11 additions and 4 deletions

View File

@ -28,3 +28,9 @@ Note
0012: V3.11 # 换用普通交叉熵损失 0012: V3.11 # 换用普通交叉熵损失
0039: V3.11 # 级联分类,但使用两个独立模型 0039: V3.11 # 级联分类,但使用两个独立模型
0122: V3.15 # 删除author_info通道 0122: V3.15 # 删除author_info通道
0138: V3.15 # 最好的模型使用自定义loss
0038: V6.0 # CNN模型
0128: V6.1 # Transformer模型交叉熵损失
0219: V6.1 # MPS训练
0242: V6.1 # 自定义loss
0259: V6.1 # 调整学习率

View File

@ -5,6 +5,7 @@ from torch.utils.data import DataLoader
import torch.optim as optim import torch.optim as optim
from dataset import MultiChannelDataset from dataset import MultiChannelDataset
from filter.modelV6_1 import VideoClassifierV6_1 from filter.modelV6_1 import VideoClassifierV6_1
from filter.modelV3_15 import AdaptiveRecallLoss
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
import os import os
import torch import torch
@ -58,7 +59,7 @@ class_weights = torch.tensor(
) )
model = VideoClassifierV6_1().to(device) model = VideoClassifierV6_1().to(device)
checkpoint_name = './filter/checkpoints/best_model_V6.2-mps.pt' checkpoint_name = './filter/checkpoints/best_model_V6.2-mps-adloss.pt'
# 初始化tokenizer和embedding模型 # 初始化tokenizer和embedding模型
tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024") tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024")
@ -72,9 +73,9 @@ eval_interval = 20
num_epochs = 20 num_epochs = 20
total_steps = samples_count * num_epochs / train_loader.batch_size total_steps = samples_count * num_epochs / train_loader.batch_size
warmup_rate = 0.1 warmup_rate = 0.1
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) optimizer = optim.AdamW(model.parameters(), lr=6e-5, weight_decay=1e-5)
cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate)) cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate))
warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.14, end_factor=1.0, total_iters=int(total_steps * warmup_rate))
scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)]) scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)])
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device) criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)