diff --git a/filter/RunningLogs.txt b/filter/RunningLogs.txt index 65b0d04..8d76c33 100644 --- a/filter/RunningLogs.txt +++ b/filter/RunningLogs.txt @@ -27,4 +27,10 @@ Note 2350: V3.13 # V3.12, 换用普通交叉熵损失 0012: V3.11 # 换用普通交叉熵损失 0039: V3.11 # 级联分类,但使用两个独立模型 -0122: V3.15 # 删除author_info通道 \ No newline at end of file +0122: V3.15 # 删除author_info通道 +0138: V3.15 # 最好的模型,使用自定义loss +0038: V6.0 # CNN模型 +0128: V6.1 # Transformer模型(交叉熵损失) +0219: V6.1 # MPS训练 +0242: V6.1 # 自定义loss +0259: V6.1 # 调整学习率 \ No newline at end of file diff --git a/filter/train.py b/filter/train.py index 3952965..e206455 100644 --- a/filter/train.py +++ b/filter/train.py @@ -5,6 +5,7 @@ from torch.utils.data import DataLoader import torch.optim as optim from dataset import MultiChannelDataset from filter.modelV6_1 import VideoClassifierV6_1 +from filter.modelV3_15 import AdaptiveRecallLoss from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report import os import torch @@ -58,7 +59,7 @@ class_weights = torch.tensor( ) model = VideoClassifierV6_1().to(device) -checkpoint_name = './filter/checkpoints/best_model_V6.2-mps.pt' +checkpoint_name = './filter/checkpoints/best_model_V6.2-mps-adloss.pt' # 初始化tokenizer和embedding模型 tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024") @@ -72,9 +73,9 @@ eval_interval = 20 num_epochs = 20 total_steps = samples_count * num_epochs / train_loader.batch_size warmup_rate = 0.1 -optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5) +optimizer = optim.AdamW(model.parameters(), lr=6e-5, weight_decay=1e-5) cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate)) -warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) +warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.14, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)]) criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)