improve: 0259-learning rate
This commit is contained in:
parent
a6319f4303
commit
e2d8394bf0
@ -28,3 +28,9 @@ Note
|
|||||||
0012: V3.11 # 换用普通交叉熵损失
|
0012: V3.11 # 换用普通交叉熵损失
|
||||||
0039: V3.11 # 级联分类,但使用两个独立模型
|
0039: V3.11 # 级联分类,但使用两个独立模型
|
||||||
0122: V3.15 # 删除author_info通道
|
0122: V3.15 # 删除author_info通道
|
||||||
|
0138: V3.15 # 最好的模型,使用自定义loss
|
||||||
|
0038: V6.0 # CNN模型
|
||||||
|
0128: V6.1 # Transformer模型(交叉熵损失)
|
||||||
|
0219: V6.1 # MPS训练
|
||||||
|
0242: V6.1 # 自定义loss
|
||||||
|
0259: V6.1 # 调整学习率
|
@ -5,6 +5,7 @@ from torch.utils.data import DataLoader
|
|||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
from dataset import MultiChannelDataset
|
from dataset import MultiChannelDataset
|
||||||
from filter.modelV6_1 import VideoClassifierV6_1
|
from filter.modelV6_1 import VideoClassifierV6_1
|
||||||
|
from filter.modelV3_15 import AdaptiveRecallLoss
|
||||||
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
|
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
|
||||||
import os
|
import os
|
||||||
import torch
|
import torch
|
||||||
@ -58,7 +59,7 @@ class_weights = torch.tensor(
|
|||||||
)
|
)
|
||||||
|
|
||||||
model = VideoClassifierV6_1().to(device)
|
model = VideoClassifierV6_1().to(device)
|
||||||
checkpoint_name = './filter/checkpoints/best_model_V6.2-mps.pt'
|
checkpoint_name = './filter/checkpoints/best_model_V6.2-mps-adloss.pt'
|
||||||
|
|
||||||
# 初始化tokenizer和embedding模型
|
# 初始化tokenizer和embedding模型
|
||||||
tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024")
|
tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024")
|
||||||
@ -72,9 +73,9 @@ eval_interval = 20
|
|||||||
num_epochs = 20
|
num_epochs = 20
|
||||||
total_steps = samples_count * num_epochs / train_loader.batch_size
|
total_steps = samples_count * num_epochs / train_loader.batch_size
|
||||||
warmup_rate = 0.1
|
warmup_rate = 0.1
|
||||||
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
|
optimizer = optim.AdamW(model.parameters(), lr=6e-5, weight_decay=1e-5)
|
||||||
cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate))
|
cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate))
|
||||||
warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate))
|
warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.14, end_factor=1.0, total_iters=int(total_steps * warmup_rate))
|
||||||
scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)])
|
scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)])
|
||||||
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
|
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user