From 2c2e780700faf78488f2c6ec30c31397edd1ab88 Mon Sep 17 00:00:00 2001 From: alikia2x Date: Fri, 7 Mar 2025 22:25:37 +0800 Subject: [PATCH] update: idfk --- filter/RunningLogs.txt | 3 +-- filter/checkpoint_conversion.py | 26 -------------------------- filter/train.py | 4 ++-- 3 files changed, 3 insertions(+), 30 deletions(-) delete mode 100644 filter/checkpoint_conversion.py diff --git a/filter/RunningLogs.txt b/filter/RunningLogs.txt index ba7b286..7b8cc7f 100644 --- a/filter/RunningLogs.txt +++ b/filter/RunningLogs.txt @@ -34,5 +34,4 @@ Note 0219: V6.1 # MPS训练 0242: V6.1 # 自定义loss 0259: V6.1 # 调整学习率 (自定义loss) -0314: V6.1 # 调整学习率(交叉熵损失) -0349: V6.3 # 增加层数至2 \ No newline at end of file +0314: V6.1 # 调整学习率(交叉熵损失) \ No newline at end of file diff --git a/filter/checkpoint_conversion.py b/filter/checkpoint_conversion.py deleted file mode 100644 index 61a72ed..0000000 --- a/filter/checkpoint_conversion.py +++ /dev/null @@ -1,26 +0,0 @@ -import torch - -from modelV3_10 import VideoClassifierV3_10 -from modelV3_9 import VideoClassifierV3_9 - - -def convert_checkpoint(original_model, new_model): - """转换原始checkpoint到新结构""" - state_dict = original_model.state_dict() - - # 直接复制所有参数(因为结构保持兼容) - new_model.load_state_dict(state_dict) - return new_model - -# 使用示例 -original_model = VideoClassifierV3_9() -new_model = VideoClassifierV3_10() - -# 加载原始checkpoint -original_model.load_state_dict(torch.load('./filter/checkpoints/best_model_V3.9.pt')) - -# 转换参数 -converted_model = convert_checkpoint(original_model, new_model) - -# 保存转换后的模型 -torch.save(converted_model.state_dict(), './filter/checkpoints/best_model_V3.10.pt') \ No newline at end of file diff --git a/filter/train.py b/filter/train.py index f35e0d7..3a65346 100644 --- a/filter/train.py +++ b/filter/train.py @@ -76,9 +76,9 @@ eval_interval = 20 num_epochs = 20 total_steps = samples_count * num_epochs / batch_size warmup_rate = 0.1 -optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5) +optimizer = optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-3) cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate)) -warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) +warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.4, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)]) criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)