From c2c2b4c29d528718ed667e8e3b66075d67880d2d Mon Sep 17 00:00:00 2001 From: alikia2x Date: Fri, 7 Mar 2025 20:14:01 +0800 Subject: [PATCH] update: V6.3 filter model --- filter/modelV6_3.py | 2 +- filter/train.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/filter/modelV6_3.py b/filter/modelV6_3.py index 1d7e93b..a317642 100644 --- a/filter/modelV6_3.py +++ b/filter/modelV6_3.py @@ -2,7 +2,7 @@ import torch import torch.nn as nn class VideoClassifierV6_3(nn.Module): - def __init__(self, embedding_dim=72, hidden_dim=256, output_dim=3, num_heads=4, num_layers=2): + def __init__(self, embedding_dim=256, hidden_dim=256, output_dim=3, num_heads=4, num_layers=2): super().__init__() self.num_channels = 3 self.channel_names = ['title', 'description', 'tags'] diff --git a/filter/train.py b/filter/train.py index 41eee4e..f35e0d7 100644 --- a/filter/train.py +++ b/filter/train.py @@ -4,7 +4,7 @@ import numpy as np from torch.utils.data import DataLoader import torch.optim as optim from dataset import MultiChannelDataset -from filter.modelV6_1 import VideoClassifierV6_1 +from filter.modelV6_3 import VideoClassifierV6_3 from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report import os import torch @@ -61,12 +61,12 @@ class_weights = torch.tensor( device=device ) -model = VideoClassifierV6_1().to(device) -checkpoint_name = './filter/checkpoints/best_model_V6.3.pt' +model = VideoClassifierV6_3().to(device) +checkpoint_name = './filter/checkpoints/best_model_V6.3-II.pt' # 初始化tokenizer和embedding模型 tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024") -session = ort.InferenceSession("./model/embedding_72/onnx/model.onnx") +session = ort.InferenceSession("./model/embedding_256/onnx/model.onnx") # 模型保存路径 os.makedirs('./filter/checkpoints', exist_ok=True) @@ -74,11 +74,11 @@ os.makedirs('./filter/checkpoints', exist_ok=True) # 优化器 eval_interval = 20 num_epochs = 20 -total_steps = samples_count * num_epochs / train_loader.batch_size +total_steps = samples_count * num_epochs / batch_size warmup_rate = 0.1 optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5) cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate)) -warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.4, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) +warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate)) scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)]) criterion = nn.CrossEntropyLoss(weight=class_weights).to(device) @@ -92,7 +92,7 @@ def evaluate(model, dataloader): with torch.no_grad(): for batch in dataloader: - batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=72).to(device) + batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=256).to(device) logits = model(batch_tensor) preds = torch.argmax(logits, dim=1) all_preds.extend(preds.cpu().numpy()) @@ -124,7 +124,7 @@ for epoch in range(num_epochs): optimizer.zero_grad() - batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=72).to(device) + batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=256).to(device) logits = model(batch_tensor)