update: V6.3 filter model
This commit is contained in:
parent
11ec3e8295
commit
c2c2b4c29d
@ -2,7 +2,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
|
||||||
class VideoClassifierV6_3(nn.Module):
|
class VideoClassifierV6_3(nn.Module):
|
||||||
def __init__(self, embedding_dim=72, hidden_dim=256, output_dim=3, num_heads=4, num_layers=2):
|
def __init__(self, embedding_dim=256, hidden_dim=256, output_dim=3, num_heads=4, num_layers=2):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.num_channels = 3
|
self.num_channels = 3
|
||||||
self.channel_names = ['title', 'description', 'tags']
|
self.channel_names = ['title', 'description', 'tags']
|
||||||
|
@ -4,7 +4,7 @@ import numpy as np
|
|||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
from dataset import MultiChannelDataset
|
from dataset import MultiChannelDataset
|
||||||
from filter.modelV6_1 import VideoClassifierV6_1
|
from filter.modelV6_3 import VideoClassifierV6_3
|
||||||
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
|
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, classification_report
|
||||||
import os
|
import os
|
||||||
import torch
|
import torch
|
||||||
@ -61,12 +61,12 @@ class_weights = torch.tensor(
|
|||||||
device=device
|
device=device
|
||||||
)
|
)
|
||||||
|
|
||||||
model = VideoClassifierV6_1().to(device)
|
model = VideoClassifierV6_3().to(device)
|
||||||
checkpoint_name = './filter/checkpoints/best_model_V6.3.pt'
|
checkpoint_name = './filter/checkpoints/best_model_V6.3-II.pt'
|
||||||
|
|
||||||
# 初始化tokenizer和embedding模型
|
# 初始化tokenizer和embedding模型
|
||||||
tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024")
|
tokenizer = AutoTokenizer.from_pretrained("alikia2x/jina-embedding-v3-m2v-1024")
|
||||||
session = ort.InferenceSession("./model/embedding_72/onnx/model.onnx")
|
session = ort.InferenceSession("./model/embedding_256/onnx/model.onnx")
|
||||||
|
|
||||||
# 模型保存路径
|
# 模型保存路径
|
||||||
os.makedirs('./filter/checkpoints', exist_ok=True)
|
os.makedirs('./filter/checkpoints', exist_ok=True)
|
||||||
@ -74,11 +74,11 @@ os.makedirs('./filter/checkpoints', exist_ok=True)
|
|||||||
# 优化器
|
# 优化器
|
||||||
eval_interval = 20
|
eval_interval = 20
|
||||||
num_epochs = 20
|
num_epochs = 20
|
||||||
total_steps = samples_count * num_epochs / train_loader.batch_size
|
total_steps = samples_count * num_epochs / batch_size
|
||||||
warmup_rate = 0.1
|
warmup_rate = 0.1
|
||||||
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5)
|
optimizer = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-5)
|
||||||
cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate))
|
cosine_annealing_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_steps - int(total_steps * warmup_rate))
|
||||||
warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.4, end_factor=1.0, total_iters=int(total_steps * warmup_rate))
|
warmup_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.1, end_factor=1.0, total_iters=int(total_steps * warmup_rate))
|
||||||
scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)])
|
scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_scheduler, cosine_annealing_scheduler], milestones=[int(total_steps * warmup_rate)])
|
||||||
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
|
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ def evaluate(model, dataloader):
|
|||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for batch in dataloader:
|
for batch in dataloader:
|
||||||
batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=72).to(device)
|
batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=256).to(device)
|
||||||
logits = model(batch_tensor)
|
logits = model(batch_tensor)
|
||||||
preds = torch.argmax(logits, dim=1)
|
preds = torch.argmax(logits, dim=1)
|
||||||
all_preds.extend(preds.cpu().numpy())
|
all_preds.extend(preds.cpu().numpy())
|
||||||
@ -124,7 +124,7 @@ for epoch in range(num_epochs):
|
|||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
|
||||||
batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=72).to(device)
|
batch_tensor = prepare_batch_per_token(session, tokenizer, batch['texts'], embedding_dim=256).to(device)
|
||||||
|
|
||||||
logits = model(batch_tensor)
|
logits = model(batch_tensor)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user