diff --git a/pred/dataset.py b/pred/dataset.py index dfa7614..7b649f1 100644 --- a/pred/dataset.py +++ b/pred/dataset.py @@ -14,7 +14,7 @@ class VideoPlayDataset(Dataset): self.max_future_seconds = max_future_days * 86400 self.series_dict = self._load_and_process_data(publish_time_path) self.valid_series = [s for s in self.series_dict.values() if len(s['abs_time']) > 1] - self.feature_windows = [3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600] # 1h,6h,24h,3d,7d + self.feature_windows = [3600, 3*3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600, 60*24*3600] def _extract_features(self, series, current_idx, target_idx): """提取增量特征""" @@ -23,7 +23,7 @@ class VideoPlayDataset(Dataset): dt = datetime.datetime.fromtimestamp(current_time) # 时间特征 time_features = [ - dt.hour / 24, (dt.weekday() + 1) / 7, + (dt.hour * 3600 + dt.minute * 60 + dt.second) / 86400, (dt.weekday() * 24 + dt.hour) / 168, np.log2(max(current_time - series['create_time'],1)) ] @@ -76,18 +76,18 @@ class VideoPlayDataset(Dataset): def _get_nearest_value(self, series, target_time, current_idx): """获取指定时间前最近的数据点""" min_diff = float('inf') - for i in range(current_idx + 1, len(series['abs_time']), 1): + for i in range(current_idx + 1, len(series['abs_time'])): diff = abs(series['abs_time'][i] - target_time) if diff < min_diff: min_diff = diff else: return i - 1 - return None + return len(series['abs_time']) - 1 def __getitem__(self, idx): series = random.choice(self.valid_series) current_idx = random.randint(0, len(series['abs_time'])-2) - target_idx = random.randint(max(0, current_idx-50), current_idx) + target_idx = random.randint(max(0, current_idx-10), current_idx) # 提取特征 features = self._extract_features(series, current_idx, target_idx) diff --git a/pred/inference.py b/pred/inference.py index 3efb34a..4a9e055 100644 --- a/pred/inference.py +++ b/pred/inference.py @@ -4,14 +4,25 @@ import torch def main(): model = CompactPredictor(10).to('cpu', dtype=torch.float32) - model.load_state_dict(torch.load('play_predictor.pth')) + model.load_state_dict(torch.load('./pred/checkpoints/play_predictor.pth')) model.eval() # inference - data = [3,3.9315974229,5.4263146604,9.4958550269,10.9203528554,11.5835529305,13.0426853722,0.7916666667,0.2857142857,24.7794093257] - np_arr = np.array([data]) - tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32) - output = model(tensor) - print(output) + last = 999469 + for i in range(1, 48): + hour = i / 2 + sec = hour * 3600 + time_d = np.log2(sec) + data = [time_d, 19.9295936113, # time_delta, current_views + 6.1575520046,8.980,10.6183855023,12.0313328273,13.2537252486, # growth_feat + 0.625,0.2857142857,24.7794093257 # time_feat + ] + np_arr = np.array([data]) + tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32) + output = model(tensor) + num = output.detach().numpy()[0][0] + views_pred = int(np.exp2(num)) + 999469 + print(f"{int(15+hour)%24:02d}:{int((15+hour)*60)%60:02d}", views_pred, views_pred - last) + last = views_pred if __name__ == '__main__': main() \ No newline at end of file diff --git a/pred/train.py b/pred/train.py index 603eb17..cae5a47 100644 --- a/pred/train.py +++ b/pred/train.py @@ -26,7 +26,7 @@ def train(model, dataloader, device, epochs=100): outputs = model(features) loss = criterion(outputs, targets) loss.backward() - #torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() @@ -49,16 +49,21 @@ def train(model, dataloader, device, epochs=100): writer.add_scalar('Params/Mean', sum(param_means)/len(param_means), global_step) samples_count = len(targets) - r = random.randint(0, samples_count-1) - t = float(torch.exp2(targets[r])) - 1 - o = float(torch.exp2(outputs[r])) - 1 - d = features[r].cpu().numpy()[0] - speed = np.exp2(features[r].cpu().numpy()[2]) - time_diff = np.exp2(d) / 3600 - inc = speed * time_diff - model_error = abs(t - o) - reg_error = abs(inc - t) - print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {model_error < reg_error}") + good = 0 + for r in range(samples_count): + r = random.randint(0, samples_count-1) + t = float(torch.exp2(targets[r])) - 1 + o = float(torch.exp2(outputs[r])) - 1 + d = features[r].cpu().numpy()[0] + speed = np.exp2(features[r].cpu().numpy()[5]) / 24 + time_diff = np.exp2(d) / 3600 + inc = speed * time_diff + model_error = abs(t - o) + reg_error = abs(inc - t) + if model_error < reg_error: + good += 1 + #print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {good/samples_count*100:.1f}%") + writer.add_scalar('Train/WinRate', good/samples_count, global_step) print(f"Epoch {epoch+1} | Avg Loss: {total_loss/len(dataloader):.4f}")