add: more features

This commit is contained in:
alikia2x (寒寒) 2025-03-13 23:32:13 +08:00
parent 1de8d85d2b
commit 23e5d6a8c9
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
3 changed files with 38 additions and 22 deletions

View File

@ -14,7 +14,7 @@ class VideoPlayDataset(Dataset):
self.max_future_seconds = max_future_days * 86400
self.series_dict = self._load_and_process_data(publish_time_path)
self.valid_series = [s for s in self.series_dict.values() if len(s['abs_time']) > 1]
self.feature_windows = [3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600] # 1h,6h,24h,3d,7d
self.feature_windows = [3600, 3*3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600, 60*24*3600]
def _extract_features(self, series, current_idx, target_idx):
"""提取增量特征"""
@ -23,7 +23,7 @@ class VideoPlayDataset(Dataset):
dt = datetime.datetime.fromtimestamp(current_time)
# 时间特征
time_features = [
dt.hour / 24, (dt.weekday() + 1) / 7,
(dt.hour * 3600 + dt.minute * 60 + dt.second) / 86400, (dt.weekday() * 24 + dt.hour) / 168,
np.log2(max(current_time - series['create_time'],1))
]
@ -76,18 +76,18 @@ class VideoPlayDataset(Dataset):
def _get_nearest_value(self, series, target_time, current_idx):
"""获取指定时间前最近的数据点"""
min_diff = float('inf')
for i in range(current_idx + 1, len(series['abs_time']), 1):
for i in range(current_idx + 1, len(series['abs_time'])):
diff = abs(series['abs_time'][i] - target_time)
if diff < min_diff:
min_diff = diff
else:
return i - 1
return None
return len(series['abs_time']) - 1
def __getitem__(self, idx):
series = random.choice(self.valid_series)
current_idx = random.randint(0, len(series['abs_time'])-2)
target_idx = random.randint(max(0, current_idx-50), current_idx)
target_idx = random.randint(max(0, current_idx-10), current_idx)
# 提取特征
features = self._extract_features(series, current_idx, target_idx)

View File

@ -4,14 +4,25 @@ import torch
def main():
model = CompactPredictor(10).to('cpu', dtype=torch.float32)
model.load_state_dict(torch.load('play_predictor.pth'))
model.load_state_dict(torch.load('./pred/checkpoints/play_predictor.pth'))
model.eval()
# inference
data = [3,3.9315974229,5.4263146604,9.4958550269,10.9203528554,11.5835529305,13.0426853722,0.7916666667,0.2857142857,24.7794093257]
np_arr = np.array([data])
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
output = model(tensor)
print(output)
last = 999469
for i in range(1, 48):
hour = i / 2
sec = hour * 3600
time_d = np.log2(sec)
data = [time_d, 19.9295936113, # time_delta, current_views
6.1575520046,8.980,10.6183855023,12.0313328273,13.2537252486, # growth_feat
0.625,0.2857142857,24.7794093257 # time_feat
]
np_arr = np.array([data])
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
output = model(tensor)
num = output.detach().numpy()[0][0]
views_pred = int(np.exp2(num)) + 999469
print(f"{int(15+hour)%24:02d}:{int((15+hour)*60)%60:02d}", views_pred, views_pred - last)
last = views_pred
if __name__ == '__main__':
main()

View File

@ -26,7 +26,7 @@ def train(model, dataloader, device, epochs=100):
outputs = model(features)
loss = criterion(outputs, targets)
loss.backward()
#torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
@ -49,16 +49,21 @@ def train(model, dataloader, device, epochs=100):
writer.add_scalar('Params/Mean', sum(param_means)/len(param_means), global_step)
samples_count = len(targets)
r = random.randint(0, samples_count-1)
t = float(torch.exp2(targets[r])) - 1
o = float(torch.exp2(outputs[r])) - 1
d = features[r].cpu().numpy()[0]
speed = np.exp2(features[r].cpu().numpy()[2])
time_diff = np.exp2(d) / 3600
inc = speed * time_diff
model_error = abs(t - o)
reg_error = abs(inc - t)
print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {model_error < reg_error}")
good = 0
for r in range(samples_count):
r = random.randint(0, samples_count-1)
t = float(torch.exp2(targets[r])) - 1
o = float(torch.exp2(outputs[r])) - 1
d = features[r].cpu().numpy()[0]
speed = np.exp2(features[r].cpu().numpy()[5]) / 24
time_diff = np.exp2(d) / 3600
inc = speed * time_diff
model_error = abs(t - o)
reg_error = abs(inc - t)
if model_error < reg_error:
good += 1
#print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {good/samples_count*100:.1f}%")
writer.add_scalar('Train/WinRate', good/samples_count, global_step)
print(f"Epoch {epoch+1} | Avg Loss: {total_loss/len(dataloader):.4f}")