add: more features
This commit is contained in:
parent
1de8d85d2b
commit
23e5d6a8c9
@ -14,7 +14,7 @@ class VideoPlayDataset(Dataset):
|
||||
self.max_future_seconds = max_future_days * 86400
|
||||
self.series_dict = self._load_and_process_data(publish_time_path)
|
||||
self.valid_series = [s for s in self.series_dict.values() if len(s['abs_time']) > 1]
|
||||
self.feature_windows = [3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600] # 1h,6h,24h,3d,7d
|
||||
self.feature_windows = [3600, 3*3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600, 60*24*3600]
|
||||
|
||||
def _extract_features(self, series, current_idx, target_idx):
|
||||
"""提取增量特征"""
|
||||
@ -23,7 +23,7 @@ class VideoPlayDataset(Dataset):
|
||||
dt = datetime.datetime.fromtimestamp(current_time)
|
||||
# 时间特征
|
||||
time_features = [
|
||||
dt.hour / 24, (dt.weekday() + 1) / 7,
|
||||
(dt.hour * 3600 + dt.minute * 60 + dt.second) / 86400, (dt.weekday() * 24 + dt.hour) / 168,
|
||||
np.log2(max(current_time - series['create_time'],1))
|
||||
]
|
||||
|
||||
@ -76,18 +76,18 @@ class VideoPlayDataset(Dataset):
|
||||
def _get_nearest_value(self, series, target_time, current_idx):
|
||||
"""获取指定时间前最近的数据点"""
|
||||
min_diff = float('inf')
|
||||
for i in range(current_idx + 1, len(series['abs_time']), 1):
|
||||
for i in range(current_idx + 1, len(series['abs_time'])):
|
||||
diff = abs(series['abs_time'][i] - target_time)
|
||||
if diff < min_diff:
|
||||
min_diff = diff
|
||||
else:
|
||||
return i - 1
|
||||
return None
|
||||
return len(series['abs_time']) - 1
|
||||
|
||||
def __getitem__(self, idx):
|
||||
series = random.choice(self.valid_series)
|
||||
current_idx = random.randint(0, len(series['abs_time'])-2)
|
||||
target_idx = random.randint(max(0, current_idx-50), current_idx)
|
||||
target_idx = random.randint(max(0, current_idx-10), current_idx)
|
||||
|
||||
# 提取特征
|
||||
features = self._extract_features(series, current_idx, target_idx)
|
||||
|
@ -4,14 +4,25 @@ import torch
|
||||
|
||||
def main():
|
||||
model = CompactPredictor(10).to('cpu', dtype=torch.float32)
|
||||
model.load_state_dict(torch.load('play_predictor.pth'))
|
||||
model.load_state_dict(torch.load('./pred/checkpoints/play_predictor.pth'))
|
||||
model.eval()
|
||||
# inference
|
||||
data = [3,3.9315974229,5.4263146604,9.4958550269,10.9203528554,11.5835529305,13.0426853722,0.7916666667,0.2857142857,24.7794093257]
|
||||
np_arr = np.array([data])
|
||||
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
|
||||
output = model(tensor)
|
||||
print(output)
|
||||
last = 999469
|
||||
for i in range(1, 48):
|
||||
hour = i / 2
|
||||
sec = hour * 3600
|
||||
time_d = np.log2(sec)
|
||||
data = [time_d, 19.9295936113, # time_delta, current_views
|
||||
6.1575520046,8.980,10.6183855023,12.0313328273,13.2537252486, # growth_feat
|
||||
0.625,0.2857142857,24.7794093257 # time_feat
|
||||
]
|
||||
np_arr = np.array([data])
|
||||
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
|
||||
output = model(tensor)
|
||||
num = output.detach().numpy()[0][0]
|
||||
views_pred = int(np.exp2(num)) + 999469
|
||||
print(f"{int(15+hour)%24:02d}:{int((15+hour)*60)%60:02d}", views_pred, views_pred - last)
|
||||
last = views_pred
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -26,7 +26,7 @@ def train(model, dataloader, device, epochs=100):
|
||||
outputs = model(features)
|
||||
loss = criterion(outputs, targets)
|
||||
loss.backward()
|
||||
#torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
optimizer.step()
|
||||
scheduler.step()
|
||||
|
||||
@ -49,16 +49,21 @@ def train(model, dataloader, device, epochs=100):
|
||||
writer.add_scalar('Params/Mean', sum(param_means)/len(param_means), global_step)
|
||||
|
||||
samples_count = len(targets)
|
||||
r = random.randint(0, samples_count-1)
|
||||
t = float(torch.exp2(targets[r])) - 1
|
||||
o = float(torch.exp2(outputs[r])) - 1
|
||||
d = features[r].cpu().numpy()[0]
|
||||
speed = np.exp2(features[r].cpu().numpy()[2])
|
||||
time_diff = np.exp2(d) / 3600
|
||||
inc = speed * time_diff
|
||||
model_error = abs(t - o)
|
||||
reg_error = abs(inc - t)
|
||||
print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {model_error < reg_error}")
|
||||
good = 0
|
||||
for r in range(samples_count):
|
||||
r = random.randint(0, samples_count-1)
|
||||
t = float(torch.exp2(targets[r])) - 1
|
||||
o = float(torch.exp2(outputs[r])) - 1
|
||||
d = features[r].cpu().numpy()[0]
|
||||
speed = np.exp2(features[r].cpu().numpy()[5]) / 24
|
||||
time_diff = np.exp2(d) / 3600
|
||||
inc = speed * time_diff
|
||||
model_error = abs(t - o)
|
||||
reg_error = abs(inc - t)
|
||||
if model_error < reg_error:
|
||||
good += 1
|
||||
#print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {good/samples_count*100:.1f}%")
|
||||
writer.add_scalar('Train/WinRate', good/samples_count, global_step)
|
||||
|
||||
print(f"Epoch {epoch+1} | Avg Loss: {total_loss/len(dataloader):.4f}")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user