add: more features
This commit is contained in:
parent
1de8d85d2b
commit
23e5d6a8c9
@ -14,7 +14,7 @@ class VideoPlayDataset(Dataset):
|
|||||||
self.max_future_seconds = max_future_days * 86400
|
self.max_future_seconds = max_future_days * 86400
|
||||||
self.series_dict = self._load_and_process_data(publish_time_path)
|
self.series_dict = self._load_and_process_data(publish_time_path)
|
||||||
self.valid_series = [s for s in self.series_dict.values() if len(s['abs_time']) > 1]
|
self.valid_series = [s for s in self.series_dict.values() if len(s['abs_time']) > 1]
|
||||||
self.feature_windows = [3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600] # 1h,6h,24h,3d,7d
|
self.feature_windows = [3600, 3*3600, 6*3600, 24*3600, 3*24*3600, 7*24*3600, 60*24*3600]
|
||||||
|
|
||||||
def _extract_features(self, series, current_idx, target_idx):
|
def _extract_features(self, series, current_idx, target_idx):
|
||||||
"""提取增量特征"""
|
"""提取增量特征"""
|
||||||
@ -23,7 +23,7 @@ class VideoPlayDataset(Dataset):
|
|||||||
dt = datetime.datetime.fromtimestamp(current_time)
|
dt = datetime.datetime.fromtimestamp(current_time)
|
||||||
# 时间特征
|
# 时间特征
|
||||||
time_features = [
|
time_features = [
|
||||||
dt.hour / 24, (dt.weekday() + 1) / 7,
|
(dt.hour * 3600 + dt.minute * 60 + dt.second) / 86400, (dt.weekday() * 24 + dt.hour) / 168,
|
||||||
np.log2(max(current_time - series['create_time'],1))
|
np.log2(max(current_time - series['create_time'],1))
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -76,18 +76,18 @@ class VideoPlayDataset(Dataset):
|
|||||||
def _get_nearest_value(self, series, target_time, current_idx):
|
def _get_nearest_value(self, series, target_time, current_idx):
|
||||||
"""获取指定时间前最近的数据点"""
|
"""获取指定时间前最近的数据点"""
|
||||||
min_diff = float('inf')
|
min_diff = float('inf')
|
||||||
for i in range(current_idx + 1, len(series['abs_time']), 1):
|
for i in range(current_idx + 1, len(series['abs_time'])):
|
||||||
diff = abs(series['abs_time'][i] - target_time)
|
diff = abs(series['abs_time'][i] - target_time)
|
||||||
if diff < min_diff:
|
if diff < min_diff:
|
||||||
min_diff = diff
|
min_diff = diff
|
||||||
else:
|
else:
|
||||||
return i - 1
|
return i - 1
|
||||||
return None
|
return len(series['abs_time']) - 1
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
series = random.choice(self.valid_series)
|
series = random.choice(self.valid_series)
|
||||||
current_idx = random.randint(0, len(series['abs_time'])-2)
|
current_idx = random.randint(0, len(series['abs_time'])-2)
|
||||||
target_idx = random.randint(max(0, current_idx-50), current_idx)
|
target_idx = random.randint(max(0, current_idx-10), current_idx)
|
||||||
|
|
||||||
# 提取特征
|
# 提取特征
|
||||||
features = self._extract_features(series, current_idx, target_idx)
|
features = self._extract_features(series, current_idx, target_idx)
|
||||||
|
@ -4,14 +4,25 @@ import torch
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
model = CompactPredictor(10).to('cpu', dtype=torch.float32)
|
model = CompactPredictor(10).to('cpu', dtype=torch.float32)
|
||||||
model.load_state_dict(torch.load('play_predictor.pth'))
|
model.load_state_dict(torch.load('./pred/checkpoints/play_predictor.pth'))
|
||||||
model.eval()
|
model.eval()
|
||||||
# inference
|
# inference
|
||||||
data = [3,3.9315974229,5.4263146604,9.4958550269,10.9203528554,11.5835529305,13.0426853722,0.7916666667,0.2857142857,24.7794093257]
|
last = 999469
|
||||||
np_arr = np.array([data])
|
for i in range(1, 48):
|
||||||
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
|
hour = i / 2
|
||||||
output = model(tensor)
|
sec = hour * 3600
|
||||||
print(output)
|
time_d = np.log2(sec)
|
||||||
|
data = [time_d, 19.9295936113, # time_delta, current_views
|
||||||
|
6.1575520046,8.980,10.6183855023,12.0313328273,13.2537252486, # growth_feat
|
||||||
|
0.625,0.2857142857,24.7794093257 # time_feat
|
||||||
|
]
|
||||||
|
np_arr = np.array([data])
|
||||||
|
tensor = torch.from_numpy(np_arr).to('cpu', dtype=torch.float32)
|
||||||
|
output = model(tensor)
|
||||||
|
num = output.detach().numpy()[0][0]
|
||||||
|
views_pred = int(np.exp2(num)) + 999469
|
||||||
|
print(f"{int(15+hour)%24:02d}:{int((15+hour)*60)%60:02d}", views_pred, views_pred - last)
|
||||||
|
last = views_pred
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
@ -26,7 +26,7 @@ def train(model, dataloader, device, epochs=100):
|
|||||||
outputs = model(features)
|
outputs = model(features)
|
||||||
loss = criterion(outputs, targets)
|
loss = criterion(outputs, targets)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
#torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
|
||||||
@ -49,16 +49,21 @@ def train(model, dataloader, device, epochs=100):
|
|||||||
writer.add_scalar('Params/Mean', sum(param_means)/len(param_means), global_step)
|
writer.add_scalar('Params/Mean', sum(param_means)/len(param_means), global_step)
|
||||||
|
|
||||||
samples_count = len(targets)
|
samples_count = len(targets)
|
||||||
r = random.randint(0, samples_count-1)
|
good = 0
|
||||||
t = float(torch.exp2(targets[r])) - 1
|
for r in range(samples_count):
|
||||||
o = float(torch.exp2(outputs[r])) - 1
|
r = random.randint(0, samples_count-1)
|
||||||
d = features[r].cpu().numpy()[0]
|
t = float(torch.exp2(targets[r])) - 1
|
||||||
speed = np.exp2(features[r].cpu().numpy()[2])
|
o = float(torch.exp2(outputs[r])) - 1
|
||||||
time_diff = np.exp2(d) / 3600
|
d = features[r].cpu().numpy()[0]
|
||||||
inc = speed * time_diff
|
speed = np.exp2(features[r].cpu().numpy()[5]) / 24
|
||||||
model_error = abs(t - o)
|
time_diff = np.exp2(d) / 3600
|
||||||
reg_error = abs(inc - t)
|
inc = speed * time_diff
|
||||||
print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {model_error < reg_error}")
|
model_error = abs(t - o)
|
||||||
|
reg_error = abs(inc - t)
|
||||||
|
if model_error < reg_error:
|
||||||
|
good += 1
|
||||||
|
#print(f"{t:07.1f} | {o:07.1f} | {d:07.1f} | {inc:07.1f} | {good/samples_count*100:.1f}%")
|
||||||
|
writer.add_scalar('Train/WinRate', good/samples_count, global_step)
|
||||||
|
|
||||||
print(f"Epoch {epoch+1} | Avg Loss: {total_loss/len(dataloader):.4f}")
|
print(f"Epoch {epoch+1} | Avg Loss: {total_loss/len(dataloader):.4f}")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user