来自 Web conference 2021
模型框架图
STAN模型:
1.运用多模嵌入,包含两个模块,轨迹嵌入层和时空嵌入层
2.自我注意层
3.注意匹配层
4.平衡采样器
数据集
实验结果
消融分析
结论
load.py
import numpy as np
import torch
from math import radians, cos, sin, asin, sqrt
import joblib
from torch.nn.utils.rnn import pad_sequence
max_len = 100 # max traj len; i.e., M
def haversine(lon1, lat1, lon2, lat2):
"""
计算地球上两点之间的大圆距离(以十进制度数表示)
"""
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
c = 2 * asin(sqrt(a))
r = 6371
return c * r
def euclidean(point, each):
lon1, lat1, lon2, lat2 = point[2], point[1], each[2], each[1]
return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)
def rst_mat1(traj, poi):
# traj (*M, [u, l, t]), poi(L, [l, lat, lon])
mat = np.zeros((len(traj), len(traj), 2))
for i, item in enumerate(traj):
for j, term in enumerate(traj):#enumerate是枚举,返回的是元素的索引以及对应的元素
poi_item, poi_term = poi[item[1] - 1], poi[term[1] - 1] # 通过loc_id检索兴趣点
mat[i, j, 0] = haversine(lon1=poi_item[2], lat1=poi_item[1], lon2=poi_term[2], lat2=poi_term[1])
mat[i, j, 1] = abs(item[2] - term[2])
return mat # (*M, *M, [dis, tim])
def rs_mat2s(poi, l_max):
# poi(L, [l, lat, lon])
candidate_loc = np.linspace(1, l_max, l_max) # (L)
mat = np.zeros((l_max, l_max)) # mat (L, L)
for i, loc1 in enumerate(candidate_loc):
print(i) if i % 100 == 0 else None
for j, loc2 in enumerate(candidate_loc):
poi1, poi2 = poi[int(loc1) - 1], poi[int(loc2) - 1] # retrieve poi by loc_id(通过loc_id检索兴趣点)
mat[i, j] = haversine(lon1=poi1[2], lat1=poi1[1], lon2=poi2[2], lat2=poi2[1])
return mat # (L, L)
def rt_mat2t(traj_time): # traj_time (*M+1) triangle matrix
# construct a list of relative times w.r.t. causality(构建一个相对时间w.r.t.因果关系列表)
mat = np.zeros((len(traj_time)-1, len(traj_time)-1))
for i, item in enumerate(traj_time): # label
if i == 0:
continue
for j, term in enumerate(traj_time[:i]): # data
mat[i-1, j] = np.abs(item - term)
return mat # (*M, *M)
def process_traj(dname): # start from 1
# data (?, [u, l, t]), poi (L, [l, lat, lon])
data = np.load('./data/' + dname + '.npy')#加载[u,l,t]
# 如果使用将时间划分为分钟而不是小时,请添加以下代码
data[:, -1] = np.array(data[:, -1]/60, dtype=np.int)
poi = np.load('./data/' + dname + '_POI.npy')#加载[l,lon,lat]
num_user = data[-1, 0] # 用户的最大id, i.e. NUM
data_user = data[:, 0] # 数据中的用户id序列
trajs, labels, mat1, mat2t, lens = [], [], [], [], []
u_max, l_max = np.max(data[:, 0]), np.max(data[:, 1])
for u_id in range(num_user+1):
if u_id == 0: # skip u_id == 0跳过
continue
init_mat1 = np.zeros((max_len, max_len, 2)) # first mat (M, M, 2)
init_mat2t = np.zeros((max_len, max_len)) # second mat of time (M, M)
user_traj = data[np.where(data_user == u_id)] # 插值u_id的所有签到
user_traj = user_traj[np.argsort(user_traj[:, 2])].copy() # 按时间对traj排序
print(u_id, len(user_traj)) if u_id % 100 == 0 else None
if len(user_traj) > max_len + 1: # 只考虑M+ 1最近的检查
# 0:-3 are training data, 1:-2 is training label;
# 1:-2 are validation data, 2:-1 is validation label;
# 2:-1 are test data, 3: is the label for test.
# *M would be the real length if <= max_len + 1
user_traj = user_traj[-max_len-1:] # (*M+1, [u, l, t])
# 时空间隔
user_len = len(user_traj[:-1]) # the len of data, i.e. *M
user_mat1 = rst_mat1(user_traj[:-1], poi) # (*M, *M, [dis, tim])
user_mat2t = rt_mat2t(user_traj[:, 2]) # (*M, *M)
init_mat1[0:user_len, 0:user_len] = user_mat1
init_mat2t[0:user_len, 0:user_len] = user_mat2t
trajs.append(torch.LongTensor(user_traj)[:-1]) # (NUM, *M, [u, l, t])
mat1.append(init_mat1) # (NUM, M, M, 2)
mat2t.append(init_mat2t) # (NUM, M, M)
labels.append(torch.LongTensor(user_traj[1:, 1])) # (NUM, *M)
lens.append(user_len-2) # (NUM), the real *M for every user
# 将零填充到右边的空白处
mat2s = rs_mat2s(poi, l_max) #包含所有位置的dis, (L, L)
zipped = zip(*sorted(zip(trajs, mat1, mat2t, labels, lens), key=lambda x: len(x[0]), reverse=True))
trajs, mat1, mat2t, labels, lens = zipped
trajs, mat1, mat2t, labels, lens = list(trajs), list(mat1), list(mat2t), list(labels), list(lens)
trajs = pad_sequence(trajs, batch_first=True, padding_value=0) # (NUM, M, 3)
labels = pad_sequence(labels, batch_first=True, padding_value=0) # (NUM, M)
data = [trajs, np.array(mat1), mat2s, np.array(mat2t), labels, np.array(lens), u_max, l_max]
data_pkl = './data/' + dname + '_data.pkl'
open(data_pkl, 'a')
with open(data_pkl, 'wb') as pkl:
joblib.dump(data, pkl)
if __name__ == '__main__':
name = 'NYC'
process_traj(name)
layers.py
from load import *
import torch
from torch import nn
from torch.nn import functional as F
seed = 0
global_seed = 0
hours = 24*7
torch.manual_seed(seed)
device = 'cuda'
def to_npy(x):
return x.cpu().data.numpy() if device == 'cuda' else x.detach().numpy()
class Attn(nn.Module):#(平衡采样器)
def __init__(self, emb_loc, loc_max, dropout=0.1):
super(Attn, self).__init__()
self.value = nn.Linear(max_len, 1, bias=False)
self.emb_loc = emb_loc
self.loc_max = loc_max
def forward(self, self_attn, self_delta, traj_len):
# self_attn (N, M, emb), candidate (N, L, emb), self_delta (N, M, L, emb), len [N]
self_delta = torch.sum(self_delta, -1).transpose(-1, -2) # squeeze the embed dimension
[N, L, M] = self_delta.shape
candidates = torch.linspace(1, int(self.loc_max), int(self.loc_max)).long() # (L)
candidates = candidates.unsqueeze(0).expand(N, -1).to(device) # (N, L)
emb_candidates = self.emb_loc(candidates) # (N, L, emb)
attn = torch.mul(torch.bmm(emb_candidates, self_attn.transpose(-1, -2)), self_delta) # (N, L, M)
# pdb.set_trace()
attn_out = self.value(attn).view(N, L) # (N, L)
# attn_out = F.log_softmax(attn_out, dim=-1) # ignore if cross_entropy_loss
return attn_out # (N, L)
class SelfAttn(nn.Module):#(自我注意层)
def __init__(self, emb_size, output_size, dropout=0.1):
super(SelfAttn, self).__init__()
self.query = nn.Linear(emb_size, output_size, bias=False)
self.key = nn.Linear(emb_size, output_size, bias=False)
self.value = nn.Linear(emb_size, output_size, bias=False)
def forward(self, joint, delta, traj_len):
delta = torch.sum(delta, -1) # squeeze the embed dimension
# joint (N, M, emb), delta (N, M, M, emb), len [N]
# construct attention mask
mask = torch.zeros_like(delta, dtype=torch.float32)
for i in range(mask.shape[0]):
mask[i, 0:traj_len[i], 0:traj_len[i]] = 1
attn = torch.add(torch.bmm(self.query(joint), self.key(joint).transpose(-1, -2)), delta) # (N, M, M)
attn = F.softmax(attn, dim=-1) * mask # (N, M, M)
attn_out = torch.bmm(attn, self.value(joint)) # (N, M, emb)
return attn_out # (N, M, emb)
class Embed(nn.Module):#(注意匹配层)
def __init__(self, ex, emb_size, loc_max, embed_layers):
super(Embed, self).__init__()
_, _, _, self.emb_su, self.emb_sl, self.emb_tu, self.emb_tl = embed_layers
self.su, self.sl, self.tu, self.tl = ex
self.emb_size = emb_size
self.loc_max = loc_max
def forward(self, traj_loc, mat2, vec, traj_len):
# traj_loc (N, M), mat2 (L, L), vec (N, M), delta_t (N, M, L)
delta_t = vec.unsqueeze(-1).expand(-1, -1, self.loc_max)
delta_s = torch.zeros_like(delta_t, dtype=torch.float32)
mask = torch.zeros_like(delta_t, dtype=torch.long)
for i in range(mask.shape[0]): # N
mask[i, 0:traj_len[i]] = 1
delta_s[i, :traj_len[i]] = torch.index_select(mat2, 0, (traj_loc[i]-1)[:traj_len[i]])
# pdb.set_trace()
esl, esu, etl, etu = self.emb_sl(mask), self.emb_su(mask), self.emb_tl(mask), self.emb_tu(mask)
vsl, vsu, vtl, vtu = (delta_s - self.sl).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(self.su - delta_s).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(delta_t - self.tl).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(self.tu - delta_t).unsqueeze(-1).expand(-1, -1, -1, self.emb_size)
space_interval = (esl * vsu + esu * vsl) / (self.su - self.sl)
time_interval = (etl * vtu + etu * vtl) / (self.tu - self.tl)
delta = space_interval + time_interval # (N, M, L, emb)
return delta
class MultiEmbed(nn.Module):#(多模嵌入)
def __init__(self, ex, emb_size, embed_layers):
super(MultiEmbed, self).__init__()
self.emb_t, self.emb_l, self.emb_u, \
self.emb_su, self.emb_sl, self.emb_tu, self.emb_tl = embed_layers
self.su, self.sl, self.tu, self.tl = ex
self.emb_size = emb_size
def forward(self, traj, mat, traj_len):
# traj (N, M, 3), mat (N, M, M, 2), len [N]
traj[:, :, 2] = (traj[:, :, 2]-1) % hours + 1 # segment time by 24 hours * 7 days
time = self.emb_t(traj[:, :, 2]) # (N, M) --> (N, M, embed)
loc = self.emb_l(traj[:, :, 1]) # (N, M) --> (N, M, embed)
user = self.emb_u(traj[:, :, 0]) # (N, M) --> (N, M, embed)
joint = time + loc + user # (N, M, embed)
delta_s, delta_t = mat[:, :, :, 0], mat[:, :, :, 1] # (N, M, M)
mask = torch.zeros_like(delta_s, dtype=torch.long)
for i in range(mask.shape[0]):
mask[i, 0:traj_len[i], 0:traj_len[i]] = 1
esl, esu, etl, etu = self.emb_sl(mask), self.emb_su(mask), self.emb_tl(mask), self.emb_tu(mask)
vsl, vsu, vtl, vtu = (delta_s - self.sl).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(self.su - delta_s).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(delta_t - self.tl).unsqueeze(-1).expand(-1, -1, -1, self.emb_size), \
(self.tu - delta_t).unsqueeze(-1).expand(-1, -1, -1, self.emb_size)
space_interval = (esl*vsu+esu*vsl) / (self.su-self.sl)
time_interval = (etl*vtu+etu*vtl) / (self.tu-self.tl)
delta = space_interval + time_interval # (N, M, M, emb)
return joint, delta
model.py
from layers import *
class Model(nn.Module):
def __init__(self, t_dim, l_dim, u_dim, embed_dim, ex, dropout=0.1):
super(Model, self).__init__()
emb_t = nn.Embedding(t_dim, embed_dim, padding_idx=0)
emb_l = nn.Embedding(l_dim, embed_dim, padding_idx=0)
emb_u = nn.Embedding(u_dim, embed_dim, padding_idx=0)
emb_su = nn.Embedding(2, embed_dim, padding_idx=0)
emb_sl = nn.Embedding(2, embed_dim, padding_idx=0)
emb_tu = nn.Embedding(2, embed_dim, padding_idx=0)
emb_tl = nn.Embedding(2, embed_dim, padding_idx=0)
embed_layers = emb_t, emb_l, emb_u, emb_su, emb_sl, emb_tu, emb_tl
self.MultiEmbed = MultiEmbed(ex, embed_dim, embed_layers)
self.SelfAttn = SelfAttn(embed_dim, embed_dim)
self.Embed = Embed(ex, embed_dim, l_dim-1, embed_layers)
self.Attn = Attn(emb_l, l_dim-1)
def forward(self, traj, mat1, mat2, vec, traj_len):
# long(N, M, [u, l, t]), float(N, M, M, 2), float(L, L), float(N, M), long(N)
joint, delta = self.MultiEmbed(traj, mat1, traj_len) # (N, M, emb), (N, M, M, emb)
self_attn = self.SelfAttn(joint, delta, traj_len) # (N, M, emb)
self_delta = self.Embed(traj[:, :, 1], mat2, vec, traj_len) # (N, M, L, emb)
output = self.Attn(self_attn, self_delta, traj_len) # (N, L)
return output
train.py
from load import *
import time
import random
from torch import optim
import torch.utils.data as data
from tqdm import tqdm
from models import *
def calculate_acc(prob, label):
# log_prob (N, L), label (N), batch_size [*M]
acc_train = [0, 0, 0, 0]
for i, k in enumerate([1, 5, 10, 20]):
# topk_batch (N, k)
_, topk_predict_batch = torch.topk(prob, k=k)
for j, topk_predict in enumerate(to_npy(topk_predict_batch)):
# topk_predict (k)
if to_npy(label)[j] in topk_predict:
acc_train[i] += 1
return np.array(acc_train)
def sampling_prob(prob, label, num_neg):
num_label, l_m = prob.shape[0], prob.shape[1]-1 # prob (N, L)
label = label.view(-1) # label (N)
init_label = np.linspace(0, num_label-1, num_label) # (N), [0 -- num_label-1]
init_prob = torch.zeros(size=(num_label, num_neg+len(label))) # (N, num_neg+num_label)
random_ig = random.sample(range(1, l_m+1), num_neg) # (num_neg) from (1 -- l_max)
while len([lab for lab in label if lab in random_ig]) != 0: # no intersection
random_ig = random.sample(range(1, l_m+1), num_neg)
global global_seed
random.seed(global_seed)
global_seed += 1
# place the pos labels ahead and neg samples in the end
for k in range(num_label):
for i in range(num_neg + len(label)):
if i < len(label):
init_prob[k, i] = prob[k, label[i]]
else:
init_prob[k, i] = prob[k, random_ig[i-len(label)]]
return torch.FloatTensor(init_prob), torch.LongTensor(init_label) # (N, num_neg+num_label), (N)
class DataSet(data.Dataset):
def __init__(self, traj, m1, v, label, length):
# (NUM, M, 3), (NUM, M, M, 2), (L, L), (NUM, M), (NUM), (NUM)
self.traj, self.mat1, self.vec, self.label, self.length = traj, m1, v, label, length
def __getitem__(self, index):
traj = self.traj[index].to(device)
mats1 = self.mat1[index].to(device)
vector = self.vec[index].to(device)
label = self.label[index].to(device)
length = self.length[index].to(device)
return traj, mats1, vector, label, length
def __len__(self): # no use
return len(self.traj)
class Trainer:
def __init__(self, model, record):
# 加载其他参数
self.model = model.to(device)
self.records = record
self.start_epoch = record['epoch'][-1] if load else 1
self.num_neg = 10
self.interval = 1000
self.batch_size = 1 # N = 1
self.learning_rate = 3e-3
self.num_epoch = 100
self.threshold = np.mean(record['acc_valid'][-1]) if load else 0 # 0 if not update
# (NUM, M, 3), (NUM, M, M, 2), (L, L), (NUM, M, M), (NUM, M), (NUM) i.e. [*M]
self.traj, self.mat1, self.mat2s, self.mat2t, self.label, self.len = \
trajs, mat1, mat2s, mat2t, labels, lens
# nn.cross_entropy_loss counts target from 0 to C - 1, so we minus 1 here.
self.dataset = DataSet(self.traj, self.mat1, self.mat2t, self.label-1, self.len)
self.data_loader = data.DataLoader(dataset=self.dataset, batch_size=self.batch_size, shuffle=False)
def train(self):
# 集合优化器
optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=0)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=1)
for t in range(self.num_epoch):
# 设置或验证和测试
valid_size, test_size = 0, 0
acc_valid, acc_test = [0, 0, 0, 0], [0, 0, 0, 0]
bar = tqdm(total=part)
for step, item in enumerate(self.data_loader):
# 获取批量数据, (N, M, 3), (N, M, M, 2), (N, M, M), (N, M), (N)
person_input, person_m1, person_m2t, person_label, person_traj_len = item
# first, try batch_size = 1 and mini_batch = 1
input_mask = torch.zeros((self.batch_size, max_len, 3), dtype=torch.long).to(device)
m1_mask = torch.zeros((self.batch_size, max_len, max_len, 2), dtype=torch.float32).to(device)
for mask_len in range(1, person_traj_len[0]+1): # from 1 -> len
# if mask_len != person_traj_len[0]:
# continue
input_mask[:, :mask_len] = 1.
m1_mask[:, :mask_len, :mask_len] = 1.
train_input = person_input * input_mask
train_m1 = person_m1 * m1_mask
train_m2t = person_m2t[:, mask_len - 1]
train_label = person_label[:, mask_len - 1] # (N)
train_len = torch.zeros(size=(self.batch_size,), dtype=torch.long).to(device) + mask_len
prob = self.model(train_input, train_m1, self.mat2s, train_m2t, train_len) # (N, L)
if mask_len <= person_traj_len[0] - 2: # only training
# nn.utils.clip_grad_norm_(self.model.parameters(), 10)
prob_sample, label_sample = sampling_prob(prob, train_label, self.num_neg)
loss_train = F.cross_entropy(prob_sample, label_sample)
loss_train.backward()
optimizer.step()
optimizer.zero_grad()
scheduler.step()
elif mask_len == person_traj_len[0] - 1: # only validation
valid_size += person_input.shape[0]
# v_prob_sample, v_label_sample = sampling_prob(prob_valid, valid_label, self.num_neg)
# loss_valid += F.cross_entropy(v_prob_sample, v_label_sample, reduction='sum')
acc_valid += calculate_acc(prob, train_label)
elif mask_len == person_traj_len[0]: # only test
test_size += person_input.shape[0]
# v_prob_sample, v_label_sample = sampling_prob(prob_valid, valid_label, self.num_neg)
# loss_valid += F.cross_entropy(v_prob_sample, v_label_sample, reduction='sum')
acc_test += calculate_acc(prob, train_label)
bar.update(self.batch_size)
bar.close()
acc_valid = np.array(acc_valid) / valid_size
print('epoch:{}, time:{}, valid_acc:{}'.format(self.start_epoch + t, time.time() - start, acc_valid))
acc_test = np.array(acc_test) / test_size
print('epoch:{}, time:{}, test_acc:{}'.format(self.start_epoch + t, time.time() - start, acc_test))
self.records['acc_valid'].append(acc_valid)
self.records['acc_test'].append(acc_test)
self.records['epoch'].append(self.start_epoch + t)
if self.threshold < np.mean(acc_valid):
self.threshold = np.mean(acc_valid)
# save the model
torch.save({'state_dict': self.model.state_dict(),
'records': self.records,
'time': time.time() - start},
'best_stan_win_1000_' + dname + '.pth')
def inference(self):
user_ids = []
for t in range(self.num_epoch):
# 设置或验证和测试
valid_size, test_size = 0, 0
acc_valid, acc_test = [0, 0, 0, 0], [0, 0, 0, 0]
cum_valid, cum_test = [0, 0, 0, 0], [0, 0, 0, 0]
for step, item in enumerate(self.data_loader):
# 获取批量数据, (N, M, 3), (N, M, M, 2), (N, M, M), (N, M), (N)
person_input, person_m1, person_m2t, person_label, person_traj_len = item
# first, try batch_size = 1 and mini_batch = 1
input_mask = torch.zeros((self.batch_size, max_len, 3), dtype=torch.long).to(device)
m1_mask = torch.zeros((self.batch_size, max_len, max_len, 2), dtype=torch.float32).to(device)
for mask_len in range(1, person_traj_len[0] + 1): # from 1 -> len
# if mask_len != person_traj_len[0]:
# continue
input_mask[:, :mask_len] = 1.
m1_mask[:, :mask_len, :mask_len] = 1.
train_input = person_input * input_mask
train_m1 = person_m1 * m1_mask
train_m2t = person_m2t[:, mask_len - 1]
train_label = person_label[:, mask_len - 1] # (N)
train_len = torch.zeros(size=(self.batch_size,), dtype=torch.long).to(device) + mask_len
prob = self.model(train_input, train_m1, self.mat2s, train_m2t, train_len) # (N, L)
if mask_len <= person_traj_len[0] - 2: # only training
continue
elif mask_len == person_traj_len[0] - 1: # only validation
acc_valid = calculate_acc(prob, train_label)
cum_valid += calculate_acc(prob, train_label)
elif mask_len == person_traj_len[0]: # only test
acc_test = calculate_acc(prob, train_label)
cum_test += calculate_acc(prob, train_label)
print(step, acc_valid, acc_test)
if acc_valid.sum() == 0 and acc_test.sum() == 0:
user_ids.append(step)
if __name__ == '__main__':
# 加载数据
dname = 'NYC'
file = open('./data/' + dname + '_data.pkl', 'rb')
file_data = joblib.load(file)
# tensor(NUM, M, 3), np(NUM, M, M, 2), np(L, L), np(NUM, M, M), tensor(NUM, M), np(NUM)
[trajs, mat1, mat2s, mat2t, labels, lens, u_max, l_max] = file_data
mat1, mat2s, mat2t, lens = torch.FloatTensor(mat1), torch.FloatTensor(mat2s).to(device), \
torch.FloatTensor(mat2t), torch.LongTensor(lens)
# 由于使用了位置矩阵,运行速度非常快(内存成本也很高)
#请使用数据分区(推荐)
part = 100
trajs, mat1, mat2t, labels, lens = \
trajs[:part], mat1[:part], mat2t[:part], labels[:part], lens[:part]
ex = mat1[:, :, :, 0].max(), mat1[:, :, :, 0].min(), mat1[:, :, :, 1].max(), mat1[:, :, :, 1].min()
stan = Model(t_dim=hours+1, l_dim=l_max+1, u_dim=u_max+1, embed_dim=50, ex=ex, dropout=0)
num_params = 0
for name in stan.state_dict():
print(name)
for param in stan.parameters():
num_params += param.numel()
print('num of params', num_params)
load = False
if load:
checkpoint = torch.load('best_stan_win_' + dname + '.pth')
stan.load_state_dict(checkpoint['state_dict'])
start = time.time() - checkpoint['time']
records = checkpoint['records']
else:
records = {'epoch': [], 'acc_valid': [], 'acc_test': []}
start = time.time()
trainer = Trainer(stan, records)
trainer.train()
# trainer.inference()