1. 数据的准备
DCRNN的github项目有完整的数据
Graph Wavenet 提供的数据缺少 '/data/sensor_graph/adj_mx.pkl'
2. engine.train 与engine.eval 的区别
区别仅在于model.train()与model.eval()
具体区别见https://blog.csdn.net/qq_43118572/article/details/118615095
class trainer():
def __init__(self, scaler, in_dim, seq_length, num_nodes, nhid, dropout, lrate, wdecay,
device, supports, gcn_bool, addaptadj, aptinit):
self.model = gwnet(device, num_nodes, dropout, supports=supports, gcn_bool=gcn_bool, aptinit=aptinit, in_dim=in_dim,
out_dim=seq_length, residual_channels=nhid, skip_channels=nhid * 8, end_channels=nhid *16)
self.model.to(device)
self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay)
self.loss = util.masked_mae
self.scaler = scaler
self.clip = 5
def train(self, input, real_val):
self.model.train()
self.optimizer.zero_grad()
input = nn.functional.pad(input, (1,0,0,0))
output = self.model(input)
output = output.transpose(1,3)
real = torch.unsqueeze(real_val, dim=1)
predict = self.scaler.inverse_transform(output)
loss = self.loss(predict, real, 0.0)
loss.backward()
if self.clip is not None:
# 梯度剪切,规定了最大不能超过的max_norm.
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
self.optimizer.step()
mape = util.masked_mape(predict, real, 0.0).item()
rmse = util.masked_rmse(predict, real, 0.0).item()
return loss.item(), mape, rmse
def eval(self, input, real_val):
self.model.eval()
input = nn.functional.pad(input, (1, 0, 0, 0))
with torch.no_grad():
output = self.model(input)
output = output.transpose(1, 3)
real = torch.unsqueeze(real_val, dim=1)
predict = self.scaler.inverse_transform(output)
loss = self.loss(predict, real, 0.0)
mape = util.masked_mape(predict, real, 0.0).item()
rmse = util.masked_rmse(predict, real, 0.0).item()
return loss.item(), mape, rmse
3. masked loss
具体原因查看:https://blog.csdn.net/qq_43118572/article/details/118698359
4. utils用到的函数:
import pickle
import numpy as np
import os
import scipy.sparse as sp
import torch
from scipy.sparse import linalg
class DataLoader(object):
def __init__(self, xs, ys, batch_size, pad_with_last_sample=True):
self.batch_size = batch_size
self.current_ind = 0
if pad_with_last_sample:
num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
x_padding = np.repeat(xs[-1:], num_padding, axis=0)
y_padding = np.repeat(ys[-1:], num_padding, axis=0)
xs = np.concatenate([xs, x_padding], axis=0)
ys = np.concatenate([ys, y_padding], axis=0)
self.size = len(xs)
self.num_batch = int(self.size // self.batch_size)
self.xs = xs
self.ys = ys
def shuffle(self):
permutation = np.random.permutation(self.size)
xs, ys = self.xs[permutation], self.ys[permutation]
self.xs = xs
self.ys = ys
# 产生批数据的生成器
def get_iterator(self):
self.current_ind = 0
def _wrapper():
while self.current_ind < self.num_batch:
start_ind = self.batch_size * self.current_ind
end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
x_i = self.xs[start_ind:end_ind, ...]
y_i = self.ys[start_ind:end_ind, ...]
yield (x_i, y_i)
return _wrapper()
class StandardScaler():
def __init__(self, mean, std):
self.mean = mean
self.std = std
# 批正则化数据
def transform(self, data):
return (data - self.mean) / self.std
# 批正则化数据逆运算
def inverse_transform(self, data):
return data*self.std + self.mean
# [A * D^(-1/2)]^T * D^(-1/2) = D^(-1/2) * A * D^(-1/2)
def sym_adj(adj):
"""Symmetrically normalize adjacency matrix."""
adj = sp.coo_matrix(adj) # 转换为coordinate形式的压缩邻接矩阵
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, - 0.5).flatten() # 将n*1的矩阵转换为1个向量
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # ndarray类型
# toarray returns an ndarray; todense returns a matrix. If you want a matrix, use todense otherwise, use toarray
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).astype(np.float32).todense()
# D^(-1/2) * A
def asym_adj(adj):
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv = np.power(rowsum, -1).flatten()
d_inv[np.isinf(d_inv)] = 0.
d_mat = sp.diags(d_inv)
return d_mat.dot(adj).astype(np.float32).todense()
def calculate_normalized_laplacian(adj):
# L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2
# D = diag(A 1)
# 由于下面用到的是与稀疏的单位阵相减,故需要tocoo()
D_tilde = sym_adj(adj).tocoo()
normarlized_laplacian = sp.identity(np.size(adj)[0]) - D_tilde
return normarlized_laplacian
def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True):
if undirected:
adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
L = calculate_normalized_laplacian(adj_mx)
if lambda_max is None:
# ‘LM’ : Largest (in magnitude) eigenvalues.
# 返回1个绝对值最大的特征值与特征向量
lambda_max, _ = linalg.eigsh(L, 1, which='LM')
lambda_max = lambda_max[0]
# 转换为稀疏矩阵
L = sp.csc_matrix(L)
M, _ = L.shape # 原始矩阵的行数
I = sp.identity(M, format='csr', dtype=L.dtype)
L = (2 / lambda_max * L) - I
return L.astype(np.float32).todense()
def load_pickle(pickle_file):
try:
with open(pickle_file, 'rb') as f:
pickle_data = pickle.load(f)
except UnicodeDecodeError as e:
with open(pickle_file, 'rb') as f:
pickle_data = pickle.load(f, encoding='latin1')
except Exception as e:
print('Unable to load data', pickle_file, ':', e)
raise
return pickle_data
def load_adj(pkl_filename, adjtype):
sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename)
if adjtype == "scalap":
adj = [calculate_scaled_laplacian(adj_mx)]
elif adjtype == "normlap":
adj = [calculate_normalized_laplacian(adj_mx).astype(np.float32).todense()]
elif adjtype == "symnadj":
adj = [sym_adj(adj_mx)]
elif adjtype == "transition":
adj = [asym_adj(adj_mx)]
elif adjtype == "doubletransition":
adj = [asym_adj(adj_mx), asym_adj(np.transpose(adj_mx))]
elif adjtype == "identity":
adj = [np.diag(np.ones(adj_mx.shape[0])).astype(np.float32)]
else:
error = 0
assert error, "adj type not defined"
return sensor_ids, sensor_id_to_ind, adj
# 返回train_loader、val_loader、test_loader及归一化所需的均值与方差参数
def load_dataset(dataset_dir, batch_size, valid_batch_size=None, test_batch_size=None):
data = {}
for category in ['train', 'val', 'test']:
cat_data = np.load(os.path.join(dataset_dir, category+'.npz'))
data['x_' + category] = cat_data['x']
data['y_' + category] = cat_data['y']
scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std())
# Data format
for category in ['train', 'val', 'test']:
data['x_' + category][..., 0] = scaler.transform(data['x_' + category][..., 0])
data['train_loader'] = DataLoader(data['x_train'], data['y_train'], batch_size)
data['val_loader'] = DataLoader(data['x_val'], data['y_val'], valid_batch_size)
data['test_loader'] = DataLoader(data['x_test'], data['y_test'], test_batch_size)
data['scaler'] = scaler
return data
def masked_mse(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = (preds - labels) ** 2
loss = loss * mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_rmse(preds, labels, null_val=np.nan):
return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))
def masked_mae(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)
loss *= mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def masked_mape(preds, labels, null_val=np.nan):
if np.isnan(null_val):
mask = ~torch.isnan(labels)
else:
mask = (labels != null_val)
mask = mask.float()
mask /= torch.mean((mask))
mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
loss = torch.abs(preds-labels)/labels
loss *= mask
loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
return torch.mean(loss)
def metric(pred, real):
# torch.tensor.item(): Returns the value of this tensor as a standard Python number.
mae = masked_mse(pred, real, 0.0).item()
mape = masked_mape(pred, real, 0.0).item()
rmse = masked_rmse(pred, real, 0.0).item()
return mae, mape, rmse