【stgcn】代码解读之数据加载(二)

panbaoran913

已于 2022-04-11 09:35:41 修改

阅读量2.5k

点赞数 3

分类专栏： # 代码解读文章标签： pytorch gcn stgcn

于 2022-02-26 17:04:57 首次发布

本文链接：https://blog.csdn.net/panbaoran913/article/details/123146635

版权

代码解读专栏收录该内容

11 篇文章 14 订阅

订阅专栏

解读 utils.py

import os
import zipfile
import numpy as np
import torch

一、加载矩阵数据

def load_metr_la_data():
    if (not os.path.isfile("../PeMSD7(M)/adj_mat.npy")
            or not os.path.isfile("../PeMSD7(M)/node_values.npy")):
        with zipfile.ZipFile("../PeMSD7(M)/METR-LA.zip", 'r') as zip_ref:
            zip_ref.extractall("data/")
    # 如果文件路径不存在，则打开zip文件
    A = np.load("../PeMSD7(M)/adj_mat.npy")
    X = np.load("../PeMSD7(M)/node_values.npy").transpose((1, 2, 0))
    X = X.astype(np.float32)

    # Normalization using Z-score method
    means = np.mean(X, axis=(0, 2)) # 均值
    X = X - means.reshape(1, -1, 1)
    stds = np.std(X, axis=(0, 2)) # 方差
    X = X / stds.reshape(1, -1, 1)
   # 标准化
    return A, X, means, stds

注释：
1. np.transpose():转轴，（0，1，2）–》（1，2，0）

二、拉普拉斯矩阵归一化

def get_normalized_adj(A):
    """
    Returns the degree normalized adjacency matrix.
    """
    A = A + np.diag(np.ones(A.shape[0], dtype=np.float32))# A=A+E 邻接矩阵
    D = np.array(np.sum(A, axis=1)).reshape((-1,)) # D 度矩阵
    D[D <= 10e-5] = 10e-5    # Prevent infs 
    diag = np.reciprocal(np.sqrt(D))
    A_wave = np.multiply(np.multiply(diag.reshape((-1, 1)), A),
                         diag.reshape((1, -1)))
    return A_wave

注释：

np.sqrt(D):返回数组的平方根
np.reciprocal():数返回参数逐元素的倒数。

三、生成迭代器

def generate_dataset(X, num_timesteps_input, num_timesteps_output):
    """
    Takes node features for the graph and divides them into multiple samples
    along the time-axis by sliding a window of size (num_timesteps_input+
    num_timesteps_output) across it in steps of 1.
    获取图的节点特征，并将其划分为窗口大小为（输入时间步长+输出时间步长）的多维样本每隔一步。
    :param X: Node features of shape (num_vertices, num_features,
    num_timesteps)
    :return:
        - Node features divided into multiple samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_input).=（样本案例数，顶点，特征，输入时间步长）
        - Node targets for the samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_output).=（样本案例数，顶点，特征，输出时间步长）
    """
    # Generate the beginning index and the ending index of a sample, 生成样本的开始和结束索引
    # which contains (num_points_for_training + num_points_for_predicting) points共包含（训练点+特征点）
    indices = [(i, i + (num_timesteps_input + num_timesteps_output)) for i
               in range(X.shape[2] - 
               			( num_timesteps_input + num_timesteps_output) + 1) ]
   
    # Save samples
    features, target = [], []
    for i, j in indices:
        features.append(
            X[:, :, i: i + num_timesteps_input].transpose(
                (0, 2, 1)))
        target.append(X[:, 0, i + num_timesteps_input: j])

    return torch.from_numpy(np.array(features)), \
           torch.from_numpy(np.array(target))