前言
初代频域GCN简单粗暴的将
d
i
a
g
(
g
^
(
λ
l
)
)
diag(\hat{g}{(\lambda_l)})
diag(g^(λl))变成了卷积核
d
i
a
g
(
θ
l
)
diag(\theta_l)
diag(θl)
由此最终形式为
y
o
u
t
p
u
t
=
σ
(
U
g
θ
(
Λ
)
U
T
x
)
y_{output}=\sigma(U{g_\theta}(\Lambda)U^Tx)
youtput=σ(Ugθ(Λ)UTx)其中
第一代的参数方法存在着一些弊端,主要在于:
- 每一次的前向传播,存在高维数矩阵的运算,计算代价相对较高
- 卷积核的定义方式意味着不具有spatial localization
- 卷积核需要n个参数导致效率降低
为解决这些问题,提出了在卷积核表达形式上进行优化的第二代频域GCN方法:利用Chebyshev多项式作为卷积核
同时此篇文章中所定义的卷积公式严格局部定位,计算复杂度低,滤波器复杂度线性,图池化
1.利用Chebyshev多项式作为卷积核
推导过程
通过上述推导过程我们可以推导出几个结论:
- 通过切比雪夫多项式的近似拟合卷积核,实现了卷积核维度的下降,降低计算复杂度
- 通过卷积核的表达形式同时结合图的邻接关系,明显可以看出卷积核的localize特性。因为它是拉普拉斯算子中的 K t h K_{th} Kth阶多项式,即它仅取决于离中央节点( K t h K_{th} Kth阶邻域)最大 K K K步的节点
- T K ( L ) x T _K(L)x TK(L)x的复杂度是 O ( ∣ E ∣ ) O(|E|) O(∣E∣),即与边数E呈线性关系,整个运算的复杂度是 O ( K ∣ E ∣ ) O(K|E|) O(K∣E∣)。当graph是稀疏图的时候,计算加速尤为明显,这个时候复杂度远低于 O ( n 2 ) O(n^2) O(n2)
2.图池化
每级粗化我们可以得到一个粗化图 ( g 0 , g 1 , g 2 , g 3 , g 4 ) (g0,g1,g2,g3,g4) (g0,g1,g2,g3,g4),将该粗化图添加上一些辅助节点(蓝色圆),使粗化图中每个顶点都有2个孩子,从而构成一个二叉树,将二叉树摊平构成一维信号,然后对该信号采样即可表示为一次图pooling。这部分作者借助的是之前的工作,这个构建二叉树于与摊平操作是依靠METIS时的粗话图之间的关系。
3.整个GCN过程
4.总结
- 此篇文章最重要的地方就是利用切比雪夫多项式近似拟合卷积核来解决初代频域GCN的一些缺点
- 同时作者原码中利用MNIST数据集作为实验,如何将图片数据转换为图非常重要
- 代码实现流程参考
- 相关数学基础:正定矩阵和半正定矩阵,矩阵特征值和特征向量的意义,实对称矩阵,拉普拉斯矩阵和拉普拉斯算子以及在图上的推广,梯度和散度,拉普拉斯矩阵的特征向量和傅里叶变换的基的关系,傅里叶变换,正交向量组等等。有些列举的非常简单,但这是了解频域GCN原理不可缺少的相关知识
5.代码
MNIST数据专用
MNIST.py
```python
# -*- coding: utf-8 -*-
import sys, os
sys.path.insert(0, '..')
from lib import models, graph, coarsening, utils
import tensorflow.compat.v1 as tf
import numpy as np
import time
# %matplotlib inline
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).') # 相似度测量
flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')
flags.DEFINE_string('dir_data', os.path.join('..', 'data', 'mnist'), 'Directory to store data.')
"""# Feature graph 图结构描述,即准备邻接矩阵A的拉普拉斯L"""
def grid_graph(m, corners=False):
z = graph.grid(m) # 该函数说白了就产生一个28*28网格的每个点的坐标
dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) # 顶点K邻近点计算
A = graph.adjacency(dist, idx) # 构建表示图的邻接矩阵 A
# Connections are only vertical or horizontal on the grid. 网格上的连接只有水平或者垂直
# Corner vertices are connected to 2 neightbors only. 转角处的顶点只与两个邻接点连接
if corners:
import scipy.sparse
A = A.toarray()
A[A < A.max() / 1.5] = 0
A = scipy.sparse.csr_matrix(A)
print('{} edges'.format(A.nnz))
# A.nnz返回矩阵中非零结点的个数
print("{} > {} edges".format(A.nnz // 2, FLAGS.number_edges * m ** 2 // 2))
return A
t_start = time.process_time()
A = grid_graph(28, corners=False) # "邻接矩阵"
A = graph.replace_random_edges(A, 0) # "添加噪声的邻接矩阵"
# 借助图的粗化
graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False) # 粗化图
# # graphs为列表形式,图粗化分为5层对图进行简化。graphs列表的每个元素存储每层图粗化的相关矩阵信息。图结点坐标形式+距离权重
# # perm为列表形式存储根据返回值可知此处perm为第一层所有节点索引,要基于此对图像结点进行重排序以适应多层的粗化图
# print('graphs的类型',type(graphs),'\ngraphs的长度',len(graphs),'\ngrapgs的值\n',graphs,'\ngraphs[0]',graphs[0])
# print('perm的类型',type(perm),'\nperm的长度',len(perm),'\nperm的值\n',perm)
L = [graph.laplacian(A, normalized=True) for A in graphs] # 对图粗化产生的每层邻接矩阵进行拉普拉斯变换
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
# graph.plot_spectrum(L)
del A
"""# Data 准备"""
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)
#
train_data = mnist.train.images.astype(np.float32)
# print('train_data的维度:',train_data.shape) # train_data的维度: (55000, 784)
val_data = mnist.validation.images.astype(np.float32)
# print('val_data的维度:',val_data.shape) # val_data的维度: (5000, 784)
test_data = mnist.test.images.astype(np.float32)
# print('test_data的维度:',test_data.shape) # test_data的维度: (10000, 784)
train_labels = mnist.train.labels
# print('train_labels的维度:',train_labels.shape) # train_labels的维度: (55000,)
val_labels = mnist.validation.labels
# print('val_labels的维度:',val_labels.shape) # val_labels的维度: (5000,)
test_labels = mnist.test.labels
# print('test_labels的维度:',test_labels.shape) # test_labels的维度: (10000,)
t_start = time.process_time()
train_data = coarsening.perm_data(train_data, perm) # 根据第一层粗化的结点数对图像进行重排序
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
del perm
"""# Neural networks"""
common = {}
common['dir_name'] = 'mnist/'
common['num_epochs'] = 20
common['batch_size'] = 100
common['decay_steps'] = mnist.train.num_examples / common['batch_size']
common['eval_frequency'] = 30 * common['num_epochs']
common['brelu'] = 'b1relu'
common['pool'] = 'mpool1'
C = max(mnist.train.labels) + 1 # number of classes
model_perf = utils.model_perf() # 模型对比函数
# 参数 for LeNet5-like networks.
common['regularization'] = 5e-4
common['dropout'] = 0.5
common['learning_rate'] = 0.02 # 0.03 in the paper but sgconv_sgconv_fc_softmax has difficulty to converge
common['decay_rate'] = 0.95
common['momentum'] = 0.9
common['F'] = [32, 64] # 每次卷积的输出feature大小
common['K'] = [25, 25] # 每个卷积核的多项式项数
common['p'] = [4, 4] # 每次卷积后的池化大小,池化次数与卷积次数一致
common['M'] = [512, C] # 全连接层输出
# Architecture of TF MNIST conv model (LeNet-5-like).
if True:
name = 'Chebyshev_test' # 'Chebyshev'
params = common.copy()
params['dir_name'] += name
params['filter'] = 'chebyshev5' # 使用chebyshev5所建的滤波器
model_perf.test(models.cgcnn(L, **params), name, params,
train_data, train_labels, val_data, val_labels, test_data, test_labels)
model_perf.show()
print('end')
model = models.cgcnn(L, **params)
accuracy, loss, t_step = model.fit(train_data, train_labels, val_data, val_labels)
'''
#or 使用下面的代码可以在训练后不显示评价结果,而是直接训练。
model = models.cgcnn(L, **params)
accuracy, loss, t_step = model.fit(train_data, train_labels, val_data, val_labels)
'''
graph.py的部分注释
import sklearn.metrics
import sklearn.neighbors
import matplotlib.pyplot as plt
import scipy.sparse
import scipy.sparse.linalg
import scipy.spatial.distance
import numpy as np
def grid(m, dtype=np.float32):
"""Return the embedding of a grid graph."""
# print('创建网格点传入的参数m:', m) 28
M = m**2
x = np.linspace(0, 1, m, dtype=dtype)
# print('x的维度:', x.shape) 28*1 0-1/28
y = np.linspace(0, 1, m, dtype=dtype)
# print('y的维度:', y.shape) 28*1 0-1/28
xx, yy = np.meshgrid(x, y)
# print('xx的维度:', xx.shape) 其值为0-1/28重复28次创建网格共784个点
# print('yy的维度:', yy.shape) 其值为0-1/28重复28次创建网格共784个点
z = np.empty((M, 2), dtype)
z[:, 0] = xx.reshape(M)
z[:, 1] = yy.reshape(M)
# print('z的维度:', z.shape) 784*2 将初始化的网格摊平共有784个点每个点对应一对x,y坐标
return z
def distance_scipy_spatial(z, k=4, metric='euclidean'):
"""Compute exact pairwise distances."""
d = scipy.spatial.distance.pdist(z, metric)
d = scipy.spatial.distance.squareform(d)
# k-NN graph.
idx = np.argsort(d)[:, 1:k+1]
d.sort()
d = d[:, 1:k+1]
return d, idx
# 顶点K邻近点计算
def distance_sklearn_metrics(z, k=4, metric='euclidean'):
"""Compute exact pairwise distances."""
# print('z的维度:',z.shape) z的维度为784*2相当于传入784个点的坐标
d = sklearn.metrics.pairwise.pairwise_distances(
z, metric=metric, n_jobs=-2)
# print('d的维度:',d.shape) d的维度为784*784,相当于根据传入的坐标计算每个点相对于其他点的距离。对角线元素均为0且为对称矩阵
# k-NN graph.
idx = np.argsort(d)[:, 1:k+1] # 将距离从小到大排列从中选取距离最近的k个节点并提取索引到idx,从1开始是排除自身节点
# print('idx的数据类型:', type(idx), 'idx的长度:', len(idx)) # idx的数据类型: <class 'numpy.ndarray'> idx的长度: 784*8
d.sort()
d = d[:, 1:k+1]
# print('d的维度:', d.shape) # 784*8 每个结点K临近点的距离矩阵
# print('idx的长度', len(idx)) # 784*8 每个结点K临近点的索引列表
return d, idx
def distance_lshforest(z, k=4, metric='cosine'):
"""Return an approximation of the k-nearest cosine distances."""
assert metric is 'cosine'
lshf = sklearn.neighbors.LSHForest()
lshf.fit(z)
dist, idx = lshf.kneighbors(z, n_neighbors=k+1)
assert dist.min() < 1e-10
dist[dist < 0] = 0
return dist, idx
# TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
# 根据每个结点K临近点的距离矩阵dist,每个结点K临近点的索引列表创建邻接矩阵idx
# dist 784*8矩阵形式
# idx 784*8列表形式
def adjacency(dist, idx):
"""Return the adjacency matrix of a kNN graph."""
M, k = dist.shape
# print('M的值:',M) # 784
# print('k的值:',k) # 8
assert M, k == idx.shape
assert dist.min() >= 0
# Weights.
sigma2 = np.mean(dist[:, -1])**2
# print('sigma2的值',sigma2) 每个结点的距离的平方平均和
dist = np.exp(- dist**2 / sigma2)
# print('dist的维度:',dist.shape) # 对距离矩阵的值进行统一处理
# Weight matrix.
I = np.arange(0, M).repeat(k) # 0,M是784个点重复k次构建邻接矩阵 repeat方法在同一纬度上叠加 6272=784*8
# print('I的维度:', I.shape)
J = idx.reshape(M*k) # 6272=784*8
# print('J的维度:', J.shape)
V = dist.reshape(M*k) # 6272=784*8
# print('V的维度:', V.shape)
W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M)) # 根据I,J提供的结点索引和权重矩阵V的值构建邻接矩阵
# print('W的维度:', W.shape)
# No self-connections.
W.setdiag(0) # 忽略自身连接,因此对角矩阵置为0
# Non-directed graph. # 无向图
bigger = W.T > W
W = W - W.multiply(bigger) + W.T.multiply(bigger)
assert W.nnz % 2 == 0
assert np.abs(W - W.T).mean() < 1e-10
assert type(W) is scipy.sparse.csr.csr_matrix
# print('W的维度:',W.shape) # W的维度: (784, 784)
return W
# "添加噪声的邻接矩阵"
def replace_random_edges(A, noise_level):
"""Replace randomly chosen edges by random edges."""
M, M = A.shape # 784,784
n = int(noise_level * A.nnz // 2)
# print(n) noise_level传入参数为0
indices = np.random.permutation(A.nnz//2)[:n] # np.random.permutation():随机排列序列。 noise_level传入参数为0 indices=[]
rows = np.random.randint(0, M, n) # noise_level传入参数为0 rows=[]
cols = np.random.randint(0, M, n) # noise_level传入参数为0 cols=[]
vals = np.random.uniform(0, 1, n) # noise_level传入参数为0 vals=[]
assert len(indices) == len(rows) == len(cols) == len(vals)
A_coo = scipy.sparse.triu(A, format='coo') # scipy.sparse.triu(A, k=0, format=None) 以稀疏格式返回矩阵的上三角部分。foo代表稀疏矩阵格式
# print('A_coo的维度',A_coo.shape) # 784*784上三角矩阵784,783...1
# print(A_coo) # 存储格式为矩阵结点索引+距离权重
assert A_coo.nnz == A.nnz // 2
assert A_coo.nnz >= n
A = A.tolil() # 对稀疏矩阵进程操作前的必要转换,提高计算效率
for idx, row, col, val in zip(indices, rows, cols, vals):
old_row = A_coo.row[idx]
old_col = A_coo.col[idx]
A[old_row, old_col] = 0
A[old_col, old_row] = 0
A[row, col] = 1
A[col, row] = 1
A.setdiag(0) # 将对角矩阵置为0,不考虑自身连接
A = A.tocsr() # 转换数据格式
A.eliminate_zeros()
return A
def laplacian(W, normalized=True):
"""Return the Laplacian of the weigth matrix."""
# Degree matrix.
d = W.sum(axis=0) # 以行为推进计算每个点的度
# print('d的维度:',d.shape) # 得出每个点及其对应的度
# Laplacian matrix.
if not normalized:
D = scipy.sparse.diags(d.A.squeeze(), 0)
L = D - W
else:
d += np.spacing(np.array(0, W.dtype))
d = 1 / np.sqrt(d)
# print(d)
D = scipy.sparse.diags(d.A.squeeze(), 0) # 根据每个点的度构造度对角矩阵
# print('D的维度:', D.shape)
# print('D的值:', D)
I = scipy.sparse.identity(d.size, dtype=W.dtype)
L = I - D * W * D
# assert np.abs(L - L.T).mean() < 1e-9
assert type(L) is scipy.sparse.csr.csr_matrix
return L
def lmax(L, normalized=True):
"""Upper-bound on the spectrum."""
if normalized:
return 2
else:
return scipy.sparse.linalg.eigsh(
L, k=1, which='LM', return_eigenvectors=False)[0]
def fourier(L, algo='eigh', k=1):
"""Return the Fourier basis, i.e. the EVD of the Laplacian."""
def sort(lamb, U):
idx = lamb.argsort()
return lamb[idx], U[:, idx]
if algo is 'eig':
lamb, U = np.linalg.eig(L.toarray())
lamb, U = sort(lamb, U)
elif algo is 'eigh':
lamb, U = np.linalg.eigh(L.toarray())
elif algo is 'eigs':
lamb, U = scipy.sparse.linalg.eigs(L, k=k, which='SM')
lamb, U = sort(lamb, U)
elif algo is 'eigsh':
lamb, U = scipy.sparse.linalg.eigsh(L, k=k, which='SM')
return lamb, U
def plot_spectrum(L, algo='eig'):
"""Plot the spectrum of a list of multi-scale Laplacians L."""
# Algo is eig to be sure to get all eigenvalues.
plt.figure(figsize=(17, 5))
for i, lap in enumerate(L):
lamb, U = fourier(lap, algo)
step = 2**i
x = range(step//2, L[0].shape[0], step)
lb = 'L_{} spectrum in [{:1.2e}, {:1.2e}]'.format(i, lamb[0], lamb[-1])
plt.plot(x, lamb, '.', label=lb)
plt.legend(loc='best')
plt.xlim(0, L[0].shape[0])
plt.ylim(ymin=0)
def lanczos(L, X, K):
"""
Given the graph Laplacian and a data matrix, return a data matrix which can
be multiplied by the filter coefficients to filter X using the Lanczos
polynomial approximation.
"""
M, N = X.shape
assert L.dtype == X.dtype
def basis(L, X, K):
"""
Lanczos algorithm which computes the orthogonal matrix V and the
tri-diagonal matrix H.
"""
a = np.empty((K, N), L.dtype)
b = np.zeros((K, N), L.dtype)
V = np.empty((K, M, N), L.dtype)
V[0, ...] = X / np.linalg.norm(X, axis=0)
for k in range(K-1):
W = L.dot(V[k, ...])
a[k, :] = np.sum(W * V[k, ...], axis=0)
W = W - a[k, :] * V[k, ...] - (
b[k, :] * V[k-1, ...] if k > 0 else 0)
b[k+1, :] = np.linalg.norm(W, axis=0)
V[k+1, ...] = W / b[k+1, :]
a[K-1, :] = np.sum(L.dot(V[K-1, ...]) * V[K-1, ...], axis=0)
return V, a, b
def diag_H(a, b, K):
"""Diagonalize the tri-diagonal H matrix."""
H = np.zeros((K*K, N), a.dtype)
H[:K**2:K+1, :] = a
H[1:(K-1)*K:K+1, :] = b[1:, :]
H.shape = (K, K, N)
Q = np.linalg.eigh(H.T, UPLO='L')[1]
Q = np.swapaxes(Q, 1, 2).T
return Q
V, a, b = basis(L, X, K)
Q = diag_H(a, b, K)
Xt = np.empty((K, M, N), L.dtype)
for n in range(N):
Xt[..., n] = Q[..., n].T.dot(V[..., n])
Xt *= Q[0, :, np.newaxis, :]
Xt *= np.linalg.norm(X, axis=0)
return Xt # Q[0, ...]
def rescale_L(L, lmax=2):
"""Rescale the Laplacian eigenvalues in [-1,1]."""
M, M = L.shape
I = scipy.sparse.identity(M, format='csr', dtype=L.dtype)
L /= lmax / 2
L -= I
return L
def chebyshev(L, X, K):
"""Return T_k X where T_k are the Chebyshev polynomials of order up to K.
Complexity is O(KMN)."""
M, N = X.shape
assert L.dtype == X.dtype
# L = rescale_L(L, lmax)
# Xt = T @ X: MxM @ MxN.
Xt = np.empty((K, M, N), L.dtype)
# Xt_0 = T_0 X = I X = X.
Xt[0, ...] = X
# Xt_1 = T_1 X = L X.
if K > 1:
Xt[1, ...] = L.dot(X)
# Xt_k = 2 L Xt_k-1 - Xt_k-2.
for k in range(2, K):
Xt[k, ...] = 2 * L.dot(Xt[k-1, ...]) - Xt[k-2, ...]
return Xt