MDS是一种降维方法,目标是降维后任意两个样本在低维空间中的欧式距离与原始空间中的相等。
令降维后样本的内积矩阵,有
通过上面的式子可以得出
由上面四个式子可以解得,
对B做特征值分解,,得
接下来上代码,采用的数据集是美国部分城市主要城市间距离。由于地球表面是球形,所以可以看作一个三维空间,降维后就是类似地图上的位置。
from numpy import *
import matplotlib.pyplot as plt
# 读取数据
def loadDataSet():
dataMat = []
fr = open('D:/distance.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2]), float(lineArr[3]), float(lineArr[4]), float(lineArr[5]), float(lineArr[6]), float(lineArr[7]), float(lineArr[8]), float(lineArr[9])])
return dataMat
# MDS降维算法
def MDS(dataMat, d):
dataMatrix = asarray(dataMat)
dataMatSqua = dataMatrix ** 2
# 计算dist_i^2, dist_j^2, dist^2
distI = mean(dataMatSqua, axis=1)
distJ = mean(dataMatSqua, axis=0)
distAll = mean(dataMatSqua)
# 计算矩阵B
B = zeros(dataMatSqua.shape)
for i in range(B.shape[0]):
for j in range(B.shape[1]):
B[i][j] = -0.5 * (dataMatSqua[i][j] - distI[i] - distJ[j] + distAll)
# 特征值分解
eigA, eigV = linalg.eig(B)
X = dot(eigV[:, :d], sqrt(diag(eigA[:d])))
label = ['Atl', 'Chi', 'Den', 'Hou', 'LA', 'Mia', 'NY', 'SF', 'Sea', 'DC']
plt.plot(X[:, 0], X[:, 1], 'o')
for i in range(X.shape[0]):
plt.text(X[i, 0] + 25, X[i, 1] - 15, label[i])
plt.show()
运行结果