前言
等距离映射,使用微分几何中测地线思想,它希望数据在低维度空间映射之后能保持流行上的测地线距离
目录:
- 算法流程
- sklearn库函数实现效果
- 方法二实现效果
- 说明
一 算法流程
1: 计算距离矩阵
2: k临近算法求解出矩阵
3: 作为输入,使用最短路径问题-Dijkstra算法 ,求解出矩阵
4: 作为MDS 算法的输入,求解出降维后的矩阵
二 sklearn库函数实现效果
from sklearn import manifold
def API(self):
axe =[221,222,223,224]
n = 0
for near in [10,20, 30,40]:
isomap=manifold.Isomap(n_components=2, n_neighbors =near)
ax = axe[n]
n = n+1
plt.subplot(ax)
X_reduced =isomap.fit_transform(self.data)
plt.title(" n_neighbors %d"% near, fontsize=14)
#print("x_reduced ",np.shape(X_reduced), "x_reduced[0",X_reduced[0:2])
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=self.color, cmap=plt.cm.hot)
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18)
plt.show()
三 方法二实现效果
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 17 16:45:21 2019
@author: chengxf2
"""
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter
from sklearn import manifold, datasets
from sklearn.datasets import load_iris
from dijk import Dijkstra
from MDS import MyMDS
class Isomap:
"""
加载数据集
"""
def __init__(self):
self.m = 0
self.n = 0
self.n_neighbors = [10,15,30,40] ##临近点取值
self.n_components = 2 ##新的维度
self.LoadData()
"""
获得k近邻
Args
data: 数据集
k: 近邻个数
"""
def GetKNear(self, data, num):
m,n = np.shape(data)
D = np.zeros((m,m))
for i in range(m):
dist = self.D[i]
kNear= dist.argsort().tolist() ##当前的参与排序
kNear.remove(i) ##当前不参与,取k个
nearIndex = kNear[0:num]
near = -1*np.ones(m)
for k in nearIndex:
near[k]=dist[k]
D[i,]=near
return D
"""
加载数据集
Args
None
return
dataList
"""
def LoadData(self):
n_points = 150
self.data, self.color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
#iris = load_iris()
#self.color = iris.target
#self.data = iris.data
self.m, self.n = np.shape(self.data)
self.D= np.zeros((self.m, self.m))
for i in range(self.m):
x = self.data[i]
xi_j =x-self.data
dist = np.sum(np.square(xi_j),axis=1) ##行方向求和
self.D[i]= dist.reshape(1,self.m)
print("\n 加载数据集 m: ",self.m, "\t n: ",self.n)
"""
防止负1 存在,对称矩阵
Args
D: 矩阵
m:样本个数
"""
def Solve(self,D,m):
for i in range(m):
#print("\n ",D[i])
for j in range(m):
if -1== D[i,j]:
D[i,j]= max(D[i])
return D
"""
训练
Args
None
return
None
"""
def Train(self):
##计算k 近邻
axe =[221,222,223,224]
n = 0
for k in self.n_neighbors:
print("\n k 近邻 \n")
D= self.GetKNear(self.D, k)
#for i in range(m):
#print("\n Di ",D[i][0:50])
print("\n step2 迪杰斯特拉(Dijikstra) \n",self.m)
dijk = Dijkstra()
dijk.LoadData(D)
DD = dijk.Updata()
#print("\n ", np.shape(minD))
mD = self.Solve(DD,self.m)
print("\n step3 MDS \n",self.m)
mds = MyMDS()
mds.SetData(mD)
newX=mds.Train(2,self.color)
ax = axe[n]
n = n+1
plt.subplot(ax)
Tip =": n_neighbors k: %d"%k
plt.title(Tip, fontsize=14)
plt.scatter(newX[:, 0], newX[:, 1], c= self.color, cmap= plt.cm.hot)
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18)
#plt.grid(True)
plt.show()
def API(self):
axe =[221,222,223,224]
n = 0
for near in [10,20, 30,40]:
isomap=manifold.Isomap(n_components=2, n_neighbors =near)
ax = axe[n]
n = n+1
plt.subplot(ax)
X_reduced =isomap.fit_transform(self.data)
plt.title(" n_neighbors %d"% near, fontsize=14)
#print("x_reduced ",np.shape(X_reduced), "x_reduced[0",X_reduced[0:2])
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=self.color, cmap=plt.cm.hot)
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18)
plt.show()
iso = Isomap()
iso.Train()
#iso.API()
四 说明
1: 邻近个数的取值对结果影响比较大。
2: 当做完dijkstra 处理完距离矩阵后,矩阵中依然存在-1的值(无穷大,不可达)
这个时候需要做特殊处理,可以取当前所有距离中最大值