Python数据挖掘02:降维
#数据挖掘第二周作业:
#1.加载13维度的房地产自变量数据;
#2.将上述数据用PCA降维到2D空间;
#3.绘制降维结果,根据1维的房价数据进行点的涂色。
首先介绍一下线性降维和非线性降维,给出以下两个代码例子:
非线性降维
#降维
#准备好降维的包
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
iris = datasets.load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names
################################################################
#PCA降维
pca=PCA(n_components=2)
X_r=pca.fit(X).transform(X)
print(len(X_r))
print(X_r)
print('components_: %s'% str(pca.components_))
# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
#####################################################################
mds = MDS(n_components=2)
X_m = mds.fit_transform(X)
print(len(X_m))
print(X_m)
#####################################################################
plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw,
label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')
#线性降维:MDS绘图
plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
plt.scatter(X_m[y == i, 0], X_m[y == i, 1], alpha=.8, color=color,
label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('MDS of IRIS dataset')
plt.show()
运行结果:
控制台数据结果:
非线性降维代码
(暂时无法运行)
#非线性降维
#准备好包和测试数据
from collections import OrderedDict
import matplotlib.pyplot as plt
from sklearn import manifold, datasets
n_points = 500
X, color = datasets.make_s_curve(n_points, random_state=0)
n_neighbors = 10
n_components = 2
###########################################
#非线性降维:绘制流形数据
# Create figure
fig = plt.figure(figsize=(16, 4))
fig.suptitle("Manifold Learning with %i points, %i neighbors"
% (500, n_neighbors), fontsize=14)
# Add 3d scatter plot
ax = fig.add_subplot(141, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)
###########################################
#非线性降维:LLE和ISOMAP降维,并用MDS做对比
methods = OrderedDict()
methods['LLE'] = manifold.LocallyLinearEmbedding(n_neighbors, n_components)
methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
methods['MDS'] = manifold.MDS(n_components)
######################
#非线性降维:绘制结果
# Plot results
for i, (label, method) in enumerate(methods.items()):
Y = method.fit_transform(X)
ax = fig.add_subplot(1, 4, 2 + i)
ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
ax.set_title("%s" % label)
plt.show()
###########################
作业
#作业
#1.加载13维度的房地产自变量数据;
#2.将上述数据用PCA降维到2D空间;
#3.绘制降维结果,根据1维的房价数据进行点的涂色。
代码链接://download.csdn.net/download/weixin_44382897/12293385
正在写