# 【python数据挖掘课程】二十五.Matplotlib绘制带主题及聚类类标的散点图

PSS：最近参加CSDN2018年博客评选，希望您能投出宝贵的一票。我是59号，Eastmount，杨秀璋。投票地址：https://bss.csdn.net/m/topic/blog_star2018/index

# 一. Matplotlib绘制带主题散点图

#-*- coding:utf-8 -*-
import os
import codecs
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

x = [2.3, 4.5, 3, 7, 6.5, 4, 5.3]
y = [5, 4, 7, 5, 5.3, 5.5, 6.2]

num = np.arange(7)
name = ["a", "b", "c", "d", "e", "f", "g"]

fig, ax = plt.subplots()
ax.scatter(x,y,c='r',s=100)

for i,txt in enumerate(name):  #n
ax.annotate(txt,(x[i],y[i]))

plt.show()

#-*- coding:utf-8 -*-
import os
import codecs
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

x = [2.3, 4.5, 3, 7, 6.5, 4, 5.3]
y = [5, 4, 7, 5, 5.3, 5.5, 6.2]

n=np.arange(7)
name = ["a", "b", "c", "d", "e", "f", "g"]

fig, ax = plt.subplots()
ax.scatter(x,y,c='r',s=100)

#定义数组读取名称
corpus = []
result = codecs.open('allname.txt', 'r', 'utf-8')
print u.strip()
corpus.append(u.strip())

#解决中文和负号'-'显示为方块的问题
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family']='sans-serif'
matplotlib.rcParams['axes.unicode_minus'] = False

for i,txt in enumerate(corpus): #n  name
ax.annotate(txt,(x[i],y[i]))

result.close()
plt.savefig('plot.png', dpi=1200)
plt.show()

# 二. Matplotlib聚类类标设置散点图

# -*- coding: utf-8 -*-
#载入数据集
print iris.data            #输出数据集
print iris.target          #输出真实标签
print len(iris.target)
print iris.data.shape    #150个样本 每个样本4个特征

#导入决策树DTC包
from sklearn.cluster import KMeans
clf = KMeans(n_clusters=3)
pre = clf.fit_predict(iris.data)
print pre

#获取花卉两列数据集
X = iris.data
L1 = [x[0] for x in X]
print L1
L2 = [x[1] for x in X]
print L2

#绘图
import numpy as np
import matplotlib.pyplot as plt
plt.scatter(L1, L2, c=pre, marker='x', s=100)
plt.title("KMeans")
plt.show()

# -*- coding: utf-8 -*-
#载入数据集
print iris.data            #输出数据集
print iris.target          #输出真实标签
print len(iris.target)
print iris.data.shape    #150个样本 每个样本4个特征

#导入决策树DTC包
from sklearn.cluster import KMeans
clf = KMeans(n_clusters=3)
y_pred = clf.fit_predict(iris.data)
print y_pred

#降维绘图
from sklearn.decomposition import PCA
pca = PCA(n_components=2)             #输出两维
newData = pca.fit_transform(iris.data)   #载入N维
print newData
x = [n[0] for n in newData]
y = [n[1] for n in newData]

x1, y1 = [], []
x2, y2 = [], []
x3, y3 = [], []

#分别获取类标为0、1、2的数据 赋值给(x1,y1) (x2,y2) (x3,y3)
i = 0
while i < len(newData):
if y_pred[i]==0:
x1.append(newData[i][0])
y1.append(newData[i][1])
elif y_pred[i]==1:
x2.append(newData[i][0])
y2.append(newData[i][1])
elif y_pred[i]==2:
x3.append(newData[i][0])
y3.append(newData[i][1])
i = i + 1

import matplotlib.pyplot as plt

#三种颜色
plot1, = plt.plot(x1, y1, 'or', marker="o", markersize=10)
plot2, = plt.plot(x2, y2, 'og', marker="o", markersize=10)
plot3, = plt.plot(x3, y3, 'ob', marker="o", markersize=10)
plt.title("K-Means Text Clustering")  #绘制标题
plt.legend((plot1, plot2, plot3), ('A', 'B', 'C'))

#plt.scatter(x1, x2, c=clf.labels_,  s=100)
plt.show()

## 本书主要包括上下两册：

《Python网络数据爬取及分析从入门到精通（爬取篇）》
《Python网络数据爬取及分析从入门到精通（分析篇）》

(By:Eastmount 2018-07-18 深夜12点  http://blog.csdn.net/eastmount/ )

09-27

05-18 5784
06-08 1648
12-19 2万+
10-12 3万+
09-28 7146
06-18 8482
09-15 1684