需要一个文件包,是海伦调查的1000分数据,共四列,前三列为数据,最后一列为分类是否有魅力。
from numpy import * # 导入科学计算包
from matplotlib.font_manager import FontProperties
import matplotlib.lines as mlines
import matplotlib.pyplot as plt # 导入绘图工具包
import operator # 导入运算符模块
def file2matrix(filename):
fr = open(filename) # 打开文件
arrarOLines = fr.readlines() # 读取内容
numberOfLines = len(arrarOLines) # 解析有多少行
returnMat = zeros((numberOfLines, 3)) # 创建行数*3的矩阵,以0填充
classLabelVector = []
index = 0
for line in arrarOLines:
line = line.strip() # 删除空白字符
listFromLine = line.split('\t') # 以空格来分割
returnMat[index, :] = listFromLine[0:3] # 前三位放入矩阵
classLabelVector.append(listFromLine[-1]) # 最后一位存入标签
index += 1
return returnMat, classLabelVector
def showData(datingDataMat, datingLabels):
font = FontProperties('SimHei') #设置字体为黑体
fig, ax = plt.subplots(nrows=2, ncols=2, sharex=False, sharey=False, figsize=(13, 8)) #将fig画布分为2*2四个区域,x轴和y轴不共享,每个画布13*8大小
LabelsColors = []
for i in datingLabels:
if i == 'didntLike': #区分三种标签的颜色
LabelsColors.append('black')
if i == 'smallDoses':
LabelsColors.append('orange')
if i == 'largeDoses':
LabelsColors.append('red')
"""左上角[0][0]"""
ax[0][0].scatter(x=datingDataMat[:, 0], y=datingDataMat[:, 1], color=LabelsColors, s=15, alpha=.5) #设置x轴数据和y轴数据,颜色,圆点大小,透明度
ax00_title = ax[0][0].set_title(u'每年获得的飞行常客里程数与玩视频游戏所消耗时间占比', FontProperties=font) #设置标题和字体
ax00_xlabel = ax[0][0].set_xlabel(u'每年获得的飞行常客里程数', FontProperties=font)
ax00_ylabel = ax[0][0].set_ylabel(u'玩视频游戏所消耗时间占比', FontProperties=font)
plt.setp(ax00_title, size=10, weight='bold', color='red') #设置图例
plt.setp(ax00_xlabel, size=8, weight='bold', color='black')
plt.setp(ax00_ylabel, size=8, weight='bold', color='black')
"""右上角[0][1]"""
ax[0][1].scatter(x=datingDataMat[:, 0], y=datingDataMat[:, 2], color=LabelsColors, s=15, alpha=.5)
ax01_title = ax[0][1].set_title(u'每年获得的飞行常客里程数与每周消费的冰激淋公升数', FontProperties=font)
ax01_xlabel = ax[0][1].set_xlabel(u'每年获得的飞行常客里程数', FontProperties=font)
ax01_ylabel = ax[0][1].set_ylabel(u'每周消费的冰激淋公升数', FontProperties=font)
plt.setp(ax01_title, size=10, weight='bold', color='red')
plt.setp(ax01_xlabel, size=8, weight='bold', color='black')
plt.setp(ax01_ylabel, size=8, weight='bold', color='black')
"""左下角[1][0]"""
ax[1][0].scatter(x=datingDataMat[:, 1], y=datingDataMat[:, 2], color=LabelsColors, s=15, alpha=.5)
ax10_title = ax[1][0].set_title(u'玩视频游戏所消耗时间占比与每周消费的冰激淋公升数', FontProperties=font)
ax10_xlabel = ax[1][0].set_xlabel(u'玩视频游戏所消耗时间占比', FontProperties=font)
ax10_ylabel = ax[1][0].set_ylabel(u'每周消费的冰激淋公升数', FontProperties=font)
plt.setp(ax10_title, size=10, weight='bold', color='red')
plt.setp(ax10_xlabel, size=8, weight='bold', color='black')
plt.setp(ax10_ylabel, size=8, weight='bold', color='black')
didntLike = mlines.Line2D([], [], color='black', marker='.', markersize=6, label='didntLike')
smallDoses = mlines.Line2D([], [], color='orange', marker='.', markersize=6, label='smallDoses')
largeDoses = mlines.Line2D([], [], color='red', marker='.', markersize=6, label='largeDoses')
ax[0][0].legend(handles=[didntLike, smallDoses, largeDoses]) #添加图例
ax[0][1].legend(handles=[didntLike, smallDoses, largeDoses])
ax[1][0].legend(handles=[didntLike, smallDoses, largeDoses])
plt.show() #显示图表
if __name__ == '__main__':
filaname = 'datingTestSet.txt'
datingDataMat, datingLabels = file2matrix(filaname)
showData(datingDataMat, datingLabels)