在机器学习实战中,个人觉得最重要的两部分是数据+算法。对于一个数据集我们在分类之前希望能够进行可视化。绘制散点图
是进行可视化的常用工具,下面链接是关于scatter函数的参数的细致讲解:
https://blog.csdn.net/jinruoyanxu/article/details/78845724
import matplotlib.pyplot as plt
from numpy import*
def loadDate(filename):
dataSet = []
labelSet = []
with open(filename) as fp:
# 返回的是列表
lines = fp.readlines()
for line in lines:
lineArr = line.strip().split('\t')
dataSet.append([float(lineArr[0]),float(lineArr[1])])
labelSet.append(float(lineArr[2]))
return dataSet,labelSet
filename = "traindata.txt"
dataSet,labelSet = loadDate(filename)
dataMat = array(dataSet)
labelMat = array(labelSet)
index =where( labelMat == 1)
# 返回数组
print(index)
res1 = dataMat[index,0]
print(res1)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[index,0],dataMat[index,1],marker = 'x', color = 'm', label='1', s = 30)
index2 = where(labelMat == -1)
ax.scatter(dataMat[index2,0],dataMat[index2,1],marker = '+', color = 'c', label='2', s = 50)
plt.show()
结果如图