``
本次整理下自己的思路,首先构建数据文件读取函数,并对数据经行处理方便后续的分类器进行分类处理,然后是写分类函数进行数据的训练与分类,然后会出现
import operator
import matplotlib.pyplot as plt
#####读取文件
def filmread(file):
fr=open(file).readlines()
hangshu=len(fr)
data=np.zeros((hangshu,3))
lable=[]
plable=[]
index=0
for lin in fr:
chuli=lin.strip().split('\t')
data[index,:]=chuli[:3]
index+=1
plable.append(chuli[3])
if chuli[3]=='smallDoses':
lable.append(1)
if chuli[3]=='largeDoses':
lable.append(2)
if chuli[3]=='didntLike':
lable.append(3)
return data,lable
#########分类器
def classfen(test,train,lable,k):
global datae
hangshu=train.shape[0]
cha=np.tile(test,(hangshu,1))-train
di=(cha**2).sum(axis=1)
dis=di.argsort()
count={}
for lin in range(k):
vlable=lable[dis[lin]]
count[vlable]=count.get(vlable,0)+1
datae=sorted(count.items(),key=operator.itemgetter(1),reverse=True)
return datae[0][0]
###测试函数
def test(test):
da, lb = filmread(data)
show(te,da,lb)
noda=autunorm(da)
print(classfen(test, noda, lb, 3))
####数据可视化
def show(te,train,lb):
plt.scatter(train[:,0],train[:,1],c=lb)
plt.scatter(te[:1],te[1:2],c='red',marker='*')
plt.show()
###########数据归一化
def autunorm(dataset):
mind=dataset.min()
maxd=dataset.max()
rang=maxd-mind
nodata=np.zeros(np.shape(dataset))
m=dataset.shape[0]
nodata=dataset-np.tile(mind,(m,1))
nodate=nodata/np.tile(rang,(m,1))
return nodate
if __name__ == '__main__':
data='data.txt'
te=np.array([1000,10,100])
test(te)