读取seeds.tsv文件(文件已经上转到本博客下载中心,可免费下载),
最后一列是小麦品种,其他列是小麦特征
导包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
数据获取
seeds = pd.read_csv('./seeds.tsv',sep='\t',header=None)
data = seeds.iloc[:,:-1]
data = np.array(data)
target = seeds.iloc[:,-1:]
训练数据
# 数据切分
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.1)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
预测数据,并查看得分
y_ = knn.predict(X_test)
knn.score(X_test,y_test)
画交叉表
pd.crosstab(index=y_,columns=y_test.values.reshape(1,-1)[0], rownames=['预测'], colnames=['真实'], margins=True)