#读取数据集细节资料
from sklearn.datasets import load_iris
iris=load_iris()
print(iris.data.shape)
print(iris.DESCR)
#对数据集进行随机分割
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test=train_test_split(iris.data, iris.target,
test_size=0.25, random_state=33)
#使用K近邻分类器对数据进行类别预测
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
ss=StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)
knc=KNeighborsClassifier()
knc.fit(X_train, y_train)
y_predict=knc.predict(X_test)
#对K近邻分类器的预测性能进行评估
print('The accuracy of K-Nearest Neighbor Classifier is', knc.score(X_test, y_test))
from sklearn.metrics import classification_report
print(classification_report(y_test, y_predict, target_names=iris.target_names))
大数据入门——Iris数据集类别预测(K近邻分类器:KNN)
于 2017-10-11 12:48:15 首次发布