from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import preprocessing
#导入数据集
datingdata=pd.read_table('C:/Users/lenovo/Desktop/datingTestSet.txt',names=['flight','icecream','gametime','class'],sep='\t')
print(datingdata.shape)
#对数据集进行变换
X=datingdata[['flight','icecream','gametime']]
y=datingdata[['class']]
scaler=preprocessing.MinMaxScaler().fit(X)
X=scaler.transform(X)
X,y
#训练模型
from sklearn.model_selection import train_test_split
from sklearn import neighbors,metrics
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
print(y_train)
knn=neighbors.KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)
y_pred=knn.predict(X_test)
knn.predict([[1,7,3]])
#评估模型
print(metrics.classification_report(y_test,y_pred))
print(metrics.confusion_matrix(y_test,y_pred))