GridSearchCV and 归一化操作
# 搜索算法最合适的参数
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
# 归一化和标准化
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
cancer = pd.read_csv('C:/Users/Administrator/sklearnziliao/day2knn/day02代码/knn/cancer.csv',
sep = '\t')
cancer.head()
# cancer.shape
X = cancer.iloc[:,2:]
y = cancer['Diagnosis']
0 M
1 M
2 M
3 M
4 M
..
564 M
565 M
566 M
567 M
568 B
Name: Diagnosis, Length: 569, dtype: object
X_train,X_test,y_train, y_test = train_test_split(X, y, test_size = 0.2)
利用网格搜索算法最佳参数
--------与cross_val_score区别:gridsearchcv更集成,内部已实现for循环,crossvalscore还要自己写for循环
knn = KNeighborsClassifier()
#需调节的参数放在一个字典里
params = {
'n_neighbors':[i for i in range