癌症诊断
导包
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
from sklearn.neighbors import KNeighborsClassifier
导入数据
cancer = pd.read_csv('./cancer.csv',sep='\t') # 名字中不可以含有汉字符
cancer
cancer.drop('ID',axis = 1,inplace=True)
提取数据
X = cancer.iloc[:,1:]
X.head()
y = cancer['Diagnosis']
y.head()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
knn = KNeighborsClassifier()
params = {'n_neighbors':[i for i in range(1,30)],
'weights':['distance','uniform'],
'p':[1,2]}
gcv = GridSearchCV(knn,params,scoring='accuracy',cv = 6)
gcv.fit(X_train,y_train)
gcv.best_params_
gcv.best_estimator_
gcv.best_score_