重复抽样
from sklearn import neighbors
from sklearn import datasets
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
iris = datasets.load_iris()
x_data = iris.data[:,:2]
y_data = iris.target # 标签
x_train,x_test,y_train,y_test, = train_test_split(x_data,y_data)
knn = neighbors.KNeighborsClassifier()
knn.fit(x_train,y_train)
def plot(model):
# 获取数据所在的范围
x_min,x_max = x_data[:,0].min() - 1,x_data[:,0].max()+1
y_min,y_max = x_data[:,1].min() - 1,x_data[:,1].max()+1
#生成网格矩阵
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.02),
np.arange(y_min,y_max,0.02))
z = model.predict(np.c_[xx.ravel(),yy.ravel()]) # ravel与flatten类似,多维数据转一维.flatten不会改变原始数据,ravel会改变原始数据
z = z.reshape(xx.shape)
# 登高线图
cs = plt.contourf(xx,yy,z)
#用knn, 输入数据建立模型
knn.fit(x_train,y_train)
plot(knn)
plt.scatter(x_data[:,0],x_data[:,1],c = y_data)
plt.show()
# 准确率
knn.score(x_test,y_test)
Out[44]: 0.8157894736842105
dtree = tree.DecisionTreeClassifier() # 决策树分类器
dtree.fit(x_train,y_train)
plot(dtree)
plt.scatter(x_data[:,0],x_data[:,1],c = y_data)
plt.show()
# 准确率
dtree.score(x_test,y_test)
Out[46]: 0.7368421052631579
#使用集成学习 参数(一个模型,有返回的抽样一百次) ,就是说进行一百次的knn分类器去分类,然后进行投票最高的那个模型
bagging_knn = BaggingClassifier(knn,n_estimators=100)
# 输入数据建立模型
bagging_knn.fit(x_train,y_train)
plot(bagging_knn)
# 样本散点图
plt.scatter(x_data[:,0],x_data[:,1],c = y_data)
plt.show()
bagging_knn.score(x_test,y_test)
Out[47]: 0.8947368421052632
#使用集成学习 参数(一个模型,有返回的抽样一百次) ,就是说进行一百次的knn分类器去分类,然后进行投票最高的那个模型
bagging_dtree = BaggingClassifier(dtree,n_estimators=100)
# 输入数据建立模型
bagging_dtree.fit(x_train,y_train)
plot(bagging_knn)
# 样本散点图
plt.scatter(x_data[:,0],x_data[:,1],c = y_data)
plt.show()
bagging_dtree.score(x_test,y_test)
Out[48]: 0.7631578947368421