# Bagging(基学习器可选KNN和决策树)算法,及网格化调参(GridSearchCV) 问题:对玻璃类型进行分类


import pandas as pd
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

def get_data(filename):
'''
读取文件
:param filename: 文件名
:return: 特征 & 标签，DataFrame
'''
x = data.iloc[:, :-1]
y = data.iloc[:, -1]
return x, y

def deal_data(x, y, ratio=0.2):
'''
将数据分为训练集和测试集
:param x: 样本, 特征, DataFrame
:param y: 标签, DataFrame
:param ratio: 训练集与测试集比例
:return: 训练集、测试集、训练集标签、测试集标签
'''
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=ratio, shuffle=True)
return x_train, x_test, y_train, y_test

def train_test(x_train, x_test, y_train):
'''
调用sklearn中的bagging算法,基学习器选用决策树
:param x_train: 训练集
:param y_train: 训练集标签
:param x_test: 测试集
:return: 对测试集预测结果,得到测试集预测标签
'''
# # 基学习器为决策树
# tree = DecisionTreeClassifier(criterion='entropy', max_depth=None)
# clf = BaggingClassifier(base_estimator=tree, n_estimators=100, max_samples=1.0, bootstrap=True)

# 基学习器为逻辑回归
lr = LogisticRegression(penalty='l2', multi_class="multinomial", solver="newton-cg")
clf = BaggingClassifier(base_estimator=lr, n_estimators=100, max_samples=1.0, bootstrap=True)

# # 基学习器为KNN
# knn = KNeighborsClassifier(7)
# clf = BaggingClassifier(base_estimator=knn, n_estimators=100, max_samples=1.0, bootstrap=True)

clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
y_pred_train = clf.predict(x_train)

return y_pred, y_pred_train

def grid_train_test(x_train, x_test, y_train):
"""
加入网格参数调参法，调参得到最优随机森林参数
:param x_train: 训练集
:param y_train: 训练集标签
:param x_test: 测试集
:return: 测试集预测结果、训练集预测结果
"""
# 基学习器为逻辑回归
lr = LogisticRegression(penalty='l2', multi_class="multinomial", solver="newton-cg")

bagging_param_grid = {'n_estimators': [50, 100], 'bootstrap': [True, False]}
clf = GridSearchCV(BaggingClassifier(base_estimator=lr), param_grid=bagging_param_grid, cv=5)

clf.fit(x_train, y_train)
print(clf.best_params_, clf.best_score_)

y_pred = clf.predict(x_test)
y_pred_train = clf.predict(x_train)

return y_pred, y_pred_train

def evaluate(y_test, y_pred, y_train, y_pred_train):
'''
评估预测结果
:param y_test: 测试集真实标签
:param y_pred: 测试集预测标签
:return: 打印出评估的结果
'''
test_accuracy = accuracy_score(y_pred, y_test)
train_accuracy = accuracy_score(y_pred_train, y_train)

result = classification_report(y_test, y_pred)

print("测试集的准确率为: " + str(test_accuracy))
print("训练集的准确率为: " + str(train_accuracy))
print(result)

if __name__ == "__main__":
# 1、读取文件, 并将数据分为训练集和测试集
filename = 'D:/pyCharm/例子/机器语言/glassType.txt'
x, y = get_data(filename)

# 2、数据分析
# 3、数据处理：划分数据集（注意数据的乱序）
x_train, x_test, y_train, y_test = deal_data(x, y, 0.2)

# 4、模型训练与测试：使用Bagging模型预测玻璃类别；并使用使用“GridSearchCV”对Bagging调参，得到最优模型；
# y_pred, y_pred_train = train_test(x_train, x_test, y_train)
# 使用网格参数调参训练
y_pred, y_pred_train = grid_train_test(x_train, x_test, y_train)

# 5、模型评估
print("Bagging算法的预测结果如下: ")
evaluate(y_test, y_pred, y_train, y_pred_train)



06-14 5803

03-30 5921

09-20 358

04-11 95

11-12 260

05-21 458

05-18 26

11-12 935

11-30 8957

08-23 4835

10-29 415

11-26 725

01-16 3271

09-28 39万+

11-01 20

04-18 159

04-14 59万+

03-13 15万+

02-19 18万+

03-04 14万+

03-06 2105

03-08 2万+

04-25 6万+

03-10 13万+

03-10 18万+

03-12 11万+

03-13 11万+

03-16 1731

03-18 1万+

03-19 8万+

03-19 3万+

03-20 9603

03-22 4662

03-23 4万+

03-24 3万+

03-25 3万+

05-08 5万+

03-25 9万+

03-27 5万+

03-29 21万+

03-29 10万+

03-30 16万+

05-31 6788

04-02 6206

04-02 4万+

05-31 218

04-06 1万+

04-06 7万+

04-06 3310

04-09 8万+

04-09 3万+

05-17 9085

04-11 4万+

04-15 6万+

04-18 4万+

04-20 4万+

04-24 3万+

05-03 1万+

05-16 5万+

05-06 1万+

05-06 2301

05-07 4037

05-08 4万+

05-10 3592

05-11 2620

05-14 6915

05-14 1223

05-16 3861

05-16 1万+

05-17 5340

05-30 1231

05-18 2481

05-18 8780

05-18 3888

05-19 1万+

05-21 8505

05-21 8580

05-27 7430

05-30 959

05-23 6830

#### 2020年，令人惊叹的Echarts！

©️2019 CSDN 皮肤主题: 游动-白 设计师: 上身试试