from numpy import *
import matplotlib.pyplot as plt
import random
from sklearn import tree
def loadDataSet(fileName):#读取数据
numFeat = len(open(fileName).readline().split('\t'))
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr =[]
curLine = line.strip().split('\t')
for i in range(numFeat-1):#添加数据
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))#添加数据对应标签
return dataMat,labelMat
def rand_train(dataMat,labelMat):#自助法采样
len_train = len(labelMat)#获取样本1数
train_data = [] ; train_label = []
for i in range(len_train):#抽取样本数次样本
index = random.randint(0,len_train-1)#随机生成样本索引
train_data.append(dataMat[index])#添加对应数据与标签
train_label.append(labelMat[index])
return train_data,train_label#返回训练集与训练集标签
def bagging_by_tree(dataMat,labelMat,t=10):#默认并行生成十个基学习器
test_data,test_label = loadDataSet('horseColicTest.txt') #获取测试样本与标签
predict_list = []
for i in range(t):#并行生成T个
train_data,train_label = rand_train(dataMat,labelMat)#自主采样1得到样本
clf = tree.DecisionTreeClassifier()#初始化决策树模型
clf.fit(train_data,train_label)#训练模型
total = []
y_predicted = clf.predict(test_data)#预测数据
total.append(y_predicted)
predict_list.append(total)#结果添加到预测列表中
return predict_list,test_label
def calc_error(predict_list,test_label):#计算错误率
m,n,k = shape(predict_list)#提取预测集信息
predict_label = sum(predict_list,axis = 0)
predict_label = sign(predict_label)
for i in range(len(predict_label[0])):
if predict_label[0][i] == 0:#如果票数相同,则随机生成一个标签
tip = random.randint(0,1)
if tip == 0:
predict_label[0][i] = 1
else:
predict_label[0][i] =-1
error_count = 0#初始化预测错误数
for i in range(k):
if predict_label[0][i] != test_label[i]:#判断预测精度
error_count += 1
error_rate = error_count/k
return error_rate
if __name__ == "__main__":
fileName = 'horseColicTraining.txt'
dataMat,labelMat = loadDataSet(fileName)
train_data,train_label = rand_train(dataMat,labelMat)
predict_list , test_label = bagging_by_tree(dataMat,labelMat)
print("Bagging错误率:",calc_error(predict_list,test_label))
04-13
2761
12-10
2452
04-05
2101
05-12
405
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交