原理参考:
【机器学习】AdaBoost算法Python实现
从零学习Adaboost算法的python实现
代码传送门:https://github.com/taifyang/machine-learning
python实现:
import numpy as np
class AdaBoost:
# 单层决策树生成函数,通过阀值比较对数据进行分类,在阀值一边的数据分到类别-1,而在另一边的数据分到类别+1
def stumpClassify(self, dataMatrix, dimen, threshVal, threshIneq): # 数据集,数据集列数,阈值,比较方式:lt,gt
retArray = np.ones((np.shape(dataMatrix)[0], 1)) # 将数组的全部元素设置为1
#print('d',dataMatrix,dimen,threshVal,threshIneq,retArray)
# lt:小于,gt;大于;根据阈值进行分类,并将分类结果存储到retArray
if threshIneq == 'lt':
retArray[dataMatrix[:, dimen] <= threshVal] = -1.0
else:
retArray[dataMatrix[:, dimen] > threshVal] = -1.0
#print(retArray)
return retArray # 返回分类结果
# 遍历stumpClassify()函数所有的可能输入值,并找到数据集上的最佳的单层决策树
def buildStump(self, dataArr, classLabels, D): # 数据集,数据标签,权重向量
dataMatrix = np.mat(dataArr) # 使输入数据符合矩阵格式
labelMat = np.mat(classLabels).T # 标签,转置为列向量
m, n = np.shape(dataMatrix) # 矩阵形状,m为样本个数,n为每个样本的特征个数
#print(dataMatrix,m,n)
numSteps = 10.0 # 初始化步数,用于在特征的所有可能值上进行遍历
bestStump = {} # 创建一个空字典,用于存储给定权重向量D时所得到的最佳决策树的相关信息
bestClasEst = np.mat(np.zeros((m, 1))) # 初始化类别估计值
minError = np.inf # 一开始初始化为正无穷大,之后用于寻找可能的最小错误率
for i in range(n): # 遍历数据集的所有特征
#print(dataMatrix[:, i])
rangeMin = dataMatrix[:, i].min()
rangeMax = dataMatrix[:, i].max()
stepSize = (rangeMax - rangeMin) / numSteps # 通过计算最大值和最小值差值除以步数来确定步长
for j in range(-1, int(numSteps) + 1): # 遍历每个步长
for inequal in ['lt', 'gt']: # 遍历每个大于和小于不等式
threshVal = rangeMin + j * stepSize # 设定阀值
predictedVals = self.stumpClassify(dataMatrix, i, threshVal, inequal) # 调用stumpClassify函数,通过阀值比较对数据进行分类
errArr = np.mat(np.ones((m, 1))) # 错误列向量,将预测结果与真实类别比较,不相同则对应位置设为1
errArr[predictedVals == labelMat] = 0 # 相同位置设为0
#print('e',errArr)
weightedError = D.T * errArr # 将错误向量和权值向量相乘并求和(横向量*列向量=对应元素相乘求和)
print("split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshVal, inequal, weightedError))
# 如果错误率低于minError,则将当前单层决策树设为最佳单层决策树,更新各项值
if weightedError < minError:
minError = weightedError
bestClasEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
return bestStump, minError, bestClasEst # 返回最佳单层决策树,最小错误率,类别估计值
# 完整AdaBoost算法实现
def fit(self, x, y, iters=10): # 数据集,类别标签,迭代次数
self.weakClassArr = [] # 建立一个空列表,用于存储单层决策树的信息
m = x.shape[0] # 数据集行数
D = np.mat(np.ones((m, 1)) / m) # 初始化向量D每个值均为1/m,D包含每个数据点的权重,在后续的迭代中会增加错误预测的权重,相应的减少正确预测的权重
aggClassEst = np.mat(np.zeros((m, 1))) # 初始化列向量,记录每个数据点的类别估计累计值
for i in range(iters): # 遍历迭代次数,直到达到遍历次数或者错误率为0停止迭代
bestStump, error, classEst = self.buildStump(x, y, D) # 调用buildStump构建一个单层决策树
alpha = float(0.5 * np.log((1.0 - error) / max(error, 1e-16))) # 根据公式计算alpha的值,max(error, 1e-16)用来确保在没有错误时不会发生除零溢出
bestStump['alpha'] = alpha # alpha加入到字典中
#print("alph'",alpha)
self.weakClassArr.append(bestStump) # 再添加到列表
#print("classLabels",classLabels,"classEst:", classEst)
# 计算下一次迭代中的新权重向量D
expon = np.multiply(-alpha * np.mat(y).T, classEst)
#print(expon)
D = np.multiply(D, np.exp(expon))
D = D / D.sum()
#print("D:", D)
aggClassEst += alpha * classEst # 累加类别估计值,该值为浮点型
#print("aggClassEst:", aggClassEst)
#print(np.sign(aggClassEst) != np.mat(classLabels).T)
aggErrors = np.multiply(np.sign(aggClassEst) != np.mat(y).T, np.ones((m, 1))) # 通过sign()函数得到二值分类结果,与真实类别标签比较,计算错误分类的个数
errorRate = aggErrors.sum() / m # 计算错误率
print("total error: ", errorRate) # 打印每次迭代的错误率
if errorRate == 0.0: # 如果某次迭代之后的错误率为0,就退出迭代过程,则不需要达到预先设定的迭代次数
break
print(self.weakClassArr)
return self.weakClassArr
def predict(self, x): # 待分类样本,多个弱分类器组成的数组
dataMatrix = np.mat(x) # 将待分类样本转为矩阵
m = dataMatrix.shape[0] # 得到测试样本的个数
aggClassEst = np.mat(np.zeros((m, 1))) # 构建一个0列向量,作用同上
for i in range(len(self.weakClassArr)): # 遍历所有弱分类器
classEst = self.stumpClassify(dataMatrix, self.weakClassArr[i]['dim'], self.weakClassArr[i]['thresh'], self.weakClassArr[i]['ineq']) # 基于stumpClassify()对每个弱分类器得到一个类别的估计值
aggClassEst += self.weakClassArr[i]['alpha'] * classEst # 输出的类别值乘以该单层决策树的alpha权重再累加到aggClassEst上
print('aggClassEst',aggClassEst) # 打印结果
return np.sign(aggClassEst) # 返回分类结果,aggClassEst大于0则返回+1,小于0则返回-1
if __name__ == '__main__':
x = np.array([[1., 2.1],[2., 1.1],[1.3, 1.],[1., 1.],[2., 1.]])
y = np.array([1.0, 1.0, -1.0, -1.0, 1.0])
ada = AdaBoost()
D = np.mat(np.ones((5, 1)) / 5) # 初始化权重向量
print('最佳单层决策树相关信息:', ada.buildStump(x, y, D))
ada.fit(x, y, iters=9)
print('预测值为:', ada.predict([[0, 0]]))
python调包:
import numpy as np
from sklearn import tree
from sklearn.ensemble import AdaBoostRegressor
x = np.array([[1., 2.1],[2., 1.1],[1.3, 1.],[1., 1.],[2., 1.]])
y = np.array([1.0, 1.0, -1.0, -1.0, 1.0])
reg = AdaBoostRegressor(tree.DecisionTreeRegressor(max_depth=4), n_estimators=300, random_state= np.random.RandomState(1))
reg.fit(x, y)
print('预测值为:', reg.predict([[0, 0]]))
C++实现:
#include <iostream>
#include <vector>
#include <map>
#include <algorithm>
#include <Eigen/Dense>
class AdaBoost
{
public:
Eigen::MatrixXf stumpClassify(Eigen::MatrixXf dataMatrix, int dimen, float threshVal, int threshIneq)
{
Eigen::MatrixXf retArray = Eigen::MatrixXf::Ones(dataMatrix.rows(), 1);
if (threshIneq == 0)
{
std::vector<int> indices;
for (size_t i = 0; i < dataMatrix.rows(); i++)
{
if (dataMatrix(i, dimen) <= threshVal)
indices.push_back(i);
}
for (auto i : indices) retArray(i, 0) = -1.0;
}
else {
std::vector<int> indices;
for (size_t i = 0; i < dataMatrix.rows(); i++)
{
if (dataMatrix(i, dimen) > threshVal)
indices.push_back(i);
}
for (auto i : indices) retArray(i, 0) = -1.0;
}
return retArray;
}
void buildStump(std::vector<std::vector<float>> dataArr, std::vector<float> classLabels, Eigen::MatrixXf D)
{
Eigen::MatrixXf dataMatrix(dataArr.size(), dataArr[0].size());
for (size_t i = 0; i < dataMatrix.rows(); i++)
{
for (size_t j = 0; j < dataMatrix.cols(); j++)
{
dataMatrix(i, j) = dataArr[i][j];
}
}
Eigen::MatrixXf labelMat(classLabels.size(), 1);
for (size_t i = 0; i < labelMat.rows(); i++)
{
labelMat(i, 0) = classLabels[i];
}
int m = dataMatrix.rows(), n = dataMatrix.cols();
float numSteps = 10.0;
m_classEst = Eigen::MatrixXf::Zero(m, 1);
m_error = INT_MAX;
for (size_t i = 0; i < n; i++)
{
std::vector<float> tmp(m);
for (size_t j = 0; j < m; j++)
{
tmp[j] = dataMatrix(j, i);
}
float rangeMin = *std::min_element(tmp.begin(), tmp.end());
float rangeMax = *std::max_element(tmp.begin(), tmp.end());
float stepSize = (rangeMax - rangeMin) / numSteps;
for (int j = -1; j < int(numSteps) + 1; j++)
{
for (auto inequal : { 0, 1 })
{
float threshVal = rangeMin + j * stepSize;
Eigen::MatrixXf predictedVals = stumpClassify(dataMatrix, i, threshVal, inequal);
Eigen::MatrixXf errArr = Eigen::MatrixXf::Ones(m, 1);
for (size_t k = 0; k < m; k++)
{
if (predictedVals(k, 0) == labelMat(k, 0))
errArr(k, 0) = 0;
}
float weightedError = (D.transpose() * errArr)(0, 0);
std::cout << "i:" << i << " threshVal:" << threshVal << " inequal:" << inequal << " weightedError:" << weightedError << std::endl;
if (weightedError < m_error)
{
m_error = weightedError;
m_classEst = predictedVals;
m_bestStump["dim"] = i;
m_bestStump["thresh"] = threshVal;
m_bestStump["ineq"] = inequal;
}
}
}
}
}
void fit(std::vector<std::vector<float>> x, std::vector<float> y, int iters = 10)
{
int m = x.size();
Eigen::MatrixXf D = Eigen::MatrixXf::Ones(m, 1) / m;
Eigen::MatrixXf aggClassEst = Eigen::MatrixXf::Zero(m, 1);
for (size_t i = 0; i < iters; i++)
{
buildStump(x, y, D);
//std::cout << "D: " << D.transpose() << std::endl;
float alpha = 0.5 * log((1.0 - m_error) / std::max(m_error, 1e-16f));
//std::cout << "alpha: " << alpha << std::endl;
m_bestStump["alpha"] = alpha;
m_classifierArr.push_back(m_bestStump);
//std::cout << "classEst: " << classEst<< std::endl;
Eigen::MatrixXf labelMat(y.size(), 1);
for (size_t i = 0; i < labelMat.rows(); i++)
{
labelMat(i, 0) = y[i];
}
//std::cout << "labelMat: " << labelMat << std::endl;
Eigen::MatrixXf expon = (-alpha *labelMat).cwiseProduct(m_classEst);
//std::cout << "expon: " << expon << std::endl;
for (size_t i = 0; i < expon.rows(); i++)
{
expon(i, 0) = exp(expon(i, 0));
}
D = D.cwiseProduct(expon);
D = D / D.sum();
//std::cout << "D: " << D << std::endl;
aggClassEst += alpha * m_classEst;
//std::cout << "aggClassEst: " << aggClassEst << std::endl;
Eigen::MatrixXf mul1(aggClassEst.rows(), 1), mul2 = Eigen::MatrixXf::Ones(m, 1);
for (size_t i = 0; i < aggClassEst.rows(); i++)
{
if (aggClassEst(i, 0)*labelMat(i, 0) < 0)
mul1(i, 0) = 1;
else
mul1(i, 0) = 0;
}
//std::cout << "mul1: " << mul1 << std::endl;
Eigen::MatrixXf aggErrors = mul1.cwiseProduct(mul2);
float errorRate = aggErrors.sum() / m;
std::cout << "errorRate: " << errorRate << std::endl;
if (errorRate == 0.0)
break;
}
}
std::vector<float> predict(std::vector<std::vector<float>> x)
{
Eigen::MatrixXf dataMatrix(x.size(), x[0].size());
for (size_t i = 0; i < dataMatrix.rows(); i++)
{
for (size_t j = 0; j < dataMatrix.cols(); j++)
{
dataMatrix(i, j) = x[i][j];
}
}
int m = dataMatrix.rows();
Eigen::MatrixXf aggClassEst = Eigen::MatrixXf::Zero(m, 1);
for (size_t i = 0; i < m_classifierArr.size(); i++)
{
Eigen::MatrixXf classEst = stumpClassify(dataMatrix, m_classifierArr[i]["dim"], m_classifierArr[i]["thresh"], m_classifierArr[i]["ineq"]);
aggClassEst += m_classifierArr[i]["alpha"] * classEst;
std::cout << "aggClassEst: " << aggClassEst << std::endl;
}
std::vector<float> ret(aggClassEst.rows());
for (size_t i = 0; i < ret.size(); i++)
{
if (aggClassEst(i, 0) > 0)
ret[i] = 1;
else
ret[i] = -1;
}
return ret;
}
private:
std::map<std::string, float> m_bestStump;
Eigen::MatrixXf m_classEst;
float m_error;
std::vector<std::map<std::string, float>> m_classifierArr;
};
int main(int argc, char* argv[])
{
std::vector<std::vector<float>> x = { { 1.0f, 2.1f},{ 2.0f, 1.1f },{ 1.3f, 1.0f },{ 1.0f, 1.0f } ,{ 2.0f, 1.0f} };
std::vector<float> y = { 1, 1, -1, -1, 1 };
AdaBoost ada = AdaBoost();
Eigen::MatrixXf D = Eigen::MatrixXf::Ones(5, 1) / 5.0;
std::cout << "最佳单层决策树相关信息:";
ada.buildStump(x, y, D);
ada.fit(x, y, 9);
std::cout << "预测值为:" << ada.predict({ {0, 0} })[0] << std::endl;
system("pause");
return EXIT_SUCCESS;
}