每个弱分类器的分类结果加权相加后,再用sign函数激活,得到最终分类结果。这里的权就是alpha
alpha根据每个弱分类器的分类错误率算出,alpha = 0.5 * ln( (1-errorRate) / errorRate )
本算法中的弱分类器为单决策树,在构建单决策树时,会根据加权错误率来衡量其性能
被分错的样本权重高,权重计算:D[i] = D[i] * e^(-1 * alpha * label[i]) / sum(D)
在AdaBoost训练过程中,每一次迭代都会更新D,D是AdaBoost与各弱分类器交互的地方
每一次迭代都会创建一个弱分类器,并存储下来
当分类错误率很小,或者迭代次数够了时,AdaBoos训练结束
#include <iostream>
#include <string>
#include <vector>
#include <set>
#include <algorithm>
#include <map>
#include <math.h>
using namespace std;
double e = 2.718281828459;
double data[5][2] = //数据集
{
{1, 2.1},
{2, 1.1},
{1.3, 1},
{1, 1},
{2, 1}
};
double label[] = {1, 1, -1, -1, 1}; //标签
//通过阈值比较对数据分类,在阈值一边的会分到-1,另一边的会分到1
//threshIneq用于在大于、小于号之间切换,0表示小于号,1表示大于号
vector<double> stumpClassify(vector< vector<double> > &data, int dimen, double threshVal, int threshIneq)
{
int N = data.size();
vector<double> retLabel(N);
if(threshIneq == 0)
{
for(int i=0; i<N; i++)
if(data[i][dimen] <= threshVal)
retLabel[i] = -1;
else
retLabel[i] = 1;
}
else
{
for(int i=0; i<N; i++)
if(data[i][dimen] > threshVal)
retLabel[i] = -1;
else
retLabel[i] = 1;
}
return retLabel;
}
//单层决策树生成函数
//D为权重向量,对于分类正确的样例,权值较小,分类错误的样例,权值较大
//minError, bestClassEst为输出值,分别表示最小误差,预测结果
vector<double> buildStump(vector< vector<double> > &data, vector<double> &D, double &minError, vector<double> &bestClassEst)
{
int N = data.size();
int M = data[0].size();
int numSteps = 10;
int i, j;
vector<double> bestStump(3); //存储最佳单决策树信息,分别表示特征下标、阈值、不等号
minError = 10000000;
for(i=0; i<M; i++) //遍历所有特征
{
double rangeMin = data[0][i], rangeMax = data[0][i];
for(j=1; j<N; j++) //求当前特征上的最小值、最大值
{
if(data[j][i] < rangeMin)
rangeMin = data[j][i];
if(data[j][i] > rangeMax)
rangeMax = data[j][i];
}
int stepSize = (rangeMax - rangeMin) / numSteps; //步长
for(j=-1; j<=stepSize; j++) //阈值也可以设为取值范围之外
{
double threshVal = rangeMin + j * stepSize;
for(int inequal = 0; inequal < 2; inequal++) //在大于、小于之间切换不等式
{
vector<double> predictVals = stumpClassify(data, i, threshVal, inequal); //预测结果
double weightedError = 0;
for(int k=0; k<N; k++)
{
if(predictVals[k] != label[k])
{
weightedError += D[k]; //计算加权错误率
}
}
if(weightedError < minError)
{
minError = weightedError;
bestClassEst = predictVals;
bestStump[0] = i;
bestStump[1] = threshVal;
bestStump[2] = inequal;
}
}
}
}
return bestStump;
}
double max(double a, double b)
{
return a > b ? a : b;
}
double sign(double a)
{
if(a > 0)
return 1;
else
return -1;
}
//基于单层决策树的AdaBoost训练过程
//numIt为迭代次数
vector< vector<double> > adaBoostTrainDS(vector< vector<double> > &data, int numIt)
{
int N = data.size();
int M = data[0].size();
vector< vector<double> > weakClassArr; //存储弱分类器
vector<double> D(N); //每个样例的权重,被分错的样例权重大
int i, j;
for(i=0; i<N; i++) //初始化所有样例权重相等
D[i] = 1.0 / (double)N;
vector<double> aggClassEst(N); //所有弱分类器的预测加权结果
for(i=0; i<N; i++)
aggClassEst[i] = 0;
for(i=0; i<numIt; i++)
{
double error;
vector<double> classEst;
vector<double> bestStump = buildStump(data, D, error, classEst); //预测
double alpha = 0.5 * log( (1.0 - error) / max(error, 0.000001) ) / log(e); //弱分类器的权重
bestStump.push_back(alpha);
weakClassArr.push_back(bestStump); //存储弱分类器
//为下一次迭代,计算每个样例的权重D
double Dsum = 0;
for(j=0; j<N; j++)
{
double expon = -1.0 * alpha * classEst[j] * label[j];
D[j] *= exp(expon);
Dsum += D[j];
}
double errorRate = 0;
for(j=0; j<N; j++)
{
D[j] /= Dsum;
aggClassEst[j] += alpha * classEst[j];
if(sign(aggClassEst[j]) != label[j])
{
errorRate += 1.0;
}
}
errorRate /= (double)N;
if(errorRate == 0)
break;
}
return weakClassArr;
}
//预测
//每个弱分类器以alpha作为权重,将弱分类器结果加权求和得到最终的结果
vector<double> adaClassify(vector< vector<double> > &test, int n, vector< vector<double> > &classifierArr)
{
vector<double> aggClassEst(n);
int i;
for(i=0; i<n; i++)
aggClassEst[i] = 0;
for(i=0; i<classifierArr.size(); i++)
{
vector<double> classEst = stumpClassify(test, (int)classifierArr[i][0], classifierArr[i][1], (int)classifierArr[i][2]);
for(int j=0; j<n; j++)
aggClassEst[j] += classifierArr[i][3] * classEst[j];
}
for(i=0; i<n; i++)
aggClassEst[i] = sign(aggClassEst[i]);
return aggClassEst;
}
int main(void)
{
int n = 2;
double test[2][2] =
{
{5, 5},
{0, 0}
};
int i, j;
int N = 5, M = 2;
cout << "数据集:" << endl;
vector< vector<double> > d(5, vector<double>(M));
for(i=0; i<N; i++)
{
for(j=0; j<M; j++)
{
d[i][j] = data[i][j];
cout << d[i][j] << "\t";
}
cout << endl;
}
cout << "标签:" << endl;
for(i=0; i<N; i++)
cout << label[i] << endl;
cout << endl << "测试数据集:" << endl;
vector< vector<double> > t(n, vector<double>(M));
for(i=0; i<n; i++)
{
for(j=0; j<M; j++)
{
t[i][j] = test[i][j];
cout << t[i][j] << "\t";
}
cout << endl;
}
vector< vector<double> > classifierArr = adaBoostTrainDS(d, 30);
vector<double> result = adaClassify(t, n, classifierArr);
cout << "预测结果:" << endl;
for(i=0; i<n; i++)
{
cout << result[i] << endl;
}
return 0;
}
运行结果: