#include <iostream>
#include <algorithm>
#include <functional>
#include<cmath>
#include <vector>
using std::vector ;
using namespace std;
#define FCOUNT 100//特征数
#define CCOUNT 30//弱分类器个数
#define PCOUNT 200//正样本数
#define NCOUNT 300//负样本数
struct sample
{
int features[FCOUNT];//特征
int pos_neg;//正0,负1
float weight;//权值
int result;//分类器的识别结果
};
struct weakClassifier
{
int indexF;
float threshold;
};
struct MySortFunction
{
int m_n;
MySortFunction(int n):m_n(n)
{
}
bool operator()(sample&s1,sample&s2)const
{
return s1.features[m_n]<s2.features[m_n];
}
};
//创建正样本
void CreatePos(vector<sample>&a)
{
int i,j;
for(i=0;i<PCOUNT;i++)
{
sample temp;
temp.pos_neg=0;
temp.weight=(float)1/(2*PCOUNT);
temp.result =0;
for(j=0;j<FCOUNT;j++)
temp.features[j]=rand()%10;
a.push_back(temp);
}
}
float min(float a,float b)
{
return(a<=b?a:b);
}
//创建负样本
void CreateNeg(vector<sample>&a)
{
int i,j;
for(i=0;i<NCOUNT;i++)
{
sample temp;
temp.pos_neg=1;
temp.weight=(float)1/(2*NCOUNT);
temp.result =1;
for(j=0;j<FCOUNT;j++)
temp.features[j]=rand()%10;
a.push_back(temp);
}
}
//Training classifier
void Training(vector<sample>&a,vector<weakClassifier>&b,float*factors)
{
int i,j;
vector<sample>::size_type id=0,tcount=a.size();
for(i=0;i<CCOUNT;i++)
{
weakClassifier temp;
float totalWeights=0.0,totalPos=0.0,totalNeg=0.0,bPos=0,bNeg=0;//(当前样本之前的)正负样本权值和
float e,thr,besterr=1.0;//训练单个分类器时用到的错误率,阈值,最小错误率
float FThr[FCOUNT];//特征阈值
float minErr=1.0;//所有特征的最小错误率
float beta;//更新权值所需系数
/*权重归一化*/
for(id=0;id<tcount;id++)
{
totalWeights+=a[id].weight ;
if(a[id].pos_neg ) totalNeg+=a[id].weight ;
else
totalPos+=a[id].weight ;
}
for(id=0;id<tcount;id++)
a[id].weight /=totalWeights;
/*对每一特征训练一弱分类器*/
for(j=0;j<FCOUNT;j++)
{
//按特征j排序
sort(a.begin (),a.end (),MySortFunction(j));
besterr=1.0;
//求单个弱分类器的阈值
for(id=0;id<tcount;id++)
{
if(a[id].pos_neg ) bNeg+=a[id].weight ;
else bPos+=a[id].weight ;
e=min((bPos+totalNeg-bNeg),(bNeg+totalPos-bPos));
if(id==0)
thr=a[id].features [j]-0.5;
else
{
if(id==tcount-1)
thr=a[a.size()-1].features[j]+0.5;
else
thr=(a[id].features [j]+a[id-1].features [j])/2;
}
if(e<besterr)
{
besterr=e;
FThr[j]=thr;
//cout<<FThr[j]<<" "<<j<<endl;
}
}
}
/*选取最优分类器*/
for(j=0;j<FCOUNT;j++)
{
float serror=0.0;
for(id=0;id<tcount;id++)
{
if(a[id].features [j]<=FThr[j]) a[id].result =0;//positive sample
else
a[id].result =1;
serror+=a[id].weight *abs(a[id].result -a[id].pos_neg );
}
if(serror<minErr)
{
minErr=serror;
temp.indexF=j;
temp.threshold=FThr[j];
}
}
b.push_back (temp);//选出一个弱分类器
beta=minErr/(1-minErr);
factors[i]=log(1/beta);
/*更新权值*/
for(id=0;id<tcount;id++)
{
if(a[id].pos_neg ==a[id].result )
a[id].weight *=beta;
}
}//强分类器训练完毕
}
void main()
{
vector<sample>a;
vector<weakClassifier>b;
float factors[CCOUNT];//at
CreatePos(a);//创建正样本
CreateNeg(a);//创建负样本
Training(a,b,factors);//训练分类器
//查看训练出的分类器的参数
int i=0;
for(i=0;i<CCOUNT;i++)
{
cout<<"系数"<<factors[i]<<"特征"<<b[i].indexF <<"阈值"<<b[i].threshold<<endl;
}
}
adaboost 简单介绍
最新推荐文章于 2022-05-29 20:55:50 发布