用Adaboost对自己的数据分类

想用Adaboost分类器对自己的数据分类 而网上一查 好多都是Adaboost+Harr人脸检测啥啥啥的   所以只能自己写了 参考http://blog.csdn.net/zhaocj/article/details/50536385的 用Adaboost算法对自己的数据集分类。特征和标签都是csv文件:特征矩阵是224X1000维的,类别是2就是二分类,

int main(int argc, char** argv)
{

//打乱顺序结果在kuangvec[]和stonevec[]两个数组里 图像标号
const int uselesssample = 134, usefulsample = 90, allsample = 224,featurecol=1000;
//打乱顺序 前90个为矿  后134个为废石
int kuangvec[usefulsample], stonevec[uselesssample];
for (int i = 0; i < usefulsample; i++)
kuangvec[i] = i ;
for (int i = 0; i <uselesssample; i++)
stonevec[i] = usefulsample+i;
random_shuffle(kuangvec, kuangvec+usefulsample);
random_shuffle(stonevec, stonevec + uselesssample);
///show fixed ranks
//for (int i = 0; i < usefulsample; i++)
// cout << kuangvec[i] << endl;
//cout << endl;
//for (int i = 0; i < uselesssample; i++)
// cout << stonevec[i] << endl;
/read features
CvMLData cvmlprimer;
cvmlprimer.read_csv("features.csv");
cv::Mat cvml = cv::Mat(cvmlprimer.get_values(),true);
CvMLData resprimer;
resprimer.read_csv("labels.csv");
cv::Mat res = cv::Mat(resprimer.get_values(), true);
/saperate into trainset 
const float rate =0.6;
int trainnum=int(usefulsample*rate)+int(uselesssample*rate), testnum=allsample-trainnum;
Mat traindata=Mat::zeros(trainnum, featurecol, cvml.type()), testdata=Mat::zeros(testnum, featurecol, cvml.type()), trainlabel=Mat::zeros(trainnum, 1, res.type()),      testlabel=Mat::zeros(testnum,1, res.type());
for (int i = 0; i <int(usefulsample*rate); i++) 
{
float* newrow = traindata.ptr<float>(i);
int currentrow = kuangvec[i];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = trainlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);  
newlabelrow[0] = primerlabelrow[0];
}
for (int i = int(usefulsample*rate); i <trainnum; i++)  
{
float* newrow = traindata.ptr<float>(i);
int ii = i - int(usefulsample*rate);
int currentrow = stonevec[ii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = trainlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
//saperate into testset 
for (int i = 0; i <usefulsample-int(usefulsample*rate); i++)
{
float* newrow = testdata.ptr<float>(i);
int iii = i + int(usefulsample*rate);
int currentrow = kuangvec[iii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = testlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
for (int i = usefulsample - int(usefulsample*rate); i <testnum; i++)
{
int ii = int(uselesssample*rate) + i - (usefulsample - int(usefulsample*rate));
float* newrow = testdata.ptr<float>(i);
int currentrow = stonevec[ii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = testlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
//trainset and testset have done!/
//for (int i = 0; i < trainnum; i++) //check the trainlabel and testlabel
//{
// float* row = trainlabel.ptr<float>(i);
// cout <<row[0]<< endl;
//}
//cout << "trainlabel= "<<trainnum << endl;
//for (int i = 0; i < testnum; i++)
//{
// float* row = testlabel.ptr<float>(i);
// cout << row[0]<<endl;
//}
CvMat traindata2 =traindata, trainlabel2 =trainlabel;
const CvMat* traindata3 = cvCreateMat(traindata2.rows, traindata2.cols, traindata2.type);
const CvMat* trainlabel3 = cvCreateMat(trainlabel2.rows, trainlabel2.cols, trainlabel2.type);
printf("Ready for Training ... ");
float priors[1000] = { 1, 1, 1, };
CvBoostParams params(CvBoost::GENTLE, 10, 0.95, 1, false, priors);
CvBoost boost;
bool update = false;
const CvMat* varIdx = 0;
const CvMat* sampleIdx = 0;
const CvMat* varType = 0;
const CvMat* missingDataMask = 0;
boost.train(traindata3,CV_ROW_SAMPLE, trainlabel3, varIdx, sampleIdx, varType, missingDataMask, params, update);
cout << "training done!" << endl;
// 1. Declare a couple of vectors to save the predictions of each sample
//std::vector train_responses, test_responses;
// 2. Calculate the training error
//float fl1 = boost.calc_error(&cvml, CV_TRAIN_ERROR, &train_responses);
// 3. Calculate the test error
//float fl2 = boost.calc_error(&cvml, CV_TEST_ERROR, &test_responses);
//printf("Error train %f \n", fl1);
//printf("Error test %f \n", fl2);
// Save the trained classifier
//boost.save("./trained_boost.xml", "boost");
//return EXIT_SUCCESS;
return 0;
}

我上面只是train 可是用Adaboost训练时候就报错 说只能用来二分类可是我检查我的标签结果证明是二分类啊训练标签是1和2两个类都拿出了60%作为训练集两个类剩下的40%作为测试集可是为什么train的时候报这样的错呢?

搞了一上午 终于知道为什么了  从traindata转成CvMat*时候没有复制数据 只是复制了信息头 所以trian里面是没有任何数据的!!!太蠢了

改成了这样:

#include "opencv2/core/core.hpp"  
#include "opencv2/highgui/highgui.hpp"  
#include "opencv2/imgproc/imgproc.hpp"  
#include "opencv2/ml/ml.hpp"  
#include <iostream>  
#include<algorithm>
#include<opencv2/opencv.hpp>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
//打乱顺序结果在kuangvec[]和stonevec[]两个数组里 图像标号
const int uselesssample = 134, usefulsample = 90, allsample = 224,featurecol=1000;
//打乱顺序 前90个为矿  后134个为废石
int kuangvec[usefulsample], stonevec[uselesssample];
for (int i = 0; i < usefulsample; i++)
kuangvec[i] = i ;
for (int i = 0; i <uselesssample; i++)
stonevec[i] = usefulsample+i;
random_shuffle(kuangvec, kuangvec+usefulsample);
random_shuffle(stonevec, stonevec + uselesssample);
///show fixed ranks
//for (int i = 0; i < usefulsample; i++)
// cout << kuangvec[i] << endl;
//cout << endl;
//for (int i = 0; i < uselesssample; i++)
// cout << stonevec[i] << endl;
/read features
CvMLData cvmlprimer;
cvmlprimer.read_csv("features.csv");
cv::Mat cvml = cv::Mat(cvmlprimer.get_values(),true);
CvMLData resprimer;
resprimer.read_csv("labels.csv");
cv::Mat res = cv::Mat(resprimer.get_values(), true);
/saperate into trainset randomly
const float rate =0.6;
int trainnum=int(usefulsample*rate)+int(uselesssample*rate), testnum=allsample-trainnum;
Mat traindata=Mat::zeros(trainnum, featurecol, cvml.type()), testdata=Mat::zeros(testnum, featurecol, cvml.type()), trainlabel=Mat::zeros(trainnum, 1, res.type()), testlabel=Mat::zeros(testnum,1, res.type());
for (int i = 0; i <int(usefulsample*rate); i++) 
{
float* newrow = traindata.ptr<float>(i);
int currentrow = kuangvec[i];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = trainlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);  
newlabelrow[0] = primerlabelrow[0];
}
for (int i = int(usefulsample*rate); i <trainnum; i++)  
{
float* newrow = traindata.ptr<float>(i);
int ii = i - int(usefulsample*rate);
int currentrow = stonevec[ii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = trainlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
//saperate into testset 
for (int i = 0; i <usefulsample-int(usefulsample*rate); i++)
{
float* newrow = testdata.ptr<float>(i);
int iii = i + int(usefulsample*rate);
int currentrow = kuangvec[iii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = testlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
for (int i = usefulsample - int(usefulsample*rate); i <testnum; i++)
{
int ii = int(uselesssample*rate) + i - (usefulsample - int(usefulsample*rate));
float* newrow = testdata.ptr<float>(i);
int currentrow = stonevec[ii];
float* primerow = cvml.ptr<float>(currentrow);
for (int j = 0; j < featurecol; j++)
newrow[j] = primerow[j];
float* newlabelrow = testlabel.ptr<float>(i);
float* primerlabelrow = res.ptr<float>(currentrow);
newlabelrow[0] = primerlabelrow[0];
}
//trainset and testset have done!/
//for (int i = 0; i < trainnum; i++) //check the trainlabel and testlabel
//{
// float* row = trainlabel.ptr<float>(i);
// cout <<row[0]<< endl;
//}
//cout << "trainlabel= "<<trainnum << endl;
//for (int i = 0; i < testnum; i++)
//{
// float* row = testlabel.ptr<float>(i);
// cout << row[0]<<endl;
//}
//cout << "testlabel= " << testnum << endl;
CvMat traindata2 = traindata, trainlabel2 = trainlabel;
    CvMat* traindata3 = cvCreateMat(traindata2.rows, traindata2.cols, traindata2.type);
cvCopy(&traindata2, traindata3);
CvMat* trainlabel3 = cvCreateMat(trainlabel2.rows, trainlabel2.cols, trainlabel2.type);
cvCopy(&trainlabel2, trainlabel3);
cout<<"Ready for Training ... "<<endl;
float priors[1000];
for (int i = 0; i < 1000; i++)
priors[i] = 1;
CvBoostParams params(CvBoost::GENTLE, 1000, 0.95, 1, false, priors);
CvBoost boost;
bool update = false;
const CvMat* varIdx = 0;
const CvMat* sampleIdx = 0;
const CvMat* varType = 0;
const CvMat* missingDataMask = 0;
boost.train(traindata3,CV_ROW_SAMPLE, trainlabel3, varIdx, sampleIdx, varType, missingDataMask, params, update);
cout << "training done!!!prepare for testing..." << endl<<endl;
begin test
CvMat testdata2 = testdata, testlabel2 = testlabel;
CvMat* testdata3 = cvCreateMat(testdata2.rows, testdata2.cols, testdata2.type);
cvCopy(&testdata2, testdata3);
CvMat* testlabel3 = cvCreateMat(testlabel2.rows, testlabel2.cols, testlabel2.type);
cvCopy(&testlabel2, testlabel3);
const CvMat* missing = 0;
CvMat* weak_responses = 0;
const int numfortest = testdata.rows;
float  outputs;
outputs=boost.predict(testdata3, missing, weak_responses, CV_WHOLE_SEQ, false);
// 1. Declare a couple of vectors to save the predictions of each sample
//std::vector train_responses, test_responses;
// 2. Calculate the training error
//float fl1 = boost.calc_error(&cvml, CV_TRAIN_ERROR, &train_responses);
// 3. Calculate the test error
//float fl2 = boost.calc_error(&cvml, CV_TEST_ERROR, &test_responses);
//printf("Error train %f \n", fl1);
//printf("Error test %f \n", fl2);
// Save the trained classifier
//boost.save("./trained_boost.xml", "boost");
//return EXIT_SUCCESS;
return 0;
}

我增加了把数据集和标签随机分为测试集和训练集  还改了之前的错误  结果证明训练可以了但测试那里出错了?请教公司的大神 大神告诉我predict()测试样本必须是一个样本 原来它不能像matlab里面一样拿很多个样本一下去测试得到一个标签向量。它只能一个个测试 所以我循环就行了

这样就行了啊 哈哈!!!只差计算准确率了。我用了个动态数组outputlabels把每次的输出的标签存进去 我都快忘记动态数组可以避免普通数组每次定义时候必须写大小的烦恼  但一定记得delete  结果这样就行了呀!!!!!!!!Adaboost用来对自己的数据进行分类 完毕!我用新数据重新测试了下:

和我用MATLAB里的Adaboost做出来的结果几乎一致。证明没有错误了。可以直接用了以后。

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 13
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 13
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

元气少女缘结神

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值