#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <cstring>
#include <algorithm>
#include <cmath>
#include <random>
#include <Eigen/Dense>
#include <time.h>
using namespace std;
using namespace Eigen;
class svm
{
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW //use the Eigen
svm(const string &filename)
{
dataInit(filename);
}
void print()
{
smoP();
caclWs();
cout<<w<<endl;
}
void test()
{
double errCount=0.0;
cout<<w<<endl;
for (int i=0;i<trainDataNum;i++)
{
if((trainDataMat.row(i)*w+b<0&&trainLabelMat[i]>0)||
(trainDataMat.row(i)*w+b>0&&trainLabelMat[i]<0))
errCount+=1.0;
}
cout<<errCount<<endl;
cout<<"error rate is: "<<errCount/trainDataNum<<endl;
}
private:
void dataInit(const string &filename)
{
ifstream file(filename);
string line;
while(getline(file,line))
{
istringstream record(line);
vector<double> data;
double temp;
while(record>>temp)
data.push_back(temp);
trainLabelVec.push_back(temp);
data.pop_back();
trainDataVec.push_back(data);
}
trainDataNum=trainDataVec.size();
trainFeatureNum=trainDataVec[0].size();
trainDataMat.resize(trainDataNum,trainFeatureNum);
trainLabelMat.resize(trainDataNum);
alphas.resize(trainDataNum);
eCache.resize(trainDataNum,2);
w.resize(trainFeatureNum);
for(int i=0;i<trainDataNum;i++)
for(int j=0;j<trainFeatureNum;j++)
trainDataMat(i,j)=trainDataVec[i][j];
for(int i=0;i<trainDataNum;i++)
{
trainLabelMat[i]=trainLabelVec[i];
alphas[i]=0;
eCache(i,0)=0;
eCache(i,1)=0;
}
}
vector<vector<double>> trainDataVec;
vector<double> trainLabelVec;
int trainDataNum;
int trainFeatureNum;
Eigen::MatrixXd trainDataMat;
Eigen::VectorXd trainLabelMat;
double C=0.6;
double toler=0.001;
double b=0;
Eigen::VectorXd alphas;
Eigen::MatrixXd eCache;
Eigen::VectorXd w;
double calcEk(int k)
{
Eigen::VectorXd alpMulLab(trainDataNum);
for(int i=0;i<trainDataNum;i++)
alpMulLab(i)=alphas(i)*trainLabelMat(i);
double fXk=alpMulLab.transpose()*
(trainDataMat*(trainDataMat.row(k).transpose()))+b;
return fXk-trainLabelMat(k);
}
int selectJrand(int i,int m)
{
int j=i;
std::default_random_engine generator(rand()%m);
std::uniform_int_distribution<int> dis(0,m);
while(j==i)
j=dis(generator);
return j;
}
int selectJ(int i,double Ei,double &Ej)
{
int maxK=-1;
double maxDeltaE=0;
Ej=0;
eCache(i,0)=1;
eCache(i,1)=Ei;
int flag=0;
for(int k=0;k<trainDataNum;k++)
{
if(eCache(k,0)==1)
{
if(k==i)
continue;
else
{
flag=1;
double Ek=calcEk(k);
double deltaE=abs(Ei-Ej);
if(deltaE>maxDeltaE)
{
maxK=k;
maxDeltaE=deltaE;
Ej=Ek;
}
}
}
}
if(flag)
return maxK;
else
{
int j=selectJrand(i,trainDataNum);
Ej=calcEk(j);
return j;
}
}
//对alpha剪辑
double cliAlpha(double aj,double H,double L)
{
if (aj>H)
aj=H;
if (L>aj)
aj=L;
return aj;
}
int innerL(int i)
{
double Ei=calcEk(i);
if(((trainLabelMat[i]*Ei<-toler)&&(alphas[i]<C))||
((trainLabelMat[i]*Ei>toler)&&(alphas[i]>0)))
{
double Ej;
int j=selectJ(i,Ei,Ej);
double alphaIold=alphas[i];
double alphaJold=alphas[j];
double L,H;
if(trainLabelMat[i]!=trainLabelMat[j])
{
L=max(0.0,alphas[j]-alphas[i]);
H=min(C,C+alphas[j]-alphas[i]);
}
else
{
L=max(0.0,alphas[j]+alphas[i]-C);
H=min(C,alphas[j]+alphas[i]);
}
if(L==H)
return 0;
double eta=2.0*trainDataMat.row(i).dot(trainDataMat.row(j))-
trainDataMat.row(i).dot(trainDataMat.row(i))-
trainDataMat.row(j).dot(trainDataMat.row(j));
if(eta>=0)
return 0;
alphas[j]-=trainLabelMat[j]*(Ei-Ej)/eta;
alphas[j]=cliAlpha(alphas[j],H,L);
updataEk(j);
if(abs(alphas[j]-alphaJold)<0.00001)
return 0;
alphas[i]+=trainLabelMat[j]*trainLabelMat[i]*(alphaJold-alphas[j]);
updataEk(i);
double b1=b-Ei-trainLabelMat[i]*(alphas[i]-alphaIold)*
(trainDataMat.row(i).dot(trainDataMat.row(i)))-
trainLabelMat[j]*(alphas[j]-alphaJold)*
(trainDataMat.row(i).dot(trainDataMat.row(j)));
double b2=b-Ej-trainLabelMat[i]*(alphas[i]-alphaIold)*
(trainDataMat.row(i).dot(trainDataMat.row(j)))-
trainLabelMat[i]*(alphas[j]-alphaJold)*
(trainDataMat.row(j).dot(trainDataMat.row(j)));
if (0<alphas[i]&&C>alphas[i])
b=b1;
else if(0<alphas[j]&&alphas[j]<C)
b=b2;
else
b=(b1+b2)/2.0;
return 1;
}
else
return 0;
}
void smoP()
{
int iter=0;
int alphaPairsChanged=0;
int entireSet=1;
while((iter<40)&&((alphaPairsChanged>0)||(entireSet)))
{
alphaPairsChanged=0;
if(entireSet) //go over all
{
for(int i=0;i<trainDataNum;i++)
alphaPairsChanged+=innerL(i);
iter+=1;
}
else //go over non-bound (railed) alphas
{
for( int i=0;i<trainDataNum;i++)
{
if(alphas[i]>0&&alphas[i]<C)
{
alphaPairsChanged+=innerL(i);
}
}
iter+=1;
}
if(entireSet)
entireSet=0;
else
{
if(alphaPairsChanged==0)
entireSet=1;
}
}
}
void updataEk(int k)
{
double Ek=calcEk(k);
eCache(k,0)=1;
eCache(k,1)=Ek;
}
void smoSimple()
{
int iter=0;
while (iter<40)
{
int alphaPairsChanged=0;
for(int i=0;i<trainDataNum;i++)
{
double Ei=calcEk(i);
if(((trainLabelMat[i]*Ei<-toler)&&(alphas[i]<C))||
((trainLabelMat[i]*Ei>toler)&&(alphas[i]>0)))
{
int j=selectJrand(i,trainDataNum);
double Ej=calcEk(j);
double alphaIold=alphas[i];
double alphaJold=alphas[j];
double L,H;
if(trainLabelMat[i]!=trainLabelMat[j])
{
L=max(0.0,alphas[j]-alphas[i]);
H=min(C,C+alphas[j]-alphas[i]);
}
else
{
L=max(0.0,alphas[j]+alphas[i]-C);
H=min(C,alphas[j]+alphas[i]);
}
if (L==H)
{
cout<<"L==H"<<endl;
continue;
}
double eta=2.0*trainDataMat.row(i).dot(trainDataMat.row(j))-
trainDataMat.row(i).dot(trainDataMat.row(i))-
trainDataMat.row(j).dot(trainDataMat.row(j));
if (eta>=0)
{
cout<<"eta>=0";
continue;
}
alphas[j]-=trainLabelMat[j]*(Ei-Ej)/eta;
alphas[j]=cliAlpha(alphas[j],H,L);
if(abs(alphas[j]-alphaJold)<0.00001)
{
cout<<"j not moving enough"<<endl;
continue;
}
alphas[i]+=trainLabelMat[j]*trainLabelMat[i]*(alphaJold-alphas[j]);
double b1=b-Ei-trainLabelMat[i]*(alphas[i]-alphaIold)*
(trainDataMat.row(i).dot(trainDataMat.row(i)))-
trainLabelMat[j]*(alphas[j]-alphaJold)*
(trainDataMat.row(i).dot(trainDataMat.row(j)));
double b2=b-Ej-trainLabelMat[i]*(alphas[i]-alphaIold)*
(trainDataMat.row(i).dot(trainDataMat.row(j)))-
trainLabelMat[i]*(alphas[j]-alphaJold)*
(trainDataMat.row(j).dot(trainDataMat.row(j)));
if (0<alphas[i]&&C>alphas[i])
b=b1;
else if(0<alphas[j]&&alphas[j]<C)
b=b2;
else
b=(b1+b2)/2.0;
alphaPairsChanged+=1;
cout<<"iter: "<<iter<<"pairs changed: "<<alphaPairsChanged;
}
if(alphaPairsChanged==0)
iter+=1;
else
iter=0;
}
}
}
void caclWs()
{
Eigen::VectorXd alpMulLab(trainDataNum);
for(int i=0;i<trainDataNum;i++)
alpMulLab(i)=alphas(i)*trainLabelMat(i);
for(int i=0;i<trainFeatureNum;i++)
w[i]=alpMulLab.transpose()*(trainDataMat.col(i));
}
};
int main()
{
svm svm("testSet.txt");
svm.print();
svm.test();
}
smo算法的c++实现
最新推荐文章于 2022-08-04 13:05:52 发布