感知机模型
PLA
(Perceptron Learning Algorithm)-
- python代码实现PLA
基本思想: 逐点修正
实现过程:
/* PLA的实现过程为: { 1.寻找w(t)的下一个错误分类点(x,y)(即sign(w(t)’*x)!=y); 2.纠正错误:w(t+1) = w(t) + y*x; }until(每个样本都无错) */
PLA
什么时候会停下来?- 平面的所有点分类正确! 即 数据
D
线性可分
- 平面的所有点分类正确! 即 数据
-
#include "stdafx.h" #include<fstream> #include<iostream> #include<vector> using namespace std; #define DEMENSION 5 double weight[DEMENSION];//权重值 int step = 0;//修改次数 int n = 0;//训练样本数 char *file = "training_data.txt";//读取文件名 //存储训练样本,input为x,output为y struct record{ double input[DEMENSION]; int output; }; //把记录存在向量里而不是存在结构体数组内,这样可以根据实际一项项添加 vector<record> trainingSet; //将数据读入训练样本向量中 void getData(ifstream &datafile) { while (!datafile.eof()) { record curRecord; curRecord.input[0] = 1; int i; for (i = 1; i < DEMENSION; i++){ datafile >> curRecord.input[i]; } datafile >> curRecord.output; trainingSet.push_back(curRecord); } datafile.close(); n = trainingSet.size(); } //计算sign值 int sign(double x){ if (x <= 0)return -1; else return 1; } //两向量相加(实际为数组相加),将结果保存在第一个数组内,用于计算w(i+1)=w(i)+y*x void add(double *v1, double *v2, int demension){ int i; for (i = 0; i < demension; i++)v1[i] += v2[i]; } //计算两数值相乘值,用于判断w*x是否小于0,若小于0要执行修正算法 double multiply(double *v1, double *v2, int demension){ double temp = 0.0; int i; for (i = 0; i < demension; i++)temp += v1[i] * v2[i]; return temp; } //计算实数num与向量乘积放在result中,用于计算y*x void multiply(double *result, double *v, int demension, int num){ int i; for (i = 0; i < demension; i++)result[i] = num * v[i]; } void PLA() { int correctNum = 0;//当前连续正确样本数,当等于n则表明轮完一圈,则表示全部正确,算法结束 int index = 0;//当前正在计算第几个样本 bool isFinished = 0;//算法是否全部完成的表示,=1表示算法结束 while (!isFinished){ if (trainingSet[index].output == sign(multiply(weight, trainingSet[index].input, DEMENSION)))correctNum++;//当前样本无错,连续正确样本数+1 else{//出错,执行修正算法 double temp[DEMENSION]; multiply(temp, trainingSet[index].input, DEMENSION, trainingSet[index].output);//计算y*x add(weight, temp, DEMENSION);//计算w(i+1)=w(i)+y*x step++;//进行一次修正,修正次数+1 correctNum = 0;//由于出错了,连续正确样本数归0 cout << "step" << step << ":" << endl << "index=" << index << " is wrong" << endl; } if (index == n - 1)index = 0; else index++; if (correctNum == n)isFinished = 1; } cout << "total step:" << step << endl; } void main() { ifstream dataFile(file); if (dataFile.is_open()){ getData(dataFile); } else{ cout << "出错,文件打开失败!" << endl; exit(1); } int i; for (i = 0; i < DEMENSION; i++)weight[i] = 0.0; PLA(); }
Packet
对于数据
D
. 若线性可分.用PLA
很快可以实现分类.但若 非线性可分. 那么就要对
PLA
进行改进.采用贪心算法.
思想是把当前最好的分类线保存在口袋中. 然后对曲线进行修正得到新的线. 如果得到新的线对训练样本的错误率更小. 那么我们把这条线保存下来. 继续运行程序,直到达到足够的迭代次数.
/* { 1.寻找分类错误点(x,y) 2.修正错误:w(t+1) = w(t) + y*x 3.如果w(t+1)对训练样本的错误率比口袋里的w更小,则用w(t+1)替代w }until(达到足够的迭代次数) */
注:
- 如果知道训练样本D是线性可分的. 则运行PLA比较好. 因为PLA速度(不用判断对所有样本的错误率)
- 如果训练样本不是线性可分的(绝大多数情况),则运行pocket,但是pocket得运行速度慢
-
#include<fstream> #include<iostream> #include<vector> #include<algorithm> using namespace std; #define DEMENSION 5 //数据维度 //样本结构体 struct record{ double x[DEMENSION]; int y; }; //读取文件数据 void getData(fstream &datafile,vector<record> &dataset){ while(!datafile.eof()){ record curRecord; curRecord.x[0] = 1; int i; for(i=1 ; i<DEMENSION ; i++)datafile>>curRecord.x[i]; datafile>>curRecord.y; dataset.push_back(curRecord); } datafile.close(); } //计算sign值 int sign(double x){ if(x <= 0)return -1; else return 1; } //计算两个向量内积,判断是否需要修正 double multiply(double *v1, double *v2){ int i; double temp = 0.0; for(i = 0; i < DEMENSION; i++)temp += v1[i] * v2[i]; return temp; } //函数重载,计算向量v与整数num的积,用于计算y*x(y为+1或-1,x为向量) void multiply(double *result,double *v,int num){ int i; for(i = 0; i < DEMENSION; i++)result[i] = num * v[i]; } //计算两向量的和放入result中,用于计算w(i+1)=w(i)+y*x void add(double *result,double *v1,double *v2){ int i; for(i = 0; i < DEMENSION; i++)result[i] = v1[i] + v2[i]; } //计算错误率 double getErrorRate(double *weight,vector<record> dataset){ int n = dataset.size(); double errorRate= 0.0; int i; for(i=0;i<n;i++) if(sign(multiply(weight,dataset[i].x)) != dataset[i].y)errorRate++; return errorRate/n; } //口袋PLA算法 void pocketPLA(double *pocketWeights,double *weights,vector<record> trainingSet,int iteration){ int index = 0; int iter= 1; int n = trainingSet.size(); while(iter < iteration){ if(sign(multiply(trainingSet[index].x,weights)) != trainingSet[index].y){ double temp[DEMENSION]; multiply(temp,trainingSet[index].x,trainingSet[index].y); int i; for(i=0;i<DEMENSION;i++)weights[i] += temp[i]; if(getErrorRate(weights,trainingSet) < getErrorRate(pocketWeights,trainingSet)){ int j; for(j = 0;j<DEMENSION;j++)pocketWeights[j] = weights[j]; } iter++; } if(index == n-1)index = 0; else index++; } } void main(){ vector<record> trainingSet; vector<record> testSet; fstream datafile1("training_data.txt"); fstream datafile2("test_data.txt"); if(datafile1.is_open()&&datafile2.is_open()){ getData(datafile1,trainingSet); getData(datafile2,testSet); } else{ cout<<"can not open file!"<<endl; exit(1); } double weights[DEMENSION],pocketWeights[DEMENSION]; double ave_error = 0.0 ; int j; for(j = 0; j < 2000; j++ ){ random_shuffle(trainingSet.begin(), trainingSet.end()); int i; for(i=0;i<DEMENSION;i++){ //注意,这里需要初始化!!!不初始化值会乱码,程序出错!!! weights[i]=0.0; pocketWeights[i]=0.0; } pocketPLA(pocketWeights,weights,trainingSet,50); double trainingError = getErrorRate(pocketWeights,trainingSet); double testError = getErrorRate(pocketWeights,testSet); ave_error += testError; cout<<"第"<<j<<"次实验---"<<"training error:"<<trainingError<<" test error:"<<testError<<endl; } cout<<"average error rate:"<<ave_error/2000<<endl; }