没有设置偏置,当测试的次数达到600次的时候,正确率有98%,大概在1100次的时候,出现过拟合,正确率下降。数据集也附上啦。
#include<fstream>
#include<sstream>
#include<cmath>
#include<cstdlib>
#include<vector>
#include<time.h>
#include<iostream>
using namespace std;
#define e 2.718282
bool operated = true;
#define trainDNum 75//训练集数据75条
#define feature 4//4个特征
#define testDNum 75//测试集数据75条
#define outResult 3//输出是3种结果
#define hiddenLayer 10//隐藏层的10个输出
#define rate_w2 0.9//w2的学习率
#define rate_w1 0.9//w1的学习率
double error_accuracy = 1.0;//损失函数最大取值
vector< vector<double> > trainData(trainDNum,vector<double>(feature));//训练集75*4
vector<int> trainL(trainDNum);//训练集标签
vector< vector<double> > testData(testDNum, vector<double>(feature));//测试集75*4
vector<int> testL(testDNum);//测试集标签
//存储one-hot编码后的二维标签
vector< vector<double> > trainL2D(trainDNum, vector<double>(outResult));//训练集标签75*3
vector< vector<double> > testL2D(testDNum, vector<double>(outResult));//测试集标签75*3
//输入层-隐藏层权重W1 4*10,隐藏层-输出层权重W2 10*3
vector< vector<double> > w1(feature, vector<double>(hiddenLayer));//输入层-隐藏层W1 4*10
vector< vector<double> > w2(hiddenLayer, vector<double>(outResult));//隐藏层-输出层W2 10*3
//存储训练输出结果1*3,循环输出75次,就是完整的75*3的输出结果
vector< double> result(outResult);//输出层
void loadTrain();//加载训练集和测试集
void loadTest();
void regulateData(vector< vector<double> >& data);
void oneHot(vector<int>& lable, vector< vector<double> >& data);//one-hot编码
void randWeight(vector< vector<double> >& weight);//用随机函数生成权重矩阵
void trainStart();//开始训练
void BPtest(int i);//测试
int main()
{
cout << "data is" << endl;
loadTrain();
loadTest();
//归一化
cout << "trainData归一化处理-----------------------------" << endl;
regulateData(trainData);
cout << "testData归一化处理-----------------------------" << endl;
regulateData(testData);
//标签变为矩阵
cout << "对标签one-hot编码-----------------------------" << endl;
oneHot(trainL, trainL2D);
oneHot(testL, testL2D);
cout << "随机生成w1-----------------------------" << endl;
//随机生成w1和w2权重矩阵
randWeight(w1);
cout << "随机生成w2-----------------------------" << endl;
randWeight(w2);
//数据已经准备好了,开始训练
cout << "训练-----------------------------" << endl;
int trainNum = 600, i = 0,j,correct=0;//训练100次
double max;
while (trainNum--)
{
trainStart();
}
cout << "测试-----------------------------" << endl;
for (i = 0; i < testDNum; i++)
{
BPtest(i);
max = result[0] > result[1] ? 1 : 2;
if (max == 1)
max = result[2] > result[0] ? 3 : 1;
else
max = result[2] > result[1] ? 3 : 2;
//cout << max << " ";
/*for (j = 0; j < outResult; j++)
{
cout << result[j] << " ";
}*/
if (max == testL[i])
correct++;
//cout << endl;
}
cout << "正确的结果有"<<correct<<"个,正确率为" << correct / (testDNum +0.0)<< endl;
return 0;
}
void loadTrain() {
cout << "-----------trainData----------- " << endl;
ifstream in("trainData.txt");
string line;
int count = 0;
int i = 0, j = 0;
int num1 = 0;
while (getline(in, line)) {//获取文件的一行字符串到line中
stringstream ss(line);//初始化
double x;
while (ss >> x) {//每一行包含不同个数的数字
count++;
if (count < 5)
{
trainData[i][j] = x;
//cout<<trainData[i][j]<<"\t";
j++;
}
else
{
trainL[i] = (int)x;
//cout<<trainL[i]<<endl;
i++;
j = 0;
count = 0; num1++;
}
}
}
//cout << "there are " << num1 << " datum" << endl;
}
void loadTest() {
cout << "-----------testData----------- " << endl;
ifstream in("testData.txt");
string line;
int count = 0;
int i = 0, j = 0;
int num1 = 0;
while (getline(in, line)) {//获取文件的一行字符串到line中
stringstream ss(line);//初始化
double x;
while (ss >> x) {//每一行包含不同个数的数字
count++;
if (count < 5)
{
testData[i][j] = x;
//cout << testData[i][j] << "\t";
j++;
}
else
{
testL[i] = (int)x;
// cout << testL[i] << endl;
i++;
j = 0;
count = 0; num1++;
}
}
}
//cout << "there are " << num1 << " datum" << endl;
}
void regulateData(vector< vector<double> >& data)
{
size_t column = data.size(), row = data[0].size(), i, j;
double max=data[0][0], min = data[0][0];
for (i = 0; i < column; i++)
{
for (j = 0; j < row; j++)
{
if (max < data[i][j])
max = data[i][j];
if (min > data[i][j])
min = data[i][j];
}
}
for (i = 0; i < column; i++)
{
for (j = 0; j < row; j++)
{
data[i][j]=(data[i][j]-min)/(max-min);
// cout << data[i][j] << " ";
}
// cout << endl;
}
}
void oneHot(vector<int>& lable, vector< vector<double> >& data)
{
size_t column = data.size(), row = data[0].size(), i, j;
for (i = 0; i < column; i++)
{
for (j = 0; j < row; j++)
{
data[i][j] = 0.01;
data[i][lable[i]-1] = 0.99;//标签是1,2,3,下标要从0开始,所以要减1
// cout << data[i][j]<<" ";
}
//cout << endl;
}
}
void randWeight(vector< vector<double> >& weight)
{
size_t i, j, column = weight.size(), row = weight[0].size();
for (i = 0; i < column; i++)
{
for (j = 0; j < row; j++)
{
weight[i][j]= 2 * rand() / double(RAND_MAX) - 1;//输出为-1到1之间的数,服从正态分布
//cout << weight[i][j]<<" ";
}
//cout << endl;
}
}
double sigmoid(double x)
{
return 1 / (1 + pow(e, -x));//返回sigmoid函数结果
//return 1 / (1 + pow(e, -x)) * (1 - 1 / (1 + pow(e, -x)));//返回sigmoid求导结果
}
void trainStart() {
size_t i, j,k;
double temp;
vector<double>o1(hiddenLayer);//隐藏层的10个输出
vector<double>o2(outResult);//输出层的输出
vector<double>diff2(outResult);//真实值-预测值,输出层-隐藏层
vector<double>diff1(trainDNum);//真实值-预测值,隐藏层-输入层
for (i = 0; i < trainDNum; i++)
{
//正向传递
//输入层-隐藏层
for (k = 0; k < hiddenLayer; k++)
{
temp = 0.0;
o1[k] = 0.0;
for (j = 0; j < feature; j++)
{
temp += trainData[i][j] * w1[j][k];//没有设置偏置
}
o1[k] = sigmoid(temp);
}
//隐藏层-输出层
for (k = 0; k < outResult; k++)
{
temp = 0.0;
o2[k] = 0.0;
for (j = 0; j < hiddenLayer; j++)
{
temp += o1[j] * w2[j][k];//没有设置偏置
}
o2[k] = sigmoid(temp);
}
//反向传递
//隐藏层-输出层,更新w2
// cout << "w2更新后" << endl;
for (k = 0; k < outResult; k++)
{
diff2[k] = (trainL2D[i][k] - o2[k]) * (1 - o2[k]) * o2[k];
for (j = 0; j < hiddenLayer; j++)
{
w2[j][k] += diff2[k]*rate_w2*o1[j];//更新w2
// cout << w2[j][k] << " ";
}
// cout << endl;
}
// cout << "w1更新后" << endl;
for (k = 0; k < hiddenLayer; k++)
{
diff1[k] = 0.0;
for (j = 0; j < outResult; j++)
{
diff1[k] += diff2[j] * w2[k][j];
}
diff1[k] *= o1[k] * (1- o1[k]);
for (j = 0; j < feature; j++)
{
w1[j][k] += rate_w1 * diff1[k] * trainData[i][j];
} /**/
}
//计算损失函数
error_accuracy = 0.0;
for (j = 0; j < outResult; j++)
error_accuracy+=pow((o2[j] - trainL2D[i][j]), 2);
error_accuracy /= 2.0;
//cout << error_accuracy << endl;
}
}
void BPtest(int column) {
size_t i, j, k;
double temp;
vector<double>o1(hiddenLayer);//隐藏层的10个输出
vector<double>o2(outResult);//输出层的输出
vector<double>diff2(outResult);//真实值-预测值,输出层-隐藏层
vector<double>diff1(trainDNum);//真实值-预测值,隐藏层-输入层
//输入层-隐藏层
for (k = 0; k < hiddenLayer; k++)
{
temp = 0.0;
o1[k] = 0.0;
for (j = 0; j < feature; j++)
{
temp += testData[column][j] * w1[j][k];//没有设置偏置
}
o1[k] = sigmoid(temp);
}
//隐藏层-输出层
for (k = 0; k < outResult; k++)
{
temp = 0.0;
o2[k] = 0.0;
for (j = 0; j < hiddenLayer; j++)
{
temp += o1[j] * w2[j][k];//没有设置偏置
}
o2[k] = sigmoid(temp);
}
for (k = 0; k < outResult; k++)
{
result[k] = o2[k];
}
}
输出结果
关于数据加载那一块,可以写成一个函数,改变参数就可以,我改的时候出了点问题,改半天没改出来,就写成了两个函数。
trainData.txt训练集
5.1 3.5 1.4 0.2 1
4.9 3 1.4 0.2 1
4.7 3.2 1.3 0.2 1
4.6 3.1 1.5 0.2 1
5 3.6 1.4 0.2 1
5.4 3.9 1.7 0.4 1
4.6 3.4 1.4 0.3 1
5 3.4 1.5 0.2 1
4.4 2.9 1.4 0.2 1
4.9 3.1 1.5 0.1 1
5.4 3.7 1.5 0.2 1
4.8 3.4 1.6 0.2 1
4.8 3 1.4 0.1 1
4.3 3 1.1 0.1 1
5.8 4 1.2 0.2 1
5.7 4.4 1.5 0.4 1
5.4 3.9 1.3 0.4 1
5.1 3.5 1.4 0.3 1
5.7 3.8 1.7 0.3 1
5.1 3.8 1.5 0.3 1
5.4 3.4 1.7 0.2 1
5.1 3.7 1.5 0.4 1
4.6 3.6 1 0.2 1
5.1 3.3 1.7 0.5 1
4.8 3.4 1.9 0.2 1
7 3.2 4.7 1.4 2
6.4 3.2 4.5 1.5 2
6.9 3.1 4.9 1.5 2
5.5 2.3 4 1.3 2
6.5 2.8 4.6 1.5 2
5.7 2.8 4.5 1.3 2
6.3 3.3 4.7 1.6 2
4.9 2.4 3.3 1 2
6.6 2.9 4.6 1.3 2
5.2 2.7 3.9 1.4 2
5 2 3.5 1 2
5.9 3 4.2 1.5 2
6 2.2 4 1 2
6.1 2.9 4.7 1.4 2
5.6 2.9 3.6 1.3 2
6.7 3.1 4.4 1.4 2
5.6 3 4.5 1.5 2
5.8 2.7 4.1 1 2
6.2 2.2 4.5 1.5 2
5.6 2.5 3.9 1.1 2
5.9 3.2 4.8 1.8 2
6.1 2.8 4 1.3 2
6.3 2.5 4.9 1.5 2
6.1 2.8 4.7 1.2 2
6.4 2.9 4.3 1.3 2
6.3 3.3 6 2.5 3
5.8 2.7 5.1 1.9 3
7.1 3 5.9 2.1 3
6.3 2.9 5.6 1.8 3
6.5 3 5.8 2.2 3
7.6 3 6.6 2.1 3
4.9 2.5 4.5 1.7 3
7.3 2.9 6.3 1.8 3
6.7 2.5 5.8 1.8 3
7.2 3.6 6.1 2.5 3
6.5 3.2 5.1 2 3
6.4 2.7 5.3 1.9 3
6.8 3 5.5 2.1 3
5.7 2.5 5 2 3
5.8 2.8 5.1 2.4 3
6.4 3.2 5.3 2.3 3
6.5 3 5.5 1.8 3
7.7 3.8 6.7 2.2 3
7.7 2.6 6.9 2.3 3
6 2.2 5 1.5 3
6.9 3.2 5.7 2.3 3
5.6 2.8 4.9 2 3
7.7 2.8 6.7 2 3
6.3 2.7 4.9 1.8 3
6.7 3.3 5.7 2.1 3
testData.txt测试集
5 3 1.6 0.2 1
5 3.4 1.6 0.4 1
5.2 3.5 1.5 0.2 1
5.2 3.4 1.4 0.2 1
4.7 3.2 1.6 0.2 1
4.8 3.1 1.6 0.2 1
5.4 3.4 1.5 0.4 1
5.2 4.1 1.5 0.1 1
5.5 4.2 1.4 0.2 1
4.9 3.1 1.5 0.2 1
5 3.2 1.2 0.2 1
5.5 3.5 1.3 0.2 1
4.9 3.6 1.4 0.1 1
4.4 3 1.3 0.2 1
5.1 3.4 1.5 0.2 1
5 3.5 1.3 0.3 1
4.5 2.3 1.3 0.3 1
4.4 3.2 1.3 0.2 1
5 3.5 1.6 0.6 1
5.1 3.8 1.9 0.4 1
4.8 3 1.4 0.3 1
5.1 3.8 1.6 0.2 1
4.6 3.2 1.4 0.2 1
5.3 3.7 1.5 0.2 1
5 3.3 1.4 0.2 1
6.6 3 4.4 1.4 2
6.8 2.8 4.8 1.4 2
6.7 3 5 1.7 2
6 2.9 4.5 1.5 2
5.7 2.6 3.5 1 2
5.5 2.4 3.8 1.1 2
5.5 2.4 3.7 1 2
5.8 2.7 3.9 1.2 2
6 2.7 5.1 1.6 2
5.4 3 4.5 1.5 2
6 3.4 4.5 1.6 2
6.7 3.1 4.7 1.5 2
6.3 2.3 4.4 1.3 2
5.6 3 4.1 1.3 2
5.5 2.5 4 1.3 2
5.5 2.6 4.4 1.2 2
6.1 3 4.6 1.4 2
5.8 2.6 4 1.2 2
5 2.3 3.3 1 2
5.6 2.7 4.2 1.3 2
5.7 3 4.2 1.2 2
5.7 2.9 4.2 1.3 2
6.2 2.9 4.3 1.3 2
5.1 2.5 3 1.1 2
5.7 2.8 4.1 1.3 2
7.2 3.2 6 1.8 3
6.2 2.8 4.8 1.8 3
6.1 3 4.9 1.8 3
6.4 2.8 5.6 2.1 3
7.2 3 5.8 1.6 3
7.4 2.8 6.1 1.9 3
7.9 3.8 6.4 2 3
6.4 2.8 5.6 2.2 3
6.3 2.8 5.1 1.5 3
6.1 2.6 5.6 1.4 3
7.7 3 6.1 2.3 3
6.3 3.4 5.6 2.4 3
6.4 3.1 5.5 1.8 3
6 3 4.8 1.8 3
6.9 3.1 5.4 2.1 3
6.7 3.1 5.6 2.4 3
6.9 3.1 5.1 2.3 3
5.8 2.7 5.1 1.9 3
6.8 3.2 5.9 2.3 3
6.7 3.3 5.7 2.5 3
6.7 3 5.2 2.3 3
6.3 2.5 5 1.9 3
6.5 3 5.2 2 3
6.2 3.4 5.4 2.3 3
5.9 3 5.1 1.8 3