C++实现逻辑回归
1:读取数据(采用鸢尾花数据集)
为了简单,自己将鸢尾花数据集构造成了二分类
#include "tools.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
using namespace std;
vector<vector<float>> get_iris(const string iris_path)
{
//准备数据,读入txt数据,然后存储起来
//string iris_path = "E:/CNTV/Download/iris.txt";
ifstream fin;
fin.open(iris_path, ios::in);
vector<vector<float>> feature;
string line;
char *next_token = NULL;
//while (fin.getline(buf,sizeof(buf)))
while (getline(fin, line))
{
cout << line << endl;
//cout << 'xxx' << endl;
//此时的读入是line为一行,遇到空格不转换
//针对分割符的处理
char *ptr_line = (char *)line.c_str();
const char * split = " ";
char *p = strtok_s(ptr_line, split, &next_token);
//next_token代表上一次切分后余下的部分
vector<float> vec;
while (p != NULL)
{
string st1 = p; //将char * 转为string,如此便可以通过cout直接输出了
if (st1 == "Iris-setosa")
st1 = "1";
else if (st1 == "Iris-versicolor")
st1 = '0';
vec.push_back(atof(st1.c_str())); //.c_str C风格字符串
//cout << st1 << endl; //如果直接cout<< p << endl; 此时只能输出5,单个字符
//printf("%f\n", atof(p));
p = strtok_s(NULL, split, &next_token);
}
feature.push_back(vec);
}
//while (fin >> buf)
//{
// cout << buf << endl;//输出读取的文本文件数据
// //cout << 'x' << endl;
// //结果表明,此种情况下数据是按空格为分割读入的
//}
return feature;
}
2:进行训练
#include "tools.h"
void train(vector<vector<float>> xtrain)
{
//不使用矩阵乘法
float weights[4] = { 0 };
float bias = 0;
float b_grad = 0;
float w_grad[4] = { 0 };
//设置学习率
float lr_alpha = 1e-1;
float lr_beta = 1e-1;
//for (auto it1 : xtrain)
//{
// float result;
// int i = 0;
// for (auto it2 : it1)
// {
// result += it2 * weights[i];
// i++;
// }
// cout << it << endl;
//}
int epoch = 0;
while (epoch < 100)
{
float result[100] = { 0 };
float loss = 0;
for (int i = 0; i < 100; i++)
{
for (int j = 0; j < 4; j++)
{
result[i] += xtrain[i][j]*weights[j]+bias;
}
result[i] = sigmoid(result[i]);
float y = xtrain[i][4];
b_grad += y * (1 - y);
for (int j = 0; j < 4; j++)
{
w_grad[j] += xtrain[i][j] * (y- result[i]);
}
loss += loss_compute(result[i], y);
}
cout << loss << endl;
b_grad /= 100;
bias = bias - lr_alpha * b_grad;
for (int j = 0; j < 4; j++)
{
w_grad[j] /= 100;
weights[j] -= lr_beta * w_grad[j];
}
epoch++;
}
}
用到的小工具
#include <cmath>
float sigmoid(float x)
{
return 1 / (1 + exp(x));
}
float loss_compute(float x, float y)
{
float loss;
loss = y * log(x) + (1 - y)*log(1 - x);
return loss;
}
细节备忘
错误记录:
1:路径中不能有中文
2:路径中的斜杠是/
3:C++几种读取的方式
char c;
while ((c=infile.get())!=EOF)
{
cout << c;
}
这个是逐字符读取
4:根据字符切分字符串
char *next_token = NULL;
char *ptr_line = (char *)line.c_str(); //使用.c_str()得到line的指针,然后使用char * 强制转换
const char * split = " "; //设定分割符
char *p = strtok_s(ptr_line, split, &next_token); //这儿要求next_token是**模式,所以又加了一个取地址符
//next_token代表上一次切分后余下的部分
while (p != NULL)
{
//cout << *p << endl; //此时只能输出5,单个字符
printf("%f\n", atof(p)); //atof的作用:可以将一个字符串转为浮点数
p = strtok_s(NULL, split, &next_token); //起始位置设定为NULL,这样,next_token指向下一段还没被处理的数据
}