用C语言实现简单的多元线性回归算法(一)

最新推荐文章于 2024-07-28 13:33:18 发布

小-灰-灰

最新推荐文章于 2024-07-28 13:33:18 发布

阅读量4.5k

点赞数 6

分类专栏：线性回归文章标签：机器学习算法

本文链接：https://blog.csdn.net/rh8866/article/details/107452658

版权

线性回归专栏收录该内容

3 篇文章 3 订阅

订阅专栏

简单的一元线性回归算法已经在这里“C语言简单的一元线性回归算法”，并且也简单阐述了梯度求解推导过程。

今天我们再呈上多元线性回归算法梯度下降的C语言实现，代码中已经加入了相应的注释。如下：

MultipleLinearRegression.h

#ifndef MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H
#define MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H

//设置样本数为 10
#define SAMPLE_COUNT    10

//设置参数个数为 6
#define PARAMETER_COUNT  6


void init(double learning_rate, long int X_Len,long int X_arg_count,int channel);

void fit(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);

double _f(const double train_x[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);

double* predict(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double theta[PARAMETER_COUNT]);

double loss(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
            double temp[PARAMETER_COUNT],double *loss_val);

void calc_gradient(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],double *temp,double *theta);

void train_step(double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);


#endif //MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H

MultipleLinearRegression.h


#include "MultipleLinearRegression.h"
#include <stdio.h>
#include <malloc.h>


//int type_count = 5;

int g_X_Len = 10;
double g_learning_rate = 0.01;
int g_X_arg_count = 5;
int g_channel = 1;


double g_out_Y_pt = 0;
//预测输出值指针
double *y_pred_pt = 0;


//损失值
double loss_val[1] = {1.0};



/*
 * learning_rate  学习率
 *
 * X_Len 样本个数
 *
 * X_arg_count 参数个数
 *
 * channel 通道数<暂未考虑>
 * */
void init(double learning_rate, long int X_Len,long int X_arg_count,int channel){
    g_learning_rate = learning_rate;
    g_X_Len = X_Len;
    g_X_arg_count = X_arg_count;
    g_channel = channel;
    y_pred_pt = malloc((size_t) X_Len);
}

void fit(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
        double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]){
    for (int i = 0; i < 10000; ++i) {
        printf("step %d: \n", i);
        calc_gradient(train_x,train_y,temp,theta);
        loss_val[0] = loss(train_x,train_y,theta,loss_val);
    }
}

double _f(const double train_x[PARAMETER_COUNT],double theta[PARAMETER_COUNT]){

    g_out_Y_pt = -1;
    for (int i = 0; i < PARAMETER_COUNT; ++i) {
        g_out_Y_pt += theta[i]*train_x[i];

    }
    return g_out_Y_pt;
}

//预测
double* predict(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double theta[PARAMETER_COUNT]){
    for (int i = 0; i < SAMPLE_COUNT; ++i) {
        y_pred_pt[i] = _f(train_x[i],theta);
    }
    return y_pred_pt;
}

//求损失
double loss(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
            double theta[PARAMETER_COUNT],double *loss_val){
    predict(train_x,theta);
    loss_val[0] = -1;
    for (int i = 0; i < SAMPLE_COUNT; i++) {
        loss_val[0] += (train_y[i] - y_pred_pt[i]) * (train_y[i] - y_pred_pt[i]);
    }
    loss_val[0] = loss_val[0] / SAMPLE_COUNT;

    printf(" loss_val = %f\n", loss_val[0]);
    return loss_val[0];
}

//求梯度
void calc_gradient(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
        double temp[PARAMETER_COUNT], double theta[PARAMETER_COUNT]) {
    for (int i = 0; i < g_X_arg_count -1; i++) {
        double sum = 0;
        for (int j = 0; j < g_X_Len; j++) {
            double hx = 0;
            for (int k = 0; k < g_X_arg_count; k++) {
                hx += theta[k] * train_x[j][k];
            }
            sum += (hx - train_y[j]) * train_x[j][i];
        }
        temp[i] = sum / g_X_Len * 0.01;
    }
    printf("\n--------------------\n");
    train_step(temp, theta);
}

//更新参数值
void train_step(double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]) {
    for (int i = 0; i < g_X_arg_count - 1; i++) {
        theta[i] = theta[i] - temp[i];
        printf(" theta[%d] = %f\n",i, theta[i]);
    }
}

main.c

//#include "src/LinerRegression.h"
#include "utils/util.h"

#include "src/MultipleLinearRegression.h"

int main() {


   // 初始数据 设置为 5个维度；增加第一维为 1 即为 常量值 b
   //该数据由 Y= 4*X1 + 9*X2 + 10*X3 + 2*X4 + 1*X5 + 6 产生(数据中尚为加入噪声)
    double X_pt[10][6] = {{1, 7.41, 3.98, 8.34, 8.0,  0.95},
                          {1, 6.26, 5.12, 9.57, 0.3,  7.79},
                          {1, 1.52, 1.95, 4.01, 7.96, 2.19},
                          {1, 1.91, 8.58, 6.64, 2.99, 2.18},
                          {1, 2.2,  6.88, 0.88, 0.5,  9.74},
                          {1, 5.17, 0.14, 4.09, 9.0,  2.63},
                          {1, 9.13, 5.54, 6.36, 9.98, 5.27},
                          {1, 1.17, 4.67, 9.02, 5.14, 3.46},
                          {1, 3.97, 6.72, 6.12, 9.42, 1.43},
                          {1, 0.27, 3.16, 7.07, 0.28, 1.77}};
    double Y_pt[10] = {171.81, 181.21, 87.84, 165.42, 96.26, 89.47, 181.21, 156.65, 163.83, 108.55};


    //初始参数函数 temp 只是一个临时变量
    double temp[6] =  {1.0,1.0,1.0,1.0,1.0,1.0};
    double theta[6] = {1.0,1.0,1.0,1.0,1.0,1.0};


    //初始化相关参数
    init(0.01,10,5+1,1);
    fit(X_pt,Y_pt,temp,theta);


    return 0;
}

可以看到再训练10000次的时候损失已经比较小了，各个参数也已经接应我们预设的参数了

step 9999:

--------------------
 theta[0] = 5.994281
 theta[1] = 3.999957
 theta[2] = 9.000404
 theta[3] = 10.000375
 theta[4] = 2.000242
 loss_val = 0.900745

Process finished with exit code 0

这只是一个C语言的简单实现，学习率也设定的是固定值，训练次数也设定为固定值。如果各位大侠有其他比较好的实现方式欢迎留言推荐。另外由于很久很久不用C语言开发了，肯定会有语法的不完美。如有更好的建议或者其他疑问欢迎交流，小弟恭候。