简单线性回归—C语言

简单线性回归应该是最简单的机器学习算法了,在这里主要介绍一下算法主要函数的C语言实现,具体算法原理简单一提,如果要学习,可以自行百度。

算法介绍

模型可以如下表示:
y = b 0 + b 1 × x y = b_0 + b_1 × x y=b0+b1×x
训练主要依据以下公式:
B 1 = ∑ i = 1 n ( ( x i − m e a n ( x ) ) × ( y i − m e a n ( y ) ) ) ∑ i = 1 n ( x i − m e a n ( x ) ) 2 B_1 = \frac{\sum_{i=1}^{n}{((x_i - mean(x))×(y_i - mean(y)))}}{\sum_{i=1}^{n}{(x_i - mean(x))^2}} B1=i=1n(ximean(x))2i=1n((ximean(x))×(yimean(y)))

B 0 = m e a n ( y ) − B 1 × m e a n ( x ) B_0 = mean(y) - B_1 × mean(x) B0=mean(y)B1×mean(x)

函数

读取csv

  • 以下三个函数分别为获取行数、获取列数、获取文本内容。
double **dataset;
int row,col;

int get_row(char *filename)//获取行数
{
	char line[1024];
	int i = 0;
	FILE* stream = fopen(filename, "r");
	while(fgets(line, 1024, stream)){
		i++;
	}
	fclose(stream);
	return i;
}

int get_col(char *filename)//获取列数
{
	char line[1024];
	int i = 0;
	FILE* stream = fopen(filename, "r");
	fgets(line, 1024, stream);
	char* token = strtok(line, ",");
	while(token){
		token = strtok(NULL, ",");
		i++;
	}
	fclose(stream);
	return i;
}

void get_two_dimension(char* line, double** data, char *filename)
{
	FILE* stream = fopen(filename, "r");
	int i = 0;
	while (fgets(line, 1024, stream))//逐行读取
    {
    	int j = 0;
    	char *tok;
        char* tmp = strdup(line);
        for (tok = strtok(line, ","); tok && *tok; j++, tok = strtok(NULL, ",\n")){
        	data[i][j] = atof(tok);//转换成浮点数
		}//字符串拆分操作
        i++;
        free(tmp);
    }
    fclose(stream);//文件打开后要进行关闭操作
}

EXAMPLE

int main()
{
	char filename[] = "data.csv";
    char line[1024];
    double **data;
    int row, col;
    row = get_row(filename);
    col = get_col(filename);
    data = (double **)malloc(row * sizeof(int *));
	for (int i = 0; i < row; ++i){
		data[i] = (double *)malloc(col * sizeof(double));
	}//动态申请二维数组
	get_two_dimension(line, data, filename);
	printf("row = %d\n", row);
	printf("col = %d\n", col);

	int i, j;
	for(i=0; i<row; i++){
		for(j=0; j<col; j++){
			printf("%f\t", data[i][j]);
		}
		printf("\n");
    }
}

计算均值

m e a n ( x ) = ∑ i = 1 x i c o u n t ( x ) mean(x) = \frac{\sum_{i=1}{x_i}}{count(x)} mean(x)=count(x)i=1xi

float mean(float* values, int length) {//对一维数组求均值
	int i;
	float sum = 0.0;
	for (i = 0; i < length; i++) {
		sum += values[i];
	}
	float mean = (float)(sum / length);
	return mean;
}

计算方差

$$
variance = \sum_{i=1}^{n}{(x_i - mean(x))^2}

$$

float variance(float* values, float mean, int length) {//这里求的是平方和,没有除以n
	float sum = 0.0;
	int i;
	for (i = 0; i < length; i++) {
		sum += (values[i] - mean)*(values[i] - mean);
	}
	return sum;
}

EXAMPLE

float x[5]={1,2,4,3,5};
printf("%f\n",mean(x, 5));
printf("%f",variance(x,mean(x,5),5));

计算协方差

c o v a r i a n c e = ∑ i = 1 n ( ( x i − m e a n ( x ) ) × ( y i − m e a n ( y ) ) ) covariance = {\sum_{i=1}^{n}{((x_i - mean(x))}}×(y_i - mean(y))) covariance=i=1n((ximean(x))×(yimean(y)))

float covariance(float* x, float mean_x, float* y, float mean_y, int length) {
	float cov = 0.0;
	int i = 0;
	for (i = 0; i < length; i++) {
		cov += (x[i] - mean_x)*(y[i] - mean_y);
	}
	return cov;
} 

EXAMPLE

float x[5]={1,2,4,3,5};
float y[5]={1,3,3,2,5};
printf("%f",covariance(x,mean(x,5),y,mean(y,5),5));

估计回归参数

B 1 = c o v a r i a n c e ( x , y ) v a r i a n c e ( x ) B_1 = \frac{covariance(x,y)}{variance(x)} B1=variance(x)covariance(x,y)

//由均值方差估计回归系数
// 输入参数:数据、存放系数的数组、数据个数
void coefficients(float** data, float* coef, int length) {
	float* x = (float*)malloc(length * sizeof(float));
	float* y = (float*)malloc(length * sizeof(float));
	int i;
	for (i = 0; i < length; i++) {
		x[i] = data[i][0];
		y[i] = data[i][1];
		//printf("x[%d]=%f,y[%d]=%f\n",i, x[i],i,y[i]);
	}
	float x_mean = mean(x, length);
	float y_mean = mean(y, length);
	//printf("x_mean=%f,y_mean=%f\n", x_mean, y_mean);
	coef[1] = covariance(x, x_mean, y, y_mean, length) / variance(x, x_mean, length);
	coef[0] = y_mean - coef[1] * x_mean;
	for (i = 0; i < 2; i++) {
		printf("coef[%d]=%f\n", i, coef[i]);
	}
}
C语言中,可以使用最小二乘法来求解线性回归方程。最小二乘法是一种通过最小化观测数据的预测值与真实值之间的差异来确定最佳拟合直线的方法。 在C语言中,可以通过以下步骤来实现最小二乘法求解线性回归方程: 1. 创建一个函数来计算最小二乘法的拟合直线的斜率w和截距b。 2. 在该函数中,首先计算相关的统计量,如x和y的均值和方差。 3. 然后,计算回归系数w和b的估计值。回归系数w的估计值可以通过以下公式计算:w = sum((xi - x_mean) * (yi - y_mean)) / sum((xi - x_mean) ^ 2),其中sum表示求和,xi和yi是观测数据中的第i个点的x和y值,x_mean和y_mean分别是x和y的均值。 4. 最后,通过以下公式计算截距b的估计值:b = y_mean - w * x_mean。 5. 最小二乘法的拟合直线方程为:y = wx + b。 下面是一个示例代码: ```c #include <stdio.h> #include <math.h> void leastSquare(double x[], double y[], int num, double *w, double *b) { double sum_x = 0.0, sum_y = 0.0, sum_xy = 0.0, sum_xx = 0.0; double x_mean, y_mean; // 计算x和y的均值 for (int i = 0; i < num; i++) { sum_x += x[i]; sum_y += y[i]; } x_mean = sum_x / num; y_mean = sum_y / num; // 计算回归系数w和b的估计值 for (int i = 0; i < num; i++) { sum_xy += (x[i] - x_mean) * (y[i] - y_mean); sum_xx += pow((x[i] - x_mean), 2); } *w = sum_xy / sum_xx; *b = y_mean - (*w) * x_mean; } int main() { double x[] = {1, 2, 3, 4, 5}; double y[] = {2, 4, 6, 8, 10}; double w, b; int num = sizeof(x) / sizeof(x[0]); leastSquare(x, y, num, &w, &b); printf("回归方程为:y = %.2lf * x + %.2lf\n", w, b); return 0; } ``` 上述代码中,给定了一组观测数据x和y,并调用了`leastSquare`函数来计算最小二乘法的拟合直线的斜率w和截距b。最后,通过打印输出的方式输出线性回归方程。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值