Machine Learning cs299 problem 3 Regulation C++ code

看了两天Regulation,MAP,结合实验,总算搞懂了,代码如下,学到这里的同学,可以参考。简单来说,就是通过将theta看做是一个随机变量,但这个变量不应该很大,否则容易过拟合,需要在一定程度上减弱,所以通过这种方式来选择参数(feature selection)。暂且写这么多,lamda设置为0.1时,和书中答案接近,可以看出lamda越小,过拟合越严重,lamda越大,越粗放,过拟合越不严重。代码采用的是coordinate descent algorithm。


#include <stdio.h>
#include <stdlib.h>
#include <math.h>

double norm(double theta[100],double oldtheta[100])
{
        double sum = 0.0;
        for(int i=0;i<100;++i)
        {
                sum += fabs(theta[i]-oldtheta[i]);
        }
        return sum;
}
int main(void)
{


        //      read y.dat into y[20]
        double y[20];
        {
                FILE * fp;
                char * line = NULL;
                size_t len = 0;
                ssize_t read;
                fp = fopen("y.dat", "r");
                if (fp == NULL)
                        exit(EXIT_FAILURE);
                int i=0;
                while ((read = getline(&line, &len, fp)) != -1) {
                        y[i++] = atof(line);
                }
                if (line)
                        free(line);
        }

        //      read x.dat into x[20][100] 20 sample, 100 item in every sample
        double x[20][100];
       
        {
                FILE * fp;
                char * line = NULL;
                size_t len = 0;
                ssize_t read;
                fp = fopen("x.dat", "r");
                if (fp == NULL)
                        exit(EXIT_FAILURE);
                int k=0;
                while ((read = getline(&line, &len, fp)) != -1) {
                        for(int i=0;i<100;++i)
                        {
                                char* item = line+16*i;
                                line[16*(i+1)]='\0';
                                x[k][i] = atof(item);
                                line[16*(i+1)]=' ';
                        }
                        k++;
                }
                if (line)
                        free(line);
        }

        double theta[100];
        double oldtheta[100];
        for(int i=0;i<100;++i)
        {
                theta[i]=0;
        }
        for(int i=0;i<100;++i)
        {
                oldtheta[i]=1;
        }
        while ( norm(theta,oldtheta) > 0.00001)
        {
                for(int i=0;i<100;++i) oldtheta[i] = theta[i];

                for(int ti=0;ti<100;++ti)
                {
                        theta[ti]=0;
                        double theta_i1 = 0.0;
                        double theta_i2 = 0.0;
                        double r[20];
                        for(int i=0;i<20;++i)
                        {
                                double sum=0.0;
                                for(int j=0;j<100;++j)
                                {
                                        sum+=x[i][j]*theta[j];
                                }
                                r[i] = sum-y[i];
                        }
                        double sum = 0.0;
                        for(int i=0;i<20;++i)
                        {
                                sum += x[i][ti]*r[i];
                        }
                        double xx = 0.0;
                        for(int i=0;i<20;++i)
                        {
                                xx += x[i][ti]*x[i][ti];
                        }

                        double lamda = 1;
                        theta_i1 = (sum*(-1)-lamda)/xx;
                        if(theta_i1<0) theta_i1 = 0;

                        theta_i2 = (sum*(-1)+lamda)/xx;
                        if(theta_i2>0) theta_i2 = 0;
                        // try theta_i1
                        theta[ti] = theta_i1;
                        double obj_theta_1 = 0.0;
                        for(int i=0;i<20;++i)
                        {
                                double sum=0.0;
                                for(int j=0;j<100;++j)
                                {
                                        sum+=x[i][j]*theta[j];
                                }
                                obj_theta_1 += fabs(sum-y[i]);
                        }
                        obj_theta_1 = obj_theta_1*obj_theta_1*(0.5);
                        for(int i=0;i<100;++i)
                        obj_theta_1 += lamda*fabs(theta[i]);

                        // try theta_i2 
                        theta[ti] = theta_i2;
                        double obj_theta_2 = 0.0;
                        for(int i=0;i<20;++i)
                        {
                                double sum=0.0;
                                for(int j=0;j<100;++j)
                                {
                                        sum+=x[i][j]*theta[j];
                                }
                                obj_theta_2 += fabs(sum-y[i]);
                        }
                        obj_theta_2 = obj_theta_2*obj_theta_2*(0.5);
                        for(int i=0;i<100;++i)
                        obj_theta_2 += lamda*fabs(theta[i]);

                        // chose theta from obj_theta
                        if(obj_theta_1<obj_theta_2)
                        {
                                theta[ti] = theta_i1;
                        }
                        else
                        {
                                theta[ti] = theta_i2;
                        }
                        for(int i=0;i<100;++i)
                        {
                                printf("%f\t",theta[i]);
                        }
                        printf("\n");
                }
        }
        return EXIT_SUCCESS;
}

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值