网上又一位牛人的Machine Learning实验笔记

最新推荐文章于 2019-07-07 13:32:37 发布

莲花法相

最新推荐文章于 2019-07-07 13:32:37 发布

阅读量974

点赞数

分类专栏： ML 文章标签：转载 ML

ML 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

目录(?)[-]
Machine learning 实验4linear programming
Machine learningSVM实验续
SVM实验再续SMO
Machine Learning实验6 理解核函数
machine learning实验7 矩阵求逆
machine learning实验7 矩阵求逆

【Machine Learning实验1】batch gradient descent（批量梯度下降）和 stochastic gradient descent（随机梯度下降）

批量梯度下降是一种对参数的update进行累积，然后批量更新的一种方式。用于在已知整个训练集时的一种训练方式，但对于大规模数据并不合适。

随机梯度下降是一种对参数随着样本训练，一个一个的及时update的方式。常用于大规模训练集，当往往容易收敛到局部最优解。

详细参见：Andrew Ng 的Machine Learning的课件（见参考1）

可能存在的改进

1）样本可靠度，特征完备性的验证

例如可能存在一些outlier，这种outlier可能是测量误差，也有可能是未考虑样本特征，例如有一件衣服色彩评分1分，料子1分，确可以卖到10000万元，原来是上面有一个姚明的签名，这个特征没有考虑，所以出现了训练的误差，识别样本中outlier产生的原因。

2）批量梯度下降方法的改进

并行执行批量梯度下降

3）随机梯度下降方法的改进

找到一个合适的训练路径（学习顺序），去最大可能的找到全局最优解

4）假设合理性的检验

H（X)是否合理的检验

5）维度放大

维度放大和过拟合问题，维度过大对训练集拟合会改善，对测试集的适用性会变差，如果找到合理的方法？

下面是我做的一个实验

假定有这样一个对衣服估价的训练样本，代码中matrix表示，第一列表示色彩的评分，第二列表示对料子质地的评分，例如第一个样本1,4表示这件衣服色彩打1分，料子打4分。我们需要训练的是theta，其表示在衣服的估价中，色彩和料子的权重，这个权重是未知量，是需要训练的，训练的依据是这四个样本的真实价格已知，分别为19元，...20元。

通过批量梯度下降和随机梯度下降的方法均可得到theta_C={3,4}T

/*
Matrix_A
1 4
2 5
5 1
4 2
theta_C

?
Matrix_A*theta_C
19
26
19
20
*/

批量梯度下降法：

[cpp]view plaincopy 
    
 
    
 #include "stdio.h"  
   
 int main(void)  
 {  
         float matrix[4][2]={{1,4},{2,5},{5,1},{4,2}};  
         float result[4]={19,26,19,20};  
         float theta[2]={2,5};                   //initialized theta {2,5}, we use the algorithm to get {3,4} to fit the model  
         float learning_rate = 0.01;  
         float loss = 1000.0;                    //set a loss big enough  
   
         for(int i = 0;i<100&&loss>0.0001;++i)  
         {  
                 float error_sum = 0.0;  
                 for(int j = 0;j<4;++j)  
                 {  
                         float h = 0.0;  
                         for(int k=0;k<2;++k)  
                         {  
                                 h += matrix[j][k]*theta[k];  
                         }  
                         error_sum = result[j]-h;  
                         for(int k=0;k<2;++k)  
                         {  
                                 theta[k] += learning_rate*(error_sum)*matrix[j][k];  
                         }  
                 }  
                 printf("*************************************\n");  
                 printf("theta now: %f,%f\n",theta[0],theta[1]);  
                 loss = 0.0;  
                 for(int j = 0;j<4;++j)  
                 {  
                         float sum=0.0;  
                         for(int k = 0;k<2;++k)  
                         {  
   
   
                                 sum += matrix[j][k]*theta[k];  
                         }  
                         loss += (sum-result[j])*(sum-result[j]);  
                 }  
                 printf("loss  now: %f\n",loss);  
         }  
         return 0;  
 }  

随机梯度下降法

[cpp]view plaincopy 
    
 
    
 int main(void)  
 {  
         float matrix[4][2]={{1,4},{2,5},{5,1},{4,2}};  
         float result[4]={19,26,19,20};  
         float theta[2]={2,5};  
         float loss = 10.0;  
         for(int i =0 ;i<100&&loss>0.001;++i)  
         {  
                 float error_sum=0.0;  
                 int j=i%4;  
                 {  
                         float h = 0.0;  
                         for(int k=0;k<2;++k)  
                         {  
                                 h += matrix[j][k]*theta[k];  
   
                         }  
                         error_sum = result[j]-h;  
                         for(int k=0;k<2;++k)  
                         {  
                                 theta[k] = theta[k]+0.01*(error_sum)*matrix[j][k];  
                         }  
                 }  
                 printf("%f,%f\n",theta[0],theta[1]);  
                 float loss = 0.0;  
                 for(int j = 0;j<4;++j)  
                 {  
                         float sum=0.0;  
                         for(int k = 0;k<2;++k)  
                         {  
   
                                 sum += matrix[j][k]*theta[k];  
                         }  
                         loss += (sum-result[j])*(sum-result[j]);  
                 }  
                 printf("%f\n",loss);  
         }  
         return 0;  
 }  

参考：

【1】http://www.stanford.edu/class/cs229/notes/cs229-notes1.pdf

【2】http://www.cnblogs.com/rocketfan/archive/2011/02/27/1966325.html

【3】http://www.dsplog.com/2011/10/29/batch-gradient-descent/

【4】http://ygc.name/2011/03/22/machine-learning-ex2-linear-regression/

【Machine Learning实验2】 Logistic Regression求解classification问题

classification问题和regression问题类似，区别在于y值是一个离散值，例如binary classification，y值只取0或1。

方法来自Andrew Ng的Machine Learning课件的note1的PartII，Classification and logsitic regression.

实验表明，通过多次迭代，能够最大化Likehood，使得分类有效，实验数据为人工构建，没有实际物理意义，matrix的第一列为x0，取常数1，第二列为区分列，第三列，第四列为非区分列，最后对预测起到主导地位的参数是theta[0]和theta[1]。

[cpp]view plaincopy 
    
 
    
 #include "stdio.h"  
 #include "math.h"  
   
 double matrix[6][4]={{1,47,76,24}, //include x0=1  
               {1,46,77,23},  
               {1,48,74,22},  
               {1,34,76,21},  
               {1,35,75,24},  
               {1,34,77,25},  
                 };  
   
 double result[]={1,1,1,0,0,0};  
 double theta[]={1,1,1,1}; // include theta0  
   
 double function_g(double x)  
 {  
         double ex = pow(2.718281828,x);  
         return ex/(1+ex);  
 }  
 int main(void)  
 {  
         double likelyhood = 0.0;  
         float sum=0.0;  
         for(int j = 0;j<6;++j)  
         {  
                 double xi = 0.0;  
                 for(int k=0;k<4;++k)  
                 {  
                         xi += matrix[j][k]*theta[k];  
                 }  
                 printf("sample %d,%f\n",j,function_g(xi));  
                 sum += result[j]*log(function_g(xi)) + (1-result[j])*log(1-function_g(xi)) ;  
         }  
         printf("%f\n",sum);  
   
         for(int i =0 ;i<1000;++i)  
         {  
                 double error_sum=0.0;  
                 int j=i%6;  
                 {  
                         double h = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 h += matrix[j][k]*theta[k];  
   
                         }  
                         error_sum = result[j]-function_g(h);  
                         for(int k=0;k<4;++k)  
                         {  
                                 theta[k] = theta[k]+0.001*(error_sum)*matrix[j][k];  
                         }  
                 }  
                 printf("theta now:%f,%f,%f,%f\n",theta[0],theta[1],theta[2],theta[3]);  
                 float sum=0.0;  
                 for(int j = 0;j<6;++j)  
                 {  
                         double xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[k];  
   
                         }  
                         printf("sample output now: %d,%f\n",j,function_g(xi));  
                         sum += result[j]*log(function_g(xi)) + (1-result[j])*log(1-function_g(xi)) ;  
                 }  
                 printf("maximize the log likelihood now:%f\n",sum);  
                 printf("************************************\n");  
         }  
         return 0;  
 }  
                           

【Machine Learning实验3】SoftMax regression

神奇的SoftMax regression，搞了一晚上搞不定，凌晨3点起来继续搞，刚刚终于调通。我算是彻底理解了，哈哈。代码试验了Andrew Ng的第四课上提到的SoftMax regression算法，并参考了http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression

最终收敛到这个结果，巨爽。

smaple 0: 0.983690,0.004888,0.011422,likelyhood:-0.016445
smaple 1: 0.940236,0.047957,0.011807,likelyhood:-0.061625
smaple 2: 0.818187,0.001651,0.180162,likelyhood:-0.200665
smaple 3: 0.000187,0.999813,0.000000,likelyhood:-0.000187
smaple 4: 0.007913,0.992087,0.000000,likelyhood:-0.007945
smaple 5: 0.001585,0.998415,0.000000,likelyhood:-0.001587
smaple 6: 0.020159,0.000001,0.979840,likelyhood:-0.020366
smaple 7: 0.018230,0.000000,0.981770,likelyhood:-0.018398
smaple 8: 0.025072,0.000000,0.974928,likelyhood:-0.025392

[cpp]view plaincopy 
    
 
    
 #include "stdio.h"  
 #include "math.h"  
   
 double matrix[9][4]={{1,47,76,24}, //include x0=1  
               {1,46,77,23},  
               {1,48,74,22},  
               {1,34,76,21},  
               {1,35,75,24},  
               {1,34,77,25},  
               {1,55,76,21},  
               {1,56,74,22},  
               {1,55,72,22},  
                 };  
   
 double result[]={1,  
                  1,  
                  1,  
                  2,  
                  2,  
                  2,  
                  3,  
                  3,  
                  3,};  
   
 double theta[2][4]={  
                  {0.3,0.3,0.01,0.01},  
                  {0.5,0.5,0.01,0.01}}; // include theta0  
   
 double function_g(double x)  
 {  
         double ex = pow(2.718281828,x);  
         return ex/(1+ex);  
 }  
   
 double function_e(double x)  
 {  
         return pow(2.718281828,x);  
 }  
   
 int main(void)  
 {  
         double likelyhood = 0.0;  
         for(int j = 0;j<9;++j)  
         {  
                 double sum = 1.0; // this is very important, because exp(thetak x)=1  
                 for(int l = 0;l<2;++l)  
                 {  
                         double xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[l][k];  
   
                         }  
                         sum += function_e(xi);  
                 }  
                 double xi = 0.0;  
                 for(int k=0;k<4;++k)  
                 {  
                         xi += matrix[j][k]*theta[0][k];  
   
                 }  
                 double p1 = function_e(xi)/sum;  
                 xi = 0.0;  
                 for(int k=0;k<4;++k)  
                 {  
                         xi += matrix[j][k]*theta[1][k];  
   
                 }  
                 double p2 = function_e(xi)/sum;  
                 double p3 = 1-p1-p2;  
   
   
                double ltheta = 0.0;  
                if(result[j]==1)  
                         ltheta = log(p1);  
                else if(result[j]==2)  
                         ltheta = log(p2);  
                else if(result[j]==3)  
                         ltheta = log(p3);  
                else  
                {}  
                 printf("smaple %d: %f,%f,%f,likelyhood:%f\n",j,p1,p2,p3,ltheta);  
   
         }  
   
         for(int i =0 ;i<1000;++i)  
         {  
                 for(int j=0;j<9;++j)  
                 {  
                         double sum = 1.0; // this is very important, because exp(thetak x)=1  
                         for(int l = 0;l<2;++l)  
                         {  
                                 double xi = 0.0;  
                                 for(int k=0;k<4;++k)  
                                 {  
                                         xi += matrix[j][k]*theta[l][k];  
   
                                 }  
                                 sum += function_e(xi);  
                         }  
                         double xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[0][k];  
   
                         }  
                         double p1 = function_e(xi)/sum;  
                         xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[1][k];  
   
                         }  
                         double p2 = function_e(xi)/sum;  
                         double p3 = 1-p1-p2;  
                         for(int m = 0; m<4; ++m)  
                         {  
                                 if(result[j]==1)  
                                 {  
                                         theta[0][m] = theta[0][m] + 0.001*(1-p1)*matrix[j][m];  
                                 }  
                                 else  
                                 {  
                                         theta[0][m] = theta[0][m] + 0.001*(-p1)*matrix[j][m];  
                                 }  
                                 if(result[j]==2)  
                                 {  
                                         theta[1][m] = theta[1][m] + 0.001*(1-p2)*matrix[j][m];  
                                 }  
                                 else  
                                 {  
                                         theta[1][m] = theta[1][m] + 0.001*(-p2)*matrix[j][m];  
                                 }  
                         }  
                 }  
                 double likelyhood = 0.0;  
                 for(int j = 0;j<9;++j)  
                 {  
                         double sum = 1.0; // this is very important, because exp(thetak x)=1  
                         for(int l = 0;l<2;++l)  
                         {  
                                 double xi = 0.0;  
                                 for(int k=0;k<4;++k)  
                                 {  
                                         xi += matrix[j][k]*theta[l][k];  
   
                                 }  
                                 sum += function_e(xi);  
                         }  
                         double xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[0][k];  
   
                         }  
                         double p1 = function_e(xi)/sum;  
                         xi = 0.0;  
                         for(int k=0;k<4;++k)  
                         {  
                                 xi += matrix[j][k]*theta[1][k];  
   
                         }  
                         double p2 = function_e(xi)/sum;  
                         double p3 = 1-p1-p2;  
   
   
                         double ltheta = 0.0;  
                         if(result[j]==1)  
                                 ltheta = log(p1);  
                         else if(result[j]==2)  
                                 ltheta = log(p2);  
                         else if(result[j]==3)  
                                 ltheta = log(p3);  
                         else  
                         {}  
                         printf("smaple %d: %f,%f,%f,likelyhood:%f\n",j,p1,p2,p3,ltheta);  
                 }  
         }  
         return 0;  
 }