python 线性回归

11 篇文章 0 订阅

 

 

深度学习模型的训练过程实际上就是在进行参数估计,待估参数是网络模型的权值参数。

线性回归模型的目标函数实际上是关于参数theta的二次函数,则必然是凸函数,则必然有唯一的全局最小值点。故而无论怎么样的训练技巧设置(学习率,batch size),最终必然都会收敛到唯一的全局最小值点。

梯度下降法

1.随机梯度下降法

对于训练数据集(规模为m)中的每个训练样本,都会更新一次参数值,相当于batch size=1

2.批梯度下降法

将整个训练数据集在参数上得到的梯度平均值,对参数更新,batch size=m

3.mini-batch 小批量梯度下降法

batch size设定,将在若干个样本上计算得到的梯度平均值作为梯度,更新参数。

import os
import numpy as np

class train_linear_reg():
    def __init__(self,x_train,y_train,lam):
        self.x_train=x_train
        self.y_train=y_train
        self.lam=lam
        # self.lr=lr
        # self.batch_size=batch_size

        self.num_train=self.x_train.shape[0]
        self.num_feat=self.x_train.shape[1]
        self.theta=np.random.randn(num_feat).reshape(self.num_feat,1)

    # def train_an_epoch(self):
    #     # 每次进行训练一个epoch的操作时,都将训练数据集的编号进行重新排列(shuffle)
    #     num_step=self.num_train//self.batch_size+1
    #
    #     # print(self.x_train.shape,self.y_train.shape,np.concatenate((self.x_train,self.y_train),axis=0).shape)
    #
    #     temp_train_x_y=np.concatenate((self.x_train,self.y_train),axis=1)
    #     np.random.shuffle(temp_train_x_y)
    #     epoch_loss=0.0
    #     for i in range(num_step):
    #         batch_x=temp_train_x_y[i*self.batch_size:(i+1)*self.batch_size,:-1]
    #         batch_y=np.expand_dims(temp_train_x_y[i*self.batch_size:(i+1)*self.batch_size,-1],axis=1)
    #
    #         epoch_loss+=self.cal_Loss(batch_x,batch_y)
    #
    #         delta_theta=np.dot(batch_x.T,np.dot(batch_x,self.theta))-np.dot(batch_x.T,batch_y)
    #         delta_theta+=self.lam*self.theta
    #         self.theta-=self.lr*delta_theta
    #
    #     epoch_loss/=num_step
    #     return epoch_loss,self.theta

    def min_square(self):
        '''
        :return:  使用最小二乘的矩阵相乘法
        X*theta=Y
        '''
        self.theta=np.dot(self.x_train.T,self.x_train)
        self.theta+=self.lam*np.eye(self.num_feat)
        self.theta=np.linalg.inv(self.theta)

        mid=np.dot(self.x_train.T,self.y_train)
        self.theta=np.dot(self.theta,mid)

        # self.theta+=self.lam*np.eye(self.num_feat)

        return self.theta

    def cal_Loss(self,x,y):
        '''
        计算prediction  error
        :param x:
        :param y:
        :return:
        '''
        pred=np.dot(x,self.theta)
        error_vector=pred-y
        loss=np.dot(error_vector.T,error_vector)
        loss+=np.dot(self.theta.T,self.theta)
        loss/=2
        return loss

    def pred(self,test_x,test_y):
        return self.cal_Loss(test_x,test_y)

if __name__=='__main__':
    data_path='F:\\machine_learning\\yaling\\hw_2'

    with open(os.path.join(data_path,'train-matrix.txt'),'r') as train_file:
        all_lines=train_file.readlines()

        # print(type(all_lines[0]))  str

        num_train=int(all_lines[0].strip())
        num_feat=int(all_lines[1].strip())

        # print(num_train,num_feat)

        train_matrix=np.zeros((num_train,num_feat))
        train_label=np.zeros((num_train,1))
        for i in range(num_train):
            train_matrix[i,:]=np.array(list(map(float,all_lines[i+2].split())))
            train_label[i,0]=float(all_lines[2+num_train+i])
        # print(train_matrix[0][0],train_label[0,0])

    # 进行10折交叉验证,实际上进行交叉验证所使用的数据集是不包含测试数据集的
    # 将训练数据集均等地划分成10个等分,每次随机取出1等分作为验证集,剩下9分作为训练集
    # 对于每个当前的超参数,都需要进行10次的训练,这样最终对于每个超参数将会得到10个在验证数据集上面的评估结果
    # 将10个评估结果取平均值就是当前超参数的验证数据集上的准确率
    # 挑选出在验证集上准确率最高的超参数作为最优的超参数
    # 给出在测试集上的评估结果作为当前模型(不考虑具体超参数设置)的泛化性能指标

    lam_list=[0.0125, 0.025, 0.05, 0.1, 0.2]
    # lam_list = [0.1]

    # epoch_num=1000

    k=10

    example_per_flod=num_train//k

    best_solver=None
    pred_error=float('inf')

    # lr=1e-8

    # batch_size=100

    for lam in lam_list:
        temp_lam_error=0.0
        for flod in range(k):
            # print(train_matrix[:flod*example_per_flod,:].shape)
            # print(train_matrix[(flod+1)*example_per_flod:,:].shape)

            if flod==0:
                temp_train_x = train_matrix[(flod + 1) * example_per_flod:, :]
                temp_train_y = train_label[(flod + 1) * example_per_flod:, :]
            elif flod==k-1:
                temp_train_x = train_matrix[:flod * example_per_flod, :]
                temp_train_y = train_label[:flod * example_per_flod, :]
            else:
                temp_train_x = np.concatenate(
                    (train_matrix[:flod * example_per_flod, :], train_matrix[(flod + 1) * example_per_flod:, :]),
                    axis=0)
                temp_train_y = np.concatenate(
                    (train_label[:flod * example_per_flod, :], train_label[(flod + 1) * example_per_flod:, :]), axis=0)
            temp_test_x=train_matrix[flod*example_per_flod:(flod+1)*example_per_flod,:]
            temp_test_y=train_label[flod*example_per_flod:(flod+1)*example_per_flod,:]

            # print('here',temp_train_y.shape,temp_train_x.shape)

            # print(temp_train_y)

            temp_solver=train_linear_reg(temp_train_x,temp_train_y,lam)

            # for epoch in range(epoch_num):
            #     epoch_loss,_=temp_solver.train_an_epoch()
            #
            #     print('epoch:%d,train epoch_loss:%.8f'%(epoch,epoch_loss))

            temp_solver.min_square()

            temp_lam_error+=temp_solver.pred(temp_test_x,temp_test_y)

        temp_pred_error=temp_lam_error/k
        print('lam:%.6f,valid loss:%.8f' % (lam, temp_pred_error))

        if temp_pred_error<pred_error:
            pred_error=temp_pred_error
            best_solver=temp_solver
    # lam:0.012500,valid loss:2159.87260888
    # lam:0.025000,valid loss:2159.87141162
    # lam:0.050000,valid loss:2159.86902188
    # lam:0.100000,valid loss:2159.86426159
    # lam:0.200000,valid loss:2159.85481769

    print(best_solver.theta,best_solver.lam)

    # [[13.19911254]
    #  [-9.31965636]
    #  [ 9.04841504]
    #  [12.40358965]
    #  [ 4.98294389]
    #  [-3.25304597]
    #  [-4.01073318]
    #  [12.87214272]
    #  [-4.9180319 ]
    #  [-7.32203696]] 0.2

    new_solver=train_linear_reg(train_matrix,train_label,best_solver.lam)

    # 读取测试数据和测试数据集的标签

    with open(os.path.join(data_path,'test-matrix.txt'),'r') as test_file:
        all_lines=test_file.readlines()

        # print(type(all_lines[0]))  str

        num_test=int(all_lines[0].strip())
        num_feat=int(all_lines[1].strip())

        # print(num_train,num_feat)

        test_matrix=np.zeros((num_test,num_feat))
        test_label=np.zeros((num_test,1))
        for i in range(num_test):
            test_matrix[i,:]=np.array(list(map(float,all_lines[i+2].split())))
            test_label[i,0]=float(all_lines[2+num_test+i])

    test_err=new_solver.pred(test_matrix,test_label)
    print('test_err:',test_err)

    with open(os.path.join(data_path,'true-beta.txt'),'r') as t:
        all_lines=t.readlines()
        num_feat=int(all_lines[0])

        true_beta=np.zeros((num_feat,1))

        for j in range(num_feat):
            true_beta[j,:]=float(all_lines[j+1])

    distance=true_beta-new_solver.theta
    distance=np.dot(distance.T,distance)

    print('distance',distance)

    # test_err: [[2628296.29671165]]
    # distance [[816.7090605]]

    # 使用贪婪算法所得到的最优beta值

    A=[]
    beta=np.zeros((num_feat,1))

    for temp_k in range(6):
        error=np.dot(train_matrix,beta)-train_label

        all_samples=np.abs(np.dot(train_matrix.T,error))

        temp_i=np.argmax(all_samples,axis=0)

        if temp_i not in A:
            A.append(temp_i)

        temp_x=train_matrix.copy()

        used_x=np.zeros((num_train,0))

        # for i in range(num_feat):
        #     if i not in A:
        #         beta[i,:]=0
        #     else:
        used_list=[]
        for i in A:
            used_list.append(train_matrix[:,i])

        used_x=np.concatenate(used_list,axis=1)

        print('used_x',used_x.shape)

        used_beta=np.linalg.inv(np.dot(used_x.T,used_x))

        mid=np.dot(used_x.T,train_label)

        used_beta=np.dot(used_beta,mid)

        used_beta=used_beta.tolist()

        for p in range(num_feat):
            if p not in A:
                beta[p,:]=0
            else:
                beta[p,:]=used_beta.pop(0)
    print(beta)

    here_solver=train_linear_reg(train_matrix,train_label,0)
    here_solver.theta=beta
    print(here_solver.pred(test_matrix,test_label))








 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值