手写线性回归模型

关于手写线性回归模型的一些知识。理论推导部分相信大家都比较熟悉了,主要关注numpy实现部分。

在这里插入图片描述在这里插入图片描述下面是代码:

# -*- coding: utf-8 -*-
"""
Created on Sat Jun 27 11:54:18 2020

@author: Lenovo
"""

import numpy as np

def linear_loss(X, y, w, b):
    num_train = X.shape[0]
   
    # 模型公式
    y_hat = np.dot(X, w) + b    
    # 损失函数
    loss = np.sum((y_hat-y)**2)/num_train    
    # 参数的偏导
    dw = np.dot(X.T, (y_hat-y)) /num_train
    db = np.sum((y_hat-y)) /num_train    
    return y_hat, loss, dw, db

def initialize_params(dims):
    w = np.zeros((dims, 1))
    b = 0
    return w, b


def linar_train(X, y, learning_rate, epochs):
    w, b = initialize_params(X.shape[1])  
    loss_list = []  
    for i in range(1, epochs):        
    # 计算当前预测值、损失和参数偏导
        y_hat, loss, dw, db = linear_loss(X, y, w, b)  
        loss_list.append(loss)      
        # 基于梯度下降的参数更新过程
        w += -learning_rate * dw
        b += -learning_rate * db        
        # 打印迭代次数和损失

        if i % 10000 == 0:
            print('epoch %d loss %f' % (i, loss)) 
               
        # 保存参数
        params = {            
            'w': w,            
            'b': b
        }        
        
        # 保存梯度
        grads = {            
            'dw': dw,            
            'db': db
        }    
            
    return loss_list, loss, params, grads




#下面是给出一些数据进行测试



from sklearn.datasets import load_diabetes
from sklearn.utils import shuffle

diabetes = load_diabetes()
data = diabetes.data
target = diabetes.target 

# 打乱数据
X, y = shuffle(data, target, random_state=13)
X = X.astype(np.float32)

# 训练集与测试集的简单划分
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1234)
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)
print('X_train=', X_train.shape)
print('X_test=', X_test.shape)
print('y_train=', y_train.shape)
print('y_test=', y_test.shape)



loss_list, loss, params, grads = linar_train(X_train, y_train, 0.001, 100000)


print(params)



def predict(X, params):
    w = params['w']
    b = params['b']

    y_pred = np.dot(X, w) + b    
    return y_pred



y_pred = predict(X_test, params)
print(y_pred[:5])





import matplotlib.pyplot as plt
f = X_test.dot(params['w']) + params['b']

plt.scatter(range(X_test.shape[0]), y_test)
plt.plot(f, color = 'darkorange')
plt.xlabel('X')
plt.ylabel('y')
plt.show()


plt.plot(loss_list, color = 'blue')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()




#下面是对线性模型的一个封装

class lr_model():    
    def __init__(self):        
        pass

    def prepare_data(self):
        data = load_diabetes().data
        target = load_diabetes().target
        X, y = shuffle(data, target, random_state=42)
        X = X.astype(np.float32)
        y = y.reshape((-1, 1))
        data = np.concatenate((X, y), axis=1)        
        return data   
         
    def initialize_params(self, dims):
        w = np.zeros((dims, 1))
        b = 0
        return w, b    
    
    def linear_loss(self, X, y, w, b):
        num_train = X.shape[0]
       

        y_hat = np.dot(X, w) + b
        loss = np.sum((y_hat-y)**2) / num_train
        dw = np.dot(X.T, (y_hat - y)) / num_train
        db = np.sum((y_hat - y)) / num_train        
        return y_hat, loss, dw, db    
    
    def linear_train(self, X, y, learning_rate, epochs):
        w, b = self.initialize_params(X.shape[1])        
        for i in range(1, epochs):
            y_hat, loss, dw, db = self.linear_loss(X, y, w, b)
            w += -learning_rate * dw
            b += -learning_rate * db            
            if i % 10000 == 0:
                print('epoch %d loss %f' % (i, loss))
            
            params = {                
                'w': w,                
                'b': b
            }
            
            grads = {                
                'dw': dw,                
                'db': db
            }    
            
        return loss, params, grads    
     
    
    def predict(self, X, params):
        w = params['w']
        b = params['b']
        y_pred = np.dot(X, w) + b        
        return y_pred    
        
    def linear_cross_validation(self, data, k, randomize=True):        
        if randomize:
            data = list(data)
            shuffle(data)

        slices = [data[i::k] for i in range(k)]        
        for i in range(k):
            validation = slices[i]
            train = [data for s in slices if s is not validation for data in s]
            train = np.array(train)
            validation = np.array(validation)            
            yield train, validation
            
            
if __name__ == '__main__':
    lr = lr_model()
    data = lr.prepare_data()   
 
    for train, validation in lr.linear_cross_validation(data, 5):
        X_train = train[:, :10]
        y_train = train[:, -1].reshape((-1, 1))
        X_valid = validation[:, :10]
        y_valid = validation[:, -1].reshape((-1, 1))

        loss5 = []
        loss, params, grads = lr.linear_train(X_train, y_train, 0.001, 100000)
        loss5.append(loss)
        score = np.mean(loss5)
        print('five kold cross validation score is', score)
        y_pred = lr.predict(X_valid, params)
        valid_score = np.sum(((y_pred - y_valid) ** 2)) / len(X_valid)
        print('valid score is', valid_score)

参考文献与说明:

本博客转载自公众号“机器学习实验室”的文章“数学推导+纯Python实现机器学习算法1:线性回归”,文章链接:
https://mp.weixin.qq.com/s/E9lMqNM8uNc57KNvnsBZGQ

如若侵权,请联系删除。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值