Python 线性回归实现

import random
with open('hw.txt', 'w') as f:
    f.write('height\tweight\n')
    for i in range(100):
        height = random.randint(1600, 1900) / 10
        weight = (height - 100) * 0.9 + random.randint(-50, 50) / 10
        f.write('%.1f\t%.1f\n' % (height, weight))

 

在实现线性回归之前我们先用Python建立一个名为hw.txt的文本文件,里面包含身高和体重100个样本用于实现线性回归

 

__author__ = 'DivinerShi'
import numpy as np
import matplotlib.pyplot as plt

def compute_error(b,m,data):

    totalError = 0
    #Two ways to implement this
    #first way
    # for i in range(0,len(data)):
    #     x = data[i,0]
    #     y = data[i,1]

    #     totalError += (y-(m*x+b))**2
        # print('(%f*%f+%f)=%f,y=%f,loss=%f,totalError=%f' % (m,x,b,(m*x+b),y,(y-(m*x+b))**2,totalError))

    #second way
    x = data[:,0]
    y = data[:,1]
    totalError = (y-m*x-b)**2
    totalError = np.sum(totalError,axis=0)

    return totalError/float(len(data))

def optimizer(data,starting_b,starting_m,learning_rate,num_iter):
    b = starting_b
    m = starting_m

    #gradient descent
    for i in range(num_iter):
        #update b and m with the new more accurate b and m by performing
        # thie gradient step
        b,m =compute_gradient(b,m,data,learning_rate)
        if i%100==0:
            print('iter {0}:error={1}'.format(i,compute_error(b,m,data)))
    return [b,m]

def compute_gradient(b_current,m_current,data ,learning_rate):

    b_gradient = 0
    m_gradient = 0

    N = float(len(data))
    #Two ways to implement this
    #first way
    # for i in range(0,len(data)):
    #     x = data[i,0]
    #     y = data[i,1]
    
    #     #computing partial derivations of our error function
    #     #b_gradient = -(2/N)*sum((y-(m*x+b))^2)
    #     #m_gradient = -(2/N)*sum(x*(y-(m*x+b))^2)
    #     b_gradient += -(2/N)*(y-((m_current*x)+b_current))
    #     m_gradient += -(2/N) * x * (y-((m_current*x)+b_current))
    #     # print('m_current=%f,b_current=%f,N=%f,x=%f,y=%f,y-((m_current*x)+b_current)=%f, b_gradient=%f, m_gradient=%f' % (m_current, b_current, N, x, y, y-((m_current*x)+b_current), b_gradient, m_gradient))

    #Vectorization implementation
    x = data[:,0]
    y = data[:,1]
    b_gradient = -(2/N)*(y-m_current*x-b_current)
    b_gradient = np.sum(b_gradient,axis=0)
    m_gradient = -(2/N)*x*(y-m_current*x-b_current)
    m_gradient = np.sum(m_gradient,axis=0)
        
    #update our b and m values using out partial derivations
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    return [new_b,new_m]


def Linear_regression():
    # get train data
    # data =np.loadtxt('data.csv',delimiter=',')
    data =np.loadtxt('hw.txt',delimiter='\t',skiprows=True)

    #define hyperparamters
    #learning_rate is used for update gradient
    #defint the number that will iteration
    # define  y =mx+b
    learning_rate = 0.000001
    initial_b =0.0
    initial_m = 0.0
    num_iter = 10000

    #train model
    #print b m error
    print('initial variables:\n initial_b = {0}\n intial_m = {1}\n error of begin = {2} \n'\
        .format(initial_b,initial_m,compute_error(initial_b,initial_m,data)))

    #optimizing b and m
    [b ,m] = optimizer(data,initial_b,initial_m,learning_rate,num_iter)

    #print final b m error
    print('final formula parmaters:\n b = {1}\n m={2}\n error of end = {3} \n'.format(num_iter,b,m,compute_error(b,m,data)))

    #plottting
    x = data[:,0]
    y = data[:,1]
    y_predict = m*x+b
    plt.scatter(x, y, color = 'blue')
    plt.plot(x,y_predict,'k-', color = 'red', linewidth = 4)
    # plt.show()

    from sklearn import linear_model
    regr = linear_model.LinearRegression()
    regr.fit(x.reshape(-1,1), y)
    print(regr.coef_, regr.intercept_)

    plt.scatter(x, y, color = 'blue')
    plt.plot(x, regr.predict(x.reshape(-1,1)), color = 'orange', linewidth = 4)
    plt.show()

def lr_by_sklearn():
    from sklearn import linear_model
    data =np.loadtxt('hw.txt',delimiter='\t',skiprows=True)
    x = data[:,0]
    y = data[:,1]
    regr = linear_model.LinearRegression()
    regr.fit(x.reshape(-1,1), y)
    print(regr.coef_, regr.intercept_)

    # import pandas as pd
    # data = pd.read_csv('hw.txt', sep='\t')
    # regr.fit(data['height'].values.reshape(-1,1), data['weight'])
    # plt.scatter(data['height'], data['weight'], color = 'blue')
    # plt.plot(data['height'], regr.predict(data['height'].values.reshape(-1,1)), color = 'orange', linewidth = 4)
    # plt.show()

    plt.scatter(x, y, color = 'blue')
    plt.plot(x, regr.predict(x.reshape(-1,1)), color = 'orange', linewidth = 4)
    plt.show()

if __name__ =='__main__':

    Linear_regression()

    # lr_by_sklearn()

利用以上代码画出身高体重的线性回归方程,用到的库有sklearn matplotlib  

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值