吴恩达ex1——多元线性回归实现

本文通过实例展示了如何使用Python进行数据预处理、梯度下降求解线性回归模型参数,并与Scikit-learn库的LinearRegression进行比较。内容包括数据标准化、成本函数计算、梯度下降算法实现和不同学习率对收敛速度的影响。
摘要由CSDN通过智能技术生成
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#读取数据
df=pd.read_csv('ex1data2.txt',header=None,
               names=['siez of housing ','numbers of bedroom','house price'])
df.head()
siez of housingnumbers of bedroomhouse price
021043399900
116003329900
224003369000
314162232000
430004539900
#标准化
df=(df-df.mean())/df.std()
df.head()
siez of housingnumbers of bedroomhouse price
00.130010-0.2236750.475747
1-0.504190-0.223675-0.084074
20.502476-0.2236750.228626
3-0.735723-1.537767-0.867025
41.2574761.0904171.595389
X=df.iloc[:,0:2].values
X.shape
(47, 2)
X=np.insert(X,0,1,axis=1)
X=np.matrix(X)
X.shape
(47, 3)
y=df.iloc[:,-1].values.reshape(47,1)
y=np.matrix(y)
y.shape
(47, 1)
theta=np.zeros((3,1))
theta=np.matrix(theta)
theta.shape
(3, 1)
#定义损失函数
def cost(X,y,theta):
    inner=(X*theta-y).T*(X*theta-y)
    J=inner/(2*len(y))
    return J
J_0=cost(X,y,theta)
J_0
matrix([[0.4893617]])
#梯度下降实现
def GredientDescent(X,y,theta,iters,a):
    loss=np.zeros(iters)
    loss=np.insert(loss,0,J_0)#把θ=0的loss放在第一个位置
    
    for i in range(iters):
        for j in range(len(theta)):
            mid=np.sum(np.multiply((X*theta-y),X[:,j]))
            theta[j]=theta[j]-mid*a/len(y)
        loss[i+1]=cost(X,y,theta)
            
    return theta,loss
theta,loss=GredientDescent(X,y,theta,iters=1000,a=0.01)
theta.shape
(3, 1)
loss.shape
(1001,)
#绘制误差随着iters变化
plt.plot(range(1000),loss[1:],color='r')
plt.xlabel('iters')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

#绘制不同学习率变化下iters-loss曲线
for i in range(5):
    theta=np.matrix(np.zeros((3,1)))#初始化theta,避免前面的theta影响
    a=[0.01,0.03,0.09,0.18,0.54]
    theta,loss=GredientDescent(X,y,theta,iters=100,a=a[i])
    plt.plot(range(101),loss,label='{}{}{}{}'.format('α',i,'=',a[i]))
    
plt.legend(loc='best')
<matplotlib.legend.Legend at 0x2afc8e334f0>

在这里插入图片描述

sklearn实现

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
df1=pd.read_csv('ex1data2.txt',header=None,
               names=['siez of housing ','numbers of bedroom','house price'])
df1.head()
siez of housingnumbers of bedroomhouse price
021043399900
116003329900
224003369000
314162232000
430004539900
X1=df1.iloc[:,[0,1]].values
X1.shape
(47, 2)
y1=df1.iloc[:,-1].values.reshape(47,-1)
y1.shape
(47, 1)
#标准化
stds=StandardScaler()
X1_std=stds.fit_transform(X1)
y1_std=stds.fit_transform(y1)
#数据太小,每分成训练集和测试集
lr=LinearRegression()
lr.fit(X1_std,y1_std)
LinearRegression()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值