吴恩达机器学习课程ex1python实现

import numpy as np
from sympy import *
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#输出一个5*5的单位矩阵
A = np.eye(5)
A

单变量线性回归

#读取数据
path = "ex1data1.txt"
data = pd.read_csv(path,header=None,names=["population","profit"])
data.head()
populationprofit
06.110117.5920
15.52779.1302
28.518613.6620
37.003211.8540
45.85986.8233
#展示数据
#plt.scatter(data['population'],data['Profit'])
data.plot(kind="scatter",x='population',y='profit',figsize=(12,8))
<matplotlib.axes._subplots.AxesSubplot at 0x11b133828>

在这里插入图片描述

#定义J(theta)
def computecost(X,y,theta):
    inner=np.power((X*theta.T-y),2)
    return np.sum(inner)/(2*len(X))
#X = data[['Ones','population']]
#y = data[['profit']]
#theta = np.matrix(np.array([0,0.6]))
#cost(X,y,theta)
#加入一列全是1的数据代表x0
data.insert(0,'Ones',1)
#初始化x和y
cols = data.shape[1]  #读取data的列数
X = data.iloc[:,:-1]
y = data.iloc[:,cols-1:cols] #pandas.core.frame.DataFrame
y2 = data.iloc[:,-1] #pandas.core.series.Series
#iloc标签选择不同datatype不同
#type(y)
X=np.matrix(X.values)
y=np.matrix(y.values)
theta = np.matrix(np.array([0,0])) #初始化theta
X.shape,y.shape,theta.shape
((97, 2), (97, 1), (1, 2))
computecost(X,y,theta)
32.072733877455676
def gradientDescent(X,y,theta,alpha,iters):
    temp = np.matrix(np.zeros(theta.shape)) #初始化theta
    parameters = int(theta.ravel().shape[1]) #读取参数数量,先把theta压成一维再读取列长度
    cost = np.zeros(iters) #初始化J,iters表示迭代次数,即初始化了iters个J的值方便之后替换
    
    #循环实现了theta的更新
    for i in range(iters):
        error = (X * theta.T) - y
        
        for j in range(parameters):
            term = np.multiply(error,X[:,j])
            temp[0,j] = theta[0,j] - ((alpha/len(X))*np.sum(term))
        theta = temp #所有的theta都被更新
        cost[i] = computecost(X,y,theta)#存储了每一次更新theta后的cost
    
    return theta,cost 
#调参 即确定学习率和迭代次数
alpha = 0.01
iters=1500
#运行梯度下降算法
g,cost = gradientDescent(X,y,theta,alpha,iters)
X = list(range(len(cost)))
plt.plot(X,cost)
[<matplotlib.lines.Line2D at 0x11aa4e128>]

在这里插入图片描述

g
matrix([[-3.63029144,  1.16636235]])
#预测35000和70000规模的城市的小吃摊利润
predict1 = [1,3.5]*g.T
print("predict1:",predict1)
predict2 = [1,7]*g.T
print("predict2:",predict2)
predict1: [[0.45197679]]
predict2: [[4.53424501]]
#可视化训练后的参数
x = np.linspace(data.population.min(),data.population.max(),100)
f = g[0,0]+g[0,1]*x

fig,ax = plt.subplots(figsize=(12,8))
ax.plot(x,f,'r',label='prediction')
ax.scatter(data.population,data.profit,label='training data')
ax.legend(loc=2)
ax.set_xlabel('population')
ax.set_ylabel('profit')
ax.set_title('predicted profit vs. population size')
plt.show()  

在这里插入图片描述

多变量线性回归

path2="ex1data2.txt"
data2 = pd.read_csv(path2,header=None,names=["Size","Bedrooms","Price"])
data2.head()
SizeBedroomsPrice
021043399900
116003329900
224003369000
314162232000
430004539900
#归一化
data2 = (data2 - data2.mean())/data2.std() #减去平均值的差除标准差
data2.head()
SizeBedroomsPrice
00.130010-0.2236750.475747
1-0.504190-0.223675-0.084074
20.502476-0.2236750.228626
3-0.735723-1.537767-0.867025
41.2574761.0904171.595389
#梯度下降

#加常数项
#data2.insert(0,'Ones',1)

#选取数据,
cols = data2.shape[1]
X2 = data2.iloc[:,0:cols-1]
y2 = data2.iloc[:,cols-1:cols]

#转换成matrix,初始化theta
X2 = np.matrix(X2.values)
y2 = np.matrix(y2.values)
theta2 = np.matrix(np.array([0,0,0]))

data2.head()
OnesSizeBedroomsPrice
010.130010-0.2236750.475747
11-0.504190-0.223675-0.084074
210.502476-0.2236750.228626
31-0.735723-1.537767-0.867025
411.2574761.0904171.595389
g2,cost2 = gradientDescent(X2,y2,theta2,alpha,iters)
g2
matrix([[-1.10856950e-16,  8.84042349e-01, -5.24551809e-02]])
X2 = list(range(len(cost2)))
plt.plot(X2,cost2)
[<matplotlib.lines.Line2D at 0x11ae4cf60>]

在这里插入图片描述

data2.head()
OnesSizeBedroomsPrice
010.130010-0.2236750.475747
11-0.504190-0.223675-0.084074
210.502476-0.2236750.228626
31-0.735723-1.537767-0.867025
411.2574761.0904171.595389
#可视化训练后的参数
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# X, Y value
X = np.linspace(data2.Size.min(),data2.Size.max(),100)
Y = np.linspace(data2.Bedrooms.min(),data2.Bedrooms.max(),100)
X, Y = np.meshgrid(X, Y)    # x-y 平面的网格
Z = g2[0,0]+g2[0,1]*X+g2[0,2]*Y
# rstride:行之间的跨度  cstride:列之间的跨度
# rcount:设置间隔个数,默认50个,ccount:列的间隔个数  不能与上面两个参数同时出现
#vmax和vmin  颜色的最大值和最小值
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
# zdir : 'z' | 'x' | 'y' 表示把等高线图投射到哪个面
# offset : 表示等高线图投射到指定页面的某个刻度
ax.contourf(X,Y,Z,zdir='z',offset=-2)
# 设置图像z轴的显示范围,x、y轴设置方式相同
ax.set_zlim(-2,2)

x = data2.iloc[:,1:cols-2]
y = data2.iloc[:,2:cols-1]
z = data2.iloc[:,cols-1:cols]


x = np.matrix(x.values)
y = np.matrix(y.values)
z = np.matrix(z.values)
ax.scatter(x, y, z, marker='.', s=50, label='')

plt.show()

在这里插入图片描述

#正规方程
def normalEqn(X,y):
    theta = np.linalg.inv(X.T@X)@X.T@y
    return theta
final_theta2 = normalEqn(X,y)
final_theta2
matrix([[-3.89578088],
        [ 1.19303364]])
#梯度下降得到的结果是matrix([[-3.24140214,  1.1272942 ]])
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值