机器学习-----吴恩达课后习题ex1

 ex1-单变量线性回归

        用到的数据集 ‘ex1data1.txt’  -------(97,2)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

代价函数:

# 代价函数
def computeCost(X,y,theta):
    inner = np.power(X @ theta-y,2)
    return np.sum(inner)/(2 * len(X))

 梯度下降函数:

 维数说明

# 梯度下降
def gradientDescent(X, y, theta, alpha, iters):
    costs = []
    for i in range(iters):
        theta = theta- (X.T @(X @theta-y))*alpha/len(X)
        cost = computeCost(X,y,theta)
        costs.append(cost)
        if i%100==0:
            print(cost)
    return theta,costs

 读入数据集

path =  'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data
# name表示列名

 画出数据集散点图

data.plot(kind='scatter',x='Population',y='Profit',figsize=(12,8))
plt.show()

对数据集加入新的1列,值为1

data.insert(0,'Ones',1) #在第0列中插入列名为'Ones'且默认值为1的新的一列
data.head()

对数据集进行分离,分成 X 和 y ,初始化 theta

cols = data.shape[1]  #读取矩阵的第二个维度
X = data.iloc[:,:-1]
y = data.iloc[:,cols-1:cols]
theta = np.zeros((2,1))

求出初始状态的代价

const_init = computeCost(X,y,theta)
print(const_init)

 32.072733877455676

 对参数进行初始化,alpha 为学习率,iters 为迭代次数

alpha = 0.01
iters = 1500
theta,costs=gradientDescent(X, y, theta, alpha, iters)

theta = matrix([[-3.63029144],[ 1.16636235]] 

 画出拟合函数

x = np.linspace(data.Population.min(), data.Population.max(), 100)
f = theta[0, 0] + (theta[1, 0] * x)

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

 画出代价值随着迭代次数的变化

fig, ax1 = plt.subplots(figsize=(12,8))
ax1.plot(np.arange(iters),costs)
ax1.set(xlabel='iters',
        ylabel='costs',
        title='cost vs iters')
plt.show()

多变量线性回归

     所用到的数据集 ‘ex1data2.txt’  ------- (47,3) 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('ex1data2.txt',names=['size','bedrooms','price'])
data.head()

 

data.mean()

 特征归一化:

 

#特征归一化
def normalize_feature(data):
    return (data-data.mean())/data.std()
data = normalize_feature(data)
data.head()

 显示房屋尺寸和房屋价格的关系

data.plot.scatter('size','price',label='size')
plt.show()

 显示房屋卧室数量和房屋价格的关系

data.plot.scatter('bedrooms','price',label='size')
plt.show()

# 添加全为一的列
data.insert(0,'ones',1)
data.head()

 分离数据集

cols = data.shape[1]  #读取矩阵的第二个维度
X = data.iloc[:,:-1]
y = data.iloc[:,cols-1:cols]
theta = np.zeros((3,1))
cost_init = computeCost(X,y,theta)
cost_init

# 0.48936170212765967

 梯度下降函数

def gradientDescent(X, y, theta, alpha, iters):
    costs = []
    for i in range(iters):
        theta = theta- (X.T @(X @theta-y))*alpha/len(X)
        cost = computeCost(X,y,theta)
        costs.append(cost)
    return theta,costs

 选用不同的学习率,来比较它们之间的代价下降速率

candidate_alpha = [0.0003,0.003,0.03,0.0001,0.001,0.01]
iters = 2000
fig, ax = plt.subplots()

for alpha in candidate_alpha:
    _,costs = gradientDescent(X,y,theta,alpha,iters)
    ax.plot(np.arange(iters),costs,label=alpha)
    ax.legend()
    ax.set(xlabel='iters',
            ylabel='costs',
            title='cost vs iters')
plt.show()

 

正规方程

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
path =  'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.insert(0,'Ones',1) #在第0列中插入列名为'Ones'且默认值为1的新的一列
cols = data.shape[1]  #读取矩阵的第二个维度
X = data.iloc[:,:-1]
y = data.iloc[:,cols-1:cols]
theta = (np.zeros((2,1))
y.shape

def normalEquation(X,y):
    theta = np.linalg.inv(X.T*X)*X.T*y
    return theta

theta = normalEquation(X,y)
print(theta)

'''
[[-3.89578088]
 [ 1.19303364]]
'''

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值