吴恩达机器学习ex1:线性回归

最新推荐文章于 2022-10-31 18:55:38 发布

wssssang

最新推荐文章于 2022-10-31 18:55:38 发布

阅读量282

点赞数

分类专栏： python 机器学习文章标签：机器学习

本文链接：https://blog.csdn.net/weixin_47798560/article/details/115525300

版权

python 同时被 2 个专栏收录

34 篇文章 5 订阅

订阅专栏

机器学习

8 篇文章 0 订阅

订阅专栏

吴恩达机器学习练习一：线性回归

单变量线性回归

在本部分的练习中，您将使用一个变量实现线性回归，以预测食品卡车的利润。假设你是一家餐馆的首席执行官，正在考虑不同的城市开设一个新的分店。该连锁店已经在各个城市拥有卡车，而且你有来自城市的利润和人口数据。
您希望使用这些数据来帮助您选择将哪个城市扩展到下一个城市。
在这里插入图片描述

DataFrame转为矩阵的方法：

#coding=utf-8


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# read data
path = 'D:\文档\ex1data1.txt'
data = pd.read_csv(path,header = None,names= ['population','profit'])

#print(data.head())
#print(data.describe())

# show data
# data.plot(kind = 'scatter',x = 'population',y = 'profit',figsize = (8,5))
# plt.show()


# define cost function
def costfunction(x,y,theta):
    inner= np.power((x * theta.T-y),2)
    costf = np.sum(inner) /(2 * len(x))
    return costf

# insert column
data.insert(0,'ones',1)

# set x(training data)  and y(target variable)
# x = data.iloc[:,:-1]
# y = data.iloc[:,-1:]
# print(x.head())
# print(y.head())

# WARNING：index!!!!
columns = data.shape[1]
# DataFrame切片采用iloc
x = data.iloc[:,0:columns-1]
y = data.iloc[:,columns-1:columns]
# print(x.head())
# print(y.head())

# transform x and y ,then we can use them. and we need to initialize theta.
x = np.matrix(x.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0]))   # matrix :2 dims

# compute value of initial costfunction
print(costfunction(x,y,theta))


# Batch gradient decent
# iters = iteration alpha = learning rate
def gradientdecent(x,y,theta,alpha,iters):
    temp = np.matrix(np.zeros(theta.shape)) # 构建等值矩阵
    parameters = int(theta.flatten().shape[1])
    cost = np.zeros(iters)  # 构建迭代次数个0的数组
    for i in range(iters):
        error =(x * theta.T) - y
        for j in range(parameters):
            part = np.multiply(error,x[:,j])
            temp[0,j] = theta[0,j] - ((alpha/len(x)) * np.sum(part))
        theta = temp
        cost[i] = costfunction(x,y,theta)
    return theta,cost


# 第二种：梯度下降矢量化形式
def gradientdecent(x,y,theta,alpha,iters):
    costs = []
    for i in range(iters):
        theta = theta - (x.T @ (x @ theta - y)) * alpha / len(x)
        cost = costfunction(x,y,theta)
        costs.append(cost)
    return theta,costs

alpha = 0.01
iters = 1000
final_theta,cost = gradientdecent(x,y,theta,alpha,iters)
print(cost)


# draw image about Predicted Profit vs. Population Size
plt.figure(figsize=(20,10),dpi = 50)
plt.xlabel('population')
plt.ylabel('profit')
a = np.linspace(data.population.min(), data.population.max(), 100)
#np.linspace在指定的间隔内返回均匀间隔的数字。
b = final_theta[0, 0] + (final_theta[0, 1] * a)
plt.plot(a,b,color = 'red',label = 'prediction')
plt.scatter(data.population, data.profit,color='blue', label='Traning Data')
plt.legend(loc = 'upper right'## 标题)
plt.title('Predicted Profit vs. Population Size')
plt.show()

# draw image about Error vs. Training Epoch
plt.figure(figsize=(15,8),dpi = 50)
plt.plot(np.arange(iters),cost)
plt.xlabel('iteration')
plt.ylabel('cost')
plt.title('Error vs. Training Epoch')
plt.show()

在这里插入图片描述

多变量线性回归

练习1还包括一个房屋价格数据集，其中有2个变量（房子的大小，卧室的数量）和目标（房子的价格）。我们使用我们已经应用的技术来分析数据集。
在这里插入图片描述

# 多变量线性回归
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = 'D:\文档\ex1data2.txt'
data = pd.read_csv(path,names = ['size','quantity','price'])
print(data.head())

# 定义代价函数
def costfunction(x,y,theta):
    inner= np.power((x * theta.T-y),2)
    costf = np.sum(inner) /(2 * len(x))
    return costf

# 归一化处理
norm_data = (data - data.mean())/data.std()
print(norm_data.head())
# 增加一列1
norm_data.insert(0,'ones',1)

x = norm_data.iloc[:,0:-1]
y = norm_data.iloc[:,-1:]
print(x.head())
print(y.head())

# 转换格式
x = np.matrix(x.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0,0]))

#print(costfunction(x,y,theta))

# 批量梯度下降函数
def gradientdescent(x,y,theta,alpha,iters):
    temp = np.matrix(np.zeros(theta.shape)) # 构建等值矩阵
    parameters = int(theta.flatten().shape[1])
    cost = np.zeros(iters)  # 构建迭代次数个0的数组
    for i in range(iters):
        error =(x * theta.T) - y
        for j in range(parameters):
            part = np.multiply(error,x[:,j])
            temp[0,j] = theta[0,j] - ((alpha/len(x)) * np.sum(part))
        theta = temp
        cost[i] = costfunction(x,y,theta)
    return theta,cost

alpha = 0.01
iters = 1000
final_theta,cost = gradientdescent(x,y,theta,alpha,iters)
print(final_theta,cost)
print(costfunction(x,y,final_theta))

plt.figure(figsize=(10,8),dpi = 50)
plt.plot(np.arange(iters),cost)
plt.xlabel('iteration')
plt.ylabel('cost')
plt.title('Error vs. Training Epoch')
plt.show()

在这里插入图片描述

正规方程

在这里插入图片描述

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# read data
path = 'D:\文档\ex1data1.txt'
data = pd.read_csv(path,header = None,names= ['population','profit'])

data.insert(0,'ones',1)
x = data.iloc[:,:-1]
y = data.iloc[:,-1:]
x = np.matrix(x.values)
y = np.matrix(y.values)
# 定义正规方程
def normEqn(x,y):
    theta = np.linalg.inv(x.T @ x) @x.T @y
    return theta
final_theta = normEqn(x,y)
print(final_theta)

wssssang

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
吴恩达机器学习ex1:线性回归

单变量线性回归在本部分的练习中，您将使用一个变量实现线性回归，以预测食品卡车的利润。假设你是一家餐馆的首席执行官，正在考虑不同的城市开设一个新的分店。该连锁店已经在各个城市拥有卡车，而且你有来自城市的利润和人口数据。您希望使用这些数据来帮助您选择将哪个城市扩展到下一个城市。#coding=utf-8import numpy as npimport pandas as pdimport matplotlib.pyplot as plt# read datapath = 'D:\文档\ex
复制链接

扫一扫

专栏目录