吴恩达机器学习作业（一）线性回归_python实现

最新推荐文章于 2024-06-03 13:00:36 发布

JiYH

最新推荐文章于 2024-06-03 13:00:36 发布

阅读量6.8k

点赞数 46

分类专栏：吴恩达机器学习作业 python实现文章标签： python 机器学习

本文链接：https://blog.csdn.net/qq_45882032/article/details/116500250

版权

吴恩达机器学习作业 python实现专栏收录该内容

8 篇文章 30 订阅

订阅专栏

必做部分：

参考了黄海广博士整理的资料：
Github：https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('ex1data1.txt', names=['population', 'profit'])
data = df#data为df为为同一个数码


#def normalize_feature(df):
    #return df.apply(lambda column: (column - column.mean()) / column.std())#特征缩放


def get_X(df):#读取特征
    ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe
    data = pd.concat([ones, df], axis=1)  # 合并数据，根据列合并 axis： 需要合并链接的轴，0是行，1是列
    return data.iloc[:, :-1]#行，列；列后面的-1表示去除最后一列，例如data.ilco[:-1,:]表示去除最后一行


def linear_cost(theta , X , y):
    m = X.shape[0]  #样本数
    inner = X @ theta - y   #与目标的差值即h(theta),inner算出来为一行
    square_sum = inner.T @ inner    #h(theta)的平方
    cost = square_sum/(2*m)
    return cost


def gradient(theta, X, y):
    m = X.shape[0]
    inner = X.T @ (X@theta - y) #X仅有仅有一个特征，恒为1的不算，即该语句算的是更新theta1时，损失函数对theta1的求导
    return inner/m


def batch_gradient_decent(theta, X, y, epoch, alpha=0.02):
    cost_data = [linear_cost(theta, X, y)]
    for _ in range(epoch):   #_仅是一个循环标志，在循环中不会用到
        theta = theta - alpha * gradient(theta, X, y)
        cost_data.append(linear_cost(theta, X, y))
    return theta, cost_data


X = get_X(df)#自定义的函数，读取df的第一列，然后在前面加一列1
y = df.values[:, 1]#type(df)为pandas.core.frame.DataFrame，type(df.values[:, 1])为 numpy.ndarray
theta = np.zeros(df.shape[1])
epoch = 6000#迭代次数
final_theta, cost_data = batch_gradient_decent(theta, X, y, epoch)
b = final_theta[0]
k = final_theta[1]
plt.scatter(data.population, data.profit, label="Training data")
plt.plot(data.population, data.population*k + b, label="Prediction")
plt.xlabel('population')
plt.ylabel('profit')
plt.legend(loc=2)

##预测，输入population，将预测的值profit画在图像上
forecast = float(input('population'))
predict_profit = forecast*k+b
print(predict_profit)
plt.scatter(forecast, predict_profit, marker='+', c='red')
plt.show()

在这里插入图片描述
我预测值（forecast）输入的23，用红色标记了

选作部分

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ax = plt.axes(projection='3d')
df = pd.read_csv('ex1data2.txt', names=['square', 'bedrooms', 'price'])


def normalize_feature(df):
    return df.apply(lambda column: (column - column.mean()) / column.std())


def get_X(df):#读取特征
    ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe
    data = pd.concat([ones, df], axis=1)  # 合并数据，根据列合并 axis： 需要合并链接的轴，0是行，1是列
    return data.iloc[:, :-1]


def lr_cost(theta, X, y):

    m = X.shape[0]#m为样本数
    inner = X @ theta - y  # R(m*1)，X @ theta等价于X.dot(theta)
    square_sum = inner.T @ inner
    cost = square_sum / (2 * m)
    return cost


def gradient(theta, X, y):
    m = X.shape[0] #样本个数
    inner = X.T @ (X @ theta - y)  # (m,n).T @ (m, 1) -> (n, 1)，X @ theta等价于X.dot(theta)
    return inner / m


def batch_gradient_decent(theta, X, y, epoch, alpha=0.01):
    cost_data = [lr_cost(theta, X, y)]
    for _ in range(epoch):
        theta = theta - alpha * gradient(theta, X, y)
        cost_data.append(lr_cost(theta, X, y))
    return theta, cost_data


def normalEqn(X, y): #正规方程
    theta = np.linalg.inv(X.T@X)@X.T@y#X.T@X等价于X.T.dot(X)
    return theta


data = normalize_feature(df)  #特征缩放
y = data.values[:, 2]
X = get_X(data)
ax.scatter(X['square'], X['bedrooms'], y, alpha=0.3)
plt.xlabel('square')
plt.ylabel('bedrooms')
ax.set_zlabel(r'$prices$')
epoch = 500
alpha = 0.01
theta = np.zeros(X.shape[1])   #在该问题中X有三个特征(1,square,bedrooms)，所以theta初始为三个零
final_theta, cost_data = batch_gradient_decent(theta, X, y, epoch, alpha=alpha)
D = final_theta[0]
A = final_theta[1]
B = final_theta[2]
Z = A*X['square'] + B*X['bedrooms'] + D
ax.plot_trisurf(X['square'], X['bedrooms'], Z,
                       linewidth=0, antialiased=False)

predict_square = float(input('square:'))
predict_square = ((predict_square - df.square.mean())/df.square.std())

predict_bedrooms = float(input('bedrooms'))
predict_bedrooms = ((predict_bedrooms - df.bedrooms.mean())/df.bedrooms.std())

p = A * predict_square + B*predict_bedrooms + D
ax.scatter(predict_square, predict_bedrooms, marker='+', c='red')
p = p * df.price.std() + df.price.mean()
print('I predict the prices is :')
print(p)
plt.show()

在这里插入图片描述

输入为
square=1635
bedrooms=3
I predict the prices is :
292611.913236568
理论上说预测的点应该在所画的平面上，但实际却不在，可能是因为我平面画的不对。预测结果可以使用。

JiYH

关注

46
点赞
踩
168

收藏

觉得还不错? 一键收藏
打赏
26
评论
吴恩达机器学习作业（一）线性回归_python实现

必做部分：（主要参考了黄海广老师的文档）import numpy as npimport matplotlib.pyplot as pltimport pandas as pddf = pd.read_csv('ex1data1.txt', names=['population', 'profit'])data = df#def normalize_feature(df): #return df.apply(lambda column: (column - column.mean()
复制链接

扫一扫