吴恩达机器学习作业(一)线性回归_python实现

必做部分:

参考了黄海广博士整理的资料:
Github:https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('ex1data1.txt', names=['population', 'profit'])
data = df#data为df为为同一个数码


#def normalize_feature(df):
    #return df.apply(lambda column: (column - column.mean()) / column.std())#特征缩放


def get_X(df):#读取特征
    ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe
    data = pd.concat([ones, df], axis=1)  # 合并数据,根据列合并 axis: 需要合并链接的轴,0是行,1是列
    return data.iloc[:, :-1]#行,列;列后面的-1表示去除最后一列,例如data.ilco[:-1,:]表示去除最后一行


def linear_cost(theta , X , y):
    m = X.shape[0]  #样本数
    inner = X @ theta - y   #与目标的差值即h(theta),inner算出来为一行
    square_sum = inner.T @ inner    #h(theta)的平方
    cost = square_sum/(2*m)
    return cost


def gradient(theta, X, y):
    m = X.shape[0]
    inner = X.T @ (X@theta - y) #X仅有仅有一个特征,恒为1的不算,即该语句算的是更新theta1时,损失函数对theta1的求导
    return inner/m


def batch_gradient_decent(theta, X, y, epoch, alpha=0.02):
    cost_data = [linear_cost(theta, X, y)]
    for _ in range(epoch):   #_仅是一个循环标志,在循环中不会用到
        theta = theta - alpha * gradient(theta, X, y)
        cost_data.append(linear_cost(theta, X, y))
    return theta, cost_data


X = get_X(df)#自定义的函数,读取df的第一列,然后在前面加一列1
y = df.values[:, 1]#type(df)为pandas.core.frame.DataFrame,type(df.values[:, 1])为 numpy.ndarray
theta = np.zeros(df.shape[1])
epoch = 6000#迭代次数
final_theta, cost_data = batch_gradient_decent(theta, X, y, epoch)
b = final_theta[0]
k = final_theta[1]
plt.scatter(data.population, data.profit, label="Training data")
plt.plot(data.population, data.population*k + b, label="Prediction")
plt.xlabel('population')
plt.ylabel('profit')
plt.legend(loc=2)

##预测,输入population,将预测的值profit画在图像上
forecast = float(input('population'))
predict_profit = forecast*k+b
print(predict_profit)
plt.scatter(forecast, predict_profit, marker='+', c='red')
plt.show()

在这里插入图片描述
我预测值(forecast)输入的23,用红色标记了

选作部分

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ax = plt.axes(projection='3d')
df = pd.read_csv('ex1data2.txt', names=['square', 'bedrooms', 'price'])


def normalize_feature(df):
    return df.apply(lambda column: (column - column.mean()) / column.std())


def get_X(df):#读取特征
    ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe
    data = pd.concat([ones, df], axis=1)  # 合并数据,根据列合并 axis: 需要合并链接的轴,0是行,1是列
    return data.iloc[:, :-1]


def lr_cost(theta, X, y):

    m = X.shape[0]#m为样本数
    inner = X @ theta - y  # R(m*1),X @ theta等价于X.dot(theta)
    square_sum = inner.T @ inner
    cost = square_sum / (2 * m)
    return cost


def gradient(theta, X, y):
    m = X.shape[0] #样本个数
    inner = X.T @ (X @ theta - y)  # (m,n).T @ (m, 1) -> (n, 1),X @ theta等价于X.dot(theta)
    return inner / m


def batch_gradient_decent(theta, X, y, epoch, alpha=0.01):
    cost_data = [lr_cost(theta, X, y)]
    for _ in range(epoch):
        theta = theta - alpha * gradient(theta, X, y)
        cost_data.append(lr_cost(theta, X, y))
    return theta, cost_data


def normalEqn(X, y): #正规方程
    theta = np.linalg.inv(X.T@X)@X.T@y#X.T@X等价于X.T.dot(X)
    return theta


data = normalize_feature(df)  #特征缩放
y = data.values[:, 2]
X = get_X(data)
ax.scatter(X['square'], X['bedrooms'], y, alpha=0.3)
plt.xlabel('square')
plt.ylabel('bedrooms')
ax.set_zlabel(r'$prices$')
epoch = 500
alpha = 0.01
theta = np.zeros(X.shape[1])   #在该问题中X有三个特征(1,square,bedrooms),所以theta初始为三个零
final_theta, cost_data = batch_gradient_decent(theta, X, y, epoch, alpha=alpha)
D = final_theta[0]
A = final_theta[1]
B = final_theta[2]
Z = A*X['square'] + B*X['bedrooms'] + D
ax.plot_trisurf(X['square'], X['bedrooms'], Z,
                       linewidth=0, antialiased=False)

predict_square = float(input('square:'))
predict_square = ((predict_square - df.square.mean())/df.square.std())

predict_bedrooms = float(input('bedrooms'))
predict_bedrooms = ((predict_bedrooms - df.bedrooms.mean())/df.bedrooms.std())

p = A * predict_square + B*predict_bedrooms + D
ax.scatter(predict_square, predict_bedrooms, marker='+', c='red')
p = p * df.price.std() + df.price.mean()
print('I predict the prices is :')
print(p)
plt.show()

在这里插入图片描述
在这里插入图片描述

输入为
square=1635
bedrooms=3
I predict the prices is :
292611.913236568
理论上说预测的点应该在所画的平面上,但实际却不在,可能是因为我平面画的不对。预测结果可以使用。

  • 46
    点赞
  • 168
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 26
    评论
评论 26
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

JiYH

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值