吴恩达《机器学习》第一次作业——多变量线性回归

最新推荐文章于 2023-06-21 15:18:19 发布

216549856

最新推荐文章于 2023-06-21 15:18:19 发布

阅读量795

点赞数

分类专栏： ML

本文链接：https://blog.csdn.net/Cherish_x/article/details/89603891

版权

ML 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

看这里 https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code
这是一位大神整理的作业代码。我也就是照葫芦画瓢，慢慢来吧。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path = 'ex1data2.txt'   #相对路径
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])

'''记录目标变量的均值和和差值'''
meanPrice = data['Price'].mean()
stdPrice = data['Price'].std()

'''操作数据，以便后续处理'''
data = (data-data.mean())/data.std()        #特征缩放
data.insert(0, 'Ones', 1)
cols = data.shape[1]
X = data.iloc[:, 0:cols-1]
y = data.iloc[:, cols-1:cols]
X = np.matrix(X.values)     #特征变量
y = np.matrix(y.values)     #目标变量
theta = np.matrix(np.array([0, 0, 0]))
#————————————————————————————————————————————————————
#cost function
def computeCost(X, y, theta):
    '''损失函数'''
    inner = np.power((X*theta.T - y), 2)
    return np.sum(inner) / (2 * len(X))

alpha = 0.01
iters = 1000

def gradientDescent(X, y, theta, alpha, iters):
    '''梯度下降算法'''
    temp = np.matrix(np.zeros(theta.shape))
    cost = np.zeros(iters)

    for i in range(iters):
        inner = X * theta.T - y

        for j in range(temp.shape[1]):
            term = np.multiply(inner, X[:, j])
            temp[0, j] = theta[0, j] - (alpha/len(X))*np.sum(term)
        theta = temp    #更新theta
        cost[i] = computeCost(X, y, theta)
    return theta, cost

'''梯度下降'''
theta, cost = gradientDescent(X, y, theta, alpha, iters)


'''测试集'''
testpath = 'testdata2.txt'
testdata = pd.read_csv(testpath, header=None, names=['Size', 'Bedrooms'])
testdata = (testdata-testdata.mean())/testdata.std()        #特征缩放
testdata.insert(0, 'Ones', 1)
Td = np.matrix(testdata)
def h_theta_x(theta, Td):
    return Td*theta.T
d1 = h_theta_x(theta, X)*stdPrice + meanPrice    #逆特征缩放
d1 = d1.astype(np.int32)      #将矩阵元素类型转换为整数
print('通过梯度下降')
print(d1)

'''绘图-cost function的收敛'''
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), cost, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()


#—————————————————————————————————————————————
def normalEqn(X, y):
    '''正规方程'''
    theta = np.linalg.inv(X.T@X)@X.T@y
    return theta

nEtheta = normalEqn(X, y)

'''测试集'''
testpath = 'testdata2.txt'
testdata = pd.read_csv(testpath, header=None, names=['Size', 'Bedrooms'])
testdata = (testdata-testdata.mean())/testdata.std()        #特征均一化
testdata.insert(0, 'Ones', 1)
Td = np.matrix(testdata)
def h_theta_x(theta, Td):
    return Td*theta.T
d2 = h_theta_x(theta, X)*stdPrice + meanPrice    #逆特征缩放
d2 = d2.astype(np.int32)      #将矩阵元素类型转换为整数
print('通过正规方程')
print(d2)

print(theta - nEtheta)		#用梯度下降和正规方程求得的参数是不同的