一、参考链接
(1)https://blog.csdn.net/weixin_44027820/article/details/104535408
二、源码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
path = 'ex1data2.txt'
data = pd.read_csv(path, names=['Size', 'Bedrooms', 'Price'])
m = len(data)
size = data['Size']
bedrooms = data['Bedrooms']
price = data['Price']
#初始数据展示
fig = plt.figure(figsize=(6,12),dpi=80)
fig.get_size_inches()
ax = fig.add_subplot(111, projection='3d') # 创建一个三维的绘图工程
ax.set_title('data') # 设置本图名称
ax.scatter(size, bedrooms, price, c = 'b',marker='x') # 绘制数据点 c:颜色
ax.set_xlabel('Size') # 设置x坐标轴
ax.set_ylabel('Bedrooms') # 设置y坐标轴
ax.set_zlabel('Price') # 设置z坐标轴
plt.show()
# 特征归一化
x1 = np.array(size).reshape(-1, 1)
x2 = np.array(bedrooms).reshape(-1, 1)
y = np.array(price).reshape(-1, 1)
data = np.concatenate((x1, x2, y), axis=1) # 放在一个ndarray中便于归一化处理
mean = np.mean(data, axis=0) # 计算每一列的均值
ptp = np.ptp(data, axis=0) # 计算每一列的最大最小差值
nor_data = (data - mean) / ptp # 归一化
X = np.insert(nor_data[..., :2], 0, 1, axis=1) # 添加x0=1
y = nor_data[..., -1]
# 计算代价函数J(θ)
def cost_function(X, y, theta):
diff = X.dot(theta.T) - y
return sum(np.power(diff, 2))/(2*m)
# 求偏导
def gradient_function(X, y, theta):
diff = X.dot(theta.T) - y
return diff.dot(X)/m
# 梯度下降
def gradient_descent(X, y, alpha,epoch):
theta = np.array((m,1,1))
gradient = gradient_function(X, y, theta)
cost = np.zeros(epoch)
# while not all (abs(gradient) <= 1e-5):修改版,使J(θ)可视化
for i in range(epoch):
theta = theta - alpha * gradient
gradient = gradient_function(X, y, theta)
cost[i] = cost_function(X, y, theta)
return theta,cost
alpha = 0.01
epoch=4000
optimal_theta,cost = gradient_descent(X, y, alpha,epoch)
print('optimal_theta:', optimal_theta)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(epoch), cost, 'r') # np.arange()返回等差数组
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
结果
optimal_theta: [1.24671178e-16 7.47082078e-01 2.82288368e-01]