- 多元线性回归:含有多个特征及多个自变量(输入)
- 公式:
- 参数:
- 损失函数:
- 梯度下降:
Repeat{
(j=0,1,2,3,4,...,n)
}
下面是未使用sklearn的代码
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #3D绘图工具
#加载数据文件
data = np.genfromtxt("Delivery.csv",delimiter=',')
#数据分类,将最后一列的值赋给y_data 前两列的值赋给x_data
x_data = data[:,:-1]#第0行到最后一行 第一列到倒数第一列(不包括倒数第一列)
y_data = data[:,-1]#第0行到最后一行 最后一列
#学习率
lr = 0.00006
#参数
theta0 = 0
theta1 = 0
theta2 = 0
#最大迭代次数
epochs = 1000
#最小二乘法
def compute_error(theta0, theta1, theta2, x_data, y_data):
totalError = 0
for i in range(len(x_data)):
totalError += (y_data[i] - (theta0 + theta1 * x_data[i,0] + theta2 * x_data[i,1])) ** 2
return totalError / float(len(x_data))
#梯度下降法优化
def gradient_descent_runner(x_data, y_data, lr, theta0, theta1, theta2, epochs):
#计算总数量
m = len(x_data)
for i in range(epochs):
theta0_grad = 0
theta1_grad = 0
theta2_grad = 0
for j in range(0,len(x_data)):
theta0_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j])
theta1_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j]) * x_data[j,0]
theta2_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j]) * x_data[j,1]
theta0 = theta0- lr * theta0_grad
theta1 = theta0- lr * theta1_grad
theta2 = theta0- lr * theta2_grad
return theta0, theta1, theta2
#打印数据
print("Starting theta0 = {0}, theta1 = {1},theta2 = {2}, error = {3}".
format(theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
print("Running...")
theta0, theta1, theta2 = gradient_descent_runner(x_data, y_data, lr, theta0, theta1, theta2, epochs)
print(" After {0}, theta0 = {1}, theta1 = {2},theta2 = {3}, error = {4}".
format(epochs, theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
ax = plt.figure().add_subplot(111, projection = '3d')
ax.scatter(x_data[:,0], x_data[:,1], y_data, c = 'r', marker = 'o', s = 100)#点为红色的三角形
x0 = x_data[:,0]
x1 = x_data[:,1]
#生成网络矩阵
x0, x1 = np.meshgrid(x0, x1)
z = theta0 + x0*theta1 + x1*theta2
#画3D图
ax.plot_surface(x0, x1, z)
#设置坐标轴
ax.set_xlabel('Miles')
ax.set_ylabel('Num of Deliveries')
ax.set_zlabel('Time')
plt.show()
打印数据
Starting theta0 = 0, theta1 = 0,theta2 = 0, error = 47.279999999999994
Running...
After 1000, theta0 = 0.07789165936149452, theta1 = 0.07829960706904125,theta2 = 0.07794715536865685, error = 0.7697147039108087
运行结果图
- 使用sklearn—多元线性回归
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn import linear_model #加载数据 data = np.genfromtxt("Delivery.csv",delimiter=',') #数据分割 x_data = data[:,:-1] y_data = data[:,-1] model = linear_model.LinearRegression() model.fit(x_data, y_data) #系数 print("coefficients: ",model.coef_) #截距 print("intercept: ",model.intercept_) #测试 x_test = [[102,4]] predict = model.predict(x_test) print("predict: ",predict ax = plt.figure().add_subplot(111, projection = '3d') ax.scatter(x_data[:,0], x_data[:,1], y_data, c = 'r', marker = 'o', s = 100)#点为红色的三角形 x0 = x_data[:,0] x1 = x_data[:,1] #生成网络矩阵 x0, x1 = np.meshgrid(x0, x1) z = model.intercept_ + x0*model.coef_[0] + x1*model.coef_[1] #画3D图 ax.plot_surface(x0, x1, z) #设置坐标轴 ax.set_xlabel('Miles') ax.set_ylabel('Num of Deliveries') ax.set_zlabel('Time') plt.show()
-
可以看出两个图还是有一些不一样,主要是由于sklearn库 使用的是标准方程法,我们使用是梯度下降法。