机器学习--多元线性回归07

最新推荐文章于 2023-09-16 10:15:22 发布

深海漫步鹅

最新推荐文章于 2023-09-16 10:15:22 发布

阅读量142

点赞数

分类专栏：机器学习文章标签：机器学习多元线性回归

本文链接：https://blog.csdn.net/JerryZhang1111/article/details/116496142

版权

机器学习专栏收录该内容

27 篇文章 0 订阅

订阅专栏

多元线性回归，跟多个自变量相关方程如下：
在这里插入图片描述
损失函数

梯度下降公式

梯度下降计算多元回归方程

导入包

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
from mpl_toolkits.mplot3d import Axes3D

读入数据

data = pd.read_csv(r"Delivery.csv",delimiter=',', header=None)
print(data)

0 1 2
0 100 4 9.3
1 50 3 4.8
2 100 4 8.9
3 100 2 6.5
4 50 2 4.2
5 80 2 6.2
6 75 3 7.4
7 65 4 6.0
8 90 3 7.6
9 90 2 6.1

切分数据

x_data = data.iloc[:,:-1]
y_data = data.iloc[:,-1]
print(x_data)
print(y_data)

##0 1
0 100 4
1 50 3
2 100 4
3 100 2
4 50 2
5 80 2
6 75 3
7 65 4
8 90 3
9 90 2
0 9.3
1 4.8
2 8.9
3 6.5
4 4.2
5 6.2
6 7.4
7 6.0
8 7.6
9 6.1
Name: 2, dtype: float64

x_data.shape
y_data.shape

(10, 2)
(10,)

定义超参数

# 学习率learning rate
lr = 0.0001
# 参数
theta0 = 0
theta1 = 0
theta2 = 0
# 最大迭代次数
epochs = 1000

建立损失函数，用最小二乘法

def compute_error(theta0, theta1, theta2, x_data, y_data):
    totalError = 0
    for i in range(0, len(x_data)):
        totalError += (y_data.iloc[i] - (theta1 * x_data.iloc[i,0] + theta2*x_data.iloc[i,1] + theta0)) ** 2
    return totalError / float(len(x_data)) / 2.0

梯度下降公式计算

def gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs):
    # 计算总数据量
    m = float(len(x_data))
    # 循环epochs次
    for i in range(epochs):
        theta0_grad = 0
        theta1_grad = 0
        theta2_grad = 0
        # 计算梯度的总和再求平均
        for j in range(0, len(x_data)):
            theta0_grad += (1/m) * ((theta1 * x_data.iloc[j,0] + theta2*x_data.iloc[j,1] + theta0) - y_data.iloc[j])
            theta1_grad += (1/m) * x_data.iloc[j,0] * ((theta1 * x_data.iloc[j,0] + theta2*x_data.iloc[j,1] + theta0) - y_data.iloc[j])
            theta2_grad += (1/m) * x_data.iloc[j,1] * ((theta1 * x_data.iloc[j,0] + theta2*x_data.iloc[j,1] + theta0) - y_data.iloc[j])
        # 更新b和k
        theta0 = theta0 - (lr*theta0_grad)
        theta1 = theta1 - (lr*theta1_grad)
        theta2 = theta2 - (lr*theta2_grad)
    return theta0, theta1, theta2

训练参数

print("Starting theta0 = {0}, theta1 = {1}, theta2 = {2}, error = {3}".
      format(theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
print("Running...")
theta0, theta1, theta2 = gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs)
print("After {0} iterations theta0 = {1}, theta1 = {2}, theta2 = {3}, error = {4}".
      format(epochs, theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))

Starting theta0 = 0, theta1 = 0, theta2 = 0, error =47.279999999999994
Running…
After 1000 iterations theta0 = 0.006971416196678632, theta1 = 0.08021042690771771, theta2 = 0.07611036240566814, error = 0.7731271432218118

ax = plt.figure().add_subplot(111, projection = '3d') 
ax.scatter(x_data.iloc[:,0], x_data.iloc[:,1], y_data, c = 'r', marker = 'o', s = 100) #点为红色三角形  
x0 = x_data.iloc[:,0]
x1 = x_data.iloc[:,1]
# 生成网格矩阵
x0, x1 = np.meshgrid(x0, x1)
z = theta0 + x0*theta1 + x1*theta2
# 画3D图
ax.plot_surface(x0, x1, z)
#设置坐标轴  
ax.set_xlabel('Miles')  
ax.set_ylabel('Num of Deliveries')  
ax.set_zlabel('Time')  
  
#显示图像  
plt.show()

在这里插入图片描述

sklearn计算多元线性回归

导入包

import numpy as np
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt  
from mpl_toolkits.mplot3d import Axes3D

读入数据

data = pd.read_csv("Delivery.csv",delimiter=',', header=None)
print(data)

0 1 2
0 100 4 9.3
1 50 3 4.8
2 100 4 8.9
3 100 2 6.5
4 50 2 4.2
5 80 2 6.2
6 75 3 7.4
7 65 4 6.0
8 90 3 7.6
9 90 2 6.1

切分数据

x_data = data.iloc[:,:-1]
y_data = data.iloc[:,-1]
print(x_data)
print(y_data)

##0 1
0 100 4
1 50 3
2 100 4
3 100 2
4 50 2
5 80 2
6 75 3
7 65 4
8 90 3
9 90 2
0 9.3
1 4.8
2 8.9
3 6.5
4 4.2
5 6.2
6 7.4
7 6.0
8 7.6
9 6.1
Name: 2, dtype: float64

x_data.shape
y_data.shape

(10, 2)
(10,)

创建模型

estimator = linear_model.LinearRegression()
estimator.fit(x_data, y_data)

# 系数
print("coefficients:",estimator.coef_)

# 截距
print("intercept:",estimator.intercept_)

# 测试
x_test = [[102,4]]
predict = estimator.predict(x_test)
print("predict:",predict)

coefficients: [ 0.0611346 0.92342537]
intercept: -0.868701466782
predict: [ 9.06072908]

图像显示

ax = plt.figure().add_subplot(111, projection = '3d') 
ax.scatter(x_data.iloc[:,0], x_data.iloc[:,1], y_data, c = 'r', marker = 'o', s = 100) #点为红色三角形  
x0 = x_data.iloc[:,0]
x1 = x_data.iloc[:,1]
# 生成网格矩阵
x0, x1 = np.meshgrid(x0, x1)
z = estimator.intercept_ + x0*estimator.coef_[0] + x1*estimator.coef_[1]
# 画3D图
ax.plot_surface(x0, x1, z)
#设置坐标轴  
ax.set_xlabel('Miles')  
ax.set_ylabel('Num of Deliveries')  
ax.set_zlabel('Time')  
  
#显示图像  
plt.show()