线性回归与非线性回归：1.2一多元线性回归及实战

最新推荐文章于 2023-01-13 14:37:09 发布

zy_小轩

最新推荐文章于 2023-01-13 14:37:09 发布

阅读量1k

点赞数

分类专栏：机器学习基础

本文链接：https://blog.csdn.net/zy_505775013/article/details/88697488

版权

机器学习基础专栏收录该内容

17 篇文章 1 订阅

订阅专栏

多元线性回归：含有多个特征及多个自变量（输入）
公式： $\bg_white \small h_{\theta}(x)=\theta^TX=\theta_0x_0+\theta_1x_1+\theta_2x_2+...+\theta_nx_n$
参数： $\theta_0,\theta_1,\theta_2...\theta_n$
损失函数： $J(\theta_0,\theta_1,...,\theta_n)=\tfrac{1}{2m} \sum_{i=1}^{m}(h_\theta(x_i)-y_i)^2$
梯度下降：

Repeat{

$\theta_j:=\theta_j-\alpha\frac{\partial }{\partial \theta_j}J(\theta_0,\theta_1...\theta_n)$ (j=0,1,2,3,4,...,n)
}

下面是未使用sklearn的代码

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #3D绘图工具

#加载数据文件
data = np.genfromtxt("Delivery.csv",delimiter=',')

#数据分类，将最后一列的值赋给y_data 前两列的值赋给x_data
x_data = data[:,:-1]#第0行到最后一行 第一列到倒数第一列（不包括倒数第一列）
y_data = data[:,-1]#第0行到最后一行 最后一列

#学习率
lr = 0.00006
#参数
theta0 = 0
theta1 = 0
theta2 = 0
#最大迭代次数
epochs = 1000
#最小二乘法
def compute_error(theta0, theta1, theta2, x_data, y_data):
    totalError = 0
    for i in range(len(x_data)):
        totalError += (y_data[i] - (theta0 + theta1 * x_data[i,0] + theta2 * x_data[i,1])) ** 2  
    return totalError / float(len(x_data))


#梯度下降法优化
def gradient_descent_runner(x_data, y_data, lr, theta0, theta1, theta2, epochs):
    #计算总数量
    m = len(x_data)
    for i in range(epochs):
        theta0_grad = 0
        theta1_grad = 0
        theta2_grad = 0
        
        for j in range(0,len(x_data)):
            theta0_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j])
            theta1_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j]) * x_data[j,0] 
            theta2_grad += (1/m)*(theta0 + theta1 * x_data[j,0] + theta2 * x_data[j,1] - y_data[j]) * x_data[j,1] 
        theta0 = theta0- lr * theta0_grad
        theta1 = theta0- lr * theta1_grad
        theta2 = theta0- lr * theta2_grad
    return theta0, theta1, theta2

#打印数据
print("Starting theta0 = {0}, theta1 = {1},theta2 = {2}, error = {3}".
      format(theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))
print("Running...")
theta0, theta1, theta2 = gradient_descent_runner(x_data, y_data, lr, theta0, theta1, theta2, epochs)
print(" After {0}, theta0 = {1}, theta1 = {2},theta2 = {3}, error = {4}".
      format(epochs, theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data)))

ax = plt.figure().add_subplot(111, projection = '3d')
ax.scatter(x_data[:,0], x_data[:,1], y_data, c = 'r', marker = 'o', s = 100)#点为红色的三角形
x0 = x_data[:,0]
x1 = x_data[:,1]
#生成网络矩阵
x0, x1 = np.meshgrid(x0, x1)
z = theta0 + x0*theta1 + x1*theta2
#画3D图
ax.plot_surface(x0, x1, z)
#设置坐标轴
ax.set_xlabel('Miles')
ax.set_ylabel('Num of Deliveries')
ax.set_zlabel('Time')

plt.show()

打印数据

Starting theta0 = 0, theta1 = 0,theta2 = 0, error = 47.279999999999994
Running...
 After 1000, theta0 = 0.07789165936149452, theta1 = 0.07829960706904125,theta2 = 0.07794715536865685, error = 0.7697147039108087

运行结果图

使用sklearn—多元线性回归

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import linear_model

#加载数据
data = np.genfromtxt("Delivery.csv",delimiter=',')
#数据分割
x_data = data[:,:-1]
y_data = data[:,-1]

model = linear_model.LinearRegression()
model.fit(x_data, y_data)

#系数
print("coefficients: ",model.coef_)

#截距
print("intercept: ",model.intercept_)

#测试
x_test = [[102,4]]
predict = model.predict(x_test)
print("predict: ",predict

ax = plt.figure().add_subplot(111, projection = '3d')
ax.scatter(x_data[:,0], x_data[:,1], y_data, c = 'r', marker = 'o', s = 100)#点为红色的三角形
x0 = x_data[:,0]
x1 = x_data[:,1]
#生成网络矩阵
x0, x1 = np.meshgrid(x0, x1)
z = model.intercept_ + x0*model.coef_[0] + x1*model.coef_[1]
#画3D图
ax.plot_surface(x0, x1, z)
#设置坐标轴
ax.set_xlabel('Miles')
ax.set_ylabel('Num of Deliveries')
ax.set_zlabel('Time')

plt.show()