多元线性回归

最新推荐文章于 2022-05-21 17:09:25 发布

CID( ͡ _ ͡°)

最新推荐文章于 2022-05-21 17:09:25 发布

阅读量151

点赞数

本文链接：https://blog.csdn.net/weixin_40533189/article/details/90183603

版权

梯度下降--多元线性回归

import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# 读入数据 
data = genfromtxt(r'data.csv',delimiter = ',')
print(data)

数据的格式:

[[100.    4.    9.3]
 [ 50.    3.    4.8]
 [100.    4.    8.9]
 [100.    2.    6.5]
 [ 50.    2.    4.2]
 [ 80.    2.    6.2]
 [ 75.    3.    7.4]
 [ 65.    4.    6. ]
 [ 90.    3.    7.6]
 [ 90.    2.    6.1]]

# 切分数据
x_data = data[:,:-1]
y_data = data[:,-1]
print(x_data,y_data)

# 学习率 learning rate
lr = 0.0001
#参数
theta0 = 0
theta1 = 1
theta2 = 2
# 最大迭代次数
epochas = 1000
# 最小二乘法 损失函数的的计算
def compute_error(theta0,theta1,theta2,x_data,y_data):
    totalError = 0
    for i in range(0,len(x_data)):
        totalError += (y_data[i] - (theta1 * x_data[i,0] + theta2 * x_data[i,1] +theta0)) ** 2
    return totalError / float(len(x_data))

def graient_descent_runner(x_data , y_data,theta0,theta1,theta2,lr,epochas):
    # 计算总数据量
    m = float(len(x_data))
    
    #循环迭代次数
    for i in range(epochas):
        theta0_grad = 0
        theta1_grad = 0
        theta2_grad = 0
        #计算梯度的总和再求平均
        for j in range(0,len(x_data)):
            theta0_grad += -(1/m) * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0))
            theta1_grad += -(1/m) * x_data[j,0] * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0))
            theta2_grad += -(1/m) * x_data[j,1] * (y_data[j] - (theta1 * x_data[j,0] + theta2 * x_data[j,1] + theta0))
        # 更新b 和 k 
        theta0 = theta0 - (lr * theta0_grad)
        theta1 = theta1 - (lr * theta1_grad)
        theta2 = theta2 - (lr * theta2_grad)
    return theta0,theta1,theta2

print ("Starting theta0 = {0} ,theta1 = {1} ,theta2 = {2} , error = {3}".
      format(theta0,theta1,theta2,compute_error(theta0,theta1,theta2,x_data,y_data)))
# 首先需要初始化,然后更新参数
print(".......")
theta0,theta1,theta2 = graient_descent_runner(x_data,y_data,theta0,theta1,theta2,lr,epochas)

print("After {0} iterations theta0 = {1},theta1 = {2} ,theta2 = {3} , error = {4}".
      format(epochas,theta0,theta1,theta2,compute_error(theta0,theta1,theta2,x_data,y_data)))

ax = plt.figure().add_subplot(111,projection = '3d')
ax.scatter(x_data[:,0],x_data[:,1],y_data,c = 'r',marker = 'o',s = 100) 
x0 = x_data[:,0]
x1 = x_data[:,1]
#生成网络矩阵
x0 , x1 = np.meshgrid(x0,x1)
z = theta0 + x0 * theta1 + x1 * theta2
#画3d图s
ax.plot_surface(x0,x1,z)
ax.set_xlabel('Miles')
ax.set_ylabel('Num of Deliveries')
ax.set_zlabel('Time')
plt.show()

sklearn多元线性回归(注意,sklearn里面封装的算法不是梯度下降法,所以结果是和梯度下降法是不一样的,sklearn使用的是标准方程法)

import numpy as np
from numpy import genfromtxt
from sklearn import linear_model
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# 读取数据
data = genfromtxt('data.csv',delimiter = ',')
print(data)

#[[100.    4.    9.3]
# [ 50.    3.    4.8]
# [100.    4.    8.9]
# [100.    2.    6.5]
# [ 50.    2.    4.2]
# [ 80.    2.    6.2]
# [ 75.    3.    7.4]
# [ 65.    4.    6. ]
# [ 90.    3.    7.6]
# [ 90.    2.    6.1]]

# 切分数据
x_data = data[:,:-1]
y_data = data[:,-1]
print(x_data)
print(y_data)

# 创建模型
model = linear_model.LinearRegression()
model.fit(x_data,y_data) # 传入多少个参数就是多少的特征,后面的系数也就是多少个

# 系数
print('coefficients:',model.coef_)
#截据
print('intercept:',model.intercept_)
# 测试
x_test = [[102,4]]
predict = model.predict(x_test)
print('predict:',predict)

sklearn多项式回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures #生成多项式
from sklearn.linear_model import LinearRegression

# 载入数据
data = np.genfromtxt('job.csv',delimiter = ',')
x_data = data[1:,1]
y_data = data[1:,2]
plt.scatter(x_data,y_data)
plt.show()


# 处理数据成需要的那种维度
x_data = data[1:,1,np.newaxis] #np.newaxis 增加数据的维度
y_data = data[1:,2,np.newaxis]
print(x_data)
# 创建并拟合模型
model = LinearRegression()
model.fit(x_data,y_data)

#画图
plt.plot(x_data,y_data,'b.')
plt.plot(x_data,model.predict(x_data),'r')
plt.show()


# 定义多项式回归,degree的值可以调节多项式的特征
# degree=1 时,x_poly特征过后的数据为[[  1.,   1.],[  1.,   2.]]
# degree=2 时,x_poly特征过后的数据为[[  1.,   1.,   1.],[  1.,   2.,   4.]] 
# 以此类推
ploy_reg = PolynomialFeatures(degree=3)

#特征处理
x_poly = ploy_reg.fit_transform(x_data)

#定义回归模型
lin_reg = LinearRegression()
#训练模型
lin_reg.fit(x_poly,y_data)


# 画图
plt.plot(x_data,y_data,'b.')
plt.plot(x_data,lin_reg.predict(ploy_reg.fit_transform(x_data)),c='r')
plt.title('Truth or Bluff (Polynomial Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

CID( ͡ _ ͡°)

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
多元线性回归

梯度下降--多元线性回归import numpy as npfrom numpy import genfromtxtimport matplotlib.pyplot as pltfrom mpl_toolkits.mplot3d import Axes3D# 读入数据 data = genfromtxt(r'data.csv',delimiter = ',')print...
复制链接

扫一扫