多元线性回归算法python实现_机器学习(2)–单元、多元线性回归Python实现 – 算法网...

这篇博客介绍了如何使用Python实现多元线性回归,包括梯度下降法和正规方程两种方法。通过比较,发现在小规模变量情况下正规方程更为高效。文中给出了房价预测和酒质量预测两个案例,提供了数据资源和完整代码,并展示了标准化处理的过程。
摘要由CSDN通过智能技术生成

分享数据资源txt文件与源码PY:

链接:https://pan.baidu.com/s/1L5YjhEH2W4jSV62tOsua1Q

提取码:czai

把txt放在py同级目录下即可。

对numpy与Pyplot做了注解。

梯度下降与正规方程在这种变量个数较小的情况下,正规方程明显好用的多,梯度下降还得去多次确定循环次数与学习率大小,找到适合的值。

单变量线性回归,利用数据prices.txt,房价预测

import numpy as np

import matplotlib.pyplot as plt

# 梯度下降求y=θ0+θ1x

def getθ(x, y, α):

θ0 ,θ1 = 0,0

m = len(x)

for i in range(1000):

sum1=0

for j in range(m):

sum1 += (θ0+θ1*x[j]-y[j])

θ0 -= α/m*sum1

sum2 = 0

for j in range(m):

sum2 += ((θ0+θ1*x[j]-y[j])*x[j])

θ1 -= α/m*sum2

return [θ0 ,θ1 ]

# 正规方程求y=θ0+θ1x

def getθ2(x, y):

x_list = []

for i in range(len(x)):

x_list.append([1, x[i]])

x_matrix = np.matrix(x_list)

y_matrix = np.matrix(y)

θ = ((x_matrix.T*x_matrix).I*x_matrix.T*(y_matrix.T)).tolist() # matrix.T求转置,matrix.I求逆,*矩阵相乘,tolist()转为列表

return [θ[0][0],θ[1][0]]

x, y = [],[]

for sample in open("prices.txt" , "r"):

_x, _y = sample.split(",")

x.append(float(_x))

y.append(float(_y))

x, y = np.array(x), np.array(y) # 转化为Numpy数组待处理

x = (x - x.mean()) / x.std() # 标准化,Mean表示取均值,std()为标准差,var()是方差

plt.figure()

plt.xlabel("x轴")

plt.ylabel("y轴")

plt.scatter(x, y, c="g", s=6) #s是点的大小,c是颜色‘b’blue‘g’green‘r’red‘c’cyan‘m’magenta‘y’yellow‘k’black‘w’white

list = getθ(x,y,0.2)

x0 = np.linspace(-2, 4 ,100) # 在-2到4上取点绘制直线

y0 = list[0]+list[1]*x0

print("梯度下降实现:y="+str(list[0])+"+"+str(list[1])+"*x")

plt.plot(x0, y0,"g")

list2 = getθ2(x, y)

x1 = np.linspace(-2, 4 ,100) # 在-2到4上取点绘制直线

y1 = list2[0]+list2[1]*x1

print("正规方程实现:y="+str(list2[0])+"+"+str(list2[1])+"*x")

plt.plot(x1, y1,"r")

plt.show()

得到的结果如下:

多元线性回归,利用数据集wine.txt,利用酒的各种数据推测酒的质量好坏,没有做统一化处理,只有11个变量,直接硬写了:

import numpy as np

# 梯度下降求向量θ

def getθ(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y, α):

θ0 ,θ1,θ2,θ3,θ4,θ5,θ6,θ7,θ8,θ9,θ10,θ11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

m = len(x1)

for i in range(1500): # 循环次数达到1500次及以上后与正规方程求解相差无几

sum=0

for j in range(m):

sum += (θ0+θ1*x1[j]+θ2*x2[j]+θ3*x3[j]+θ4*x4[j]+θ5*x5[j]+θ6*x6[j]+θ7*x7[j]+θ8*x8[j]+θ9*x9[j]+θ10*x10[j]+θ11*x11[j]-y[j])

θ0 -= α/m*sum

sum = 0

for j in range(m):

sum += ((θ0+θ1*x1[j]+θ2*x2[j]+θ3*x3[j]+θ4*x4[j]+θ5*x5[j]+θ6*x6[j]+θ7*x7[j]+θ8*x8[j]+θ9*x9[j]+θ10*x10[j]+θ11*x11[j]-y[j])*x1[j])

θ1 -= α/m*sum

sum = 0

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 * x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x2[j])

θ2 -= α / m * sum

sum = 0

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x3[j])

θ3 -= α / m * sum

sum = 0

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x4[j])

θ4 -= α / m * sum

sum = 0

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x5[j])

θ5 -= α / m * sum

sum = 0

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x6[j])

θ6 -= α / m * sum

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x7[j])

θ7 -= α / m * sum

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x8[j])

θ8 -= α / m * sum

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x9[j])

θ9 -= α / m * sum

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x10[j])

θ10 -= α / m * sum

for j in range(m):

sum += ((θ0 + θ1 * x1[j] + θ2 * x2[j] + θ3 * x3[j] + θ4 * x4[j] + θ5 * x5[j] + θ6 * x6[j] + θ7 * x7[j] + θ8 *x8[j] + θ9 * x9[j] + θ10 * x10[j] + θ11 * x11[j] - y[j]) * x11[j])

θ11 -= α / m * sum

return [θ0 ,θ1,θ2,θ3,θ4,θ5,θ6,θ7,θ8,θ9,θ10,θ11 ]

# 正规方程求向量θ

def getθ2(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y):

x_list = []

for i in range(len(x1)):

x_list.append([1, x1[i], x2[i], x3[i], x4[i], x5[i], x6[i], x7[i], x8[i], x9[i], x10[i], x11[i]])

x_matrix = np.matrix(x_list)

y_matrix = np.matrix(y)

θ = ((x_matrix.T*x_matrix).I*x_matrix.T*(y_matrix.T)).tolist() # matrix.T求转置,matrix.I求逆,*矩阵相乘,tolist()转为列表

re_list = []

for i in range(len(θ)):

re_list.append(θ[i][0])

return re_list

x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y = [],[],[],[],[],[],[],[],[],[],[],[]

for sample in open("wine.txt", "r"):

_x1, _x2, _x3, _x4, _x5, _x6, _x7, _x8, _x9, _x10, _x11, _y = sample.split(";")

x1.append(float(_x1))

x2.append(float(_x2))

x3.append(float(_x3))

x4.append(float(_x4))

x5.append(float(_x5))

x6.append(float(_x6))

x7.append(float(_x7))

x8.append(float(_x8))

x9.append(float(_x9))

x10.append(float(_x10))

x11.append(float(_x11))

y.append(float(_y))

x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y = np.array(x1), np.array(x2), np.array(x3), np.array(x4), np.array(x5), np.array(x6), np.array(x7), np.array(x8), np.array(x9), np.array(x10), np.array(x11), np.array(y) # 转化为Numpy数组待处理

x1 = (x1 - x1.mean()) / x1.std() # 标准化,Mean表示取均值,std()为标准差,var()是方差

x2 = (x2 - x2.mean()) / x2.std()

x3 = (x3 - x3.mean()) / x3.std()

x4 = (x4 - x4.mean()) / x4.std()

x5 = (x5 - x5.mean()) / x5.std()

x6 = (x6 - x6.mean()) / x6.std()

x7 = (x7 - x7.mean()) / x7.std()

x8 = (x8 - x8.mean()) / x8.std()

x9 = (x9 - x9.mean()) / x9.std()

x10 = (x10 - x10.mean()) / x10.std()

x11 = (x11 - x11.mean()) / x11.std()

list = getθ(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y ,0.2)

print("梯度下降得到:"+str(list))

list2 = getθ2(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, y)

print("正规方程得到:"+str(list2))

在循环1500次以上时效果较好:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值