多元回归
加载包
import openpyxl
from sympy import *#求导函数用得包
import matplotlib.pyplot as plt
读取数据
wd = openpyxl.load_workbook('E:\机器学习数据\线性回归梯度下降.xlsx')
wd.sheetnames #查看工作表名字
ws = wd['多维']
data = []
for i in ws.values:
data.append(i)
print(data)
定义格式函数
def x_list(m,j):
x=[]
for i in range(m):
x.append(data[i+1][j])
return x
m=len(data)-1
X1=x_list(m,0)
X2=x_list(m,1)
X3=x_list(m,2)
Y=x_list(m,3)
定义求和函数
def sumtheta(theta0,theta1,theta2,theta3,DIFF):
sum_theta=0
for i in range(m):
sum_theta=DIFF.subs({t0:theta0,t1:theta1,t2:theta2,t3:theta3,x1:X1[i],x2:X2[i],x3:X3[i],y:Y[i]})
return sum_theta
t0,t1,t2,t3,x1,x2,x3,y=symbols('t0 t1 t2 t3 x1 x2 x3 y') #这里要用symbols不能用symbol因为是多变量
#设置迭代阈值,当误差小于阈值时停止迭代
o=0.0001
#设置学习率
a=0.001
#初始化参数值
theta0=0
theta1=0
theta2=0
theta3=0
#设置最大迭代次数防止死循环
loop_max=100000
lp=1#定义迭代次数
lp=1
while(lp<loop_max):
J0=0
suma=sumtheta(theta0,theta1,theta2,theta3,diff((t0+t1*x1+t2*x2+t3*x3-y)**2,t0))
sumb=sumtheta(theta0,theta1,theta2,theta3,diff((t0+t1*x1+t2*x2+t3*x3-y)**2,t0))
sumc=sumtheta(theta0,theta1,theta2,theta3,diff((t0+t1*x1+t2*x2+t3*x3-y)**2,t0))
sumd=sumtheta(theta0,theta1,theta2,theta3,diff((t0+t1*x1+t2*x2+t3*x3-y)**2,t0))
sumJ=sumtheta(theta0,theta1,theta2,theta3,(t0+t1*x1+t2*x2+t3*x3-y)**2)
J1=(1/(2*m))*sumJ
theta0-=a*(1/m)*suma
theta1-=a*(1/m)*sumb
theta2-=a*(1/m)*sumc
theta3-=a*(1/m)*sumd
print ('迭代次数为%d'%lp)
print ('参数值为%f,%f,%f,%f'%(theta0,theta1,theta2,theta3))#f%是输出浮点数,用d%只能输出0
if(abs(J1-J0)>o):
lp+=1
J0=J1
else:
break
画图
y_hat=[]
for i in range(m):
y_hat.append(theta0+theta1*X1[i]+theta2*X2[i]+theta3*X3[i])
x=[]
for i in range(m):
x.append(i)
plt.plot(x,Y , 'g*')
plt.plot(x,y_hat, 'r')
plt.show()
拟合结果