二维情况:y = theta0 + theta1*x
结果为:2.57549789814787, 0.613000580602551
代码如下:
from sympy import *
import math
X = [1.5, 2, 1.5, 2, 3, 3, 3.5, 3.5, 4, 4, 5, 5]
Y = [3, 3.2, 4, 4.5, 4, 5, 4.2, 4.5, 5, 5.5, 4.8, 6.5]
J = 0 # 损失函数
theta0, theta1 = symbols('theta0, theta1') # 定义theta参数
for i in range(len(X)): # 构造损失函数
J += (theta0 + X[i]*theta1 - Y[i])**2
J *= 0.5
# print(J)
alpha = 0.01 # 学习步长
epsilon = 0.0000000000001 # 迭代阀值,当两次迭代损失函数之差小于该阀值时停止迭代
dtheta0 = diff(J, theta0) # 对theta0求偏导
dtheta1 = diff(J, theta1) # 对theta0求偏导
print('dthedat0=', dtheta0)
print('dthedat1=', dtheta1)
theta0 = 0 # 初始化theta参数
theta1 = 0
while True:
last0 = theta0
last1 = theta1
theta0 -= alpha * dtheta0.subs({'theta0': theta0, 'theta1': theta1})
theta1 -= alpha * dtheta1.subs({'theta0': theta0, 'theta1': theta1})
a = J.subs({'theta0': theta0, 'theta1': theta1})
b = J.subs({'theta0': last0, 'theta1': last1})
print("{}, {}, {}, {}".format(theta0, theta1, a, b))
if math.fabs(J.subs({'theta0': theta0, 'theta1': theta1}) - J.subs({'theta0': last0, 'theta1': last1})) < epsilon:
break
print("{}, {}".format(theta0, theta1))
多维情况:y[i] = theta0 + theta1*x[i][0] + theta2*x[i][1] + theta3*x[i][2]
结果为:50.3097802023958, 47.7942911922764, -13.0287743334236, 1.13282147172682
代码如下:
from sympy import *
import math
X = [(1, 0., 3), (1, 1., 3), (1, 2., 3), (1, 3., 2), (1, 4., 4)]
Y = [95.364, 97.217205, 75.195834, 60.105519, 49.342380]
J = 0 # 损失函数 和 theta参数
theta0, theta1, theta2, theta3 = symbols('theta0, theta1, theta2, theta3')
for i in range(len(X)): # 构造损失函数
J += (theta0 + X[i][0]*theta1 + X[i][1]*theta2 + X[i][2]*theta3 - Y[i])**2
J *= 0.5
# print(J)
alpha = 0.01 # 学习步长
epsilon = 0.0000000000001 # 迭代阀值,当两次迭代损失函数之差小于该阀值时停止迭代
dtheta0 = diff(J, theta0) # 对theta0求偏导
dtheta1 = diff(J, theta1) # 对theta1求偏导
dtheta2 = diff(J, theta2) # 对theta2偏导
dtheta3 = diff(J, theta3) # 对theta3求偏导
print('dthedat0=', dtheta0)
print('dthedat1=', dtheta1)
print('dthedat2=', dtheta2)
print('dthedat3=', dtheta3)
theta0 = 0 # 初始化theta参数
theta1 = 0
theta2 = 0
theta3 = 0
while True:
last0 = theta0
last1 = theta1
last2 = theta2
last3 = theta3
theta0 -= alpha * dtheta0.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3})
theta1 -= alpha * dtheta1.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3})
theta2 -= alpha * dtheta2.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3})
theta3 -= alpha * dtheta3.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3})
a = J.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3})
b = J.subs({'theta0': last0, 'theta1': last1, 'theta2': last2, 'theta3': last3})
print("{}, {}, {}, {}, {}, {}".format(theta0, theta1, theta2, theta3, a, b))
if math.fabs(J.subs({'theta0': theta0, 'theta1': theta1, 'theta2': theta2, 'theta3': theta3}) -
J.subs({'theta0': last0, 'theta1': last1, 'theta2': last2, 'theta3': last3})) < epsilon:
break
print("{}, {}, {}, {}".format(theta0, theta1, theta2, theta3))