f ( x , y ) = ( 1 − x ) 2 + 100 ( y − x 2 ) 2 f(x,y) = (1 - x) ^ 2 + 100 (y - x^2)^2 f(x,y)=(1−x)2+100(y−x2)2
梯度下降法
对于此问题,求导,统一梯度下降
∂
f
(
x
,
y
)
∂
x
=
−
2
(
1
−
x
)
−
2
∗
100
(
y
−
x
2
)
∗
2
x
\\\frac{\partial f(x,y)}{\partial x}=-2(1-x)-2*100(y-x^2)*2x
∂x∂f(x,y)=−2(1−x)−2∗100(y−x2)∗2x
∂
f
(
x
,
y
)
∂
y
=
2
∗
100
(
y
−
x
2
)
\frac{\partial f(x,y)}{\partial y}=2 * 100(y - x ^ 2)
∂y∂f(x,y)=2∗100(y−x2)
import numpy as np
def f(x, y):
return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
def f_x(x, y):
return -2 + 2 * x - 400 * (y - x ** 2) * x
def f_y(x,y):
return 200 * (y - x ** 2)
def main(max_iters = 100000, temp_size = 0.001):
pre_x = np.zeros((2,), dtype = np.float32)
iters_count = 0
loss = 10
while(loss > 0.001 and iters_count < max_iters):
error = np.zeros_like(pre_x)
error[0] = f_x(pre_x[0], pre_x[1]) * temp_size
error[1] = f_y(pre_x[0], pre_x[1]) * temp_size
pre_x -= error
loss = f(pre_x[0], pre_x[1])
# print(iters_count, loss, pre_x)
iters_count += 1
return pre_x
if __name__ == '__main__':
w = main()
print(w)
线性回归问题
一般的线性回归方程如下:
y
=
θ
1
∗
x
1
+
θ
2
∗
x
2
+
⋯
+
θ
n
∗
x
n
+
b
\\ y=\theta _1 * x_1 + \theta _2 * x_2 + \dots + \theta _n * x_n + b
y=θ1∗x1+θ2∗x2+⋯+θn∗xn+b
转化为
y
=
θ
1
∗
x
1
+
θ
2
∗
x
2
+
⋯
+
θ
n
∗
x
n
+
θ
0
∗
b
\\ y=\theta _1 * x_1 + \theta _2 * x_2 + \dots + \theta _n * x_n + \theta _0 * b
y=θ1∗x1+θ2∗x2+⋯+θn∗xn+θ0∗b
需要定义损失函数,用于判断最后得到的预测参数的预测效果,常用的损失函数是均方误差
h
(
θ
)
为预测结果,
y
(
θ
)
为原始数据
\\ h(\theta)为预测结果,y(\theta)为原始数据
h(θ)为预测结果,y(θ)为原始数据
J
(
θ
)
=
1
2
m
∑
j
=
1
m
(
h
(
θ
)
i
−
y
i
)
2
\\ J(\theta) = \frac{1}{2m}\sum\limits_{j=1}^{m}(h(\theta)^i-y^i)^2
J(θ)=2m1j=1∑m(h(θ)i−yi)2
求偏导
∂
J
(
θ
)
∂
θ
j
=
1
m
∑
j
=
1
m
(
h
(
θ
)
i
−
y
i
)
x
j
i
\\ \frac{\partial J(\theta)}{\partial \theta _j} = \frac{1}{m}\sum\limits_{j=1}^m(h(\theta)^i - y^i)x_j^i
∂θj∂J(θ)=m1j=1∑m(h(θ)i−yi)xji
更新公式为
θ
i
=
θ
i
−
α
1
m
∑
j
=
1
m
(
h
(
θ
)
i
−
y
i
)
x
j
i
α
为步长
\\ \theta _i = \theta _i-\alpha\frac{1}{m}\sum\limits_{j = 1}^m(h(\theta)^i - y^i)x_j^i\\\alpha 为步长
θi=θi−αm1j=1∑m(h(θ)i−yi)xjiα为步长
例子
y = 3 ∗ x 1 + 4 ∗ x 2 y = 3*x_1 + 4 * x_2 y=3∗x1+4∗x2
BGD(批量梯度下降法)
import numpy as np
import matplotlib.pyplot as plt
def f(simple_num = 100):
x1 = np.linspace(0, 9, simple_num)
x2 = np.linspace(4, 13, simple_num)
x = np.concatenate(([x1], [x2]), axis = 0).T
y = np.dot(x, np.array([3, 4]).T)
return x, y
def BGM(sample, y, step_size = 0.01, max_iters = 10000):
sample_num, dim = sample.shape
y = y.flatten()
loss = 10
iters_count = 0
w = np.ones((dim,), dtype = np.float32)
while loss > 0.001 and iters_count < max_iters :
loss = 0
error = np.zeros_like(w)
for i in range(sample_num):
predict_y = np.dot(w.T, sample[i])
for j in range(dim):
error[j] += (y[i] - predict_y) * sample[i][j]
# for j in range(dim):
# w[j] += step_size * error[j] / sample_num
w += step_size * error / sample_num
for i in range(sample_num):
predict_y = np.dot(w.T, sample[i])
error = 1 / (dim * sample_num) * np.power(predict_y - y[i], 2)
loss += error
iters_count += 1
return w
if __name__ == '__main__':
samples, y = f()
w = BGM(samples, y)
print(w)
fig = plt.figure()
ax = fig.add_subplot(111, projection= '3d')
ax.plot(samples[:, 0], samples[:, 1], y, color = 'b')
# y1 = np.dot(samples,w)
# ax.plot(samples[:, 0], samples[:, 1], y1, color = 'r')
plt.show()
SGB(随机梯度下降法)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def f(simple_num = 100):
x1 = np.linspace(0, 9, simple_num)
x2 = np.linspace(4, 13, simple_num)
x = np.concatenate([[x1], [x2]], axis = 0).T
y = np.dot(x, np.array([3, 4]).T)
return x, y
def SGD(simples, y, step_size = 0.01, max_iters = 1000):
simples_num, dim = simples.shape
y = y.flatten()
loss = 10
iters_count = 0
w = np.ones((dim,), dtype = np.float32)
while loss > 0.001 and iters_count < max_iters:
loss = 0
i = np.random.randint(0, simples_num)
predict_y = np.dot(w.T, simples[i])
error = y[i] - predict_y
for j in range(dim):
w[j] += step_size * error * simples[i][j]
for i in range(simples_num):
predict_y = np.dot(w.T, simples[i])
error = 1 / (2 * simples_num) * (predict_y - y[i]) ** 2
loss += error
iters_count += 1
return w
if __name__ == '__main__':
simples, y = f()
w = SGD(simples, y)
print(w)
fig = plt.figure()
ax = fig.add_subplot(111, projection= '3d')
ax.plot(simples[:, 0], simples[:, 1], y, color = 'b')
y1 = np.dot(simples,w)
ax.plot(simples[:, 0], simples[:, 1], y1, color = 'r')
plt.show()