【凸优化】交替方向乘子法的PyTorch实现
本实验使用Python作为编程语言,PyTorch作为机器学习库,实现了交替方向乘子法
基本步骤
优化目标
本次实验的优化目标如下:
m
i
n
i
m
i
z
e
f
(
x
)
=
3
2
x
1
2
+
1
2
x
2
2
−
x
1
x
2
−
2
x
1
g
(
z
)
=
1
2
z
1
2
+
2
z
2
2
+
z
1
z
2
+
z
1
+
z
2
A
=
(
1
,
1
)
B
=
(
1
,
1
)
C
=
1
minimize f(x)=\frac{3}{2} x_1^2+\frac{1}{2} x_2^2-x_1 x_2-2 x_1 \\ g(z)=\frac{1}{2} z_1^2+2 z_2^2+z_1 z_2+z_1+z_2 \\ A=(1,1) \ B=(1,1) \ C=1
minimizef(x)=23x12+21x22−x1x2−2x1g(z)=21z12+2z22+z1z2+z1+z2A=(1,1) B=(1,1) C=1
代码
import torch
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
x = torch.tensor([1., 1.], requires_grad=True).reshape(2, 1)
z = torch.tensor([1., 1.], requires_grad=True).reshape(2, 1)
rho = 2
epsilon = 0.01
u = torch.tensor([0.])
c = 1
A_ = torch.tensor([1., 1.])
B_ = torch.tensor([1., 1.])
A = torch.tensor([[3., -1], [-1, 1]])
b1 = torch.tensor([2., 0])
# 定义初始函数
def f(x, z, u):
return 1 / 2 * x.t() @ A @ x - b1.t() @ x + 1 / 2 * rho * torch.norm(
x @ torch.tensor([1., 1.]).reshape(1, 2) + z @ torch.tensor([1., 1.]).reshape(1, 2) - 1 + u)
B = torch.tensor([[1., 4.], [1., 1.]])
b2 = torch.tensor([1., 1.])
def g(x, z, u):
return 1 / 2 * z.t() @ B @ z + b2.t() @ z + 1 / 2 * rho * torch.norm(
x @ torch.tensor([1., 1.]).reshape(1, 2) + z @ torch.tensor([1., 1.]).reshape(1, 2) - 1 + u)
def argmin_f(x, z, u):
def f_grad(x, z, u): # 计算梯度
y = f(x, z, u)
grad = torch.autograd.grad(y, x, retain_graph=True, create_graph=True)[0]
return grad
alpha = torch.tensor([0.25])
beta = torch.tensor([0.5])
eta = 0.1
while torch.norm(f_grad(x, z, u)) > eta:
delta_x = - f_grad(x, z, u)
t = torch.tensor([1.])
while f(x + t.mul(delta_x), z, u) > (f(x, z, u) + alpha.mul(t).mul((f_grad(x, z, u).t()) @ delta_x)):
t = beta.mul(t)
x = x + t.mul(delta_x)
return x
def argmin_g(x, z, u):
def g_grad(x, z, u): # 计算梯度
y = g(x, z, u)
grad = torch.autograd.grad(y, z, retain_graph=True, create_graph=True)[0]
return grad
alpha = torch.tensor([0.25])
beta = torch.tensor([0.5])
eta = 0.1
while torch.norm(g_grad(x, z, u)) > eta:
delta_z = - g_grad(x, z, u)
t = torch.tensor([1.])
while g(x, z + t.mul(delta_z), u) > (g(x, z, u) + alpha.mul(t).mul((g_grad(x, z, u).t()) @ delta_z)):
t = beta.mul(t)
z = z + t.mul(delta_z)
return z
def admm(x, z, u):
while torch.norm(A_@x+B@z-c) > epsilon or torch.norm(rho*A_.t()@B@(z_last-z)) > epsilon:
x = argmin_f(x, z, u)
z_last = z
z = argmin_g(x, z, u)
u = u + (torch.tensor([1., 1.]).reshape(1, 2)@x - torch.tensor([1., 1.]).reshape(1, 2)@z)
opt = 1/2*x.t()@A@x - b1.t()@x + 1/2*z.t()@B@z + b2.t()@z_last
print('Function value is: ' + str(opt.tolist()))
opt = 1/2*x.t()@A@x - b1.t()@x + 1/2*z.t()@B@z + b2.t()@z
print('Optimal solution is: ' + str(opt.tolist()))
if __name__ == '__main__':
admm(x, z, u)
运行结果
注意:此代码仅为初始的实现版本,所得结果与真实值存在差距,如果你有更好的版本,欢迎提交PR到我的邮箱。
如果我的博客对你有帮助,欢迎点赞收藏
欢迎转载,转载请注明出处