G-S迭代
代码最少可以这么写:
def GS(A,b,X,N):
X_old = X.copy()
n = X.shape[0]
for k in range(N):
X[0,0] = (b[0,0] - A[0:1,1:n]@X_old[1:n,0])/A[0,0]
for i in range(1,n):
X[i,0] = (b[i,0] - A[i:i + 1,0:i]@X[0:i] - A[i: i + 1,i + 1:n]@X_old[i + 1:n])/A[i,i]
X_old = X
if max(abs(A@X_old - b)) < 1e-5:
break
print(k)
return X
然而上面这个代码一般求解常系数矩阵,在我们做优化的过程中,往往需要求解牛顿方程 G k d = − g k G_{k}d=-g_{k} Gkd=−gk,这个时候的代码需要做修改:
def GS(HH,GG,X,eta,N):
A = HH(X);b = -GG(X).T
n = X.shape[1]
b_norm = (b**2).mean()
x0 = b
x = x0.copy()
for k in range(N):
x[0,0] = (b[0,0] - (A[0:1,1:n]@x0[1:n,0]).item())/A[0,0]
for i in range(1,n):
x[i,0] = (b[i,0] - (A[i:i + 1,0:i]@x[0:i]).item() - (A[i: i + 1,i + 1:n]@x0[i + 1:n]).item())/A[i,i]
x0 = x
r_norm = ((A@x - b)**2).mean()
if r_norm < eta*b_norm:
break
#print(k)
return x
上面这个函数的参数HH,GG分别表示我们需要优化的那个函数的梯度和Hessen矩阵。
重点:G-S迭代求解一般用在对称正定矩阵,其他类型的矩阵很难保证迭代收敛性
我们拿差分法来测试一下刚才写的关于G-S迭代的正确性。
def UU(X, order,prob):#X表示(x,t)
if prob==1:
temp = 10*(X[:,0]+X[:,1])**2 + (X[:,0]-X[:,1])**2 + 0.5
if order[0]==0 and order[1]==0:
return torch.log(temp)
if order[0]==1 and order[1]==0:#对x求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) + 2*(X[:,0]-X[:,1]))
if order[0]==0 and order[1]==1:#对t求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) - 2*(X[:,0]-X[:,1]))
if order[0]==2 and order[1]==0:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if order[0]==1 and order[1]==1:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) \
* (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) \
+ temp**(-1) * (18)
if order[0]==0 and order[1]==2:
return - temp**(-2) * (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if prob==2:
if order[0]==0 and order[1]==0:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
0.5*(torch.exp(2*X[:,1])+torch.exp(-2*X[:,1]))
if order[0]==1 and order[1]==0:
return (3*X[:,0]*X[:,0]-1) * \
0.5*(torch.exp(2*X[:,1])+torch.exp(-2*X[:,1]))
if order[0]==0 and order[1]==1:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
(torch.exp(2*X[:,1])-torch.exp(-2*X[:,1]))
if order[0]==2 and order[1]==0:
return (6*X[:,0]) * \
0.5*(torch.exp(2*X[:,1])+torch.exp(-2*X[:,1]))
if order[0]==1 and order[1]==1:
return (3*X[:,0]*X[:,0]-1) * \
(torch.exp(2*X[:,1])-torch.exp(-2*X[:,1]))
if order[0]==0 and order[1]==2:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
2*(torch.exp(2*X[:,1])+torch.exp(-2*X[:,1]))
if prob==3:
temp1 = X[:,0]*X[:,0] - X[:,1]*X[:,1]
temp2 = X[:,0]*X[:,0] + X[:,1]*X[:,1] + 0.1
if order[0]==0 and order[1]==0:
return temp1 * temp2**(-1)
if order[0]==1 and order[1]==0:
return (2*X[:,0]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,0])
if order[0]==0 and order[1]==1:
return (-2*X[:,1]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,1])
if order[0]==2 and order[1]==0:
return (2) * temp2**(-1) + \
2 * (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
if order[0]==1 and order[1]==1:
return (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
(-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0]) * (2*X[:,1])
if order[0]==0 and order[1]==2:
return (-2) * temp2**(-1) + \
2 * (-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,1])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
def FF(prob,X):
return -UU(X,[0,2],prob) - UU(X,[2,0],prob)
class FD():
def __init__(self,bound,hx,prob):
self.prob = prob
self.dim = 2
self.hx = hx
self.nx = [int((bound[0,1] - bound[0,0])/self.hx[0]) + 1,int((bound[1,1] - bound[1,0])/self.hx[1]) + 1]
self.size = self.nx[0]*self.nx[1]
self.X = torch.zeros(self.size,self.dim)
m = 0
for i in range(self.nx[0]):
for j in range(self.nx[1]):
self.X[m,0] = bound[0,0] + i*self.hx[0]
self.X[m,1] = bound[1,0] + j*self.hx[1]
m = m + 1
self.u_acc = UU(self.X,[0,0],self.prob).view(-1,1)
def matrix(self):
self.A = torch.zeros(self.nx[0]*self.nx[1],self.nx[0]*self.nx[1])
dx = self.hx[0];dy = self.hx[1]
for i in range(self.nx[0]):
for j in range(self.nx[1]):
dx = self.hx[0];dy = self.hx[1]
if i== 0 or i == self.nx[0] - 1 or j == 0 or j == self.nx[1] - 1:
self.A[i*self.nx[1]+j,i*self.nx[1]+j] = 1
else:
self.A[i*self.nx[1]+j,i*self.nx[1]+j] = 2*(dx/dy + dy/dx)
self.A[i*self.nx[1]+j,i*self.nx[1]+j-1] = -dx/dy
self.A[i*self.nx[1]+j,i*self.nx[1]+j+1] = -dx/dy
self.A[i*self.nx[1]+j,(i-1)*self.nx[1]+j] = -dy/dx
self.A[i*self.nx[1]+j,(i+1)*self.nx[1]+j] = -dy/dx
return self.A
def right(self):
self.b = torch.zeros(self.nx[0]*self.nx[1],1)
for i in range(self.nx[0]):
for j in range(self.nx[1]):
dx = self.hx[0];dy = self.hx[1]
x = i*dx;y = j*dy
X = torch.tensor([[x,y]]).float()
if i== 0 or i == self.nx[0] - 1 or j == 0 or j == self.nx[1] - 1:
self.b[i*self.nx[1]+j] = UU(self.X[i*self.nx[1]+j:i*self.nx[1]+j+1,:],[0,0],self.prob)
else:
self.b[i*self.nx[1]+j] = FF(self.prob,self.X[i*self.nx[1]+j:i*self.nx[1]+j+1,:])*dx*dy
return self.b
def solve(self):
A = self.matrix().numpy()
b = self.right().numpy()
x = np.zeros([b.shape[0],1])
u = GS(A,b,x,200)
return u
def error(u_pred,u_acc):
temp = ((u_pred - u_acc.numpy())**2).sum()/(u_acc.numpy()**2).sum()
return temp**(0.5)
bound = torch.tensor([[0,2],[0,1]]).float()
hx = [0.1,0.1]
prob = 3
fd = FD(bound,hx,prob)
u_pred = fd.solve()
u_acc = fd.u_acc
print(error(u_pred,u_acc))
线高斯迭代
针对五点差分法,我们可以利用矩阵的特殊性构造线高斯方法,具体的原理参考下面这个表达式:
−
d
y
d
x
∗
u
i
−
1
,
j
+
2
(
d
x
d
y
+
d
y
d
x
)
u
i
,
j
−
d
y
d
x
∗
u
i
+
1
,
j
=
d
x
∗
d
y
∗
f
i
,
j
+
d
x
d
y
∗
(
u
i
,
j
−
1
+
u
i
,
j
+
1
)
- \frac{dy}{dx}*u_{i - 1,j}+2(\frac{dx}{dy} + \frac{dy}{dx})u_{i,j}- \frac{dy}{dx}*u_{i + 1,j}=dx*dy*f_{i,j}+\frac{dx}{dy}*(u_{i ,j-1}+u_{i ,j+1})
−dxdy∗ui−1,j+2(dydx+dxdy)ui,j−dxdy∗ui+1,j=dx∗dy∗fi,j+dydx∗(ui,j−1+ui,j+1)
那么根据这个表达式,不断更新每一层的近似解,参考下面这个公式:
−
d
y
d
x
∗
u
i
−
1
,
j
n
e
w
+
2
(
d
x
d
y
+
d
y
d
x
)
u
i
,
j
n
e
w
−
d
y
d
x
∗
u
i
+
1
,
j
n
e
w
=
d
x
∗
d
y
∗
f
i
,
j
+
d
x
d
y
∗
(
u
i
,
j
−
1
n
e
w
+
u
i
,
j
+
1
o
l
d
)
- \frac{dy}{dx}*u_{i - 1,j}^{new}+2(\frac{dx}{dy} + \frac{dy}{dx})u_{i,j}^{new}- \frac{dy}{dx}*u_{i + 1,j}^{new}=dx*dy*f_{i,j}+\frac{dx}{dy}*(u_{i ,j-1}^{new}+u_{i ,j+1}^{old})
−dxdy∗ui−1,jnew+2(dydx+dxdy)ui,jnew−dxdy∗ui+1,jnew=dx∗dy∗fi,j+dydx∗(ui,j−1new+ui,j+1old)
其中在更新每一层的近似解过程中,都会涉及到一个三对角矩阵的求解,为此我们还需要写一个三对角矩阵的求解过程。
def TH(d,l,u,b):
n = b.shape[0]
y = np.zeros([n,1]);x = np.zeros([n,1])
alpha = np.zeros_like(d)
beta = np.zeros_like(u)
gama = l.copy()
alpha[0,0] = d[0,0];beta[0,0] = u[0,0]/d[0,0]
for i in range(1,n - 1):
alpha[i,0] = d[i,0] - l[i - 1,0]*beta[i - 1,0]
beta[i,0] = u[i,0]/alpha[i,0]
alpha[n - 1,0] = d[n - 1,0] - l[n - 2,0]*beta[n - 2,0]
y[0,0] = b[0,0]/alpha[0,0]
for i in range(1,n):
y[i,0] = (b[i,0] - gama[i - 1,0]*y[i - 1,0])/alpha[i,0]
x[n - 1,0] = y[n - 1,0]
for j in range(n - 2,-1,-1):
x[j,0] = y[j,0] - beta[j,0]*x[j + 1,0]
return x
这里的 d , l , u , b d,l,u,b d,l,u,b都是向量,存储的是三对角矩阵的对角线元素,和上下对角线元素,b表示方程的右端项。这样做可以节省存储量。整个求解代码过程参考以下:
import numpy as np
import time
def TH(d,l,u,b):
n = b.shape[0]
y = np.zeros([n,1]);x = np.zeros([n,1])
alpha = np.zeros_like(d)
beta = np.zeros_like(u)
gama = l.copy()
alpha[0,0] = d[0,0];beta[0,0] = u[0,0]/d[0,0]
for i in range(1,n - 1):
alpha[i,0] = d[i,0] - l[i - 1,0]*beta[i - 1,0]
beta[i,0] = u[i,0]/alpha[i,0]
alpha[n - 1,0] = d[n - 1,0] - l[n - 2,0]*beta[n - 2,0]
y[0,0] = b[0,0]/alpha[0,0]
for i in range(1,n):
y[i,0] = (b[i,0] - gama[i - 1,0]*y[i - 1,0])/alpha[i,0]
x[n - 1,0] = y[n - 1,0]
for j in range(n - 2,-1,-1):
x[j,0] = y[j,0] - beta[j,0]*x[j + 1,0]
return x
def UU(X, order,prob):#X表示(x,t)
if prob==1:
temp = 10*(X[:,0]+X[:,1])**2 + (X[:,0]-X[:,1])**2 + 0.5
if order[0]==0 and order[1]==0:
return np.log(temp)
if order[0]==1 and order[1]==0:#对x求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) + 2*(X[:,0]-X[:,1]))
if order[0]==0 and order[1]==1:#对t求偏导
return temp**(-1) * (20*(X[:,0]+X[:,1]) - 2*(X[:,0]-X[:,1]))
if order[0]==2 and order[1]==0:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if order[0]==1 and order[1]==1:
return - temp**(-2) * (20*(X[:,0]+X[:,1])+2*(X[:,0]-X[:,1])) \
* (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) \
+ temp**(-1) * (18)
if order[0]==0 and order[1]==2:
return - temp**(-2) * (20*(X[:,0]+X[:,1])-2*(X[:,0]-X[:,1])) ** 2 \
+ temp**(-1) * (22)
if prob==2:
if order[0]==0 and order[1]==0:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==1 and order[1]==0:
return (3*X[:,0]*X[:,0]-1) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==0 and order[1]==1:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
(np.exp(2*X[:,1])-np.exp(-2*X[:,1]))
if order[0]==2 and order[1]==0:
return (6*X[:,0]) * \
0.5*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if order[0]==1 and order[1]==1:
return (3*X[:,0]*X[:,0]-1) * \
(np.exp(2*X[:,1])-np.exp(-2*X[:,1]))
if order[0]==0 and order[1]==2:
return (X[:,0]*X[:,0]*X[:,0]-X[:,0]) * \
2*(np.exp(2*X[:,1])+np.exp(-2*X[:,1]))
if prob==3:
temp1 = X[:,0]*X[:,0] - X[:,1]*X[:,1]
temp2 = X[:,0]*X[:,0] + X[:,1]*X[:,1] + 0.1
if order[0]==0 and order[1]==0:
return temp1 * temp2**(-1)
if order[0]==1 and order[1]==0:
return (2*X[:,0]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,0])
if order[0]==0 and order[1]==1:
return (-2*X[:,1]) * temp2**(-1) + \
temp1 * (-1)*temp2**(-2) * (2*X[:,1])
if order[0]==2 and order[1]==0:
return (2) * temp2**(-1) + \
2 * (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
if order[0]==1 and order[1]==1:
return (2*X[:,0]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
(-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,0]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,0]) * (2*X[:,1])
if order[0]==0 and order[1]==2:
return (-2) * temp2**(-1) + \
2 * (-2*X[:,1]) * (-1)*temp2**(-2) * (2*X[:,1]) + \
temp1 * (2)*temp2**(-3) * (2*X[:,1])**2 + \
temp1 * (-1)*temp2**(-2) * (2)
def FF(prob,X):
return -UU(X,[0,2],prob) - UU(X,[2,0],prob)
np.random.seed(1234)
class FD():
def __init__(self,bound,hx,prob):
self.prob = prob
self.dim = 2
self.hx = hx
self.nx = [int((bound[0,1] - bound[0,0])/self.hx[0]) + 1,int((bound[1,1] - bound[1,0])/self.hx[1]) + 1]
self.size = self.nx[0]*self.nx[1]
self.X = np.zeros([self.size,self.dim])
m = 0
for i in range(self.nx[0]):
for j in range(self.nx[1]):
self.X[m,0] = bound[0,0] + i*self.hx[0]
self.X[m,1] = bound[1,0] + j*self.hx[1]
m = m + 1
def u_init(self):
u = np.zeros([self.nx[0],self.nx[1]])
x = self.X.reshape([self.nx[0],self.nx[1],self.dim])
u[0,:] = UU(x[0,:,:],[0,0],self.prob)
u[-1,:] = UU(x[-1,:,:],[0,0],self.prob)
u[1:self.nx[0] - 1,0] = UU(x[1:self.nx[0] - 1,0,:],[0,0],self.prob)
u[1:self.nx[0] - 1,-1] = UU(x[1:self.nx[0] - 1,-1,:],[0,0],self.prob)
return u
def solve(self,rig):
tic = time.time()
u_old = self.u_init()
u_new = u_old.copy()
M = self.nx[0];N = self.nx[1]
dx = self.hx[0];dy = self.hx[1]
right = (dx*dy*rig).reshape([M - 2,N - 2])
#print(right[0,:].shape,u_new[0,1:N - 2].shape)
right[0,:] += u_new[0,1:N - 1]*dy/dx
right[-1,:] += u_new[- 1,1:N - 1]*dy/dx
r1 = dy/dx;r2 = dx/dy
l = - np.ones([M - 3,1])*r1
u = - np.ones([M - 3,1])*r1
d = np.ones([M - 2,1])*2*(r1 + r2)
#print(d.shape)
for k in range(1000):
for j in range(1,N - 1):
#print(u_old[1:M - 1,j - 1].shape,right[:,j].shape)
b = r2*(u_new[1:M - 1,j - 1] + u_old[1:M - 1,j + 1]) + right[:,j - 1]
u_new[1:M - 1,j:j + 1] = TH(d,l,u,b.reshape(-1,1))
#print(np.linalg.norm(u_new - u_old))
if (np.linalg.norm(u_new - u_old) < 1e-7):
break
else:
u_old = u_new.copy()
if k%100 == 0:
print('the iteration = %d'%(k + 1))
ela = time.time() - tic
print('the end iteration is %d,the time:%.2f'%(k + 1,ela))
return u_new.reshape(-1,1)
bound = np.array([[0,2.0],[0,1.0]])
hx = [0.05,0.05]
prob = 1
fd = FD(bound,hx,prob)
M = fd.nx[0];N = fd.nx[1]
X = fd.X
u_acc = UU(X,[0,0],prob).reshape(-1,1)
rig_in = (FF(prob,X).reshape(M,N))[1:M - 1,1:N - 1]
u_pred = fd.solve(rig_in)
print(max(abs(u_acc - u_pred)))