有限差分需要循环解方程,每一列需要等待上一列的的计算结果出来才能进行下一步操作。但是cuda的syncthreads只能同步同一block中所有线程,如果问题规模比较大,跨越了block,那么彼此之间的同步就无法进行了。
Numba支持Cuda协作组 (Cooperative Groups),这样我们就可以实现所有线程的同步。
以下的例子改编自《Python金融数据分析》中的向下敲出期权的例子
使用的是CK法,本质上是求解线性方程,作者采用的是LU分解法,我们改为用矩阵的逆相乘,在核函数中的处理就简单很多了
import cupy as cp
from abc import ABC, abstractmethod
import cupyx.scipy.linalg as linalg
from concurrent.futures import ThreadPoolExecutor
from numba import cuda,float32
@cuda.jit()
def traverse(M,G):
row = cuda.grid(1)
g = cuda.cg.this_grid()
rows = M.shape[0]
if row >= rows:
return
cols = G.shape[1]-1
for col in range( cols-1,-1,-1):
sum=0
for i in range(rows):
sum+=M[row,i]*G[i+1,col+1]
G[row+1,col]=sum
g.sync()
class FDCnDo(object):
def __init__(
self, S0, K, r=0.05, T=1, sigma=0,
Sbarrier=0, Smax=1, M=1, N=1, is_put=False
):
self.S0 = S0
self.K = K
self.r = r
self.T = T
self.sigma = sigma
self.Smax = Smax
self.M, self.N = M, N
self.is_call = not is_put
self.i_values = cp.arange(self.M)
self.j_values = cp.arange(self.N)
self.grid = cp.zeros(shape=(self.M+1, self.N+1),dtype=cp.float32)
self.boundary_conds = cp.linspace(0, Smax, self.M+1)
self.barrier = Sbarrier
self.boundary_conds = \
cp.linspace(Sbarrier, Smax, M+1,dtype=cp.float32)
self.dS=(self.Smax-self.barrier)/float(self.M)
self.dt=self.T/float(self.N)
self.i_values = self.boundary_conds/self.dS
def price(self):
self.setup_boundary_conditions()
self.setup_coefficients()
self.traverse_grid()
return self.interpolate()
def setup_coefficients(self):
self.alpha = 0.25*self.dt*(
(self.sigma**2)*(self.i_values**2) - \
self.r*self.i_values)
self.beta = -self.dt*0.5*(
(self.sigma**2)*(self.i_values**2) + self.r)
self.gamma = 0.25*self.dt*(
(self.sigma**2)*(self.i_values**2) +
self.r*self.i_values)
self.M1 = -cp.diag(self.alpha[2:self.M], -1) + \
cp.diag(1-self.beta[1:self.M]) - \
cp.diag(self.gamma[1:self.M-1], 1)
self.M1inv=cp.linalg.inv(self.M1)
self.M2 = cp.diag(self.alpha[2:self.M], -1) + \
cp.diag(1+self.beta[1:self.M]) + \
cp.diag(self.gamma[1:self.M-1], 1)
self.M12=cp.dot(self.M1inv,self.M2)
def traverse_grid(self):
""" Solve using linear systems of equations """
blockdim = 32
griddim = self.M12.shape[0] // blockdim+1
traverse[griddim, blockdim](self.M12,self.grid)
return
def setup_boundary_conditions(self):
if self.is_call:
self.grid[:,-1] = cp.maximum(
0, self.boundary_conds - self.K)
self.grid[-1,:-1] = (self.Smax-self.K) * \
cp.exp(-self.r*self.dt*(self.N-self.j_values))
else:
self.grid[:,-1] = cp.maximum(
0, self.K-self.boundary_conds)
self.grid[0,:-1] = (self.K-self.Smax) * \
cp.exp(-self.r*self.dt*(self.N-self.j_values))
def interpolate(self):
"""
Use piecewise linear interpolation on the initial
grid column to get the closest price at S0.
"""
return cp.interp(
cp.array([self.S0]), self.boundary_conds, self.grid[:,0])
if __name__=='__main__':
option=FDCnDo(50, 60, r=0.1, T=5./12.,
sigma=0.4, Sbarrier=40, Smax=100, M=120, N=500)
print(option.price())
使用该方法可以大大加速有限差分法的速度,但是该方法不支持stream。