Python调用cuRandSobol生成Sobol

最新推荐文章于 2024-10-14 23:40:45 发布

嘉诩

最新推荐文章于 2024-10-14 23:40:45 发布

阅读量258

点赞数 1

分类专栏： Python和GPU加速期权估值文章标签： python Cuda

本文链接：https://blog.csdn.net/dxlbuaa/article/details/136065985

版权

Python和GPU加速期权估值专栏收录该内容

5 篇文章 0 订阅

订阅专栏

Sobol低偏差序列在期权的蒙特卡洛估值中非常有用。scipy的qmc模块提供了相关的函数，但是速度比普通的伪随机数要慢。cuRand提供了sobol生成器，速度很快，但是cupy目前还没有集成，scipy下面的qmc模块在cupy中没有对应。Python下想使用cuRandSobol，目前还没有特别好用的模块，所以我们可以用c++编写共享库，然后通过Python的Ctypes方式调用。cuRandSobol和scipy的qmc用的Sobol算法都是JoeKuoD6，支持的最大维度为21201。

首先编写一个gen_sobol.cu文件

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdexcept>
#include <vector>

#include <cuda_runtime.h>
#include <curand.h>

// CUDA API error checking
#define CUDA_CHECK(err)                                                        \
  do {                                                                         \
    cudaError_t err_ = (err);                                                  \
    if (err_ != cudaSuccess) {                                                 \
      std::printf("CUDA error %d at %s:%d\n", err_, __FILE__, __LINE__);       \
      throw std::runtime_error("CUDA error");                                  \
    }                                                                          \
  } while (0)

// curand API error checking
#define CURAND_CHECK(err)                                                      \
  do {                                                                         \
    curandStatus_t err_ = (err);                                               \
    if (err_ != CURAND_STATUS_SUCCESS) {                                       \
      std::printf("curand error %d at %s:%d\n", err_, __FILE__, __LINE__);     \
      throw std::runtime_error("curand error");                                \
    }                                                                          \
  } while (0)


using data_type = float;
extern "C"{
void  gen_sobol(const int &n, const unsigned long long &offset,
                 const unsigned int &num_dimensions,                 
                 float* h_data) {

  curandOrdering_t order= CURAND_ORDERING_QUASI_DEFAULT;
  curandRngType_t rng= CURAND_RNG_QUASI_SOBOL32;
  cudaStream_t stream=NULL;
  curandGenerator_t gen=NULL;
  CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));

  /* Create quasi-random number generator */
  CURAND_CHECK(curandCreateGeneratorHost(&gen, CURAND_RNG_QUASI_SOBOL32));

  /* Set cuRAND to stream */
  CURAND_CHECK(curandSetStream(gen, stream));

  /* Set offset */
  CURAND_CHECK(curandSetGeneratorOffset(gen, offset));

  /* Set Dimension */
  CURAND_CHECK(curandSetQuasiRandomGeneratorDimensions(gen, num_dimensions));

  /* Set ordering */
  CURAND_CHECK(curandSetGeneratorOrdering(gen, order));

  /* 可以直接生成正态分布，也可以生成均匀分布 */
 // CURAND_CHECK(curandGenerateUniform(gen, h_data, n*num_dimensions));  
  CURAND_CHECK(curandGenerateNormal(gen, h_data, n*num_dimensions,0,1));

  /* Cleanup */
  CURAND_CHECK(curandDestroyGenerator(gen));
}
}

之后，我们可以用nvcc生成so

nvcc -Xcompiler -fPIC -shared -o gen_sobol.so  gen_sobol.cu -lcurand

在python端，可以使用ctypes调用


import numpy as np
import ctypes
from ctypes import *
from scipy.stats import norm
# extract cuda_sum function pointer in the shared object cuda_sum.so
def get_gen_sobol():
    dll = ctypes.CDLL('./gen_sobol.so', mode=ctypes.RTLD_GLOBAL)
    func = dll.gen_sobol
    func.argtypes = [POINTER(c_int), POINTER(c_ulonglong), POINTER(c_uint), POINTER(c_float)]
    return func

# create __cuda_sum function with get_cuda_sum()
__cuda_gen_sobol = get_gen_sobol()

# convenient python wrapper for __cuda_sum
# it does all job with types convertation
# from python ones to C++ ones
def cuda_gen_sobol(n, offset, num_dimensions, h_data):
#    a_p = n.ctypes.data_as(POINTER(c_int))
#    b_p = offset.ctypes.data_as(POINTER(c_ulonglong))
#    c_p = num_dimensions.ctypes.data_as(POINTER(c_uint))
    h_data_p=h_data.ctypes.data_as(POINTER(c_float))

    __cuda_gen_sobol(byref(n), byref(offset), byref(num_dimensions), h_data_p)
# testing, sum of two arrays of ones and output head part of resulting array
if __name__ == '__main__':
    n=65535
    offset=c_ulonglong(n+1)
    num_dimensions=520
    size=(num_dimensions,n)
    h_data = np.zeros(size).astype('float32')

    cuda_gen_sobol(c_int(n), offset, c_uint(num_dimensions), h_data)

    print(h_data)
    #和qmc比较
    dimension = num_undl * num_tp
    soboleng = qmc.Sobol(d=dimension, scramble=False)
    soboleng.fast_forward(n+1)        
    qs =soboleng.random(num_path)
    z=norm.ppf(qs)
    print(z)