因为程序原因,所以要求同时开启电脑的 俩个GPU 进行 计算,下边是开启双GPU的方法。
首先我们要知道一些概念:
同一时间,一个cpu下的进程只能有一个device也就是GPU,而在device上进行的内存分配等操作都需要一个context去管理。
所以,我们的思路就是,在CPU下使用多进程去开启不同的device。下面是测试代码:
import pycuda from pycuda import gpuarray import pycuda.driver as cuda from pycuda.compiler import SourceModule import threading import numpy import time class GPUThread(threading.Thread): def __init__(self, number, some_array): threading.Thread.__init__(self) self.number = number self.some_array = some_array def run(self): self.dev = cuda.Device(self.number) self.ctx = self.dev.make_context() self.array_gpu = gpuarray.to_gpu(self.some_array) uuout = test_kernel(self.array_gpu) print("device name is:",self.dev.name()) print ("successful exit from thread:" ,self.getName()) self.ctx.pop() del self.array_gpu del self.ctx def test_kernel(input_array_gpu): mod = SourceModule(""" __global__ void f(float * out, float * in) { int idx = threadIdx.x; out[idx] = in[idx] + 6; } """) func = mod.get_function("f") output_array_gpu = gpuarray.zeros((64,), numpy.float32) func(output_array_gpu, input_array_gpu, block=(64, 1, 1), grid=(1,1)) uuout = output_array_gpu.get() return uuout
cuda.init() some_array = numpy.full((64,), 1).astype(numpy.float32) num = cuda.Device.count() gpu_thread_list = [] start_time = time.time() for i in range(num): gpu_thread = GPUThread(i, some_array) gpu_thread.start() gpu_thread_list.append(gpu_thread)
for i in gpu_thread_list:
i.join()
在pycuda下有俩种方式进行device操作,
1第一种是交给import pycuda.autoinit,
2第二种是手动开启:
import pycuda import pycuda.driver as cuda
cuda.init() num = cuda.Device.count() self.dev = cuda.Device(num) self.ctx = self.dev.make_context()//开启一个上下文 self.ctx.pop()//自己开启的上下文使用完后要弹出