【CUDA】python利用numba库cuda编程

cuda编程部分基本和c++上是一致的
可参考c++版的:
CUDA编程基本入门学习笔记

看懂上面链接之后就很好懂numba的python代码了
下面直接放代码了:

from numba import cuda ,vectorize
import numpy as np
import math
from timeit import default_timer as timer

def func_cpu(a,b,c,th):
    for y in range(a.shape[0]):
        for x in range(a.shape[1]):
            diff = 0
            if a[y][x] > b[y][x]:
                diff = a[y][x] - b[y][x]
            else:
                diff = b[y][x] - a[y][x]
                '''
            if diff > th:
                c[y][x] = 255
            else:
                c[y][x] = 0
                '''


@cuda.jit
def func_gpu(a,b,c):
    x_max = a.shape[0]
    startX = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
    gridX = cuda.gridDim.x * cuda.blockDim.x;
    for x in range(startX, x_max, gridX):
        c[x]=(a[x]-b[x])*(a[x]-b[x])

@cuda.jit
def func_gpu_2(a,b,c,d_th):
    y_max = a.shape[0]
    x_max = a.shape[1]
    startX = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
    startY = cuda.blockDim.y * cuda.blockIdx.y + cuda.threadIdx.y
    gridX = cuda.gridDim.x * cuda.blockDim.x;
    gridY = cuda.gridDim.y * cuda.blockDim.y;
    for y in range(startY, y_max, gridY):
        for x in range(startX, x_max, gridX):
            diff = 0
            if a[y][x] > b[y][x]:
                diff = a[y][x] - b[y][x]
            else:
                diff = b[y][x] - a[y][x]
                '''x
            if diff > d_th[0]:
                c[y][x] = 255
            else:
                c[y][x] = 0
                '''




@vectorize(["float32 (float32 , float32 )"], target='cuda')
def func_gpu_3(a,b):
    #x=np.maximum(1,2)
    return a-b

def main():
    N = 128
    for i in range(10):
        A = np.ones((N,N), dtype=np.float32 )
        B = np.ones((N,N), dtype=np.float32 )
        A = A*3
        C = np.zeros((N,N), dtype=np.float32 )

        start = timer()
        th = 2
        func_cpu(A, B,C,th)
        vectorAdd_time = timer() - start
        #print(C)
        print("CPU took %f ms " % (vectorAdd_time*1000))
    print("------------------------------------")

    for i in range(10):
        start = timer()
        blockdim = (32, 8)
        griddim = (32, 16)
        th = 2
        th_arr = np.zeros(1, dtype=np.uint8)
        th_arr[0] = 2
        d_A = cuda.to_device(A)
        d_B = cuda.to_device(B)
        d_C = cuda.to_device(C)
        d_th = cuda.to_device(th_arr)
        func_gpu_2[griddim, blockdim](d_A,d_B,d_C,d_th)
        C = d_C.copy_to_host()
        #print(C)
        vectorAdd_time = timer() - start
        print("GPU took %f ms" % (vectorAdd_time*1000))

    print("------------------------------------")
    for i in range(10):
        start = timer()
        C = func_gpu_3(A,B)
        # print(C)
        vectorAdd_time = timer() - start
        print("vectorize GPU took %f ms" % (vectorAdd_time * 1000))


if __name__ == '__main__':
    main()

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值