pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invalid

qq_43072005

已于 2023-11-16 11:10:05 修改

阅读量520

点赞数

文章标签： python

于 2023-11-16 11:05:45 首次发布

本文链接：https://blog.csdn.net/qq_43072005/article/details/134437335

版权

pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invali,
我出现错误的原因是运行pycuda的核函数中的arch

mod = SourceModule('''
#define INTER_RESIZE_COEF_BITS 11
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)

typedef unsigned char uint8_t;
static __inline__ __device__ int limit(int value, int low, int high){
    return value < low ? low : (value > high ? high : value);
}

static __inline__ __device__ int resize_cast(int value){
    return (value + (1 << (CAST_BITS - 1))) >> CAST_BITS;
}

__global__ void resize_bilinear_and_normalize_kernel(
    uint8_t* src, int src_line_size, int src_width, int src_height, 
    uint8_t* dst, int dst_line_size, int dst_width, int dst_height, 
    float sx, float sy, int edge
){
    int position = blockDim.x * blockIdx.x + threadIdx.x;
    if (position >= edge) return;

    int dx      = position % dst_width;
    int dy      = position / dst_width;
    float src_x = (dx + 0.5f) * sx - 0.5f;
    float src_y = (dy + 0.5f) * sy - 0.5f;
    int y_low = floorf(src_y);
    int x_low = floorf(src_x);
    int y_high = limit(y_low + 1, 0, src_height - 1);
    int x_high = limit(x_low + 1, 0, src_width - 1);
    y_low = limit(y_low, 0, src_height - 1);
    x_low = limit(x_low, 0, src_width - 1);

    int ly    = rint((src_y - y_low) * INTER_RESIZE_COEF_SCALE);
    int lx    = rint((src_x - x_low) * INTER_RESIZE_COEF_SCALE);
    int hy    = INTER_RESIZE_COEF_SCALE - ly;
    int hx    = INTER_RESIZE_COEF_SCALE - lx;
    int w1    = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
    uint8_t* v1 = src + y_low * src_line_size + x_low * 3;
    uint8_t* v2 = src + y_low * src_line_size + x_high * 3;
    uint8_t* v3 = src + y_high * src_line_size + x_low * 3;
    uint8_t* v4 = src + y_high * src_line_size + x_high * 3;
    uint8_t* output_ptr = dst + dy * dst_line_size + dx * 3;
    output_ptr[0] = resize_cast(w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0]);
    output_ptr[1] = resize_cast(w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1]);
    output_ptr[2] = resize_cast(w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2]);
}
''', arch="sm_80")

对arch改变为你现在的显卡算力

qq_43072005

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invalid

pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invali, 我出现错误的原因是运行pycuda的核函数`mod = SourceModule(‘’’‘’', arch=“sm_80”)`，对arch改变为你现在的显卡算力。
复制链接

扫一扫