pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invalid

pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invali,
我出现错误的原因是运行pycuda的核函数中的arch

mod = SourceModule('''
#define INTER_RESIZE_COEF_BITS 11
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)

typedef unsigned char uint8_t;
static __inline__ __device__ int limit(int value, int low, int high){
    return value < low ? low : (value > high ? high : value);
}

static __inline__ __device__ int resize_cast(int value){
    return (value + (1 << (CAST_BITS - 1))) >> CAST_BITS;
}

__global__ void resize_bilinear_and_normalize_kernel(
    uint8_t* src, int src_line_size, int src_width, int src_height, 
    uint8_t* dst, int dst_line_size, int dst_width, int dst_height, 
    float sx, float sy, int edge
){
    int position = blockDim.x * blockIdx.x + threadIdx.x;
    if (position >= edge) return;

    int dx      = position % dst_width;
    int dy      = position / dst_width;
    float src_x = (dx + 0.5f) * sx - 0.5f;
    float src_y = (dy + 0.5f) * sy - 0.5f;
    int y_low = floorf(src_y);
    int x_low = floorf(src_x);
    int y_high = limit(y_low + 1, 0, src_height - 1);
    int x_high = limit(x_low + 1, 0, src_width - 1);
    y_low = limit(y_low, 0, src_height - 1);
    x_low = limit(x_low, 0, src_width - 1);

    int ly    = rint((src_y - y_low) * INTER_RESIZE_COEF_SCALE);
    int lx    = rint((src_x - x_low) * INTER_RESIZE_COEF_SCALE);
    int hy    = INTER_RESIZE_COEF_SCALE - ly;
    int hx    = INTER_RESIZE_COEF_SCALE - lx;
    int w1    = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
    uint8_t* v1 = src + y_low * src_line_size + x_low * 3;
    uint8_t* v2 = src + y_low * src_line_size + x_high * 3;
    uint8_t* v3 = src + y_high * src_line_size + x_low * 3;
    uint8_t* v4 = src + y_high * src_line_size + x_high * 3;
    uint8_t* output_ptr = dst + dy * dst_line_size + dx * 3;
    output_ptr[0] = resize_cast(w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0]);
    output_ptr[1] = resize_cast(w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1]);
    output_ptr[2] = resize_cast(w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2]);
}
''', arch="sm_80")

对arch改变为你现在的显卡算力

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值