写一个入门的vec add。
出了这么个错误。之前是没有的。现在有了,想想找了,结果这类错误好像还不多。
一定又是个低级的错误。记录下来,让那些和我一样(对,就是你,看这个帖子的你)下次写程序时小心些。
__global__ 没写。
error: a host function call cannot be configured
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <time.h>
# include <math.h>
# include <cuda.h>
#include <cuda_runtime.h>
void sumArraysOnHost(float *A, float *B, float *C, const int N)
{
for ( int idx = 0; idx < N; idx++ )
{
C[idx] = A[idx] + B[idx];
}
}
void sumArrayOnDevice(float *A, float *B, float *C) //error version
//__global__ void sumArrayOnDevice(float *A, float *B, float *C) // right version
{
C[blockIdx.x] = A[blockIdx.x] + B[blockIdx.x];
}
void initialData(float *ip, int size)
{
// time_t t;
// srand((unsigned int) time(&t));
for (int i = 0; i< size; i++ )
{
//ip[i] = (float)(rand() & 0XFF)/10.0f;
ip[i] = (float)(rand() & 0XFF)/10.0f;
}
}
int main(int argc, char ** argv)
{
int nElem = 12;
size_t nBytes = nElem * sizeof(float);
float *h_A, *h_B, *h_C, *h_C_host;
h_A = (float *)malloc(nBytes);
h_B = (float *)malloc(nBytes);
h_C = (float *)malloc(nBytes);
h_C_host = (float *)malloc(nBytes);
initialData(h_A, nElem);
initialData(h_B, nElem);
initialData(h_C,nElem);
initialData(h_C_host,nElem);
sumArraysOnHost(h_A,h_B,h_C,nElem);
float *d_A, *d_B, *d_C;
cudaMalloc((float **)&d_A, nBytes);
cudaMalloc((float **)&d_B, nBytes);
cudaMalloc((float **)&d_C, nBytes);
cudaMemcpy(d_A,h_A,nBytes,cudaMemcpyHostToDevice);
cudaMemcpy(d_B,h_B,nBytes,cudaMemcpyHostToDevice);
sumArrayOnDevice<<<12,1>>>(d_A,d_B,d_C);
cudaMemcpy(h_C_host,d_C,nBytes,cudaMemcpyDeviceToHost);
for (int i=0;i<nElem;i++)
{
printf("A[%d] = %5.2f B[%d]= %5.2f C[%d]= %5.2f C_dev[%d]=%5.2f \n",\
i,h_A[i],i,h_B[i],i,h_C[i],i,h_C_host[i]);
}
free(h_A);
free(h_B);
free(h_C);
free(h_C_host);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
return(0);
}