/*
* 源程序来自于 炼数成金教程
* 在GPU核函数中,为什么数值型变量可以和数组型变量相互赋值, CPU中不可以
*
* */
#include <iostream>
using namespace std;
__global__ void global_scan(float* d_out,float* d_in){
int idx = threadIdx.x;
float out = 0.00f; //声明一个float类型变量,并初始化
out = d_in[idx]; //d_in[idx]中随着idx改变d_in[idx]也改变,然而并没有覆盖out,
//out也变成一个数组了,why????????????
__syncthreads();
d_out[idx] = out;
__syncthreads();
}
int main(int argc,char** argv){
const int ARRAY_SIZE = 8;
const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);
// generate the input array on the host
float h_in[ARRAY_SIZE];
for(int i=0;i<ARRAY_SIZE;i++){
h_in[i] = float(i);
}
float h_out[ARRAY_SIZE];
// declare GPU memory pointers
float* d_in; //声明为一个float型指针
float* d_out;
// allocate GPU memory
cudaMalloc((void**) &d_in,ARRAY_BYTES);
cudaMalloc((void**) &d_out,ARRAY_BYTES);
// transfer the array to GPU
cudaMemcpy(d_in,h_in,ARRAY_BYTES,cudaMemcpyHostToDevice);
// launch the kernel
global_scan<<<1,ARRAY_SIZE>>>(d_out,d_in);
// copy back the result array to the GPU
cudaMemcpy(h_out,d_out,ARRAY_BYTES,cudaMemcpyDeviceToHost);
// print out the resulting array
for(int i=0;i<ARRAY_SIZE;i++){
cout<<h_out[i]<<" ";
}
// free GPU memory allocation
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
/
结果如下:
/
0 1 2 3 4 5 6 7
//
#include<iostream>
using namespace std;
int main()
{
int a[3] = {1, 2, 3};
int b;
int *c;
//b = a; //这里把数组a赋值给变量b,会报错,但是在GPU内核函数中不会报错,为什么?????
c = a; //把数组a的首地址给b
cout<<" a[2]= "<<a[2]<<" c[2]= "<<c[2]<<endl;
}
/
结果如下:
/
a[2]= 3 c[2]= 3