#include "../common/book.h"#define N 10
void add( int*a, int*b, int*c ) {
int tid = 0; // this is CPU zero, so we start at zero
while (tid < N) {
c[tid] = a[tid] + b[tid];
tid += 1; // we have one CPU, so we increment by one
}
}
int main( void ) {
int a[N], b[N], c[N];
// fill the arrays 'a'and'b' on the CPU
for (int i=0; i<N; i++) {
a[i] = -i;
b[i] = i * i;
}
add( a, b, c );
// display the results
for (int i=0; i<N; i++) {
printf( "%d + %d = %d\n", a[i], b[i], c[i] );
}
return0;
}
add_loop_gpu.cu
#include "../common/book.h"#define N 10
__global__ void add( int *a, int *b, int *c ) {
int tid = blockIdx.x; // this thread handles the data at its thread idif (tid < N)//在核函数内部检查下标,避免内存访问越界;
c[tid] = a[tid] + b[tid];
}
int main(
GPU计算的应用前景在很大程度上取决于能否从很多问题中发掘出大规模并行性。add_loop_cpu.cu#include &amp;amp;amp;amp;quot;../common/book.h&amp;amp;amp;amp;quot;#define N 10void add( int *a, int *b, int *c ) { int tid = 0; // this is CPU zero, so we start at ze...