#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>
using namespace std;
__device__ static unsigned int d_tmp[NUM_ELEM];
__global__ void test_gpu_gmem(unsigned int * const data, const unsigned int num_elements)
{
unsigned int tid = (blockIdx.x*blockDim.x) + threadIdx.x;
if (tid < num_elements)
{
unsigned int d_tmp = 0;
for (int i = 0; i < KERNEL_LOOP; i++)
{
d_tmp |= (packed_array[i] << i);
}
data[tid] = d_tmp;
}
}
int main()
{
return 0;
}