#include "cuda.h"
#include "common/book.h"
#include "common/cpu_bitmap.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
//#include "GL/GLU.h"
//#include "GL/glut.h"
#define DIM 1024
#define PI 3.1415926535897932f
__global__ void kernel( unsigned char *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
__shared__ float shared[16][16];
// now calculate the value at that position
const float period = 128.0f;
shared[threadIdx.x][threadIdx.y] =
255 * (sinf(x*2.0f*PI/ period) + 1.0f) *
(sinf(y*2.0f*PI/ period) + 1.0f) / 4.0f;
__syncthreads();
ptr[offset*4 + 0] = 0;
ptr[offset*4 &
NVIDIA高性能计算——基于共享内存的线程同步
最新推荐文章于 2023-09-17 14:04:04 发布