main.cpp
#include "interactions.h"
#include <stdio.h>//加载合适的头文件
#include <stdlib.h>
#include "kernel.h"
#ifdef _WIN32
#define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#endif // _WIN32
#ifdef __APPLE__
#include <GLUT/glut.h>
#else
#include <GL/glew.h>
#include <GL/freeglut.h>
#endif // __APPLE__
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#define ITERS_PER_RENDER 50 //雅克比迭代次数
GLuint pbo = 0;
GLuint tex = 0;
struct cudaGraphicsResource *cuda_pbo_resource;
void render()//创造属于自己的CUDA/OPENGL互操作应用程序时,唯一需要改变的就是render函数
{
uchar4 *d_out = 0;
cudaGraphicsMapResources(1, &cuda_pbo_resource, 0);
cudaGraphicsResourceGetMappedPointer((void**)&d_out, NULL, cuda_pbo_resource);
for (int i = 0; i < ITERS_PER_RENDER; i++)
{
kernelLauncher(d_out, d_temp, W, H, bc);
}
cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0);
char title[128];
sprintf(title, "Temperature Visualizer - Iterations=%4d, " "T_s=%3.0f, T_a=%3.0f, T_g=%3.0f", iterationCount, bc.t_s, bc.t_a, bc.t_g);
glutSetWindowTitle(title);
}
void drawTexture() {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, W, H, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);//建立二维纹理图像,创建四边形图像
glEnable(GL_TEXTURE_2D);
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f); glVertex2f(0, 0);
glTexCoord2f(0.0f, 1.0f); glVertex2f(0, H);
glTexCoord2f(1.0f, 1.0f); glVertex2f(W, H);
glTexCoord2f(1.0f, 0.0f); glVertex2f(W, 0);
glEnd();
glDisable(GL_TEXTURE_2D);
}
void display()//窗口中显示的内容
{
render();//计算新像素值
drawTexture();//画OPENGL纹理
glutSwapBuffers();//交换显示缓冲区。
//双缓冲是一种用来提高图形程序效率的常见技术。
//一个缓冲区提供可被读取用于显示的内存,与此同时,另一个缓冲区提供一段内存,保证下一帧的内容能够被写入。
//在图形序列的帧与帧之间,两个缓冲区交换它们的读/写角色。
}
void initGLUT(int *argc, char **argv)
{
glutInit(argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(W, H);
glutCreateWindow("Temp. Vis.");
#ifndef __APPLE__
glewInit();
#endif // !__APPLE__
}
void initPixelBuffer()//初始化像素缓冲区
{
glGenBuffers(1, &pbo);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
glBufferData(GL_PIXEL_UNPACK_BUFFER, 4 * W*H * sizeof(GLubyte), 0, GL_STREAM_DRAW);
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard);
//CUDA注册OPENGL缓冲区
//如果映射成功,则将缓冲区内存的控制交给CUDA来进行写输出。
//如果没有映射成功,则返回缓冲区内存的控制给OPENGL用于显示。
}
void exitfunc()
{
if (pbo)
{
cudaGraphicsUnregisterResource(cuda_pbo_resource);
glDeleteBuffers(1, &pbo);
glDeleteTextures(1, &tex);
}
cudaFree(d_temp);
}
int main(int argc, char** argv)
{
cudaMalloc(&d_temp, W*H * sizeof(float));
resetTemperature(d_temp, W, H, bc);//启动内核函数 初始化温度数组
printInstructions();//将一些用户指令打印到命令窗口
initGLUT(&argc, argv);//初始化GLUT库,并且设置图像窗口的规格,包括显示模式(RGBA),缓冲区(double型),尺寸(W*H)和标题。
gluOrtho2D(0, W, H, 0);//建立视觉变换(简单的投影)。
glutKeyboardFunc(keyboard);//键盘和鼠标的交互
glutMouseFunc(mouse);
glutIdleFunc(idle);//迭代过程在用户操作中不断更新
glutDisplayFunc(display);
initPixelBuffer();
glutMainLoop();//
atexit(exitfunc);//执行清理工作,删除OPENGL的像素缓冲和纹理。
return 0;
}
kernel.h
#pragma once
#ifndef KERNEL_H
#define KERNEL_H
struct uchar4;//防止nvcc启动C++编译器产生错误,而声明
typedef struct //包含了所有的边界条件信息(包括管道的坐标和半径)
{
int x, y;
float rad;
int chamfer;
float t_s, t_a, t_g;
}BC;
void kernelLauncher(uchar4 *d_out, float *d_temp, int w, int h, BC bc);
void resetTemperature(float *d_temp, int w, int h, BC bc);
#endif // !KERNEL_H
interactions.h
#pragma once
#ifndef INTERACTIONS_H
#define INTERACTIONS_H
#include "kernel.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <GLUT/glut.h>
#else
#include <GL/glew.h>
#include <GL/freeglut.h>
#endif // __APPLE__
#define MAX(x,y) (((x)>(y))?(x):(y))
#define W 640
#define H 640
#define DT 1.f
float *d_temp = 0;
int iterationCount = 0;
BC bc = { W / 2,H / 2,W / 10.f,150,212.f,70.f,0.f };
void keyboard(unsigned char key, int x, int y)
{
if (key=='S')
bc.t_s += DT;
if (key == 's')
bc.t_s -= DT;
if (key == 'A')
bc.t_a += DT;
if (key == 'a')
bc.t_a -= DT;
if (key == 'G')
bc.t_g += DT;
if (key == 'g')
bc.t_g -= DT;
if (key=='R')
bc.rad += DT;
if (key == 'r')
bc.rad=MAX(0.f,bc.rad-DT);
if (key == 'C')
++bc.chamfer;
if (key == 'c')
--bc.chamfer;
if (key == 'z')
resetTemperature(d_temp, W, H, bc);
if (key == 27) exit(0);
glutPostRedisplay();
}
void mouse(int button, int state, int x, int y)
{
bc.x = x, bc.y = y;
glutPostRedisplay();
}
void idle(void)
{
++iterationCount;
glutPostRedisplay();
}
void printInstructions()
{
printf("Temperature visualizer\n");
printf("Relocate source with mouse click\n");
printf("Change source temperature (-/+):s/S\n");
printf("Change air temperature (-/+):a/A\n");
printf("Change ground temperature (-/+):g/G\n");
printf("Change pipe radius (-/+):r/R\n");
printf("Change chamfer (-/+):c/C\n");
printf("Reset to air temperature :z\n");
printf("Exit :Esc\n");
}
#endif // !INTERACTIONS_H
kernel.cu
#include "kernel.h"
#define TX 32//线程数
#define TY 32
#define RAD 1
int divUp(int a, int b) //用来计算可以覆盖一个计算网格的特定大小线程块的数量
{
return (a + b - 1) / b;
}
//确保颜色值在【0,255】范围内
__device__ unsigned char clip(int n) { return n > 255 ? 255 : (n < 0 ? 0 : n); }
//避免超出范围的采样值。idxClip(i,N)返回一个在[0,N-1]区间内的int类型值(例如一个长度为N的数组的合法索引)。
__device__ int idxClip(int idx, int idxMax) { return idx > (idxMax - 1) ? (idxMax - 1) : (idx < 0 ? 0 : idx); }
//计算一个大小为width*height的二维数组中第col列,第row行元素在线性一维数组中所对应的索引
__device__ int flatten(int col, int row, int width, int height)
{
return idxClip(col, width) + idxClip(row, height)*width;
}
__global__ void resetKernel(float *d_temp, int w, int h, BC bc)
{
const int col = blockIdx.x*blockDim.x + threadIdx.x;
const int row = blockIdx.y*blockDim.y + threadIdx.y;
if ((col>=w)||(row>=h))
return;
d_temp[row*w + col] = bc.t_a;
}
//给所有点赋初始颜色黑色(完全不透明),然后将一块(包括必要的重叠单元)已经存在的温度值加载到共享内存中。
__global__ void tempKernel(uchar4 *d_out, float *d_temp, int w, int h, BC bc)
{
extern __shared__ float s_in[];
const int col = blockIdx.x*blockDim.x + threadIdx.x;
const int row = blockIdx.y*blockDim.y + threadIdx.y;
if ((col>=w)||(row>=h)) return;
const int idx = flatten(col, row, w, h);
const int s_w = blockDim.x + 2 * RAD;
const int s_h = blockDim.y + 2 * RAD;
const int s_col = threadIdx.x + RAD;
const int s_row = threadIdx.y + RAD;
const int s_idx = flatten(s_col, s_row, s_w, s_h);
d_out[idx].x = 0;//red逐渐远离平衡(即否决稳定的其他投票)
d_out[idx].y = 0;//green
d_out[idx].z = 0;//blue代表衰减到平衡(即稳定的投票)
d_out[idx].w = 255;
s_in[s_idx] = d_temp[idx];
if (threadIdx.x<RAD)
{
s_in[flatten(s_col - RAD, s_row, s_w, s_h)] = d_temp[flatten(col - RAD, row, w, h)];
s_in[flatten(s_col + blockDim.x, s_row, s_w, s_h)] = d_temp[flatten(col + blockDim.x, row, w, h)];
}
if (threadIdx.y<RAD)
{
s_in[flatten(s_col, s_row-RAD, s_w, s_h)] = d_temp[flatten(col, row - RAD, w, h)];
s_in[flatten(s_col, s_row + blockDim.y, s_w, s_h)] = d_temp[flatten(col, row + blockDim.y, w, h)];
}
float dSq = ((col - bc.x)*(col - bc.x) + (row - bc.y)*(row - bc.y));
if (dSq<bc.rad*bc.rad)
{
d_temp[idx] = bc.t_s;
return;
}
if ((col==0)||(col==w-1)||(row==0)||(col+row<bc.chamfer)||(col-row>w-bc.chamfer))
{
d_temp[idx] = bc.t_a;
return;
}
if (row == h - 1)
{
d_temp[idx] = bc.t_g;
return;
}
__syncthreads();
float temp = 0.25f*(s_in[flatten(s_col - 1, s_row, s_w, s_h)] +
s_in[flatten(s_col + 1, s_row, s_w, s_h)] +
s_in[flatten(s_col, s_row - 1, s_w, s_h)] +
s_in[flatten(s_col, s_row + 1, s_w, s_h)]);
d_temp[idx] = temp;
const unsigned char intensity = clip((int)temp);
d_out[idx].x = intensity;
d_out[idx].z = 255 - intensity;
}
void kernelLauncher(uchar4 *d_out, float *d_temp, int w, int h, BC bc)
{
const dim3 blockSize(TX, TY);
const dim3 gridSize(divUp(w, TX), divUp(h,TY));
const size_t smSz = (TX + 2 * RAD)*(TY + 2 * RAD) * sizeof(float);
tempKernel << <gridSize, blockSize,smSz >> > (d_out, d_temp, w, h, bc);
}
void resetTemperature(float *d_temp, int w, int h, BC bc)
{
const dim3 blockSize(TX, TY);
const dim3 gridSize(divUp(w, TX), divUp(h, TY));
resetKernel << <gridSize, blockSize >> > (d_temp, w, h, bc);
}
运行结果如下: