效果图:
#include <iostream>
#include <fstream>
using namespace std;
#include <cuda_runtime.h>
__global__ void
GetImage(int* imagedata,const int nx, const int ny,int tempj){
int i = threadIdx.x;
int j = blockIdx.x;
//从左到右,红色通道值增加
float r = float(i) / float(nx);
//从上到下,绿色通道值减小
float g = float(j) / float(ny);
//蓝色通道值不变
float b = 0.2f;
int ir = int(255.99f*r);
int ig = int(255.99f*g);
int ib = int(255.99f*b);
imagedata[j * 800*3 + i * 3] = ir;
imagedata[j * 800 * 3 + i * 3 + 1] = ig;
imagedata[j * 800 * 3 + i * 3 + 2] = ib;
}
int main()
{
int nx = 800;
int ny = 400;
int* d_imagedata = NULL;
cudaMalloc((void**)&d_imagedata, nx*ny*sizeof(int)*3);
cudaMemset((void**)&d_imagedata, 0, nx*ny*sizeof(int)* 3);
int threadPerBlock = 400;
int blockPerGrid = 2;
dim3 grid(400, 1, 1);
dim3 threads(800, 1, 1);
//for (int j = ny - 1; j >= 0; j--)
{
GetImage << <grid, threads >> >(d_imagedata, nx, ny, 1);
}
int* h_imagedata = (int*)malloc(nx*ny * 3 * sizeof(int));
cudaMemcpy(h_imagedata, d_imagedata, nx*ny * 3 * sizeof(int), cudaMemcpyDeviceToHost);
ofstream outfile;
outfile.open("IMG.ppm");
outfile << "P3\n" << nx << " " << ny << "\n255\n";
for (int j = ny - 1; j >= 0; j--){
for (int i = 0; i < nx; i++){
int ir = h_imagedata[j*nx * 3 + i * 3];
int ig = h_imagedata[j*nx * 3 + i * 3 + 1];
int ib = h_imagedata[j*nx * 3 + i * 3 + 2];
outfile << ir << " " << ig << " " << ib << "\n";
}
}
outfile.close();
free(h_imagedata);
cudaFree(d_imagedata);
return 0;
}