Sample: simplePrintf
Minimum spec: SM 3.0
This CUDA Runtime API sample is a very basic sample that implements how to use the printf function in the device code. Specifically, for devices with compute capability less than 2.0, the function cuPrintf is called; otherwise, printf can be used directly.
Key concepts:
Debugging
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// System includes
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include <cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include <helper_functions.h>
#include <helper_cuda.h>
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
__global__ void testKernel(int val)
{
printf("[%d, %d]:\t\tValue is:%d\n",\
blockIdx.y*gridDim.x+blockIdx.x,\
threadIdx.z*blockDim.x*blockDim.y+threadIdx.y*blockDim.x+threadIdx.x,\
val);
}
int main(int argc, char **argv)
{
int devID;
cudaDeviceProp props;
// This will pick the best possible CUDA capable device
devID = findCudaDevice(argc, (const char **)argv);
//Get GPU information
checkCudaErrors(cudaGetDevice(&devID));
checkCudaErrors(cudaGetDeviceProperties(&props, devID));
printf("Device %d: \"%s\" with Compute %d.%d capability\n",
devID, props.name, props.major, props.minor);
printf("printf() is called. Output:\n\n");
//Kernel configuration, where a two-dimensional grid and
//three-dimensional blocks are configured.
dim3 dimGrid(2, 2);
dim3 dimBlock(2, 2, 2);
testKernel<<<dimGrid, dimBlock>>>(10);
cudaDeviceSynchronize();
return EXIT_SUCCESS;
}