#include< stdio.h>
#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
// Kernel definition
__global__ void VecAdd(float* A, float* B, float* C)
{
int i = threadIdx.x;
C[i] = A[i] + B[i];
}
__global__ void VecSub(float* A, float* B, float* C)
{
int i = threadIdx.x;
C[i] = A[i] - B[i];
}
__global__ void VecMul(float* A, float* B, float* C)
{
int i = threadIdx.x;
C[i] = A[i] * B[i];
}
int main()
{
float a[3]={86,1,2};
float b[3]={2,1,7};
float c[3]={};
float *A;
float *B;
float *C;
cudaMalloc((void**)&A,3*sizeof(float));
cudaMalloc((void**)&B,3*sizeof(float));
cudaMalloc((void**)&C,3*sizeof(float));
cudaMemcpy(A,a,3*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(B,b,3*sizeof(float),cudaMemcpyHostToDevice);
VecMul<<<1,3>>>(A, B, C);
cudaMemcpy((void**)&c,C,3*sizeof(float),cudaMemcpyDeviceToHost);
std::cout<<c[0]<<","<<c[1]<<","<<c[2]<<std::endl;
printf("Hello CUDA\n");
return 0;
}
08-11
10-17