一D卷积
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
using namespace std;
__global__ void juan ( int * a, int * b, int * c, int m, int n) {
int id = blockIdx. x * blockDim. x + threadIdx. x;
int sum = 0 ;
int k = id - ( m / 2 ) ;
if ( id < n) {
for ( int i = 0 ; i < m; i++ ) {
if ( i + k >= 0 && i + k < n) {
sum + = a[ i + k] * b[ i] ;
}
}
c[ id] = sum;
}
}
int main ( ) {
int ha[ 7 ] , hb[ 7 ] , hc[ 7 ] ;
int * da, * db, * dc;
for ( int i = 0 ; i < 7 ; i++ ) {
cin >> ha[ i] ;
}
for ( int i = 0 ; i < 5 ; i++ ) {
cin >> hb[ i] ;
}
cudaMalloc ( ( void * * ) & da, sizeof ( int ) * 7 ) ;
cudaMalloc ( ( void * * ) & db, sizeof ( int ) * 7 ) ;
cudaMalloc ( ( void * * ) & dc, sizeof ( int ) * 7 ) ;
cudaMemcpy ( da, ha, sizeof ( int ) * 7 , cudaMemcpyHostToDevice) ;
cudaMemcpy ( db, hb, sizeof ( int ) * 7 , cudaMemcpyHostToDevice) ;
dim3 bl ( 7 ) ;
juan << < 1 , bl >> > ( da, db, dc, 5 , 7 ) ;
cudaMemcpy ( hc, dc, sizeof ( int ) * 7 , cudaMemcpyDeviceToHost) ;
for ( int i = 0 ; i < 7 ; i++ ) {
cout << hc[ i] << " " ;
}
cudaFree ( da) ;
cudaFree ( db) ;
cudaFree ( dc) ;
}
二D卷积
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
using namespace std;
__global__ void juan ( int * a, int * b, int * c, int m, int n) {
int idx = blockIdx. x * blockDim. x + threadIdx. x;
int idy = blockIdx. y * blockDim. y + threadIdx. y;
int sum = 0 ;
int l = idx - ( m / 2 ) ;
int r = idy - ( m / 2 ) ;
if ( idx < n && idy < n) {
for ( int i = 0 ; i < m; i++ ) {
for ( int j = 0 ; j < m; j++ ) {
int nc = l + j;
int nr = r + i;
if ( nc >= 0 && nc < n && nr >= 0 && nr < n) {
sum + = a[ nr * n + nc] * b[ i * m + j] ;
}
}
}
c[ idy * n + idx] = sum;
}
}
int main ( ) {
int ha[ 7 ] [ 7 ] , hb[ 5 ] [ 5 ] , hc[ 49 ] ;
int * da, * db, * dc;
for ( int i = 0 ; i < 7 ; i++ ) {
for ( int j = 0 ; j < 7 ; j++ )
cin >> ha[ i] [ j] ;
}
for ( int i = 0 ; i < 5 ; i++ ) {
for ( int j = 0 ; j < 5 ; j++ )
cin >> hb[ i] [ j] ;
}
cudaMalloc ( ( void * * ) & da, sizeof ( int ) * 49 ) ;
cudaMalloc ( ( void * * ) & db, sizeof ( int ) * 49 ) ;
cudaMalloc ( ( void * * ) & dc, sizeof ( int ) * 49 ) ;
cudaMemcpy ( da, ha, sizeof ( int ) * 49 , cudaMemcpyHostToDevice) ;
cudaMemcpy ( db, hb, sizeof ( int ) * 49 , cudaMemcpyHostToDevice) ;
dim3 bl ( 7 , 7 ) ;
juan << < 1 , bl >> > ( da, db, dc, 5 , 7 ) ;
cudaMemcpy ( hc, dc, sizeof ( int ) * 49 , cudaMemcpyDeviceToHost) ;
for ( int i = 0 ; i < 7 ; i++ ) {
for ( int j = 0 ; j < 7 ; j++ )
cout << hc[ i * 7 + j] << " " ;
cout << endl;
}
cudaFree ( da) ;
cudaFree ( db) ;
cudaFree ( dc) ;
}