@@ -407,8 +407,8 @@ class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {
 const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

 cudnnHandle_t handle_;
- cudnnTensor4dDescriptor_t bottom_desc_;
- cudnnTensor4dDescriptor_t top_desc_;
+ cudnnTensorDescriptor_t bottom_desc_;
+ cudnnTensorDescriptor_t top_desc_;
 };
 #endif

@@ -380,8 +380,8 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
 const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

 cudnnHandle_t handle_;
- cudnnTensor4dDescriptor_t bottom_desc_;
- cudnnTensor4dDescriptor_t top_desc_;
+ cudnnTensorDescriptor_t bottom_desc_;
+ cudnnTensorDescriptor_t top_desc_;
 };
 #endif

@@ -464,8 +464,8 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
 const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

 cudnnHandle_t handle_;
- cudnnTensor4dDescriptor_t bottom_desc_;
- cudnnTensor4dDescriptor_t top_desc_;
+ cudnnTensorDescriptor_t bottom_desc_;
+ cudnnTensorDescriptor_t top_desc_;
 };
 #endif

@@ -550,8 +550,8 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
 const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

 cudnnHandle_t handle_;
- cudnnTensor4dDescriptor_t bottom_desc_;
- cudnnTensor4dDescriptor_t top_desc_;
+ cudnnTensorDescriptor_t bottom_desc_;
+ cudnnTensorDescriptor_t top_desc_;
 };
 #endif

@@ -56,34 +56,34 @@ template<> class dataType<double> {
 };

 template <typename Dtype>
-inline void  createTensor4dDesc(cudnnTensor4dDescriptor_t* desc) {
- CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc));
+inline void  createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
+ CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
 }

 template <typename Dtype>
-inline void  setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
- int n,  int c, int h,  int w,
- int stride_n,  int stride_c, int stride_h, int stride_w) {
+inline void  setTensor4dDesc(cudnnTensorDescriptor_t* desc,
+ int n, int c,  int h, int w,
+ int stride_n, int stride_c,  int stride_h, int stride_w) {
 CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
- n, c, h, w, stride_n, stride_c, stride_h, stride_w));
+ n, c, h, w, stride_n, stride_c, stride_h, stride_w));
 }

 template <typename Dtype>
-inline void  setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
- int n,  int c, int h,  int w) {
+inline void  setTensor4dDesc(cudnnTensorDescriptor_t* desc,
+ int n, int c,  int h, int w) {
 const int stride_w =  1;
 const int stride_h = w * stride_w;
 const int stride_c = h * stride_h;
 const int stride_n = c * stride_c;
 setTensor4dDesc<Dtype>(desc, n, c, h, w,
- stride_n, stride_c, stride_h, stride_w);
+ stride_n, stride_c, stride_h, stride_w);
 }

 template <typename Dtype>
 inline void  createFilterDesc(cudnnFilterDescriptor_t* desc,
 int n, int c,  int h, int w) {
 CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
- CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType<Dtype>::type,
+ CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
 n, c, h, w));
 }

@@ -94,9 +94,9 @@ inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {

 template <typename Dtype>
 inline void  setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
- cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter,
+ cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
 int pad_h, int pad_w,  int stride_h, int stride_w) {
- CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter,
+ CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv, 
 pad_h, pad_w, stride_h, stride_w, 1,  1, CUDNN_CROSS_CORRELATION));
 }

@@ -109,14 +109,15 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
 *mode = CUDNN_POOLING_MAX;
 break;
 case PoolingParameter_PoolMethod_AVE:
+ // *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
 *mode = CUDNN_POOLING_AVERAGE;
 break;
 default:
 LOG(FATAL) << "Unknown pooling method.";
 }
 CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
- CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
- stride_h, stride_w));
+ CUDNN_CHECK(cudnnSetPooling2dDescriptor(*conv, *mode, h, w,
+ 0, 0, stride_h, stride_w));
 }

 } // namespace cudnn
@@ -145,8 +145,8 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {

 cudnnHandle_t* handle_;
 cudaStream_t* stream_;
- vector<cudnnTensor4dDescriptor_t> bottom_descs_, top_descs_;
- cudnnTensor4dDescriptor_t bias_desc_;
+ vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
+ cudnnTensorDescriptor_t bias_desc_;
 cudnnFilterDescriptor_t filter_desc_;
 vector<cudnnConvolutionDescriptor_t> conv_descs_;
 int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
@@ -344,7 +344,7 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
 const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

 cudnnHandle_t handle_;
- cudnnTensor4dDescriptor_t bottom_desc_, top_desc_;
+ cudnnTensorDescriptor_t bottom_desc_, top_desc_;
 cudnnPoolingDescriptor_t pooling_desc_;
 cudnnPoolingMode_t mode_;
 };
@@ -43,10 +43,10 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(

 // Create tensor descriptor(s) for data and corresponding convolution(s).
 for (int i = 0; i < bottom.size(); i++) {
- cudnnTensor4dDescriptor_t bottom_desc;
+ cudnnTensorDescriptor_t bottom_desc;
 cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
 bottom_descs_.push_back(bottom_desc);
- cudnnTensor4dDescriptor_t top_desc;
+ cudnnTensorDescriptor_t top_desc;
 cudnn::createTensor4dDesc<Dtype>(&top_desc);
 top_descs_.push_back(top_desc);
 cudnnConvolutionDescriptor_t conv_desc;
@@ -99,12 +99,12 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
 template <typename Dtype>
 CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
 for (int i = 0; i < bottom_descs_.size(); i++) {
- cudnnDestroyTensor4dDescriptor(bottom_descs_[i]);
- cudnnDestroyTensor4dDescriptor(top_descs_[i]);
+ cudnnDestroyTensorDescriptor(bottom_descs_[i]);
+ cudnnDestroyTensorDescriptor(top_descs_[i]);
 cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
 }
 if (this->bias_term_) {
- cudnnDestroyTensor4dDescriptor(bias_desc_);
+ cudnnDestroyTensorDescriptor(bias_desc_);
 }
 cudnnDestroyFilterDescriptor(filter_desc_);

@@ -21,21 +21,51 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(

 // Forward through cuDNN in parallel over groups.
 for (int g = 0; g <  this->group_; g++) {
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
+ cudnnConvolutionFwdAlgo_t algo;
+
+ // get the desired convolution algorithm
+ CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[g],
+ bottom_descs_[i],
+ filter_desc_,
+ conv_descs_[i],
+ top_descs_[i],
+ CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
+ 0,  // memoryLimitInBytes,
+ &algo));
+
+ // get minimum size of the workspace needed for the desired algorithm
+ size_t workspaceSizeInBytes;
+
+ CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[g],
+ bottom_descs_[i],
+ filter_desc_,
+ conv_descs_[i],
+ top_descs_[i],
+ algo,
+ &workspaceSizeInBytes));
+
+ void *workspace =  NULL;
+
 // Filters.
- CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
- bottom_descs_[i], bottom_data + bottom_offset_ * g,
- filter_desc_, weight + weight_offset_ * g,
- conv_descs_[i],
- top_descs_[i], top_data + top_offset_ * g,
- CUDNN_RESULT_NO_ACCUMULATE));
+ CUDNN_CHECK(cudnnConvolutionForward(handle_[g], (void *)(&alpha),
+ bottom_descs_[i], bottom_data + bottom_offset_ * g,
+ filter_desc_, weight + weight_offset_ * g,
+ conv_descs_[i],
+ algo, workspace, workspaceSizeInBytes, // algo, workspace, workspacebytes,
+ (void *)(&beta),
+ top_descs_[i], top_data + top_offset_ * g));

 // Bias.
 if (this->bias_term_) {
 const Dtype* bias_data = this->blobs_[1]->gpu_data();
- Dtype alpha = 1.;
- CUDNN_CHECK(cudnnAddTensor4d(handle_[g], CUDNN_ADD_SAME_C, &alpha,
- bias_desc_, bias_data + bias_offset_ * g,
- top_descs_[i], top_data + top_offset_ * g));
+ Dtype alpha = 1.0;
+ Dtype beta = 1.0;
+ CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C, (void *)(&alpha),
+ bias_desc_, bias_data + bias_offset_ * g, (void *)(&beta),
+ top_descs_[i], top_data + top_offset_ * g));
 }
 }

@@ -65,34 +95,39 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_diff = top[i]->gpu_diff();
 // Backward through cuDNN in parallel over groups and gradients.
 for (int g = 0; g <  this->group_; g++) {
+
 // Gradient w.r.t. bias.
 if (this->bias_term_ && this->param_propagate_down_[1]) {
- CUDNN_CHECK(cudnnConvolutionBackwardBias(handle_[0*this->group_ + g],
- top_descs_[i], top_diff + top_offset_ * g,
- bias_desc_, bias_diff + bias_offset_ * g,
- CUDNN_RESULT_ACCUMULATE));
+ const Dtype alpha =  1.0;
+ const Dtype beta =  1.0;
+ CUDNN_CHECK(cudnnConvolutionBackwardBias(handle_[0*this->group_ + g], (void *)(&alpha),
+ top_descs_[i], top_diff + top_offset_ * g,
+ (void *)(&beta),
+ bias_desc_, bias_diff + bias_offset_ * g));
 }

 // Gradient w.r.t. weights.
 if (this->param_propagate_down_[0]) {
+ const Dtype alpha =  1.0;
+ const Dtype beta =  1.0;
 const Dtype* bottom_data = (*bottom)[i]->gpu_data();
- CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g],
- bottom_descs_[i], bottom_data + bottom_offset_ * g,
- top_descs_[i], top_diff + top_offset_ * g,
- conv_descs_[i],
- filter_desc_, weight_diff + weight_offset_ * g,
- CUDNN_RESULT_ACCUMULATE));
+ CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g], (void *)(&alpha),
+ bottom_descs_[i], bottom_data + bottom_offset_ * g,
+ top_descs_[i], top_diff + top_offset_ * g,
+ conv_descs_[i], (void *)(&beta),
+ filter_desc_, weight_diff + weight_offset_ * g));
 }

 // Gradient w.r.t. bottom data.
 if (propagate_down[i]) {
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
 Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
- CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g],
- filter_desc_, weight + weight_offset_ * g,
- top_descs_[i], top_diff + top_offset_ * g,
- conv_descs_[i],
- bottom_descs_[i], bottom_diff + bottom_offset_ * g,
- CUDNN_RESULT_NO_ACCUMULATE));
+ CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g], (void *)(&alpha),
+ filter_desc_, weight + weight_offset_ * g,
+ top_descs_[i], top_diff + top_offset_ * g,
+ conv_descs_[i], (void *)(&beta),
+ bottom_descs_[i], bottom_diff + bottom_offset_ * g));
 }
 }

@@ -34,8 +34,8 @@ void CuDNNPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,

 template <typename Dtype>
 CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() {
- cudnnDestroyTensor4dDescriptor(bottom_desc_);
- cudnnDestroyTensor4dDescriptor(top_desc_);
+ cudnnDestroyTensorDescriptor(bottom_desc_);
+ cudnnDestroyTensorDescriptor(top_desc_);
 cudnnDestroyPoolingDescriptor(pooling_desc_);
 cudnnDestroy(handle_);
 }
@@ -21,8 +21,12 @@ void CuDNNPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,

 const Dtype* bottom_data = bottom[0]->gpu_data();
 Dtype* top_data = (*top)[0]->mutable_gpu_data();
- CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_,
- bottom_desc_, bottom_data, top_desc_, top_data));
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
+ CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_, (void *)(&alpha),
+ bottom_desc_, bottom_data, (void *)(&beta), top_desc_, top_data));
 }

 template <typename Dtype>
@@ -43,9 +47,13 @@ void CuDNNPoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_data = top[0]->gpu_data();
 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
- CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_,
- top_desc_, top_data, top_desc_, top_diff,
- bottom_desc_, bottom_data, bottom_desc_, bottom_diff));
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
+ CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_, (void *)(&alpha),
+ top_desc_, top_data, top_desc_, top_diff,
+ bottom_desc_, bottom_data, (void *)(&beta), bottom_desc_, bottom_diff));
 }

 INSTANTIATE_CLASS(CuDNNPoolingLayer);
@@ -31,8 +31,8 @@ void CuDNNReLULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,

 template <typename Dtype>
 CuDNNReLULayer<Dtype>::~CuDNNReLULayer() {
- cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
- cudnnDestroyTensor4dDescriptor(this->top_desc_);
+ cudnnDestroyTensorDescriptor(this->bottom_desc_);
+ cudnnDestroyTensorDescriptor(this->top_desc_);
 cudnnDestroy(this->handle_);
 }

@@ -17,9 +17,14 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,

 const Dtype* bottom_data = bottom[0]->gpu_data();
 Dtype* top_data = (*top)[0]->mutable_gpu_data();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationForward(this->handle_,
- CUDNN_ACTIVATION_RELU,
- this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+ CUDNN_ACTIVATION_RELU, (void *)(&alpha),
+ this->bottom_desc_, bottom_data, (void *)(&beta),
+ this->top_desc_, top_data));
 }

 template <typename Dtype>
@@ -39,10 +44,15 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_diff = top[0]->gpu_diff();
 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationBackward(this->handle_,
- CUDNN_ACTIVATION_RELU,
- this->top_desc_, top_data, this->top_desc_, top_diff,
- this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+ CUDNN_ACTIVATION_RELU, (void *)(&alpha),
+ this->top_desc_, top_data, this->top_desc_, top_diff,
+ this->bottom_desc_, bottom_data, (void *)(&beta), 
+ this->bottom_desc_, bottom_diff));
 }

 INSTANTIATE_CLASS(CuDNNReLULayer);
@@ -31,8 +31,8 @@ void CuDNNSigmoidLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,

 template <typename Dtype>
 CuDNNSigmoidLayer<Dtype>::~CuDNNSigmoidLayer() {
- cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
- cudnnDestroyTensor4dDescriptor(this->top_desc_);
+ cudnnDestroyTensorDescriptor(this->bottom_desc_);
+ cudnnDestroyTensorDescriptor(this->top_desc_);
 cudnnDestroy(this->handle_);
 }

@@ -12,9 +12,14 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 vector<Blob<Dtype>*>* top) {
 const Dtype* bottom_data = bottom[0]->gpu_data();
 Dtype* top_data = (*top)[0]->mutable_gpu_data();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationForward(this->handle_,
- CUDNN_ACTIVATION_SIGMOID,
- this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+ CUDNN_ACTIVATION_SIGMOID, (void *)(&alpha),
+ this->bottom_desc_, bottom_data, (void *)(&beta),
+ this->top_desc_, top_data));
 }

 template <typename Dtype>
@@ -29,10 +34,15 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_diff = top[0]->gpu_diff();
 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationBackward(this->handle_,
- CUDNN_ACTIVATION_SIGMOID,
- this->top_desc_, top_data, this->top_desc_, top_diff,
- this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+ CUDNN_ACTIVATION_SIGMOID, (void *)(&alpha),
+ this->top_desc_, top_data, this->top_desc_, top_diff,
+ this->bottom_desc_, bottom_data, (void *)(&beta), 
+ this->bottom_desc_, bottom_diff));
 }

 INSTANTIATE_CLASS(CuDNNSigmoidLayer);
@@ -35,8 +35,8 @@ void CuDNNSoftmaxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,

 template <typename Dtype>
 CuDNNSoftmaxLayer<Dtype>::~CuDNNSoftmaxLayer() {
- cudnnDestroyTensor4dDescriptor(bottom_desc_);
- cudnnDestroyTensor4dDescriptor(top_desc_);
+ cudnnDestroyTensorDescriptor(bottom_desc_);
+ cudnnDestroyTensorDescriptor(top_desc_);
 cudnnDestroy(handle_);
 }

@@ -16,9 +16,14 @@ void CuDNNSoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 vector<Blob<Dtype>*>* top) {
 const Dtype* bottom_data = bottom[0]->gpu_data();
 Dtype* top_data = (*top)[0]->mutable_gpu_data();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- bottom_desc_, bottom_data, top_desc_, top_data));
+ CUDNN_SOFTMAX_MODE_CHANNEL, (void *)(&alpha), 
+ bottom_desc_, bottom_data, (void *)(&beta),
+ top_desc_, top_data));
 }

 template <typename Dtype>
@@ -29,9 +34,14 @@ void CuDNNSoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_diff = top[0]->gpu_diff();
 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnSoftmaxBackward(handle_, CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- top_desc_, top_data, top_desc_, top_diff, bottom_desc_, bottom_diff));
+ CUDNN_SOFTMAX_MODE_CHANNEL, (void *)(&alpha),
+ top_desc_, top_data, top_desc_, top_diff, (void *)(&beta),
+ bottom_desc_, bottom_diff));
 }
 }

@@ -31,8 +31,8 @@ void CuDNNTanHLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,

 template <typename Dtype>
 CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() {
- cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
- cudnnDestroyTensor4dDescriptor(this->top_desc_);
+ cudnnDestroyTensorDescriptor(this->bottom_desc_);
+ cudnnDestroyTensorDescriptor(this->top_desc_);
 cudnnDestroy(this->handle_);
 }

@@ -12,9 +12,14 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 vector<Blob<Dtype>*>* top) {
 const Dtype* bottom_data = bottom[0]->gpu_data();
 Dtype* top_data = (*top)[0]->mutable_gpu_data();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationForward(this->handle_,
- CUDNN_ACTIVATION_TANH,
- this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+ CUDNN_ACTIVATION_TANH, (void *)(&alpha),
+ this->bottom_desc_, bottom_data, (void *)(&beta),
+ this->top_desc_, top_data));
 }

 template <typename Dtype>
@@ -29,10 +34,15 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 const Dtype* top_diff = top[0]->gpu_diff();
 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+
+ const Dtype alpha =  1.0;
+ const Dtype beta =  0.0;
+
 CUDNN_CHECK(cudnnActivationBackward(this->handle_,
- CUDNN_ACTIVATION_TANH,
- this->top_desc_, top_data, this->top_desc_, top_diff,
- this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+ CUDNN_ACTIVATION_TANH, (void *)(&alpha),
+ this->top_desc_, top_data, this->top_desc_, top_diff,
+ this->bottom_desc_, bottom_data, (void *)(&beta),
+ this->bottom_desc_, bottom_diff));
 }

 INSTANTIATE_CLASS(CuDNNTanHLayer);