在caffe中添加样本扩增的功能

最新推荐文章于 2021-02-23 22:45:01 发布

jinggegebuaa

最新推荐文章于 2021-02-23 22:45:01 发布

阅读量1.8k

点赞数

本文链接：https://blog.csdn.net/keyanxiaocaicai/article/details/68941185

版权

在caffe-cudnn 中添加样本扩增的功能

有个样本扩增的代码。可以run

考虑到我的caffe的版本太多了。所以把所有的功能都merge 到一起。

首先merge 的是样本扩增的功能。

因为只有在imagedata 层里面用到样本扩增。里面用到了data_transfer 这一层。

data_transfer 这一层有四个函数。

我们只需要重载：

template<typename Dtype>

void DataTransformer<Dtype>::Transform(const cv::Mat& img, Blob<Dtype>* transformed_blob)

改成下面的code。

template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& img,
                                       Blob<Dtype>* transformed_blob) {
  cv::Mat cv_img;
  img.copyTo(cv_img);									   
  const int crop_size = param_.crop_size();
  const bool display = param_.display();
  const bool contrast_adjustment = param_.contrast_adjustment();
  const bool smooth_filtering = param_.smooth_filtering();
  const bool jpeg_compression = param_.jpeg_compression();


  const int img_channels = cv_img.channels();
  const int img_height = cv_img.rows;
  const int img_width = cv_img.cols;

  // Check dimensions.
  const int channels = transformed_blob->channels();
  const int height = transformed_blob->height();
  const int width = transformed_blob->width();
  const int num = transformed_blob->num();

  CHECK_EQ(channels, img_channels);
  CHECK_LE(height, img_height);
  CHECK_LE(width, img_width);
  CHECK_GE(num, 1);

  CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";

  const Dtype scale = param_.scale();
  //const bool do_mirror = param_.mirror() && Rand(2);
  const bool has_mean_file = param_.has_mean_file();
  const bool has_mean_values = mean_values_.size() > 0;

  CHECK_GT(img_channels, 0);
  CHECK_GE(img_height, crop_size);
  CHECK_GE(img_width, crop_size);

  // param for rotation
  const float rotation_angle_interval = param_.rotation_angle_interval();


  if (display && phase_ == TRAIN)
	  cv::imshow("Source", cv_img);

  // Flipping and Reflection -----------------------------------------------------------------
	int flipping_mode = (Rand(4)) - 1; // -1, 0, 1, 2
	bool apply_flipping = (flipping_mode != 2);
	if (apply_flipping) {
		cv::flip(cv_img,cv_img,flipping_mode);
		if (display && phase_ == TRAIN)
			cv::imshow("Flipping and Reflection", cv_img);
	}


  // Smooth Filtering -------------------------------------------------------------
  int smooth_param1 = 3;
  int apply_smooth = Rand(2);
  if ( smooth_filtering && apply_smooth ) {
	int smooth_type = Rand(4); // see opencv_util.hpp
	smooth_param1 = 3 + 2*(Rand(1));
        switch(smooth_type){
        case 0:
	   //cv::Smooth(cv_img, cv_img, smooth_type, smooth_param1);
	   cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param1,smooth_param1),0);
           break;
        case 1:
           cv::blur(cv_img, cv_img, cv::Size(smooth_param1,smooth_param1));
           break;
        case 2:
           cv::medianBlur(cv_img, cv_img, smooth_param1);
           break;
        case 3:
           cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param1*2,smooth_param1*2));
           break;
        }
	if (display && phase_ == TRAIN)
      cv::imshow("Smooth Filtering", cv_img);
  }
  cv::RNG rng;
  // Contrast and Brightness Adjuestment ----------------------------------------
  float alpha = 1, beta = 0;
  int apply_contrast = Rand(2);
  if ( contrast_adjustment && apply_contrast ) {
    float min_alpha = 0.8, max_alpha = 1.2;
    alpha = rng.uniform(min_alpha, max_alpha);
    beta = (float)(Rand(6));
	// flip sign
	if ( Rand(2) ) beta = - beta;
    cv_img.convertTo(cv_img, -1 , alpha, beta);
	if (display && phase_ == TRAIN)
     		cv::imshow("Contrast Adjustment", cv_img);
  }
   LOG(INFO) << "JPEG Compression";
  // JPEG Compression -------------------------------------------------------------
  // DO NOT use the following code as there is some memory leak which I cann't figure out
  int QF = 100;
  int apply_JPEG = Rand(2);
  if ( jpeg_compression && apply_JPEG ) {
	// JPEG quality factor
	QF = 95 + 1 * (Rand(6));
        int cp[] = {1, QF};
	vector<int> compression_params(cp,cp + 2);
        vector<unsigned char> img_jpeg;
	//cv::imencode(".jpg", cv_img, img_jpeg);
        cv::imencode(".jpg", cv_img, img_jpeg, compression_params);
	cv::Mat temp = cv::imdecode(img_jpeg, 1);
        temp.copyTo(cv_img);
	if (display && phase_ == TRAIN)
      cv::imshow("JPEG Compression", cv_img);
  }
   LOG(INFO) << "crop";
  // Cropping -------------------------------------------------------------
  int h_off = 0;
  int w_off = 0;
  cv::Mat cv_cropped_img = cv_img;
  if (crop_size) {
    CHECK_EQ(crop_size, height);
    CHECK_EQ(crop_size, width);
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {
      h_off = Rand(img_height - crop_size + 1);
      w_off = Rand(img_width - crop_size + 1);
    } else {
      h_off = (img_height - crop_size) / 2;
      w_off = (img_width - crop_size) / 2;
    }
    cv::Rect roi(w_off, h_off, crop_size, crop_size);
    cv_cropped_img = cv_img(roi);
    if (display && phase_ == TRAIN)
    	cv::imshow("Cropping", cv_cropped_img);
  } else {
    CHECK_EQ(img_height, height);
    CHECK_EQ(img_width, width);
  }


  // Rotation -------------------------------------------------------------
  double rotation_degree;
  if ( rotation_angle_interval!=1 ) {
  cv::Mat dst;
  int interval = 360/rotation_angle_interval;
  int apply_rotation = Rand(interval);

  cv::Size dsize = cv::Size(cv_cropped_img.cols*1.5,cv_cropped_img.rows*1.5);
  cv::Mat resize_img = cv::Mat(dsize,CV_32S);
  cv::resize(cv_cropped_img, resize_img,dsize);

  cv::Point2f pt(resize_img.cols/2., resize_img.rows/2.);    
  rotation_degree = apply_rotation*rotation_angle_interval;
  cv::Mat r = getRotationMatrix2D(pt, rotation_degree, 1.0);
  warpAffine(resize_img, dst, r, cv::Size(resize_img.cols, resize_img.rows));


  cv::Rect myROI(resize_img.cols/6, resize_img.rows/6, cv_cropped_img.cols, cv_cropped_img.rows);
  cv::Mat crop_after_rotate = dst(myROI);
  if (display && phase_ == TRAIN)
      cv::imshow("Rotation", crop_after_rotate);


  crop_after_rotate.copyTo(cv_img);
  }
  
  if (display && phase_ == TRAIN)
      cv::imshow("Final", cv_img);


  

  //--------------------!! for debug only !!-------------------
  if (display && phase_ == TRAIN) {
	LOG(INFO) << "----------------------------------------";
	LOG(INFO) << "src width: " << width << ", src height: " << height;
	LOG(INFO) << "dest width: " << crop_size << ", dest height: " << crop_size;
	if (apply_flipping) {
		LOG(INFO) << "* parameter for flipping: ";
		LOG(INFO) << "  flipping_mode: " << flipping_mode;
	}
	if ( smooth_filtering && apply_smooth ) {
          LOG(INFO) << "* parameter for smooth filtering: ";
	  //LOG(INFO) << "  smooth type: " << smooth_type << ", smooth param1: " << smooth_param1;
	}
	if ( contrast_adjustment && apply_contrast ) {
	  LOG(INFO) << "* parameter for contrast adjustment: ";
	  LOG(INFO) << "  alpha: " << alpha << ", beta: " << beta;
	}
	if ( jpeg_compression && apply_JPEG ) {
	  LOG(INFO) << "* parameter for JPEG compression: ";
	  LOG(INFO) << "  QF: " << QF;
	}
	LOG(INFO) << "* parameter for cropping: ";
	LOG(INFO) << "  w: " << w_off << ", h: " << h_off;
	LOG(INFO) << "  roi_width: " << crop_size << ", roi_height: " << crop_size;
	LOG(INFO) << "* parameter for rotation: ";
	LOG(INFO) << "  angle_interval: " << rotation_angle_interval;
	LOG(INFO) << "  angle: " << rotation_degree;
    cvWaitKey(10);
  }

  Dtype* mean = NULL;
  if (has_mean_file) {
    CHECK_EQ(img_channels, data_mean_.channels());
    CHECK_EQ(img_height, data_mean_.height());
    CHECK_EQ(img_width, data_mean_.width());
    mean = data_mean_.mutable_cpu_data();
  }
  
   if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
     "Specify either 1 mean_value or as many as channels: " << img_channels;
    if (img_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < img_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }
  
  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
  int top_index;
  for (int h = 0; h < height; ++h) {
    const uchar* ptr = cv_img.ptr<uchar>(h); // here!!
    int img_index = 0;
    for (int w = 0; w < width; ++w) {
      for (int c = 0; c < img_channels; ++c) {
        //if (do_mirror) {
        //  top_index = (c * height + h) * width + (width - 1 - w);
        //} else {
          top_index = (c * height + h) * width + w;
        //}
        // int top_index = (c * height + h) * width + w;
        Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
        if (has_mean_file) {
          int mean_index = (c * img_height + h) * img_width + w;
          transformed_data[top_index] =
            (pixel - mean[mean_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (pixel - mean_values_[c]) * scale;
          } else {
            transformed_data[top_index] = pixel * scale;
          }
        }
      }
    }
  }

}

同时在caffe.proto 把transformer 层改成：
message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would subtract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
// change by ggj 20170331
optional bool self_preprocess = 15 [default = false];
// Specify the range of scaling factor for doing resizing

// 下面的部分是需要添加的部分
optional float min_scaling_factor = 8 [default = 0.75];
optional float max_scaling_factor = 9 [default = 1.50];
// Specify the angle interval for doing rotation
optional float rotation_angle_interval = 10 [default = 1];
optional bool contrast_adjustment = 11 [default = false];
optional bool smooth_filtering = 12 [default = false];
optional bool jpeg_compression = 13 [default = false];
optional bool display = 14 [default = false];
}

然后make clean && make all -j8 如果报错，就按照错误一步步看下去，看哪里出现问题，然后改掉