图像常用的数据增强技术(based on TensorFlow)

数据作为深度学习的基础,其对模型性能的重要重要性不言而喻。在本文,我们将梳理下常用的图像数据处理技术,至于具体的编程工具,选择 Python + TensorFlow:

Tip:

如果你使用 tf.data 来组织你的图像数据输入管道,那么恭喜您,直接在 map 中调用本文的代码。

第一种

在这里插入图片描述

#coding:utf-8
# preprocess_lenet.py

"""
 Train
    step1: resize image with crop or pad
    step2: subtract 128.0
    step3: div 128.0
 Eval
    step1: resize image with crop or pad
    step2: subtract 128.0
    step3: div 128.0
"""

import tensorflow as tf


def preprocess_image(image, label, is_training, 
                     out_height=28, out_width=28):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.

  Returns:
    A preprocessed image.
  """
  image = tf.cast(image, tf.float32)  # (0.0~255.0)
  image = tf.image.resize_image_with_crop_or_pad(
      image, out_width, out_height)
  image = tf.subtract(image, 128.0)
  image = tf.div(image, 128.0)
  return image, label

第二种

在这里插入图片描述

#coding:utf-8
#preprocess_cifar.py

"""
 Train
    step1: if pad
    step2: random crop
    step3: random flip left right
    step4: random brightness
    step5: random contrast
    step6: per image standardization
 Eval
    step1: resize image with crop or pad
    step2: per image standardization
"""

import tensorflow as tf

_PADDING = 4


def preprocess_image(image, label, is_training, 
                     out_height=32, out_width=32,
                     add_image_summaries=False):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if is_training:
    image =  preprocess_for_train(
        image, out_height, out_width,
        add_image_summaries=add_image_summaries)
  else:
    image = preprocess_for_eval(
        image, out_height, out_width,
        add_image_summaries=add_image_summaries)
  return image, label


def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING,
                         add_image_summaries=True):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if add_image_summaries:
    tf.summary.image('image', tf.expand_dims(image, 0))

  # Transform the image to floats(0.0~255.0).
  image = tf.cast(image, tf.float32)
  if padding > 0:
    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(image,
                                   [output_height, output_width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  if add_image_summaries:
    tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)
  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_standardization(distorted_image)


def preprocess_for_eval(image, output_height, output_width,
                        add_image_summaries=True):
  """Preprocesses the given image for evaluation.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if add_image_summaries:
    tf.summary.image('image', tf.expand_dims(image, 0))
  # Transform the image to floats.
  image = tf.to_float(image)

  # Resize and crop if needed.
  resized_image = tf.image.resize_image_with_crop_or_pad(image,
                                                         output_width,
                                                         output_height)
  if add_image_summaries:
    tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))

  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_standardization(resized_image)

第三种

在这里插入图片描述

#coding:utf-8
#preprocess_vgg.py

"""
 Train
    step1: _aspect_preserving_resize
           # 在不改变图像宽高比的基础上,
           # 将图像的窄边缩放到_RESIZE_SIDE_MIN和_RESIZE_SIDE_MAX之间
    step2: _random_crop
           # 将图像随机裁剪到 out_height, out_width
           # 如果out_height, out_width比缩放后的图像大,报错。
    step3: random flip left right
    step4: _mean_image_subtraction
           # RGB每个通道减去整个数据集的RGB通道的均值
 Eval
    step1: _aspect_preserving_resize
    step2: _central_crop
    step3: _mean_image_subtraction
"""

import tensorflow as tf

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

_RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512


def preprocess_image(image, label, is_training,
                     out_height=224, out_width=224,
                     resize_side_min=_RESIZE_SIDE_MIN,
                     resize_side_max=_RESIZE_SIDE_MAX):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    out_height: The height of the image after preprocessing.
    out_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    resize_side_min: The lower bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, then this value
      is used for rescaling.
    resize_side_max: The upper bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, this value is
      ignored. Otherwise, the resize side is sampled from
        [resize_size_min, resize_size_max].

  Returns:
    A preprocessed image.
  """
  if is_training:
    image = preprocess_for_train(image, out_height, out_width,
                                 resize_side_min, resize_side_max)
  else:
    image = preprocess_for_eval(image, out_height, out_width,
                                resize_side_min)
  return image, label


def preprocess_for_train(image,
                         out_height,
                         out_width,
                         resize_side_min<
  • 3
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值