数据作为深度学习的基础,其对模型性能的重要重要性不言而喻。在本文,我们将梳理下常用的图像数据处理技术,至于具体的编程工具,选择 Python + TensorFlow:
Tip:
如果你使用tf.data
来组织你的图像数据输入管道,那么恭喜您,直接在map
中调用本文的代码。
第一种
#coding:utf-8
# preprocess_lenet.py
"""
Train
step1: resize image with crop or pad
step2: subtract 128.0
step3: div 128.0
Eval
step1: resize image with crop or pad
step2: subtract 128.0
step3: div 128.0
"""
import tensorflow as tf
def preprocess_image(image, label, is_training,
out_height=28, out_width=28):
"""Preprocesses the given image.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
Returns:
A preprocessed image.
"""
image = tf.cast(image, tf.float32) # (0.0~255.0)
image = tf.image.resize_image_with_crop_or_pad(
image, out_width, out_height)
image = tf.subtract(image, 128.0)
image = tf.div(image, 128.0)
return image, label
第二种
#coding:utf-8
#preprocess_cifar.py
"""
Train
step1: if pad
step2: random crop
step3: random flip left right
step4: random brightness
step5: random contrast
step6: per image standardization
Eval
step1: resize image with crop or pad
step2: per image standardization
"""
import tensorflow as tf
_PADDING = 4
def preprocess_image(image, label, is_training,
out_height=32, out_width=32,
add_image_summaries=False):
"""Preprocesses the given image.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
if is_training:
image = preprocess_for_train(
image, out_height, out_width,
add_image_summaries=add_image_summaries)
else:
image = preprocess_for_eval(
image, out_height, out_width,
add_image_summaries=add_image_summaries)
return image, label
def preprocess_for_train(image,
output_height,
output_width,
padding=_PADDING,
add_image_summaries=True):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
[`resize_size_min`, `resize_size_max`].
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
padding: The amound of padding before and after each dimension of the image.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
if add_image_summaries:
tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats(0.0~255.0).
image = tf.cast(image, tf.float32)
if padding > 0:
image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
# Randomly crop a [height, width] section of the image.
distorted_image = tf.random_crop(image,
[output_height, output_width, 3])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
if add_image_summaries:
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
# Because these operations are not commutative, consider randomizing
# the order their operation.
distorted_image = tf.image.random_brightness(distorted_image,
max_delta=63)
distorted_image = tf.image.random_contrast(distorted_image,
lower=0.2, upper=1.8)
# Subtract off the mean and divide by the variance of the pixels.
return tf.image.per_image_standardization(distorted_image)
def preprocess_for_eval(image, output_height, output_width,
add_image_summaries=True):
"""Preprocesses the given image for evaluation.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
if add_image_summaries:
tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
# Resize and crop if needed.
resized_image = tf.image.resize_image_with_crop_or_pad(image,
output_width,
output_height)
if add_image_summaries:
tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))
# Subtract off the mean and divide by the variance of the pixels.
return tf.image.per_image_standardization(resized_image)
第三种
#coding:utf-8
#preprocess_vgg.py
"""
Train
step1: _aspect_preserving_resize
# 在不改变图像宽高比的基础上,
# 将图像的窄边缩放到_RESIZE_SIDE_MIN和_RESIZE_SIDE_MAX之间
step2: _random_crop
# 将图像随机裁剪到 out_height, out_width
# 如果out_height, out_width比缩放后的图像大,报错。
step3: random flip left right
step4: _mean_image_subtraction
# RGB每个通道减去整个数据集的RGB通道的均值
Eval
step1: _aspect_preserving_resize
step2: _central_crop
step3: _mean_image_subtraction
"""
import tensorflow as tf
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512
def preprocess_image(image, label, is_training,
out_height=224, out_width=224,
resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX):
"""Preprocesses the given image.
Args:
image: A `Tensor` representing an image of arbitrary size.
out_height: The height of the image after preprocessing.
out_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
resize_side_min: The lower bound for the smallest side of the image for
aspect-preserving resizing. If `is_training` is `False`, then this value
is used for rescaling.
resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing. If `is_training` is `False`, this value is
ignored. Otherwise, the resize side is sampled from
[resize_size_min, resize_size_max].
Returns:
A preprocessed image.
"""
if is_training:
image = preprocess_for_train(image, out_height, out_width,
resize_side_min, resize_side_max)
else:
image = preprocess_for_eval(image, out_height, out_width,
resize_side_min)
return image, label
def preprocess_for_train(image,
out_height,
out_width,
resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX):</