本文介绍Tensorflow处理图像的函数及其使用。
0 环境
- Ubuntu18.04
- Python3.6
- Tensorflow 1.12.0
#-*-coding:utf-8-*-
import matplotlib.pyplot as plt
import tensorflow as tf
#线程
from threading import Thread
1 读取图片
1.0 两种方式
【gfile.FastGFile】
import tensorflow as tf
#读取图像原始数据
def pureTF(sourceDir):
with tf.Session() as sess:
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
print("Type of image: {}".format(type(image_raw)))
print("Image: {}".format(image_raw))
sourceDir = "./images/reading/Aaron_Eckhart_0001.jpeg"
# imageProcess(sourceDir)
pureTF(sourceDir)
【read_file】
image_path = "a.jpg"
png = image_path.lower().endswith("png")
img_bytes = tf.read_file(image_path)
img_decode = tf.image.decode_png(img_bytes, channels=3) if png else tf.image.decode_jpeg(img_bytes, channels=3)
img_resize = tf.image.resize_images(img_decode)
with tf.Session() as sess:
img_decode, img_reseize = sess.run([img_decode, img_resize])
print("image size:{}{}".format(img_decode.shape, img_resize.shape))
【Result】
Type of image: <type 'str'>
Image:乱码了,不贴出来了
【Analysis】
(1) tensorflow读取图像文件有两种方式:gfile.FastGFile和read_file;
(2) tensorflow读取图像文件和python的open读取功能一致,获取图像的信息,转成string格式;
(3) 图像解码通过后缀判断:image_name.lower().endswith(“png”);
2 图片解码
【Demo】
import tensorflow as tf
import matplotlib.pyplot as plt
def pureTF(sourceDir):
with tf.Session() as sess:
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
print("Type of image raw: {}".format(type(image_raw)))
# print("Image: {}".format(image_raw))
img_decode = tf.image.decode_jpeg(image_raw)
print("Type of decode image: {}".format(img_decode))
img_decode_show = sess.run(img_decode)
print("Type of decode image: {}".format(type(img_decode_show)))
print("Decode image: {}".format(img_decode_show))
print("Shape of decode image: {}".format(img_decode_show.shape))
#根据解码数据,输出图片
plt.imshow(img_decode_show)
plt.show()
sourceDir = "./images/reading/Aaron_Eckhart_0001.jpeg"
pureTF(sourceDir)
【Result】
Type of image raw: <type 'str'>
Type of decode image: Tensor("DecodeJpeg:0", shape=(?, ?, ?), dtype=uint8)
Type of decode image: <type 'numpy.ndarray'>
Decode image: [[[ 0 0 0]
[ 0 0 0]
[ 0 0 0]
...
[ 0 0 0]
[ 0 0 0]
[ 0 0 0]]
[[ 0 0 0]
[ 0 0 0]
[ 0 0 0]
...
[ 0 0 0]
[ 0 0 0]
[ 0 0 0]]
[[ 0 0 0]
[ 0 0 0]
[ 0 0 0]
...
[ 2 2 0]
[ 2 2 0]
[ 2 2 0]]
...
[[104 120 146]
[109 125 151]
[117 133 159]
...
[119 130 150]
[132 145 164]
[138 151 170]]
[[102 118 144]
[104 120 146]
[112 128 154]
...
[126 139 158]
[139 152 171]
[145 158 177]]
[[ 99 115 141]
[101 117 143]
[109 125 151]
...
[132 145 164]
[142 155 174]
[149 162 181]]]
Shape of decode image: (250, 250, 3)
【Analysis】
(1) 读取:tensorflow读取图像数据,将图片转为str格式,即<type ‘str’>;
(2) 解码:tensorflow将图片信息进行解码,得到Tensor向量,即Tensor(“DecodeJpeg:0”, shape=(?, ?, ?), dtype=uint8);
(3) 运行:通过运行Session,得到numpy.array格式的图像数据,即<type ‘numpy.ndarray’>;
(4) 绘图:绘制图像使用imshow函数以numpy.array为参数绘制;
(5) 图像维度: (250, 250, 3)
3 图片编码
【Demo】
import tensorflow as tf
import matplotlib.pyplot as plt
def pureTF(sourceDir, objectDir):
with tf.Session() as sess:
with tf.gfile.FastGFile(sourceDir, 'rb') as fencode:
image_raw = fencode.read()
print("Type of image raw: {}".format(type(image_raw)))
# print("Image: {}".format(image_raw))
img_decode = tf.image.decode_jpeg(image_raw)
print("Type of decode image: {}".format(img_decode))
img_decode_show = sess.run(img_decode)
img_encode = tf.image.encode_jpeg(img_decode)
img_encode_show = sess.run(img_encode)
print("Type of encode image: {}".format(type(img_encode_show)))
with tf.gfile.GFile(objectDir, 'wb') as fdecode:
fdecode.write(img_encode_show)
sourceDir = "./images/reading/Aaron_Eckhart_0001.jpeg"
objectDir = "./images/saved/test.jpeg"
pureTF(sourceDir, objectDir)
【Result】
(1) Type of encode image:: <type ‘str’>
(2) 保存图片至指定目录:./images/saved/,图片名为:test.jpeg
【Analysis】
(1) 编码:tf.image.encode_jpeg();
(2) 解码:对编码为numpy.array的数据进行解码;
(3) tensorflow依旧可以使用with方式进行图片读写(哈哈哈);
4 图片数据类型转化
【Demo】
#图片类型转化:实数转为浮点数
def imageProcessPureTF(sourceDir):
with tf.Session() as sess:
with tf.gfile.FastGFile(sourceDir, 'rb') as fencode:
img_encode = tf.image.decode_jpeg(fencode.read())
img_float = tf.image.convert_image_dtype(img_encode, dtype=tf.float32)
print("Type of convert image to float32 : {}".format(type(img_float)))
img_float_show = sess.run(img_float)
print("Type of convert image to float32 in Session: {}".format(type(img_float_show)))
print("Convert image to float32 in Session: {}".format(img_float_show))
print("Shape of convert image in Session: {}".format(img_float_show.shape))
sourceDir = "./images/reading/Aaron_Eckhart_0001.jpeg"
objectDir = "./images/saved/test.jpeg"
imageProcessPureTF(sourceDir)
【Result】
Type of convert image to float32 : <class 'tensorflow.python.framework.ops.Tensor'>
Type of convert image to float32 in Session: <type 'numpy.ndarray'>
Convert image to float32 in Session:: [[[0. 0. 0. ]
[0. 0. 0. ]
[0. 0. 0. ]
...
[0. 0. 0. ]
[0. 0. 0. ]
[0. 0. 0. ]]
[[0. 0. 0. ]
[0. 0. 0. ]
[0. 0. 0. ]
...
[0. 0. 0. ]
[0. 0. 0. ]
[0. 0. 0. ]]
[[0. 0. 0. ]
[0. 0. 0. ]
[0. 0. 0. ]
...
[0.00784314 0.00784314 0. ]
[0.00784314 0.00784314 0. ]
[0.00784314 0.00784314 0. ]]
...
[[0.40784317 0.47058827 0.57254905]
[0.427451 0.4901961 0.5921569 ]
[0.45882356 0.52156866 0.62352943]
...
[0.4666667 0.50980395 0.5882353 ]
[0.5176471 0.5686275 0.6431373 ]
[0.5411765 0.5921569 0.6666667 ]]
[[0.40000004 0.46274513 0.5647059 ]
[0.40784317 0.47058827 0.57254905]
[0.43921572 0.5019608 0.6039216 ]
...
[0.49411768 0.54509807 0.61960787]
[0.54509807 0.59607846 0.67058825]
[0.5686275 0.61960787 0.69411767]]
[[0.38823533 0.45098042 0.5529412 ]
[0.39607847 0.45882356 0.56078434]
[0.427451 0.4901961 0.5921569 ]
...
[0.5176471 0.5686275 0.6431373 ]
[0.5568628 0.60784316 0.68235296]
[0.58431375 0.63529414 0.70980394]]]
Shape of convert image in Session: (250, 250, 3)
【Analysis】
(1) 图像编码之后,传入类型转换函数,将图像编码从0~255 转为0.0 ~1 .0的值,保证后续处理精度;
(2) 图像维度不变;
到此Tensorflow图像处理的根,即编码解码已经全部解析完毕,下面开始进阶操作。
5 改变图片尺寸:保留图片全部内容
【Demo】
'''将图片数据格式转为tf.float32,
避免转换过程损失过多信息
'''
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
if img_decode.dtype != tf.float32:
img_decode = tf.image.convert_image_dtype(img_decode, dtype=tf.float32)
'''双线性插值法调整图片大小'''
resized_method0 = tf.image.resize_images(img_decode, [300, 300], method=0)
'''最邻近法调整图片大小'''
resized_method1 = tf.image.resize_images(img_decode, [300, 300], method=1)
'''双三次插值法调整图片大小'''
resized_method2 = tf.image.resize_images(img_decode, [300, 300], method=2)
'''面积插值法调整图片大小'''
resized_method3 = tf.image.resize_images(img_decode, [300, 300], method=3)
'''调整图片集合'''
resizes = []
method_desc = ['Bilinear', 'Nearest neighbor', 'Bicubic', 'Area']
print(type(method_desc[0]))
for i in range(4):
resized_method = tf.image.resize_images(img_decode, [300, 300], method=i)
resizes.append(resized_method)
print(len(resizes))
for i in range(len(resizes)):
plt.gcf().canvas.set_window_title('不同方法处理图片')
plt.gcf().suptitle("Interpolation")
# print(resizes[i].eval())
print(i)
plt.subplot(2, 2, i+1).set_title(method_desc[i])
plt.imshow(resizes[i].eval())
'''保存图片'''
plt.savefig('./picture/resized.jpg')
'''屏幕显示'''
plt.ion()
plt.show()
'''显示时间:秒数'''
plt.pause(5)
'''关闭显示'''
plt.close()
6 剪裁图片
【Demo】
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
croped = tf.image.resize_image_with_crop_or_pad(img_decode, 1000, 1000)
print(type(croped))
padded = tf.image.resize_image_with_crop_or_pad(img_decode, 3000, 3000)
print(type(padded))
#绘制画图区、加标题、显示、保存
plt.figure(1)
plt.title('Crop')
plt.imshow(croped.eval())
plt.savefig('./picture/crop.jpg')
plt.figure(2)
plt.title('Padding')
plt.imshow(padded.eval())
plt.savefig('./picture/pad.jpg')
plt.ion()
plt.show()
plt.pause(10)
plt.close()
7 调整图片大小:比例
【Demo】
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
#0.5调整比例[0,1]
central_cropped = tf.image.central_crop(img_decode, 0.5)
plt.figure(1)
plt.title('Crop para')
plt.imshow(central_cropped.eval())
plt.savefig('./picture/cropparam.jpg')
plt.ion()
plt.show()
plt.pause(10)
plt.close()
8 旋转图片
【Demo】
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
##上下旋转:180度
flip0 = tf.image.flip_up_down(img_decode)
##左右旋转
flip1 = tf.image.flip_left_right(img_decode)
##对角线旋转
flip2 = tf.image.transpose_image(img_decode)
plt.subplot(1, 3, 1).set_title('up_down')
plt.imshow(flip0.eval())
plt.subplot(1, 3, 2).set_title('left_right')
plt.imshow(flip1.eval())
plt.subplot(1, 3, 3).set_title('transpose')
plt.imshow(flip2.eval())
plt.savefig('./picture/flip.jpg')
plt.ion()
plt.show()
plt.pause(10)
plt.close()
#50%概率旋转图片
flip1 = tf.image.random_flip_up_down(img_encode)
flip2 = tf.image.random_flip_left_right(img_encode)
9 调整图片色彩
【Demo】
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
##调整亮度:变黑
brightness0 = tf.image.adjust_brightness(img_decode, -0.5)
##图片亮度拉回到[0,1]
brightness1 = tf.clip_by_value(brightness0, 0.0, 1.0)
##图片亮度:+0.6
brightness2 = tf.image.adjust_brightness(img_decode, 0.5)
##随机增加亮度
# brightness3 = tf.image.random_brightness(img_encode, max_delta)
##调整亮度:倍数
##亮度x0.8
brightness4 = tf.image.adjust_contrast(img_encode, 0.8)
##亮度x2
brightness5 = tf.image.adjust_contrast(img_encode, 2)
##随机调整亮度
# brightness6 = tf.image.random_contrast(img_encode, lower, upper)
##调整色相
hue0 = tf.image.adjust_hue(img_encode, 0.2)
##随机调整色相
# hue1 = tf.image.random_hue(img_encode, max_delta)
##调整饱和度
##调整到-2
saturation0 = tf.image.adjust_saturation(img_encode, -2)
##调整到+2
saturation1 = tf.image.adjust_saturation(img_encode, 2)
##随机调整饱和度
# saturation2 = tf.image.random_saturation(img_encode, lower, upper)
##图像均值为0,方差为1
changed = tf.image.per_image_standardization(img_encode)
10 标注框
【Demo】
image_raw = tf.gfile.FastGFile(sourceDir, 'rb').read()
img_decode = tf.image.decode_jpeg(image_raw)
##img_data已转化为float32
resized_method0 = tf.image.resize_images(img_decode, [180, 267], method=0)
##img_data是三维矩阵,改变成四维矩阵
batched = tf.expand_dims(resized_method0, 0)
##标注框[y_min, x_min,y_max, x_max]
boxes = tf.constant([[[0.05, 0.05, 0.9, 0.7],[0.35, 0.47, 0.5, 0.56]]])
##添加框
bounding_box = tf.image.draw_bounding_boxes(batched, boxes)
print(type(bounding_box))
print(bounding_box.eval())
print(type(bounding_box[0].eval()))
print(bounding_box[0].eval())
# plt.imshow(bounding_box.eval())
plt.imshow(bounding_box[0].eval())
plt.ion()
plt.show()
plt.pause(10)
plt.close()
【问题1】
File "imageProcess.py", line 188, in <module>
plt.imshow(bounding_box.eval())
File "/Users/xindaqi/anaconda3/lib/python3.6/site-packages/matplotlib/pyplot.py", line 3205, in imshow
**kwargs)
File "/Users/xindaqi/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py", line 1855, in inner
return func(ax, *args, **kwargs)
File "/Users/xindaqi/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py", line 5487, in imshow
im.set_data(X)
File "/Users/xindaqi/anaconda3/lib/python3.6/site-packages/matplotlib/image.py", line 653, in set_data
raise TypeError("Invalid dimensions for image data")
TypeError: Invalid dimensions for image data
【原因】
bounding_box.eval()是四维矩阵,而imshow()只能处理三维矩阵。
【解决方案】
四维矩阵替换为三维矩阵,即从四维矩阵中提取出三维。
bounding_box[0].eval()
总结
(1) Tensorflow函数只处理Tensor向量,图片的解码过程,tf先读取图片,将图片转为Tensor,然后进行解码,解码之后仍为Tensor向量;
(2) 提取Tensor向量,需要使用会话功能,即tf.Session(),使用Session即可还原数据本来的面目,如图像编码,运行之后是numpy.array类型;
(3) 数据维度:data.shape获取;Tensor获取维度可能是(?,?,?),运行会话之后,即可获取真正shape;
(4) tensorflow读取图像文件有两种方式:gfile.FastGFile和read_file;
(5) 图像解码通过后缀判断:image_name.lower().endwith(“png”);
(6) 图像尺寸变换是先将图像数据转为tf.float32
格式,减少图像信息损失;