文章目录
本文于2022年5月15日进行更新,主要是对于tensorflow版本升级成tf2.8.win10系统,并使用cv2.cvtColor来处理通道问题,如评论中所说;另一部分是增加了使用tensorflow 转换png到jpg.
在深度学习解决计算机视觉问题时,python处理图像常用的库有PIL,opencv,scikit-image,主流的深度学习框架会使用这几个库来读取图像;但其中tensorflow有独自的处理图像的aip,读取图像方式不同,同一张图,得到的数据可能会不同,在做模型推理时,可能会产生完全不一样的结果,因此,本文将会对这四种图像读取的方未能及结果进行对比分析。
大多数的预训练模型都是基于一种图像库读取的图像来训练的,也就是说图像处理只用一种编解码方式,一种插值方式,比如tf 的bilinear,cv2 bicubic,pillow bicubic等,在推理时,不同的插值会使精度(top-1 on imagenet-1k)下降0.2-0.5%,强数据增强方法会使这种情况缓解。另外由于不同的库同样的其它运算也会有不同,所以也会影响结果,本文对此不做过多讨论,只会对他们读取方式做个简单比较
1、四种不同的库读取jpg图显示
用PIL,skimage,opencv,tf.image四种库来读取图片对比差异
from PIL import Image
from skimage import io
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
def readimg(imgpath):
print("==============================PIL=========================================")
#PIL
pil_img = Image.open(imgpath)
imgtype = type(pil_img)
shape = pil_img.size
mode = pil_img.mode
np_pil_img = np.array(pil_img)
npimgshape=np_pil_img.shape
print(f"format:{pil_img.format} type:{imgtype} shape:{shape} mode:{mode} npshape:{npimgshape} dtype:{np_pil_img.dtype}")
# plt.imshow(np_pil_img)
print("==============================skimage=========================================")
#skimage
sk_img = io.imread(imgpath)
print(f"type:{type(sk_img)} shape:{sk_img.shape} dtype:{sk_img.dtype}")
# plt.imshow(sk_img)
print("==============================opencv=========================================")
#opencv
cv_img = cv2.imread(imgpath,-1)
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
print(f"type:{type(cv_img)} shape:{cv_img.shape} dtype:{cv_img.dtype}")
# plt.imshow(cv_img)
print("==============================tensorflow=========================================")
#tensorflow
im = tf.io.read_file(imgpath)
# tf_img = tf.image.decode_image(im) #自动识别图片类型进行转码
tf_img = tf.image.decode_jpeg(im) #自动识别图片类型进行转码
tf_img = tf_img.numpy()
print(f"type:{type(tf_img)} shape:{tf_img.shape} dtype:{tf_img.dtype}")
# plt.imshow(tf_img)
plt.figure(figsize=(20,5))
for i,img in enumerate([np_pil_img,sk_img,cv_img,tf_img]):
ax=plt.subplot(1,4,i+1)
plt.imshow(img)
plt.axis("off")
ax.title.set_text(f"{i}")
return np_pil_img,sk_img,cv_img,tf_img
imgpath='a.jpg'
np_pil_img,sk_img,cv_img,tf_img=readimg(imgpath)
==============================PIL=========================================
format:JPEG type:<class 'PIL.JpegImagePlugin.JpegImageFile'> shape:(720, 480) mode:RGB npshape:(480, 720, 3) dtype:uint8
==============================skimage=========================================
type:<class 'numpy.ndarray'> shape:(480, 720, 3) dtype:uint8
==============================opencv=========================================
type:<class 'numpy.ndarray'> shape:(480, 720, 3) dtype:uint8
==============================tensorflow=========================================
type:<class 'numpy.ndarray'> shape:(480, 720, 3) dtype:uint8
2、评估所读图片的差异
显示效果是一样的,但我们还要看具体的数值是否一样
imgs = [np_pil_img,sk_img,cv_img,tf_img]
idx2name = {idx:name for idx,name in enumerate(['pil','sk','cv','tf'])}
for i in range(4):
for j in range(i+1,4):
try:
np.testing.assert_almost_equal(imgs[i],imgs[j])
print(f"{idx2name[i]}/{idx2name[j]} same")
except:
print(f"{idx2name[i]}/{idx2name[j]} difference")
pil/sk same
pil/cv same
pil/tf difference
sk/cv same
sk/tf difference
cv/tf difference
接着查看具体的差值
np.sum(np.abs(cv_img.astype(np.float32)-tf_img.astype(np.float32)))
1040874.0
np.sum(np.abs(tf_img.astype(np.float32)-cv_img.astype(np.float32)))
1040874.0
sum=0.0
for i in range(300):
for j in range(480):
for k in range(3):
sum+=np.abs(float(cv_img[i,j,k])-float(tf_img[i,j,k]))
print(sum)
1040874.0
all_img = np.stack([np_pil_img,sk_img,cv_img,tf_img],0).astype(np.float32)#原类型是uint8,求和会溢出
# new_all_img = all_img.copy()
diff = np.abs(all_img[:,None,...]-all_img)
print(diff.shape)
print(diff.min())
print(diff.max())
(4, 4, 480, 720, 3)
0.0
5.0
idx2name = {idx:name for idx,name in enumerate(['pil','sk','cv','tf'])}
plt.figure(figsize=(20,20))
for i in range(diff.shape[0]):
for j in range(diff.shape[1]):
ax=plt.subplot(diff.shape[0],diff.shape[1],i*diff.shape[0]+j+1)
plt.imshow(diff[i,j].astype(np.uint8))
dif = np.sum(diff[i,j].astype(np.float32))
plt.axis('off')
ax.title.set_text(f"{idx2name[i]}/{idx2name[j]} diff:{dif}")
plt.tight_layout()
3、简单说明有差异原因
从第二部分可以看出,tensorflow读图的结果与其它方式读图方式不同,这部分详细的可以查看https://towardsdatascience.com/image-read-and-resize-with-opencv-tensorflow-and-pil-3e0f29b992be,这篇内容对于读取图片和resize的说明了tensorflow的差异原因。读图时,差异原因是原文是“this difference is arising from the fact that OpenCV, by default, uses integer accurate decompression of the JPEG image. In contrast, TensorFlow uses Discrete Cosine Transform as default. This type of decoding is inaccurate and so to make it the same as OpenCV, we need to decode it by using integer accurate decompression. This can be done by setting the parameter dct_method=’INTEGER_ACCURATE’ as shown below.”
image_tf = tf.io.read_file(imgpath)
image_tf = tf.image.decode_jpeg(image_tf, channels=3, dct_method='INTEGER_ACCURATE')
image_tf = image_tf.numpy()
np.sum(np.abs(image_tf.astype(np.float32)-cv_img.astype(np.float32)))
0.0
总的来说,同样的神经网络,因为读图的不同,会导致结果完全不同,这点要注意
4、同样的流程对png图片进行处理
imgpath='b.png'
def readimg_png(imgpath):
print("==============================PIL=========================================")
#PIL
pil_img = Image.open(imgpath)
imgtype = type(pil_img)
shape = pil_img.size
mode = pil_img.mode
np_pil_img = np.array(pil_img)
npimgshape=np_pil_img.shape
print(f"format:{pil_img.format} type:{imgtype} shape:{shape} mode:{mode} npshape:{npimgshape} dtype:{np_pil_img.dtype}")
# plt.imshow(np_pil_img)
print("==============================skimage=========================================")
#skimage
sk_img = io.imread(imgpath)
print(f"type:{type(sk_img)} shape:{sk_img.shape} dtype:{sk_img.dtype}")
# plt.imshow(sk_img)
print("==============================opencv=========================================")
#opencv
cv_img = cv2.imread(imgpath,-1)
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGRA2RGBA)
print(f"type:{type(cv_img)} shape:{cv_img.shape} dtype:{cv_img.dtype}")
# plt.imshow(cv_img)
print("==============================tensorflow=========================================")
#tensorflow
im = tf.io.read_file(imgpath)
# tf_img = tf.image.decode_image(im) #自动识别图片类型进行转码
tf_img = tf.image.decode_png(im) #自动识别图片类型进行转码
tf_img = tf_img.numpy()
print(f"type:{type(tf_img)} shape:{tf_img.shape} dtype:{tf_img.dtype}")
# plt.imshow(tf_img)
plt.figure(figsize=(20,5))
for i,img in enumerate([np_pil_img,sk_img,cv_img,tf_img]):
ax=plt.subplot(1,4,i+1)
plt.imshow(img)
plt.axis("off")
ax.title.set_text(f"{i}")
return np_pil_img,sk_img,cv_img,tf_img
np_pil_img,sk_img,cv_img,tf_img=readimg(imgpath)
==============================PIL=========================================
format:PNG type:<class 'PIL.PngImagePlugin.PngImageFile'> shape:(624, 480) mode:RGBA npshape:(480, 624, 4) dtype:uint8
==============================skimage=========================================
type:<class 'numpy.ndarray'> shape:(480, 624, 4) dtype:uint8
==============================opencv=========================================
type:<class 'numpy.ndarray'> shape:(480, 624, 4) dtype:uint8
==============================tensorflow=========================================
type:<class 'numpy.ndarray'> shape:(480, 624, 4) dtype:uint8
imgs = [np_pil_img,sk_img,cv_img,tf_img]
idx2name = {idx:name for idx,name in enumerate(['pil','sk','cv','tf'])}
for i in range(4):
for j in range(i+1,4):
try:
np.testing.assert_almost_equal(imgs[i],imgs[j])
print(f"{idx2name[i]}/{idx2name[j]} same")
except:
print(f"{idx2name[i]}/{idx2name[j]} difference")
pil/sk same
pil/cv same
pil/tf same
sk/cv same
sk/tf same
cv/tf same
all_img = np.stack([np_pil_img,sk_img,cv_img,tf_img],0).astype(np.float32)#原类型是uint8,求和会溢出
# new_all_img = all_img.copy()
diff = np.abs(all_img[:,None,...]-all_img)
print(diff.shape)
print(diff.min())
print(diff.max())
(4, 4, 480, 624, 4)
0.0
0.0
idx2name = {idx:name for idx,name in enumerate(['pil','sk','cv','tf'])}
plt.figure(figsize=(20,20))
for i in range(diff.shape[0]):
for j in range(diff.shape[1]):
ax=plt.subplot(diff.shape[0],diff.shape[1],i*diff.shape[0]+j+1)
plt.imshow(diff[i,j].astype(np.uint8))
dif = np.sum(diff[i,j].astype(np.float32))
plt.axis('off')
ax.title.set_text(f"{idx2name[i]}/{idx2name[j]} diff:{dif}")
plt.tight_layout()
可以看到,对于png结果全部相同的.
5、png图片转jpg
5.1 使用PIL进行转换
from PIL import Image
im = Image.open('b.png')
if not im.mode == 'RGB':
im = im.convert('RGB')
im.save('b_pil.jpg',quality=95)
5.2 使用Opencv进行转换
import cv2
# Loading .png image
png_img = cv2.imread('b.png')
png_img = cv2.cvtColor(png_img,cv2.COLOR_BGRA2RGBA)
# converting to jpg file
#saving the jpg file
cv2.imwrite('b_cv.jpg', png_img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
True
5.3 使用Tensorflow 进行转换
import tensorflow as tf
tf_img = tf.io.read_file('b.png')
tf_img = tf.image.decode_png(tf_img,channels=3)
t = tf.image.encode_jpeg(tf_img,quality=95)
tf.io.write_file('b_tf1.jpg',t)
cv2.imwrite('b_tf.jpg',tf_img.numpy()[:,:,::-1])
5.4 使用scikit-image进行转换
事实上,scikit-image使用的是其它库进行图像处理
import skimage.io as io
io.find_available_plugins()
{‘fits’: [‘imread’, ‘imread_collection’],
‘gdal’: [‘imread’, ‘imread_collection’],
‘gtk’: [‘imshow’],
‘imageio’: [‘imread’, ‘imsave’, ‘imread_collection’],
‘imread’: [‘imread’, ‘imsave’, ‘imread_collection’],
‘matplotlib’: [‘imshow’, ‘imread’, ‘imshow_collection’, ‘imread_collection’],
‘pil’: [‘imread’, ‘imsave’, ‘imread_collection’],
‘qt’: [‘imshow’, ‘imsave’, ‘imread’, ‘imread_collection’],
‘simpleitk’: [‘imread’, ‘imsave’, ‘imread_collection’],
‘tifffile’: [‘imread’, ‘imsave’, ‘imread_collection’]}
import skimage.io as io
io.use_plugin('pil')
这样就相当于使用pil了
5.3对比以上三种方法转换的图片是否相同
两种转换方法在quality都是95的情况下做对比,有关quality的取值,可以参看https://jdhao.github.io/2019/07/20/pil_jpeg_image_quality/
from PIL import Image
import numpy as np
pil_img = np.array(Image.open('b_pil.jpg'))
cv_img = np.array(Image.open('b_cv.jpg'))
tf_img = np.array(Image.open('b_tf.jpg'))
tf_img1 = np.array(Image.open('b_tf1.jpg'))
print(pil_img.shape)
print(cv_img.shape)
print(tf_img.shape)
np.testing.assert_equal(pil_img,cv_img)
np.testing.assert_equal(pil_img,tf_img)
np.testing.assert_equal(pil_img,tf_img1)
(480, 624, 3)
(480, 624, 3)
(480, 624, 3)
结果是完全相同的。