caffe网络推理图像数据加载中的坑(Python切片)
1.人脸检测MobileNetSSD网络推理demo(正确的)
// An highlighted block
import numpy as np
import sys,os
import cv2
import caffe
net_file= 'deploy.prototxt'
caffe_model='test.caffemodel'
test_dir = "./rgb_image"
if not os.path.exists(caffe_model):
print("use merge_bn.py to generate it.")
exit()
net = caffe.Net(net_file,caffe_model,caffe.TEST)
CLASSES = ('background','face')
def preprocess_mssd(src):
img = cv2.resize(src, (256,256))
image = img.copy()
print(img.shape)
print(img.dtype)
# img[:,:,0] = img[:,:,0] - 127.5
# img[:,:,1] = img[:,:,1] - 127.5
# img[:,:,2] = img[:,:,2] - 127.5
img = img -127.5
img = img * (1.0/127.5)
return img, image
def postprocess(img, out):
h = img.shape[0]
w = img.shape[1]
box = out['detection_out'][0,0,:,3:7] * np.array([w, h, w, h])
cls = out['detection_out'][0,0,:,1]
conf = out['detection_out'][0,0,:,2]
return (box.astype(np.int32), conf, cls)
def detect(imgfile):
origimg = cv2.imread(imgfile)
img, origimg = preprocess_mssd(origimg)
img = img.astype(np.float32)
img = img.transpose((2, 0, 1))
net.blobs['data'].data[...] = img
out = net.forward()
box, conf, cls = postprocess(origimg, out)
print("imagefile: ",imgfile)
for i in range(len(box)):
p1 = (box[i][0], box[i][1])
p2 = (box[i][2], box[i][3])
print("(",box[i][0],",", box[i][1],")",",","(",box[i][2],",", box[i][3],")")
cv2.rectangle(origimg, p1, p2, (0,255,0))
p3 = (max(p1[0], 15), max(p1[1], 15))
title = "%s:%.2f" % (CLASSES[int(cls[i])], conf[i])
cv2.putText(origimg, title, p3, cv2.FONT_ITALIC, 0.6, (0, 255, 0), 1)
cv2.imshow("ssd", origimg)
k = cv2.waitKey(0) & 0xff
#Exit if ESC pressed
if k == 27 : return False
return True
for f in os.listdir(test_dir):
if detect(test_dir + "/" + f) == False:
break
2.图像预处理切片的问题
可以看到1中函数“ preprocess_mssd”中注释的部分,作为为通道减均值,注释掉了,下边还有直接整个图像减均值,两个操作区别导致模型图例卡了几天才找到问题。若执行通道减均值最后网络网络推理能得到不正常结果,若执行真个图像减去均值能得到正常结果。主要问题是读入图像时候图像数值类型+python切片造成的问题。 见下面demo:
import cv2
import numpy as np
img = cv2.imread("0_Parade_marchingband_1_156.jpg")
image = img.copy()
image = image[0:2,0:2,:]
image1 = image.copy()
image2 = image.copy()
print("origin image:",image.dtype)
print("origin image:\n",image)
image[:,:,0] = image[:,:,0] - 127.5
image[:,:,1] = image[:,:,1] - 127.5
image[:,:,2] = image[:,:,2] - 127.5
print("channel image: ",image.dtype)
print("channel image:\n",image)
image1 = image1 -127.5
print("mean image: ",image1.dtype)
print("mean image:\n",image1)
x = image2[:,:,0] - 127.5
image2[:,:,0] = image2[:,:,0] - 127.5
print("x image: ",x.dtype)
print("x image:\n",x)
print("image2 image: ",image2.dtype)
print("image2 image:\n",image2)
输出结果:
origin image: uint8
origin image:
[[[67 68 78]
[75 75 81]]
[[71 72 82]
[76 77 81]]]
channel image: uint8
channel image:
[[[196 197 207]
[204 204 210]]
[[200 201 211]
[205 206 210]]]
mean image: float64
mean image:
[[[-60.5 -59.5 -49.5]
[-52.5 -52.5 -46.5]]
[[-56.5 -55.5 -45.5]
[-51.5 -50.5 -46.5]]]
x image: float64
x image:
[[-60.5 -52.5]
[-56.5 -51.5]]
image2 image: uint8
image2 image:
[[[196 68 78]
[204 75 81]]
[[200 72 82]
[205 77 81]]]
可以看到,opencv读入图像为uint8类型,以切片方式赋值后类型不变origin image: uint8
;切片方式减去float64
类型数据后,在进行切片赋值,结果类型还是channel image: uint8
,这就导致了间距之后越界,数据异常;整个图像间均值后赋值mean image: float64
不会有这样的问题;后面的两个输x image: float64
和 image2 image: uint8
出进一步说明了,切片后减均值会改变临时变量的数据类型为float64
,但是再进行切片赋值后不会改边之前numpy数组的数据类型仍为int8
,这就会导致减均值时候造成数据越界,得到异常结果。模型推理出问题整整卡了3天时间。