网上很多都是opencv写的,没办法gpu加速,我开始也用opencv写过,但是不加速运行很慢。没办法,亲自操刀写了一个利用caffe的python API进行caffe ssd模型预测的代码,代码在ubuntu16.04 caffe python2.7 cuda10.0+cudnn7.6.5运行成功。利用这个代码可以轻松扩展为摄像头。视频文件预测,代码写的有有点乱,嘿嘿,我故意的,请食用,祝您用餐愉快
import os
import numpy as np
import cv2
import sys
sys.path.append("/home/fut/caffe-ssd/python")
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
model_def = '/home/fut/caffe-ssd/myproj/prototxt/deploy.prototxt'
model_weights = '/home/fut/caffe-ssd/myproj/model/model_SSD_300x300_iter_97648.caffemodel'
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_mean('data', np.array([104, 117, 123])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2, 1, 0)) # the reference model has channels in BGR order instead of RGB
# set net to batch size of 1
image_resize = 300
net.blobs['data'].reshape(1, 3, image_resize, image_resize)
image_file = '/home/fut/Desktop/share/Femal-732/JPEGImages/20201010_100742_287718.jpg'
# image = caffe.io.load_image(image_file)
# print("image type is:", type(image))
# print ("image shape:",image.shape)
image = cv2.imread('/home/fut/Desktop/share/Femal-732/JPEGImages/20201010_100742_287718.jpg')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image1 = image1 / 255.
transformed_image = transformer.preprocess('data', image1)
net.blobs['data'].data[...] = transformed_image
# Forward pass.
detections = net.forward()['detection_out']
print(type(detections))
print (detections.shape)
def load_labels(file_name):
labels = []
with open(file_name, "r") as f:
labels = f.read().split('\n').strip('\n')
return labels
def SSDPostProcess(inference_result, image_resolution, confidence_threshold, labels=[]):
detection_result_list = []
lines = inference_result.shape[2]
print("row count is:", lines)
for i in range(lines):
if inference_result[0][0][i][2] < confidence_threshold:
continue
class_index = int(inference_result[0][0][i][1])
confidence = float(inference_result[0][0][i][2])
xmin = int(max(min(float(inference_result[0][0][i][3]), 1), 0) * image_resolution[1])
ymin = int(max(min(float(inference_result[0][0][i][4]), 1), 0) * image_resolution[0])
xmax = int(max(min(float(inference_result[0][0][i][5]), 1), 0) * image_resolution[1])
ymax = int(max(min(float(inference_result[0][0][i][6]), 1), 0) * image_resolution[0])
detection_result_list.append([labels[class_index], confidence, xmin, ymin, xmax, ymax])
print(detection_result_list)
return detection_result_list
def imshow(detection_result_list,img):
font = cv2.FONT_HERSHEY_SIMPLEX
for result in detection_result_list:
img = cv2.putText(img, result[0] + "," + str(result[1]), (result[2] - 5, result[3]), font, 2, (0, 0, 255), 1)
cv2.rectangle(image, (result[2], result[3]), (result[4], result[5]), (0, 0, 255), 4)
cv2.imshow("result", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
result_list = SSDPostProcess(detections, (image.shape[0], image.shape[1]), 0.2, ["1", "2", "3"])
imshow(result_list,image)