在PYNQ上使用CNN分类图像
运行环境
环境 | 平台 | 版本 |
---|---|---|
Opencv | PYNQ | 3.4.11 |
Opencv-Contrib | PYNQ | 3.4.11 |
Python | PC、PYNQ | 3.7 |
Tensorflow | PC | 1.5.0 |
Keras | PC | 1.2.2 |
主要使用的数据集:SAR遥感图像数据集 MSTAR,包含11类17658张军用车辆图像。
模型准备
分割训练集
按70%、15%、15%的比例分配训练、测试、验证三个数据集。按格式保存如下图:
每类图像保存在各自的文件夹中:
模型训练
训练过程主要来自于该博客:https://blog.csdn.net/a1367666195/article/details/105311259
训练后生成.h5文件。
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.preprocessing import image
from keras.models import load_model
import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
import pickle
train_path = r'F:\MSTARdata\train' #训练集路径
test_path = r'F:\MSTARdata\test' #测试集路径
model = Sequential()
model.add(Conv2D(96,11,11,
activation='relu',
subsample=(4, 4),
border_mode='same',
input_shape=(100, 100, 1)
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(MaxPooling2D(pool_size=(2, 2), strides=(4, 4)))
model.add(Conv2D(256,5,5,
activation='relu',
border_mode='same',
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))
model.add(Conv2D(384,3,3,
activation='relu',
border_mode='same'))
model.add(Conv2D(384,3,3,
activation='relu',
border_mode='same',
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(Conv2D(256,3,3,
activation='relu',
border_mode='same',
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(MaxPooling2D(pool_size=(3, 3), strides=(1, 1)))
model.add(Flatten())
model.add(Dense(1024, activation='relu',
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu',
#kernel_initializer='random_uniform',
#bias_initializer='zeros'
)
)
model.add(Dropout(0.5))
model.add(Dense(11, activation='softmax',
#ernel_initializer='random_uniform',
#ias_initializer='zeros'
)
)
model.summary()
model.compile(optimizer=SGD(lr=1e-3, momentum=0.9),
loss='categorical_crossentropy',
metrics=['accuracy'])
img_height, img_width = 100, 100
batch_size = 64
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2, # 剪切强度(以弧度逆时针方向剪切角度)。
rotation_range=10., # 随机旋转的度数范围
zoom_range=0.2, # 随机缩放范围
horizontal_flip=True) # 随机水平翻转
test_datagen = ImageDataGenerator(rescale=1. / 255)
# 图片generator
train_generator = train_datagen.flow_from_directory(
train_path,
color_mode='grayscale',
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical',
classes=['2s1', 'bmp2', 'brdm2', 'btr60', 'btr70', 'd7', 'slicey', 't62', 't72', 'zil131', 'zsu23-4'])
validation_generator = test_datagen.flow_from_directory(
test_path,
color_mode='grayscale',
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical',
classes=['2s1', 'bmp2', 'brdm2', 'btr60', 'btr70', 'd7', 'slicey', 't62', 't72', 'zil131', 'zsu23-4'])
model_path = r'./save_model_old.h5'#model保存路径
checkpoint = ModelCheckpoint(model_path, monitor='val_acc', verbose=1,
save_best_only=True, mode='max')
#tbCallBack = TensorBoard(log_dir="./model")
callbacks_list = [checkpoint]
#model=load_model(model_path)
history=model.fit_generator(
train_generator,
nb_epoch=300,
#initial_epoch=0,
samples_per_epoch = 16000,
validation_data=validation_generator,
nb_val_samples=2000,
#validation_steps=10,
callbacks=callbacks_list
)
model.save(model_path)
with open('history.pickle', 'wb') as file:
pickle.dump(history.history, file)
模型格式转换
将h5格式保存的模型转换为tensorflow使用的pb文件:
from keras import backend as K
import tensorflow as tf
from keras.models import load_model
K.set_learning_phase(0)
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
"""
Freezes the state of a session into a pruned computation graph.
Creates a new computation graph where variable nodes are replaced by
constants taking their current value in the session. The new graph will be
pruned so subgraphs that are not necessary to compute the requested
outputs are removed.
@param session The TensorFlow session to be frozen.
@param keep_var_names A list of variable names that should not be frozen,
or None to freeze all the variables in the graph.
@param output_names Names of the relevant graph outputs.
@param clear_devices Remove the device directives from the graph for better portability.
@return The frozen graph definition.
"""
from tensorflow.python.framework.graph_util import convert_variables_to_constants
graph = session.graph
with graph.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
output_names = output_names or []
output_names += [v.op.name for v in tf.global_variables()]
# Graph -> GraphDef ProtoBuf
input_graph_def = graph.as_graph_def()
if clear_devices:
for node in input_graph_def.node:
node.device = ""
frozen_graph = convert_variables_to_constants(session, input_graph_def,
output_names, freeze_var_names)
return frozen_graph
model_path = r'./save_model_old.h5'#model保存路径
model=load_model(model_path)
frozen_graph = freeze_session(K.get_session(),
output_names=[out.op.name for out in model.outputs])
# Save to ./model/tf_model.pb
tf.train.write_graph(frozen_graph, "pb_model", "save_model_old.pb", as_text=False)
tf.train.write_graph(frozen_graph, "pb_model", "save_model_old.pbtxt", as_text=True)
模型固化
这一步的目的是使用tensorflow的API将推断时不需要的参数和结构转为常量或删除,若不进行这一步,使用Opencv推断时会报错。
固化前:
固化后
可以看出经过固化,结构得到了极大的简化。
在该网站上查看保存模型的结构:https://lutzroeder.github.io/netron/
需要先用bazel编译安装TransformGraph
下载最新的TensorFlow库
git clone https://github.com/tensorflow/tensorflow.git cd ./tensorflow
编译graph_transforms模块,需要等待一段时间,编译很耗费CPU资源和时间
bazel build tensorflow/tools/graph_transforms:transform_graph
import tensorflow as tf
from tensorflow.tools.graph_transforms import TransformGraph
def transform_graph(input_graph, output_graph):
""" Use to run different transform function on the input graph and generate a output graph. """
with tf.gfile.GFile(input_graph, 'rb') as fid:
od_graph_def = tf.GraphDef()
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
od_graph_def = TransformGraph(od_graph_def, ['convolution2d_input_1'], ['Softmax'],
['remove_nodes(op=Identity, op=CheckNumerics, op=Switch)',
'fold_constants(ignore_errors=true)', 'sort_by_execution_order'])
tf.train.write_graph(od_graph_def, "", output_graph, as_text=False)
if __name__ == '__main__':
frozen_graph = r'./pb_model/save_model_old.pb'
output_transform_graph = r'./pb_model/transformed_graph.pb'
transform_graph(frozen_graph, output_transform_graph)
使用PYNQ进行分类
推理时需要使用到Opencv的DNN模块(tensorflow和keras对无AVX指令集的处理器兼容性不好而且编译麻烦,相对而言Opencv编译较为简单)
将pb文件拷贝到PYNQ的Linux系统中,在python环境下运行以下代码:
import cv2
import numpy as np
import time
print(cv2.__version__)
def dataload(path):
img=cv2.imread(path,flags=cv2.IMREAD_GRAYSCALE)
img=cv2.resize(img,(100,100),interpolation=cv2.INTER_LINEAR)
img=np.array(img).reshape(100,100)/255.0
return img.astype(np.float32)
def decode_prediction(preds,labels):
index = np.argsort(-preds).reshape(-1)[0]
label = labels[index]
prob = preds.reshape(-1)[index]
return label,prob
img_path = 'test.jpg'
labels_path = 'labels.npy'
pb_file = 'transformed_graph.pb'
net = cv2.dnn.readNet(pb_file)
if net.empty():
print('No layers in net')
frame = dataload(img_path)
time_start=time.time()
net.setInput(cv2.dnn.blobFromImage(frame,swapRB=False, crop=False))
pred=net.forward()
time_end=time.time()
print(decode_prediction(pred,np.load(labels_path)))
print('time cost',time_end-time_start,'s')
结果分析
简单的速度对比:
PYNQ:0.24s
PC(GTX1070启用CUDA加速):0.019s
训练之前截取了15%的数据,进行验证,不同的类别分类准确率均大于95%。