1. onnx介绍
onnxruntime:https://github.com/Microsoft/onnxruntime
onnx: https://github.com/onnx/onnx
可以参考这篇:https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/sampleOnnxMNIST
ONNX 是微软与Facebook和AWS共同开发的深度学习和传统机器学习模型的开放格式。
ONNX Runtime是基于ONNX规范实现的推理引擎。ONNX Runtime 可以自动调用各种硬件加速器,例如NV CUDA、TensorRT 和Intel的 MKL-DNN、nGraph。如下所示,ONNX格式的模型可以传入到蓝色部分的 Runtime,并自动完成计算图分割及并行化处理,最后我们只需要如橙色所示的输入数据和输出结果就行了。
ONNX目的使算法开发人员可以为算法任务选择合适的机器学习框架,推理框架作者可以集中精力推出创新、提高框架的性能。对于硬件供应商来说,也可以简化神经网络计算的复杂度,实现优化算法。
目前ONNX Runtime支持CUDA、MLAS(Microsoft Linear Algebra Subprograms)、MKL-DNN、MKL-ML和TensorRT用于计算加速。
下面是简单使用的案例:
import onnxruntime as rt
sess = rt.InferenceSession("model.onnx")
input_name = sess.get_inputs()[0].name
X = numpy.random.random((3, 4, 5)).astype(numpy.float32)
pred_onnx = sess.run(None, {input_name: X})
print(pred_onnx)
2. onnx graph
2.1 创建variable
import onnx_graphsurgeon as gs
import numpy as np
import onnx
X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 5, 5))
Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 3, 1, 1))
node = gs.Node(op="GlobalLpPool", attrs={"p": 2}, inputs=[X], outputs=[Y])
graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y])
onnx.save(gs.export_onnx(graph), "test_globallppool.onnx")
2.2 添加constant
import onnx_graphsurgeon as gs
import numpy as np
import onnx
X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224))
# Since W is a Constant, it will automatically be exported as an initializer
W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32))
Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222))
node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y])
# Note that initializers do not necessarily have to be graph inputs
graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y])
onnx.save(gs.export_onnx(graph), "test_conv.onnx")
2.3 保存子模型
import onnx_graphsurgeon as gs
import numpy as np
import onnx
model = onnx.load("model.onnx")
graph = gs.import_onnx(model)
tensors = graph.tensors()
graph.inputs = [tensors["x1"].to_variable(dtype=np.float32)]
graph.outputs = [tensors["add_out"].to_variable(dtype=np.float32)]
graph.cleanup()
onnx.save(gs.export_onnx(graph), "subgraph.onnx")
原模型为:
子模型保存下来为:
2.4 修改模型
还是上面的模型,我们要改成:
代码为:
import onnx_graphsurgeon as gs
import numpy as np
import onnx
graph = gs.import_onnx(onnx.load("model.onnx"))
# 1. Remove the `b` input of the add node
first_add = [node for node in graph.nodes if node.op == "Add"][0]
first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"]
# 2. Change the Add to a LeakyRelu
first_add.op = "LeakyRelu"
first_add.attrs["alpha"] = 0.02
# 3. Add an identity after the add node
identity_out = gs.Variable("identity_out", dtype=np.float32)
identity = gs.Node(op="Identity", inputs=first_add.outputs, outputs=[identity_out])
graph.nodes.append(identity)
# 4. Modify the graph output to be the identity output
graph.outputs = [identity_out]
# 5. Remove unused nodes/tensors, and topologically sort the graph
# ONNX requires nodes to be topologically sorted to be considered valid.
# Therefore, you should only need to sort the graph when you have added new nodes out-of-order.
# In this case, the identity node is already in the correct spot (it is the last node,
# and was appended to the end of the list), but to be on the safer side, we can sort anyway.
graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "modified.onnx")
3. 示例
3.1 在结尾添加全连接层:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
import onnxruntime as rt
graph = gs.import_onnx(onnx.load("model.onnx"))
pca_w = gs.Constant(name="pca_w", values=np.random.randn(512, 256).astype(np.float32))
pca_b = gs.Constant(name="pca_b", values=np.zeros(shape=(bs, 256), dtype=np.float32))
Y = gs.Variable(name="Y", dtype=np.float32, shape=(bs, 256))
pca_node = gs.Node(op='Gemm',inputs=[graph.nodes[-1].outputs[0], pca_w,pca_b], outputs=[Y])
graph.nodes.append(pca_node)
graph.outputs = [Y]
onnx.save(gs.export_onnx(graph), "add_gemm.onnx")
session = rt.InferenceSession('add_gemm.onnx')
inp = session.get_inputs()[0].name
out = session.get_outputs()[0].name
session.run([out],{inp:input1.numpy()})
3.2 添加nms层
import argparse
import logging
import json
import numpy as np
import onnx_graphsurgeon as gs
import onnx
streamhandler = logging.StreamHandler()
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
logger.addHandler(streamhandler)
@gs.Graph.register()
def trt_batched_nms(self, boxes_input, scores_input, nms_output, config, layer_name):
boxes_input.outputs.clear()
scores_input.outputs.clear()
for node in nms_output:
node.inputs.clear()
attrs = {
"shareLocation": config["shareLocation"],
"backgroundLabelId": config["backgroundLabelId"],
"numClasses": config["numClasses"],
"topK": config["topK"],
"keepTopK": config["keepTopK"],
"scoreThreshold": config["scoreThreshold"],
"iouThreshold": config["iouThreshold"],
"isNormalized": config["isNormalized"],
"clipBoxes": True,
# etc.
}
return self.layer(op="BatchedNMS_TRT", attrs=attrs,
inputs=[boxes_input, scores_input],
outputs=nms_output, name=layer_name)
@gs.Graph.register()
def add_op_by_type(self, input_tensor, output_tensor, config, layer_name, op_type):
input_tensor.inputs.clear()
shape = list(input_tensor.shape)
shape[3]=1
shape[2]=1
print(input_tensor.shape)
val = np.array(config['val'], np.float32).reshape(shape)
return self.layer(op=op_type,
inputs=[input_tensor, val],
outputs=[output_tensor], name=layer_name)
def parse_args():
parser = argparse.ArgumentParser(
description='Add NMS op with box and score nodes after detection onnx.')
parser.add_argument('model', type=str,
help='model to use.')
parser.add_argument('config', type=str,
help='plugin json file path.')
parser.add_argument('--save', type=str, default="result.onnx",
help='saving model path.')
opt = parser.parse_args()
return opt
def main():
opt = parse_args()
logger.info(opt)
graph = gs.import_onnx(onnx.load(opt.model))
with open(opt.config) as f:
config = json.load(f)
tmap = graph.tensors()
for i in range(len(config['plugins'])):
plugin_conf = config['plugins'][i]
layer_conf = config['layers'][i]
batch_size = tmap[layer_conf['inputs'][0]].shape[0]
logger.info("add layer:{} with inputs:{}".format(plugin_conf['type'], layer_conf['inputs']))
if plugin_conf['type']=='NMS':
num_detections = gs.Variable(name=layer_conf['outputs'][0], dtype=np.int32, shape=(batch_size, 1))
boxes = gs.Variable(name=layer_conf['outputs'][1], dtype=np.float32, shape=(batch_size, plugin_conf['keepTopK'], 4))
scores = gs.Variable(name=layer_conf['outputs'][2], dtype=np.float32, shape=(batch_size, plugin_conf["keepTopK"]))
classes = gs.Variable(name=layer_conf['outputs'][3], dtype=np.float32, shape=(batch_size, plugin_conf["keepTopK"]))
graph.trt_batched_nms(tmap[layer_conf['inputs'][0]], tmap[layer_conf['inputs'][1]],
[num_detections, boxes, scores, classes], plugin_conf, layer_conf["name"])
graph.outputs = [num_detections, boxes, scores, classes]
elif plugin_conf['type'].lower()=="add" or plugin_conf['type'].lower()=="mul":
input_tensor = tmap[layer_conf['inputs'][0]]
new_input = gs.Variable(name=layer_conf['name'], dtype=np.float32, shape=input_tensor.shape)
input_tensor.outputs[0].inputs[0] = new_input
graph.add_op_by_type(input_tensor, new_input, plugin_conf, layer_conf['name'], plugin_conf['type'].capitalize())
else:
logger.warning("not support yet")
graph.cleanup().toposort()
graph.fold_constants().cleanup()
onnx.save(gs.export_onnx(graph), opt.save)
if __name__ == '__main__':
main()
然后是用于配置的文件:
{
"plugins": [
{
"name": "nms",
"type": "NMS",
"shareLocation": false,
"backgroundLabelId": -1,
"numClasses": 6,
"topK": 400,
"keepTopK": 100,
"scoreThreshold": 0.5,
"iouThreshold": 0.5,
"isNormalized": false
},
{
"type": "add",
"val": [-102.9801,-115.9465,-122.7717]
}
],
"layers": [
{
"name": "nms",
"inputs": ["boxes", "scores"],
"outputs": ["num_detections", "nmsed_boxes", "nmsed_scores", "nmsed_classes"]
},
{
"name": "mean",
"inputs": ["input_image"]
}
]
}
然后执行python onnx-modifier.py model.onnx plugin.json
就可以输出带nms的模型啦~
4. 使用onnx直接构造和修改
首先是构造。尝试完全用 ONNX 的 Python API 构造一个描述线性函数 output=a*x+b 的 ONNX 模型。
4.1 创建
import onnx
from onnx import helper
from onnx import TensorProto
a = helper.make_tensor_value_info('a', TensorProto.FLOAT, [10, 10])
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 10])
b = helper.make_tensor_value_info('b', TensorProto.FLOAT, [10, 10])
output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [10, 10])
mul = helper.make_node('Mul', ['a', 'x'], ['c'])
add = helper.make_node('Add', ['c', 'b'], ['output'])
如上面的例子所示:Mul 节点定义了输出 c,Add 节点定义了输入 c,则 Mul 节点和 Add 节点是相连的。
helper.make_graph 函数需要传入节点、图名称、输入张量信息、输出张量信息这 4 个参数。这里 make_graph 的节点参数有一个要求:计算图的节点必须以拓扑序给出。
# graph and model
graph = helper.make_graph([mul, add], 'linear_func', [a, x, b], [output])
model = helper.make_model(graph)
# save model
onnx.checker.check_model(model)
print(model)
onnx.save(model, 'linear_func.onnx')
下面是个测试:
import onnxruntime
import numpy as np
sess = onnxruntime.InferenceSession('linear_func.onnx')
a = np.random.rand(10, 10).astype(np.float32)
b = np.random.rand(10, 10).astype(np.float32)
x = np.random.rand(10, 10).astype(np.float32)
output = sess.run(['output'], {'a': a, 'b': b, 'x': x})[0]
4.2 修改
接下来,我们来看看怎么把图 GraphProto、节点 NodeProto、张量信息 ValueInfoProto 读取出来:
graph = model.graph
node = graph.node
input = graph.input
output = graph.output
print(node)
print(input)
print(output)
使用如上这些代码,我们可以分别访问模型的图、节点、张量信息。读取 ONNX 模型的信息后,修改 ONNX 模型就是一件很轻松的事了。我们既可以按照上一小节的模型构造方法,新建节点和张量信息,与原有模型组合成一个新的模型,也可以在不违反 ONNX 规范的前提下直接修改某个数据对象的属性。
这里我们来看一个直接修改模型属性的例子:
import onnx
model = onnx.load('linear_func.onnx')
node = model.graph.node
node[1].op_type = 'Sub'
onnx.checker.check_model(model)
onnx.save(model, 'linear_func_2.onnx')
在读入之前的 linear_func.onnx 模型后,我们可以直接修改第二个节点的类型 node[1].op_type,把加法变成减法。
4.3 截取
参照下面的例子进行截取
onnx.utils.extract_model('whole_model.onnx', 'partial_model.onnx', ['22'], ['28'])
这段代码的作用是把原计算图从边 22 到边 28 的子图提取出来,并组成一个子模型。onnx.utils.extract_model 就是完成子模型提取的函数,它的参数分别是原模型路径、输出模型路径、子模型的输入边(输入张量)、子模型的输出边(输出张量)。