参考:
https://blog.csdn.net/u011622208/article/details/110240126
https://blog.csdn.net/github_28260175/article/details/103436020
torchvision的分类模型的output为未归一化的数组,用起来会不是很方便,加上个softmax会方便很多,因此,可以自己创建个“壳”,实现自定义的操作:
MyUtil.py
...
# 自定义的的AdaptiveAvgPool2d,但是不符合trace的规范。
class MyAdaptiveAvgPool2d(nn.Module):
def __init__(self, output_size):
super(MyAdaptiveAvgPool2d, self).__init__()
self.outputSize = np.array(output_size)
def forward(self, x: torch.Tensor):
strideSize = np.floor(np.array(x.shape[-2:]) / self.outputSize).astype(np.int32)
kernelSize = np.array(x.shape[-2:]) - (self.outputSize - 1) * strideSize
# print('in size', x.shape, inputSize)
# print('out size', self.outputSize)
avg = nn.AvgPool2d(kernel_size=list(kernelSize), stride=list(strideSize))
# avg = nn.AdaptiveAvgPool2d(self.outputSize)
x = avg(x)
return x
# 封装多一层,用于输出labels以及scores
class MyClsNet(nn.Module):
def __init__(self, ori_net):
super(MyClsNet, self).__init__()
self.oriNet = ori_net
# #本来打算自己实现一下这个AdaptiveAvgPool2d,然后令其顺利导出的;但是由于trace的问题,失败。
# self.oriNet.avgpool = MyAdaptiveAvgPool2d(ori_net.avgpool.output_size)
def forward(self, x: torch.Tensor):
x = self.oriNet(x)
outputs = nn.functional.softmax(x, dim=1)
# max ---> dim是max函数索引的维度0/1,0是每列的最大值,1是每行的最大值
# 函数会返回两个tensor,第一个tensor是每行的最大值;第二个tensor是每行最大值的索引。
scores, preds = torch.max(outputs, 1) # 获取输出的最大值的所在列,也就是最有可能属于哪一类
results = []
for idx in range(0, len(scores)):
results.append({"labels": preds[idx], "scores": scores[idx]})
return results
# 获取图像分类模型
def get_classification_model(net_type, num_classes):
model = None
if net_type == "alexNet":
model = torchvision.models.alexnet(pretrained=True)
model.classifier[6] = nn.Linear(4096, num_classes)
# model.classifier.add_module('7', nn.Softmax(dim=1))
elif net_type == "vgg":
model = torchvision.models.vgg11(pretrained=True)
model.classifier[6] = nn.Linear(4096, num_classes)
elif net_type == "restNet":
# model = torchvision.models.resnet18(pretrained=True)
# model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
# model.load_state_dict(torch.load('C:/Users/Administrator/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth'))
model = torchvision.models.resnet18(pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)
return model
main.py
...
# 创建模型
model = get_classification_model('vgg', 2)
# 套上我们为分类模型定制的外衣
model = MyClsNet(model)
model.eval()
# 这个tensor用的什么尺寸,后面导出的onnx模型也就只能用什么尺寸了
input = torch.rand(1, 3, 224, 224)
# # 查看一下
# output = model(input)
# print(output)
torch.onnx.export(model, input, "classification.onnx",
input_names=['image'],
output_names=['labels', 'scores'],
# 由于onnx不支持模型里面的AdaptiveAvgPool2d模组,因此无法实现模型的动态输入尺寸;只能支持导出时用的input的尺寸
# dynamic_axes={
# 'image': {2: 'height', 3: 'width'},
# "labels": [0],
# "scores": [0],
# },
opset_version=11,
) # 11