## SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.# SPDX-License-Identifier: Apache-2.0## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#import os
import sys
# This sample uses an MNIST PyTorch model to create a TensorRT Inference Engineimport model
import numpy as np
import tensorrt as trt
sys.path.insert(1, os.path.join(sys.path[0],".."))import common
# You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)classModelData(object):
INPUT_NAME ="data"
INPUT_SHAPE =(1,1,28,28)
OUTPUT_NAME ="prob"
OUTPUT_SIZE =10
DTYPE = trt.float32
defpopulate_network(network, weights):# Configure the network layers based on the weights provided.
input_tensor = network.add_input(name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE)defadd_matmul_as_fc(net,input, outputs, w, b):assertlen(input.shape)>=3
m =1iflen(input.shape)==3elseinput.shape[0]
k =int(np.prod(input.shape)/ m)assert np.prod(input.shape)== m * k
n =int(w.size / k)assert w.size == n * k
assert b.size == n
input_reshape = net.add_shuffle(input)
input_reshape.reshape_dims = trt.Dims2(m, k)
filter_const = net.add_constant(trt.Dims2(n, k), w)
mm = net.add_matrix_multiply(
input_reshape.get_output(0),
trt.MatrixOperation.NONE,
filter_const.get_output(0),
trt.MatrixOperation.TRANSPOSE,)
bias_const = net.add_constant(trt.Dims2(1, n), b)
bias_add = net.add_elementwise(mm.get_output(0), bias_const.get_output(0), trt.ElementWiseOperation.SUM)
output_reshape = net.add_shuffle(bias_add.get_output(0))
output_reshape.reshape_dims = trt.Dims4(m, n,1,1)return output_reshape
conv1_w = weights["conv1.weight"].cpu().numpy()
conv1_b = weights["conv1.bias"].cpu().numpy()
conv1 = network.add_convolution_nd(input=input_tensor, num_output_maps=20, kernel_shape=(5,5), kernel=conv1_w, bias=conv1_b
)
conv1.stride_nd =(1,1)
pool1 = network.add_pooling_nd(input=conv1.get_output(0),type=trt.PoolingType.MAX, window_size=(2,2))
pool1.stride_nd = trt.Dims2(2,2)
conv2_w = weights["conv2.weight"].cpu().numpy()
conv2_b = weights["conv2.bias"].cpu().numpy()
conv2 = network.add_convolution_nd(pool1.get_output(0),50,(5,5), conv2_w, conv2_b)
conv2.stride_nd =(1,1)
pool2 = network.add_pooling_nd(conv2.get_output(0), trt.PoolingType.MAX,(2,2))
pool2.stride_nd = trt.Dims2(2,2)
fc1_w = weights["fc1.weight"].cpu().numpy()
fc1_b = weights["fc1.bias"].cpu().numpy()
fc1 = add_matmul_as_fc(network, pool2.get_output(0),500, fc1_w, fc1_b)
relu1 = network.add_activation(input=fc1.get_output(0),type=trt.ActivationType.RELU)
fc2_w = weights["fc2.weight"].cpu().numpy()
fc2_b = weights["fc2.bias"].cpu().numpy()
fc2 = add_matmul_as_fc(network, relu1.get_output(0), ModelData.OUTPUT_SIZE, fc2_w, fc2_b)
fc2.get_output(0).name = ModelData.OUTPUT_NAME
network.mark_output(tensor=fc2.get_output(0))defbuild_engine(weights):# For more information on TRT basics, refer to the introductory samples.
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(0)
config = builder.create_builder_config()
runtime = trt.Runtime(TRT_LOGGER)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))# Populate the network using weights from the PyTorch model.
populate_network(network, weights)# Build and return an engine.
plan = builder.build_serialized_network(network, config)return runtime.deserialize_cuda_engine(plan)# Loads a random test case from pytorch's DataLoaderdefload_random_test_case(model, pagelocked_buffer):# Select an image at random to be the test case.
img, expected_output = model.get_random_testcase()# Copy to the pagelocked input buffer
np.copyto(pagelocked_buffer, img)return expected_output
defmain():
common.add_help(description="Runs an MNIST network using a PyTorch model")# Train the PyTorch model
mnist_model = model.MnistModel()
mnist_model.learn()
weights = mnist_model.get_weights()# Do inference with TensorRT.
engine = build_engine(weights)# Build an engine, allocate buffers and create a stream.# For more information on buffer allocation, refer to the introductory samples.
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
context = engine.create_execution_context()
case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host)# For more information on performing inference, refer to the introductory samples.# The common.do_inference function will return a list of outputs - we only have one in this case.[output]= common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
pred = np.argmax(output)
common.free_buffers(inputs, outputs, stream)print("Test Case: "+str(case_num))print("Prediction: "+str(pred))if __name__ =="__main__":
main()
## SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.# SPDX-License-Identifier: Apache-2.0## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.## This file contains functions for training a PyTorch MNIST Modelimport torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
from random import randint
# NetworkclassNet(nn.Module):def__init__(self):super(Net, self).__init__()
self.conv1 = nn.Conv2d(1,20, kernel_size=5)
self.conv2 = nn.Conv2d(20,50, kernel_size=5)
self.fc1 = nn.Linear(800,500)
self.fc2 = nn.Linear(500,10)defforward(self, x):
x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
x = x.view(-1,800)
x = F.relu(self.fc1(x))
x = self.fc2(x)return F.log_softmax(x, dim=1)classMnistModel(object):def__init__(self):
self.batch_size =64
self.test_batch_size =100
self.learning_rate =0.0025
self.sgd_momentum =0.9
self.log_interval =100# Fetch MNIST data set.
self.train_loader = torch.utils.data.DataLoader(
datasets.MNIST("/tmp/mnist/data",
train=True,
download=True,
transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]),),
batch_size=self.batch_size,
shuffle=True,
num_workers=1,
timeout=600,)
self.test_loader = torch.utils.data.DataLoader(
datasets.MNIST("/tmp/mnist/data",
train=False,
transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))]),),
batch_size=self.test_batch_size,
shuffle=True,
num_workers=1,
timeout=600,)
self.network = Net()if torch.cuda.is_available():
self.network = self.network.to("cuda")# Train the network for one or more epochs, validating after each epoch.deflearn(self, num_epochs=2):# Train the network for a single epochdeftrain(epoch):
self.network.train()
optimizer = optim.SGD(self.network.parameters(), lr=self.learning_rate, momentum=self.sgd_momentum)for batch,(data, target)inenumerate(self.train_loader):if torch.cuda.is_available():
data = data.to("cuda")
target = target.to("cuda")
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = self.network(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()if batch % self.log_interval ==0:print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
epoch,
batch *len(data),len(self.train_loader.dataset),100.0* batch /len(self.train_loader),
loss.data.item(),))# Test the networkdeftest(epoch):
self.network.eval()
test_loss =0
correct =0for data, target in self.test_loader:with torch.no_grad():if torch.cuda.is_available():
data = data.to("cuda")
target = target.to("cuda")
data, target = Variable(data), Variable(target)
output = self.network(data)
test_loss += F.nll_loss(output, target).data.item()
pred = output.data.max(1)[1]
correct += pred.eq(target.data).cpu().sum()
test_loss /=len(self.test_loader)print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
test_loss, correct,len(self.test_loader.dataset),100.0* correct /len(self.test_loader.dataset)))for e inrange(num_epochs):
train(e +1)
test(e +1)defget_weights(self):return self.network.state_dict()defget_random_testcase(self):
data, target =next(iter(self.test_loader))
case_num = randint(0,len(data)-1)
test_case = data.cpu().numpy()[case_num].ravel().astype(np.float32)
test_name = target.cpu().numpy()[case_num]return test_case, test_name