使用tensorRt部署 efficientNet b系列

efficientNet 的代码,见ubantu code  

https://github.com/lukemelas/EfficientNet-PyTorch

train的代码:

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import time
import os
# 如果使用上面的Git工程的话这样导入
# from efficientnet.model import EfficientNet
# 如果使用pip安装的Efficient的话这样导入
from efficientnet_pytorch import EfficientNet

# some parameters
use_gpu = torch.cuda.is_available()
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
data_dir = '../../../datasets/Neu'
batch_size = 8
lr = 0.01
momentum = 0.9
num_epochs = 20
input_size = 224
class_num = 1000
net_name = 'efficientnet-b0'
is_pred = True


def loaddata(data_dir, batch_size, set_name, shuffle):
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in [set_name]}
    # num_workers=0 if CPU else =1
    dataset_loaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                      batch_size=batch_size,
                                                      shuffle=shuffle, num_workers=1) for x in [set_name]}
    data_set_sizes = len(image_datasets[set_name])
    return dataset_loaders, data_set_sizes
    

def train_model(model_ft, criterion, optimizer, lr_scheduler, num_epochs=50):
    train_loss = []
    since = time.time()
    best_model_wts = model_ft.state_dict()
    best_acc = 0.0
    model_ft.train(True)
    for epoch in range(num_epochs):
        dset_loaders, dset_sizes = loaddata(data_dir=data_dir, batch_size=batch_size, set_name='train', shuffle=True)
        print('Data Size', dset_sizes)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        optimizer = lr_scheduler(optimizer, epoch)

        running_loss = 0.0
        running_corrects = 0
        count = 0

        for data in dset_loaders['train']:
            inputs, labels = data
            labels = torch.squeeze(labels.type(torch.LongTensor))
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            outputs = model_ft(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs.data, 1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            count += 1
            if count % 30 == 0 or outputs.size()[0] < batch_size:
                print('Epoch:{}: loss:{:.3f}'.format(epoch, loss.item()))
                train_loss.append(loss.item())

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dset_sizes
        epoch_acc = running_corrects.double() / dset_sizes

        print('Loss: {:.4f} Acc: {:.4f}'.format(
            epoch_loss, epoch_acc))

        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = model_ft.state_dict()
        if epoch_acc > 1.9999:
            break

    # save best model
    save_dir = data_dir + '/model'
    os.makedirs(save_dir, exist_ok=True)
    model_ft.load_state_dict(best_model_wts)
    model_out_path = save_dir + "/" + net_name + '.pth'
    torch.save(model_ft, model_out_path)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return train_loss, best_model_wts


def test_model(model, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    cont = 0
    outPre = []
    outLabel = []
    dset_loaders, dset_sizes = loaddata(data_dir=data_dir, batch_size=batch_size, set_name='test', shuffle=False)
    for data in dset_loaders['test']:
        inputs, labels = data
        labels = torch.squeeze(labels.type(torch.LongTensor))
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        if cont == 0:
            outPre = outputs.data.cpu()
            outLabel = labels.data.cpu()
        else:
            outPre = torch.cat((outPre, outputs.data.cpu()), 0)
            outLabel = torch.cat((outLabel, labels.data.cpu()), 0)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        cont += 1
    print('Loss: {:.4f} Acc: {:.4f}'.format(running_loss / dset_sizes,
                                            running_corrects.double() / dset_sizes))


def exp_lr_scheduler(optimizer, epoch, init_lr=0.01, lr_decay_epoch=10):
    """Decay learning rate by a f#            model_out_path ="./model/W_epoch_{}.pth".format(epoch)
#            torch.save(model_W, model_out_path) actor of 0.1 every lr_decay_epoch epochs."""
    lr = init_lr * (0.8**(epoch // lr_decay_epoch))
    print('LR is set to {}'.format(lr))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer


# train
pth_map = {
    'efficientnet-b0': 'efficientnet-b0.pth',
    'efficientnet-b1': 'efficientnet-b1.pth',
    'efficientnet-b2': 'efficientnet-b2.pth',
    'efficientnet-b3': 'efficientnet-b3.pth',
    'efficientnet-b4': 'efficientnet-b4.pth',
    'efficientnet-b5': 'efficientnet-b5.pth',
    'efficientnet-b6': 'efficientnet-b6.pth',
    'efficientnet-b7': 'efficientnet-b7.pth',
}
# 自动下载到本地预训练
# model = EfficientNet.from_pretrained('efficientnet-b0')
# 离线加载预训练,需要事先下载好
model_ft = EfficientNet.from_name(net_name)
if is_pred:
    net_weight = 'weights/' + pth_map[net_name]
    state_dict = torch.load(net_weight)
    model_ft.load_state_dict(state_dict)

# 修改全连接层
num_ftrs = model_ft._fc.in_features
model_ft._fc = nn.Linear(num_ftrs, class_num)

criterion = nn.CrossEntropyLoss()
if use_gpu:
    model_ft = model_ft.cuda()
    criterion = criterion.cuda()

optimizer = optim.SGD((model_ft.parameters()), lr=lr,
                      momentum=momentum, weight_decay=0.0004)

train_loss, best_model_wts = train_model(model_ft, criterion, optimizer, exp_lr_scheduler, num_epochs=num_epochs)

# test
print('-' * 10)
print('Test Accuracy:')
model_ft.load_state_dict(best_model_wts)
criterion = nn.CrossEntropyLoss().cuda()
test_model(model_ft, criterion)

tensorRt的code :https://github.com/wang-xinyu/tensorrtx

gen_wts.py改写

import torch
import torch.nn as nn
import struct
from efficientnet_pytorch import EfficientNet
model_name = 'efficientnet-b0'
#model = EfficientNet.from_pretrained(model_name)

model = EfficientNet.from_pretrained(model_name,'weights/efficientnet-b0.pth',False,3)

torch.save(model,'test.pth')

model = torch.load('test.pth')

model = torch.load('weights/Neu-efficientnet-b0.pth')

model.eval()

f = open(model_name + '.wts', 'w')
......

部署代码改写

int main_(int argc, char **argv)
{
	"./efficientnet -s ../efficientnet-b3.wts efficientnet-b3.engine b3";
	"./efficientnet -d efficientnet-b3.engine b3";

	std::string wtsPath = "";			//../efficientnet-b0.wts
	std::string engine_name = "efficientnet-b0.engine";
	std::string backbone = "b0";

	/*std::string wtsPath = "";
	std::string engine_name = "";
	std::string backbone = "";*/
	/*if (!parse_args(argc, argv, wtsPath, engine_name, backbone))
	{
		std::cerr << "arguments not right!" << std::endl;
		std::cerr << "./efficientnet -s [.wts] [.engine] [b0 b1 b2 b3 ... b7]  // serialize model to engine file" << std::endl;
		std::cerr << "./efficientnet -d [.engine] [b0 b1 b2 b3 ... b7]   // deserialize engine file and run inference" << std::endl;
		return -1;
	}*/

	GlobalParams global_params = global_params_map[backbone];
	// create a model using the API directly and serialize it to a stream
	if (!wtsPath.empty())
	{
		IHostMemory *modelStream{nullptr};
		APIToModel(MAX_BATCH_SIZE, &modelStream, wtsPath, block_args_list, global_params);
		assert(modelStream != nullptr);

		std::ofstream p(engine_name, std::ios::binary);
		if (!p)
		{
			std::cerr << "could not open plan output file" << std::endl;
			return -1;
		}
		p.write(reinterpret_cast<const char *>(modelStream->data()), modelStream->size());
		modelStream->destroy();
		return 1;
	}

	char *trtModelStream{nullptr};
	size_t size{0};

	std::ifstream file(engine_name, std::ios::binary);
	if (file.good())
	{
		file.seekg(0, file.end);
		size = file.tellg();
		file.seekg(0, file.beg);
		trtModelStream = new char[size];
		assert(trtModelStream);
		file.read(trtModelStream, size);
		file.close();
	}
	else
	{
		std::cerr << "could not open plan file" << std::endl;
		return -1;
	}

	IRuntime *runtime = createInferRuntime(gLogger);
	assert(runtime != nullptr);
	ICudaEngine *engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
	assert(engine != nullptr);
	IExecutionContext *context = engine->createExecutionContext();
	assert(context != nullptr);
	delete[] trtModelStream;

	std::vector<cv::String> imgNames;
	cv::glob("../In/", imgNames);

	for (int i = 0; i < imgNames.size(); i++)
	{
		cv::Mat sad = cv::imread(imgNames[i], 1);
		cv::resize(sad, sad, cv::Size(224, 224), 0, 0, cv::INTER_CUBIC);

		// dummy input
		float *data = new float[3 * global_params.input_h * global_params.input_w];
		for (int i = 0; i < 3 * global_params.input_h * global_params.input_w; i++)
			data[i] = 0.1;

		int fcount = 1;
		int INPUT_W = global_params.input_w, INPUT_H = global_params.input_h;

		for (int b = 0; b < fcount; b++)
		{
			cv::Mat img = sad;
			if (img.empty()) continue;
			cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
			int i = 0;
			for (int row = 0; row < INPUT_H; ++row)
			{
				uchar* uc_pixel = pr_img.data + row * pr_img.step;
				for (int col = 0; col < INPUT_W; ++col)
				{
					data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
					data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
					data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
					uc_pixel += 3;
					++i;
				}
			}
		}	

		// Run inference
		float *prob = new float[global_params.num_classes];
		for (int i = 0; i < 1; i++)
		{
			auto start = std::chrono::system_clock::now();
			doInference(*context, data, prob, 1, global_params);
			auto end = std::chrono::system_clock::now();
			//std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;				//统计耗时
		}
		int at = 0;
		float score = -999999;
		std::vector<float> scores;
		for (unsigned int i = 0; i < 1000; i++)
		{
			scores.push_back(prob[i]);
			if (prob[i] > score)
			{
				at = i;
				score = prob[i];
			}
		}
		std::cout << at << std::endl;
		std::sort(scores.begin(), scores.end());
		std::reverse(scores.begin(), scores.end());

		delete data;
		delete prob;
	}

	

	// Destroy the engine
	context->destroy();
	engine->destroy();
	runtime->destroy();	
	

	return 0;
}

结果:

 

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值