今天突然好多人关注,也不知道是系统账号还是真粉丝,所以决定大晚上的发一个博客,嘿嘿嘿
项目代码地址:(1条消息) 这是一个demo实战的代码1-VR文档类资源-CSDN文库
有代码固然好,但最好还是自己手动敲一遍,增加理解
这是我的demo项目结构,nets文件夹里面是自己搭建的网络模型(用的是mobilnet网络模型)
代码里面训练图片可能需要自己改一下,这应该挺简单的:
训练代码:
import os, sys, glob, shutil, json
import numpy as np
import torch
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from dataloader import M_Data
from nets.mobilenet_v1 import MobilenetV1
def train(data_loader, model, optim, criterion1, use_cuda=False):
if use_cuda:
model = model.cuda()
model.train()
train_loss = []
for i, (img, target) in enumerate(data_loader):
if use_cuda:
img, target = img.cuda(), target.cuda()
feat1, feat2 = model(img)
# 因为label里面都是两位数字,它的shape是[1,2],所以分别取出来计算loss值
loss = criterion1(feat1, target[:, 0]) + criterion1(feat2, target[:, 1])
optim.zero_grad() # 梯度清零
loss.backward() # 反向传播
optim.step() # 优化器更新
train_loss.append(loss.item())
return np.mean(train_loss)
def validate(val_loader, model, criterion2, use_cuda=False):
model.eval()
val_loss = []
# 不记录模型的梯度信息
with torch.no_grad():
for _, (img, target) in enumerate(val_loader):
if use_cuda:
model = model.cuda()
img, target = img.cuda(), target.cuda()
feat1, feat2 = model(img)
loss = criterion2, target(feat1, target[:, 0]) + criterion2(feat2, target[:, 1])
val_loss.append(loss.item())
return val_loss
if __name__ == '__main__':
train_img_path = glob.glob(r"D:\Code\python\pytorch\street_stings_reganize\test_img\*.png")
train_img_path.sort()
with open(r"D:\Code\python\pytorch\street_stings_reganize\train.json") as f2:
train_json = json.load(f2)
train_label = [train_json[x]['label'] for x in train_json]
# 2.读取数据集
transform = transforms.Compose([
transforms.Resize((416, 416)), # 对图片大小进行resize
# transforms.RandomCrop((60, 120)), # 随机裁剪
transforms.ColorJitter(0.3, 0.3, 0.2), # 颜色调整
transforms.RandomRotation(10), # 随机旋转10°
transforms.ToTensor(), # 将图片转换成张量
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # 归一化
train_loader = torch.utils.data.DataLoader(M_Data(train_img_path, train_label, transform), batch_size=8, shuffle=True, num_workers=0)
# 3.加载模型,损失函数,优化器等等
net = MobilenetV1(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
# 4.开始进行训练
EPOCHES = 300
BEST_LOSS = 100
for epoch in range(EPOCHES):
train_loss = train(train_loader, net, optimizer, criterion, use_cuda=True)
print('Epoch: {0}, Train loss: {1} '.format(epoch, train_loss))
if train_loss < BEST_LOSS:
BEST_LOSS = train_loss
if not os.path.exists("./models"):
os.makedirs("./models")
torch.save(net, "./models/best_mobilenet.pt")
print("save best model success")
torch.save(net, "./models/last_mobilenet.pt")
测试代码:
import os
import numpy as np
import torch
import torchvision.transforms as transforms
import cv2
def i2t(img):
tn = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
img = tn(img).unsqueeze(0)
# print(img.shape)
return img
def predict(test_loader, model, tta=10, use_cuda=True):
if use_cuda:
model = model.cuda()
model.eval()
test_pred_tta = None
# TTA 次数
for _ in range(tta):
test_pred = []
with torch.no_grad():
for i, (img, target) in enumerate(test_loader):
if use_cuda:
img = img.cuda()
feat1, feat2 = model(img)
if use_cuda:
output = np.concatenate([
feat1.data.cpu().numpy(),
feat2.data.cpu().numpy()], axis=1)
else:
output = np.concatenate([
feat1.data.numpy(),
feat2.data.numpy()], axis=1)
test_pred.append(output)
test_pred = np.vstack(test_pred)
if test_pred_tta is None:
test_pred_tta = test_pred
else:
test_pred_tta += test_pred
return test_pred_tta
def each_predict(img_path, model, use_cuda=True):
with torch.no_grad():
if use_cuda:
model = model.cuda()
model.eval()
img = cv2.imread(img_path)
img = cv2.resize(img, (416, 416))
img2 = img
img = i2t(img).cuda()
out1, out2 = model(img)
out1,out2 = out1.cpu().numpy().argmax(-1), out2.cpu().numpy().argmax(-1)
# print(out1, type(out1))
output1, output2 = classes_name[int(out1)], classes_name[int(out2)]
print("%s预测结果是:"% img_name, output1+output2)
# cv2.putText(img2, output1+output2)
cv2.putText(img2, output1+output2, (20,20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
if not os.path.exists("./output_img"):
os.makedirs("./output_img")
cv2.imwrite("./output_img/%s"%img_name, img2)
if __name__ == '__main__':
# train_img_path = glob.glob(r"E:\tianchi_learning\demo_project\test_img\*.png")
# train_img_path.sort()
# with open(r"E:\tianchi_learning\demo_project\train.json") as f2:
# train_json = json.load(f2)
# train_label = [train_json[x]['label'] for x in train_json]
#
# # 2.读取数据集
# transform = transforms.Compose([
# transforms.Resize((64, 128)), # 对图片大小进行resize
# transforms.RandomCrop((60, 120)), # 随机裁剪
# transforms.ColorJitter(0.3, 0.3, 0.2), # 颜色调整
# transforms.RandomRotation(10), # 随机旋转10°
# transforms.ToTensor(), # 将图片转换成张量
# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # 归一化
# train_loader = torch.utils.data.DataLoader(M_Data(train_img_path, train_label, transform), batch_size=30, shuffle=True, num_workers=0)
#
# # 3.加载模型,损失函数,优化器等等
# net = torch.load(r"E:\tianchi_learning\demo_project\models\best.pt")
# predict_label = predict(train_loader, net)
#
# predict_label = np.vstack([predict_label[:, :10].argmax(1),predict_label[:, 10:20].argmax(1)]).T
# val_label_pred = []
# train_label = [''.join(map(str, x)) for x in train_loader.dataset.label]
# for x in predict_label:
# val_label_pred.append(''.join(map(str, x)))
# print(val_label_pred, len(val_label_pred))
# print(train_label)
# val_char_acc = np.mean(np.array(val_label_pred) == np.array(train_label))
# print("acc", val_char_acc)
# 2.读取数据集
classes_name = [str(i) for i in range(10)]
net = torch.load(r"./models/best_mobilenet.pt")
train_path = r"D:\Code\python\pytorch\street_stings_reganize\test_img"
for img_name in os.listdir(train_path):
img_path = os.path.join(train_path, img_name)
each_predict(img_path, net)