一:数据集的处理:
观察数据集
可知应该根据图片对应的index将其分类,且应根据x,y中心裁剪出图片并保存
建立文件夹作为保存路径
读取,裁剪,保存过程均在以下代码中:
import json
import cv2 as cv
from tqdm import tqdm
import os
filepath = 'D:/Shetuan/traffic cal/traffic_dataset/traffic_dataset/traffic_dataset/' #源文件路径
a,b,c,d = 0,0,0,0
savepath = 'D:/Shetuan/traffic cal/traffic_dataset/traffic_dataset/traffic_sign/' #保存路径
for filename in tqdm(os.listdir(filepath)): #历遍文件夹
if filename.split('.')[1] == 'jpg': #如果文件后缀为.jpg
img = cv.imread(filepath+filename) #读取图片
width = img.shape[0] #取图片宽
height = img.shape[1] #取图片高
elif filename.split('.')[1] == 'json': #如果文件后缀为.json
ata = json.load(open(filepath+filename)) #以列表形式读取文件
length = len(ata) #读取列表长度
for i in range (length):
x0 = float(ata[i]['x_center_norm']) * height - float(ata[i]['box_width_norm']) * width/2 #取第i张图片起始点的x值
y0 = float(ata[i]['y_center_norm']) * width - float(ata[i]['box_height_norm']) * height/2 #取第i张图片起始点的y值
x1 = float(ata[i]['x_center_norm']) *height + float(ata[i]['box_width_norm']) * width/2 #取第i张图片终点的x值
y1 = float(ata[i]['y_center_norm']) * width + float(ata[i]['box_height_norm']) * height/2 #取第i张图片终点的y值
x0 = int(x0) #如果点越界,则置零
if x0 < 0 :
x0 = 0
y0 = int(y0)
if y0 < 0 :
y0 = 0
x1 = int(x1)
if x1 < 0 :
x1 = 0
y1 = int(y1)
if y1 < 0 :
y1 = 0
imgCrop = img[y0:y1,x0:x1] #以起始和终点裁剪图片
if ata[i]['class_index'] == '0': #如果文件index为0,则保存进禁止标志的文件夹,下同
cv.imwrite(savepath+'prohibitory/%d.jpg'%(a),imgCrop)
a = a + 1
if ata[i]['class_index'] == '1':
cv.imwrite(savepath+'danger/%d.jpg'%(b),imgCrop)
b = b + 1
if ata[i]['class_index'] == '2':
cv.imwrite(savepath+'mandatory/%d.jpg'%(c),imgCrop)
c = c + 1
elif ata[i]['class_index'] == '3':
cv.imwrite(savepath+'other/%d.jpg'%(d),imgCrop)
d = d + 1
二:数据增强:
观察到裁剪后的图片大多亮度暗淡,且数据量不够大,应对数据进行增强
体现在transform的参数设置中
transform = transforms.Compose([transforms.Resize((224,224)), #将图片尺寸进行转换,以便后面的网络使用
transforms.RandomHorizontalFlip(), #对图片进行随机翻转
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),#对图片亮度和对比度等进行随机设置
transforms.ToTensor(), #转化为tensor格式
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
三:加载数据集
读取裁剪后保存图片的文件夹,并记录好对应的标签
class traffic_datasets(Dataset):
def __init__(self,filepath):
self.images = []
self.labels = []
self.transform = transform #设置transform为上文设置的格式
for filename in tqdm(os.listdir(filepath+'prohibitory')): #读取禁止标志的文件夹
image = Image.open(filepath+'prohibitory/'+filename) #打开图片
image = self.transform(image) #将图片进行格式转化
self.images.append(image) #将图片加进图片列表中
self.labels.append(0) #增加标签为0,下同
for filename in tqdm(os.listdir(filepath+'danger')):
image = Image.open(filepath+'danger/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(1)
for filename in tqdm(os.listdir(filepath+'mandatory')):
image = Image.open(filepath+'mandatory/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(2)
for filename in tqdm(os.listdir(filepath+'other')):
image = Image.open(filepath+'other/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(3)
self.labels = torch.LongTensor(self.labels) #将标签转化为tensor格式
def __getitem__(self, index): #构造迭代器
return self.images[index], self.labels[index]
def __len__(self): #迭代器长度
images = np.array(self.images)
len = images.shape[0]
return len
train_data = traffic_datasets('traffic_dataset/traffic_dataset/train/')
train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True) #设置文件打乱
val_data = traffic_datasets('traffic_dataset/traffic_dataset/val/')
val_loader = DataLoader(val_data,batch_size = batch_size)
四:构造resnet34的网络结构
class Bottleneck(nn.Module): #构造resnet的块信息
extention=1
def __init__(self,inplanes,planes,stride,downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes,planes,kernel_size = 3,stride = stride,padding = 1,bias = False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,planes,kernel_size = 3,stride = 1,padding = 1,bias = False)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.relu = nn.ReLU()
def forward(self,x):
residual = x
if self.downsample is not None:
residual=self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = out + residual
out = self.relu(out)
return out
class ResNet(nn.Module): #resnet的整体网络结构
def __init__(self,block,layers,num_class):
self.inplane=64
super(ResNet, self).__init__()
self.block=block
self.layers=layers
self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
self.bn1=nn.BatchNorm2d(self.inplane)
self.relu=nn.ReLU()
self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
self.avgpool=nn.AvgPool2d(7)
self.fc=nn.Linear(512,num_class)
def forward(self,x):
out=self.conv1(x)
out=self.bn1(out)
out=self.relu(out)
out=self.maxpool(out)
out=self.stage1(out)
out=self.stage2(out)
out=self.stage3(out)
out=self.stage4(out)
out=self.avgpool(out)
out=torch.flatten(out,1)
out=self.fc(out)
return out
def make_layer(self,block,plane,block_num,stride=1):
block_list=[]
downsample=None
if(stride!=1 or self.inplane!=plane*block.extention):
downsample=nn.Sequential(
nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
nn.BatchNorm2d(plane*block.extention)
)
conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
block_list.append(conv_block)
self.inplane=plane*block.extention
for i in range(1,block_num):
block_list.append(block(self.inplane,plane,stride=1))
return nn.Sequential(*block_list)
model=ResNet(Bottleneck,[3,4,6,3],4) #输出的类型为4
device = torch.device('cuda'if torch.cuda.is_available else 'cpu') #设置为cuda格式
model.to(device)
五:构造损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 0.001)
六:构建训练模型:
def train(epoch):
model.train()
print("epoch:",epoch+1)
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs, targets = data #读取图片和标签
inputs, targets = inputs.to(device),targets.to(device) #转化为cuda格式
optimizer.zero_grad() #梯度清零
outputs = model(inputs) #正向传播得出模型得出的结果
loss = criterion(outputs,targets) #求损失
loss.backward() #反向传播
optimizer.step() #梯度递进
running_loss = running_loss + loss.item()
print('train loss: %.3f' % (running_loss/batch_idx))
torch.save(model.state_dict(), './model.pth') #保存模型
七:构造验证模型:
def val():
model.eval() #打开评估模式
correct = 0
total = 0
with torch.no_grad(): #不需要用到梯度,关闭梯度
for data in val_loader:
images, labels = data #读取图片和标签
images, labels = images.to(device), labels.to(device) #转化为cuda格式
outputs = model(images) #正向传播得出模型的预测结果
_, predicted = torch.max(outputs.data, dim=1) #得出预测的标签
total += labels.size(0)
correct += (predicted == labels).sum().item() #计算正确数
print('accuracy on test set: %d %% ' % (100*correct/total))
return correct/total
八:构造测试模型:
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
class Bottleneck(nn.Module):
extention=1
def __init__(self,inplanes,planes,stride,downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes,planes,kernel_size = 3,stride = stride,padding = 1,bias = False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,planes,kernel_size = 3,stride = 1,padding = 1,bias = False)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.relu = nn.ReLU()
def forward(self,x):
residual = x
if self.downsample is not None:
residual=self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = out + residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,block,layers,num_class):
self.inplane=64
super(ResNet, self).__init__()
self.block=block
self.layers=layers
self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
self.bn1=nn.BatchNorm2d(self.inplane)
self.relu=nn.ReLU()
self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
self.avgpool=nn.AvgPool2d(7)
self.fc=nn.Linear(512,num_class)
def forward(self,x):
out=self.conv1(x)
out=self.bn1(out)
out=self.relu(out)
out=self.maxpool(out)
out=self.stage1(out)
out=self.stage2(out)
out=self.stage3(out)
out=self.stage4(out)
out=self.avgpool(out)
out=torch.flatten(out,1)
out=self.fc(out)
return out
def make_layer(self,block,plane,block_num,stride=1):
block_list=[]
downsample=None
if(stride!=1 or self.inplane!=plane*block.extention):
downsample=nn.Sequential(
nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
nn.BatchNorm2d(plane*block.extention)
)
conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
block_list.append(conv_block)
self.inplane=plane*block.extention
for i in range(1,block_num):
block_list.append(block(self.inplane,plane,stride=1))
return nn.Sequential(*block_list)
model=ResNet(Bottleneck,[3,4,6,3],4)
model.load_state_dict(torch.load("model.pth"))
device = torch.device('cuda'if torch.cuda.is_available else 'cpu')
model.to(device)
def test(imgpath):
font={ 'color': 'red',
'size': 20,
'family': 'KaiTi',
'style':'italic'}
o_img = Image.open(imgpath)
o_img1 = o_img.resize((224,224))
img = transform(o_img1)
img = img.unsqueeze(0)
img = img.cuda()
print(img.shape)
model = ResNet(Bottleneck,[3,4,6,3],4)
model.load_state_dict(torch.load("model.pth"))
model = model.cuda()
model.eval()
output = model(img)
_, predict = torch.max(output,dim=1)
if predict == 0:
print("prohibitory")
plt.imshow(o_img)
plt.text(0, -6.0, "prohibitory", fontdict=font)
plt.show()
if predict == 1:
print("danger")
plt.imshow(o_img)
plt.text(0, -6.0, "danger", fontdict=font)
plt.show()
if predict == 2:
print("mandatory")
plt.imshow(o_img)
plt.text(0, -6.0, "mandatory", fontdict=font)
plt.show()
if predict == 3:
print("other")
plt.imshow(o_img)
plt.text(0, -6.0, "other", fontdict=font)
plt.show()
if __name__ == "__main__":
test('D:/Shetuan/traffic cal/traffic_dataset/traffic_dataset/test/mandatory/22.jpg')
源代码如下:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset,DataLoader
import torchvision
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
import os
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
batch_size = 8
transform = transforms.Compose([transforms.Resize((224,224)), #将图片尺寸进行转换,以便后面的网络使用
transforms.RandomHorizontalFlip(), #对图片进行随机翻转
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),#对图片亮度和对比度等进行随机设置
transforms.ToTensor(), #转化为tensor格式
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
class traffic_datasets(Dataset):
def __init__(self,filepath):
self.images = []
self.labels = []
self.transform = transform #设置transform为上文设置的格式
for filename in tqdm(os.listdir(filepath+'prohibitory')): #读取禁止标志的文件夹
image = Image.open(filepath+'prohibitory/'+filename) #打开图片
image = self.transform(image) #将图片进行格式转化
self.images.append(image) #将图片加进图片列表中
self.labels.append(0) #增加标签为0,下同
for filename in tqdm(os.listdir(filepath+'danger')):
image = Image.open(filepath+'danger/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(1)
for filename in tqdm(os.listdir(filepath+'mandatory')):
image = Image.open(filepath+'mandatory/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(2)
for filename in tqdm(os.listdir(filepath+'other')):
image = Image.open(filepath+'other/'+filename)
image = self.transform(image)
self.images.append(image)
self.labels.append(3)
self.labels = torch.LongTensor(self.labels) #将标签转化为tensor格式
def __getitem__(self, index): #构造迭代器
return self.images[index], self.labels[index]
def __len__(self): #迭代器长度
images = np.array(self.images)
len = images.shape[0]
return len
train_data = traffic_datasets('traffic_dataset/traffic_dataset/train/')
train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True) #设置文件打乱
val_data = traffic_datasets('traffic_dataset/traffic_dataset/val/')
val_loader = DataLoader(val_data,batch_size = batch_size)
class Bottleneck(nn.Module): #构造resnet的块信息
extention=1
def __init__(self,inplanes,planes,stride,downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes,planes,kernel_size = 3,stride = stride,padding = 1,bias = False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes,planes,kernel_size = 3,stride = 1,padding = 1,bias = False)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.relu = nn.ReLU()
def forward(self,x):
residual = x
if self.downsample is not None:
residual=self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = out + residual
out = self.relu(out)
return out
class ResNet(nn.Module): #resnet的整体网络结构
def __init__(self,block,layers,num_class):
self.inplane=64
super(ResNet, self).__init__()
self.block=block
self.layers=layers
self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
self.bn1=nn.BatchNorm2d(self.inplane)
self.relu=nn.ReLU()
self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
self.avgpool=nn.AvgPool2d(7)
self.fc=nn.Linear(512,num_class)
def forward(self,x):
out=self.conv1(x)
out=self.bn1(out)
out=self.relu(out)
out=self.maxpool(out)
out=self.stage1(out)
out=self.stage2(out)
out=self.stage3(out)
out=self.stage4(out)
out=self.avgpool(out)
out=torch.flatten(out,1)
out=self.fc(out)
return out
def make_layer(self,block,plane,block_num,stride=1):
block_list=[]
downsample=None
if(stride!=1 or self.inplane!=plane*block.extention):
downsample=nn.Sequential(
nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
nn.BatchNorm2d(plane*block.extention)
)
conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
block_list.append(conv_block)
self.inplane=plane*block.extention
for i in range(1,block_num):
block_list.append(block(self.inplane,plane,stride=1))
return nn.Sequential(*block_list)
model=ResNet(Bottleneck,[3,4,6,3],4) #输出的类型为4
device = torch.device('cuda'if torch.cuda.is_available else 'cpu') #设置为cuda格式
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 0.001)
def train(epoch):
model.train() #打开训练模式
print("epoch:",epoch+1)
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs, targets = data #读取图片和标签
inputs, targets = inputs.to(device),targets.to(device) #转化为cuda格式
optimizer.zero_grad() #梯度清零
outputs = model(inputs) #正向传播得出模型得出的结果
loss = criterion(outputs,targets) #求损失
loss.backward() #反向传播
optimizer.step() #梯度递进
running_loss = running_loss + loss.item()
print('train loss: %.3f' % (running_loss/batch_idx))
torch.save(model.state_dict(), './model.pth') #保存模型
def val():
model.eval() #打开评估模式
correct = 0
total = 0
with torch.no_grad(): #不需要用到梯度,关闭梯度
for data in val_loader:
images, labels = data #读取图片和标签
images, labels = images.to(device), labels.to(device) #转化为cuda格式
outputs = model(images) #正向传播得出模型的预测结果
_, predicted = torch.max(outputs.data, dim=1) #得出预测的标签
total += labels.size(0)
correct += (predicted == labels).sum().item() #计算正确数
print('accuracy on test set: %d %% ' % (100*correct/total))
return correct/total
if __name__ == '__main__':
acc_list = []
epoch_list = []
for epoch in range(20):
train(epoch)
acc = val()
acc_list.append(acc)
epoch_list.append(epoch + 1)
plt.plot(epoch_list,acc_list)
plt.ylabel("ACC")
plt.xlabel("Epoch")
plt.show()
我的训练次数较少,但是准确率已经可以达到95%
附几张测试结果图和准确率曲线图