模型结构采用CNN+RNN(CRNN)+CTC
!pip install matplotlib
import torch.utils.data.dataloader as dataloader
import torch.utils.data.dataset as dataset
from torchvision import datasets, transforms
import torch.nn.functional as F
import torch
import glob
import PIL.Image as Image
import os
import numpy as np
import math
import torch.nn as nn
import time
import matplotlib.pyplot as plt
class CodeDataset(dataset.Dataset):
def __init__(self, basedir):
self.files = glob.glob(basedir + "*.png")
self.pre_process =transforms.Compose(
[transforms.ToTensor()
,transforms.Normalize(mean=[0.5], std=[0.5])
])
def __getitem__(self, index):
file = self.files[index]
name = os.path.basename(file).rstrip(".png")
label = name[(name.index('_') + 1):]
image = Image.open(file)
numbs = [ord(x) - ord('0') for x in label]
assert len(numbs) >= 2
# one_hots = np.zeros((len(numbs), 10))
# one_hots[np.arange(0, len(label)), numbs] = 1
# return self.pre_process(image), one_hots.reshape(40)
return self.pre_process(image), torch.LongTensor(numbs)
def __len__(self):
return len(self.files)
def collate_fn(batch):
sequence_lengths = []
max_width, max_height = 0, 0
for image, label in batch:
if image.size(1) > max_height:
max_height = image.size(1)
if image.size(2) > max_width:
max_width = image.size(2)
sequence_lengths.append(label.size(0))
seq_lengths = torch.LongTensor(sequence_lengths)
seq_tensor = torch.zeros(seq_lengths.size(0), seq_lengths.max()).long()
img_tensor = torch.zeros(seq_lengths.size(0), 3, max_height, max_width)
for idx, (image, label) in enumerate(batch):
seq_tensor[idx, :label.size(0)] = label
img_tensor[idx, :, :image.size(1), :image.size(2)] = image
return img_tensor, seq_tensor, seq_lengths
USE_GPU =torch.cuda.is_available()
print("USE GPU:",USE_GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 100
trainset = CodeDataset('captcha_datasets_var/train-data/')
# 防止最后一个batch_size=1,如果最后一个batch_size=1就舍去
drop_last = True if len(trainset) % BATCH_SIZE == 1 else False
nw = min([os.cpu_count(), BATCH_SIZE if BATCH_SIZE > 1 else 0, 8]) # number of workers
print('Using %g dataloader workers' % nw)
trainloader = dataloader.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True,num_workers=nw,
drop_last=drop_last, collate_fn=collate_fn)
validset = CodeDataset('captcha_datasets_var/valid-data/')
validloader = dataloader.DataLoader(validset, batch_size=BATCH_SIZE, shuffle=False,num_workers=nw, collate_fn=collate_fn)
testset = CodeDataset('captcha_datasets_var/test-data/')
testloader = dataloader.DataLoader(testset, batch_size=81,shuffle=True, collate_fn=collate_fn)
USE GPU: True
Using 8 dataloader workers
def _make_convolutional(in_channels,
out_channels,
kernel_size,
stride=1,
padding=0):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size,
stride, padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(inplace=True)
)
class ResidualBlock(nn.Module):
def __init__(self, in_channels):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, 1,1,0,bias=False)
self.bn1=nn.BatchNorm2d(in_channels//2)
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, 3, 1, 1,bias=False)
self.bn2=nn.BatchNorm2d(in_channels)
self.relu = nn.LeakyReLU(inplace=True)
def forward(self, input):
x = self.conv1(input)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x += input
x = self.relu(x)
return x
class QLNet(nn.Module):
def __init__(self):
super(QLNet, self).__init__()
self.conv1 = _make_convolutional(3, 32, 3, 1, 1)
self.conv2 = _make_convolutional(32, 64, 3, 2, 1)
self.layer1 = self._make_layer(64, 1)
self.conv3 = _make_convolutional(64, 128, 3, 2, 1)
self.layer2 = self._make_layer(128, 2)
# self.conv4 = _make_convolutional(128, 256, 3,2, 1)
# self.layer3 = self._make_layer(256, 1)
# self.conv5 = _make_convolutional(256, 512, 3, 2, 1)
# self.layer4 = self._make_layer(512, 1)
# self.conv6 = _make_convolutional(512, 1024, 3, (2,1), 1)
# self.layer5 = self._make_layer(1024, 1)
self.adapt_max_pool2d = nn.AdaptiveMaxPool2d((1, 40))
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
def _make_layer(self, in_channels, repeat_count):
layers = []
for _ in range(repeat_count):
layers.append(ResidualBlock(in_channels))
return nn.Sequential(*layers)
def forward(self, input):
x = self.conv1(input)
x = self.conv2(x)
x = self.layer1(x)
x = self.conv3(x)
x = self.layer2(x)
# x = self.conv4(x)
# x = self.layer3(x)
# x = self.conv5(x)
# x = self.layer4(x)
# x = self.conv6(x)
# x = self.layer5(x)
x=self.adapt_max_pool2d(x)
return x
qlnet=QLNet()
for images,labels,_ in trainloader:
print('labels.size:',labels.size())
print(images.size())
output=qlnet(images)
print(output.shape)
break
labels.size: torch.Size([100, 10])
torch.Size([100, 3, 60, 400])
torch.Size([100, 128, 1, 40])
class Model(nn.Module):
def __init__(self, output_size):
super(Model,self).__init__()
self.feature_extractor = QLNet()
self.num_layers = 2
self.n_directions = 2
self.hidden_size = 80
self.gru = nn.GRU(input_size=128, hidden_size=self.hidden_size,
num_layers=self.num_layers,
bidirectional=True, batch_first=True)
self.fc = nn.Linear(self.hidden_size * self.n_directions, output_size)
self.log_softmax = nn.LogSoftmax(2)
def forward(self,input):
x = self.feature_extractor(input)
x = x.squeeze(2)
x = x.permute(0, 2, 1)
hidden = torch.zeros((self.num_layers * self.n_directions,
x.size(0), self.hidden_size),device=device)
output, hidden = self.gru(x, hidden)
x = self.fc(output)
x = x.permute(1, 0, 2)
output = self.log_softmax(x)
output_lengths = torch.full(size=(x.size(1),), fill_value=x.size(0), dtype=torch.long,device=device)
return output, output_lengths
!pip install torchsummary
from torchsummary import summary
class_num = 11
net = Model(class_num)
net=net.to(device)
summary(net, (3, 60, 160))
def create_tensor(tensor):
tensor = tensor.to(device)
return tensor
criterion = torch.nn.CTCLoss(blank=class_num - 1)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
def calculat_acc(output, target, target_lengths):
output = torch.argmax(output, dim=-1)
output = output.permute(1, 0)
correct_num = 0
for predict, label, label_length in zip(output, target, target_lengths):
predict = torch.unique_consecutive(predict)
predict = predict[predict != (class_num - 1)]
if (predict.size()[0] == label_length.item()
and (predict == label[:label_length.item()]).all()):
correct_num += 1
return correct_num, target.size(0)
def train():
total = 0
correct = 0
run_loss = []
net.train()
for i, (images, labels, label_lengths) in enumerate(trainloader, 0):
images, labels, label_lengths = images.to(device), \
labels.to(device), \
label_lengths.to(device)
optimizer.zero_grad()
outputs, output_lengths = net(images)
# label_lengths = torch.full(size=(labels.size(0),),
# fill_value=labels.size(1), dtype=torch.long,device=device)
loss = criterion(outputs, labels, output_lengths, label_lengths)
loss.backward()
optimizer.step()
c, t = calculat_acc(outputs.data, labels.data, label_lengths.data)
correct += c
total += t
run_loss.append(loss.item())
return 100 * correct / total,np.array(run_loss).mean()
def evaluate():
total = 0
correct = 0
run_loss = []
net.eval()
with torch.no_grad():
for data in validloader:
images, labels, label_lengths = data
images, labels, label_lengths = images.to(device), labels.to(device), \
label_lengths.to(device)
outputs, output_lengths = net(images)
# label_lengths = torch.full(size=(labels.size(0),),
# fill_value=labels.size(1), dtype=torch.long,device=device)
loss = criterion(outputs, labels,output_lengths,label_lengths)
run_loss.append(loss.item())
c, t = calculat_acc(outputs, labels,label_lengths)
correct += c
total += t
return 100 * correct / total,np.array(run_loss).mean()
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 60, 160] 864
BatchNorm2d-2 [-1, 32, 60, 160] 64
LeakyReLU-3 [-1, 32, 60, 160] 0
Conv2d-4 [-1, 64, 30, 80] 18,432
BatchNorm2d-5 [-1, 64, 30, 80] 128
LeakyReLU-6 [-1, 64, 30, 80] 0
Conv2d-7 [-1, 32, 30, 80] 2,048
BatchNorm2d-8 [-1, 32, 30, 80] 64
LeakyReLU-9 [-1, 32, 30, 80] 0
Conv2d-10 [-1, 64, 30, 80] 18,432
BatchNorm2d-11 [-1, 64, 30, 80] 128
LeakyReLU-12 [-1, 64, 30, 80] 0
ResidualBlock-13 [-1, 64, 30, 80] 0
Conv2d-14 [-1, 128, 15, 40] 73,728
BatchNorm2d-15 [-1, 128, 15, 40] 256
LeakyReLU-16 [-1, 128, 15, 40] 0
Conv2d-17 [-1, 64, 15, 40] 8,192
BatchNorm2d-18 [-1, 64, 15, 40] 128
LeakyReLU-19 [-1, 64, 15, 40] 0
Conv2d-20 [-1, 128, 15, 40] 73,728
BatchNorm2d-21 [-1, 128, 15, 40] 256
LeakyReLU-22 [-1, 128, 15, 40] 0
ResidualBlock-23 [-1, 128, 15, 40] 0
Conv2d-24 [-1, 64, 15, 40] 8,192
BatchNorm2d-25 [-1, 64, 15, 40] 128
LeakyReLU-26 [-1, 64, 15, 40] 0
Conv2d-27 [-1, 128, 15, 40] 73,728
BatchNorm2d-28 [-1, 128, 15, 40] 256
LeakyReLU-29 [-1, 128, 15, 40] 0
ResidualBlock-30 [-1, 128, 15, 40] 0
AdaptiveMaxPool2d-31 [-1, 128, 1, 40] 0
QLNet-32 [-1, 128, 1, 40] 0
GRU-33 [[-1, 40, 160], [-1, 2, 80]] 0
Linear-34 [-1, 40, 11] 1,771
LogSoftmax-35 [-1, 2, 11] 0
================================================================
Total params: 280,523
Trainable params: 280,523
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.11
Forward/backward pass size (MB): 17.46
Params size (MB): 1.07
Estimated Total Size (MB): 18.64
----------------------------------------------------------------
accuracies = []
losses = []
TOTAL_EPOCH=30
for epoch in range(TOTAL_EPOCH):
print("Epoch %d/%d"%(epoch,TOTAL_EPOCH))
start=time.time()
train_acc,train_loss=train()
val_acc,val_loss = evaluate()
if len(accuracies) == 0 or val_acc > max(accuracies):
torch.save(net.state_dict(), 'vary_base_cov_mnist.pt')
accuracies.append(val_acc)
end=time.time()
print("%ds loss: %.5f - accuracy: %.5f - val_loss: %.5f - val_accuracy: %.5f"%((end-start),train_loss,train_acc,val_loss,val_acc))
Epoch 0/30
41s loss: 2.74111 - accuracy: 0.00000 - val_loss: 2.56225 - val_accuracy: 0.00000
Epoch 1/30
42s loss: 1.45840 - accuracy: 26.52800 - val_loss: 0.12558 - val_accuracy: 88.26000
Epoch 2/30
42s loss: 0.05355 - accuracy: 94.46800 - val_loss: 0.03135 - val_accuracy: 96.22000
Epoch 3/30
42s loss: 0.02199 - accuracy: 97.18800 - val_loss: 0.03834 - val_accuracy: 94.82000
Epoch 4/30
42s loss: 0.01648 - accuracy: 97.75600 - val_loss: 0.01476 - val_accuracy: 97.98000
Epoch 5/30
42s loss: 0.01180 - accuracy: 98.32000 - val_loss: 0.01326 - val_accuracy: 98.06000
Epoch 6/30
42s loss: 0.00976 - accuracy: 98.45600 - val_loss: 0.01387 - val_accuracy: 97.70000
Epoch 7/30
43s loss: 0.00595 - accuracy: 99.04800 - val_loss: 0.00939 - val_accuracy: 98.45000
Epoch 8/30
43s loss: 0.00638 - accuracy: 99.02400 - val_loss: 0.00661 - val_accuracy: 98.97000
Epoch 9/30
42s loss: 0.00706 - accuracy: 98.91200 - val_loss: 0.00998 - val_accuracy: 98.19000
Epoch 10/30
42s loss: 0.00462 - accuracy: 99.14400 - val_loss: 0.00469 - val_accuracy: 99.06000
Epoch 11/30
42s loss: 0.00368 - accuracy: 99.34800 - val_loss: 0.02754 - val_accuracy: 95.45000
Epoch 12/30
42s loss: 0.00945 - accuracy: 98.41600 - val_loss: 0.04272 - val_accuracy: 92.97000
Epoch 13/30
42s loss: 0.00397 - accuracy: 99.23200 - val_loss: 0.00699 - val_accuracy: 98.77000
Epoch 14/30
42s loss: 0.00280 - accuracy: 99.50400 - val_loss: 0.00525 - val_accuracy: 99.13000
Epoch 15/30
42s loss: 0.00134 - accuracy: 99.78800 - val_loss: 0.00377 - val_accuracy: 99.36000
Epoch 16/30
42s loss: 0.00349 - accuracy: 99.31200 - val_loss: 0.02686 - val_accuracy: 96.14000
Epoch 17/30
42s loss: 0.00707 - accuracy: 98.65200 - val_loss: 0.00987 - val_accuracy: 98.29000
Epoch 18/30
42s loss: 0.00569 - accuracy: 98.98800 - val_loss: 0.01325 - val_accuracy: 97.69000
Epoch 19/30
42s loss: 0.00364 - accuracy: 99.27600 - val_loss: 0.00795 - val_accuracy: 98.68000
Epoch 20/30
42s loss: 0.00472 - accuracy: 99.16400 - val_loss: 0.02567 - val_accuracy: 96.12000
Epoch 21/30
42s loss: 0.00360 - accuracy: 99.29200 - val_loss: 0.67278 - val_accuracy: 63.42000
Epoch 22/30
43s loss: 0.00235 - accuracy: 99.48000 - val_loss: 0.01010 - val_accuracy: 98.39000
Epoch 23/30
42s loss: 0.00426 - accuracy: 99.22800 - val_loss: 0.01605 - val_accuracy: 97.35000
Epoch 24/30
42s loss: 0.00191 - accuracy: 99.61600 - val_loss: 0.00360 - val_accuracy: 99.46000
Epoch 25/30
42s loss: 0.00235 - accuracy: 99.58400 - val_loss: 0.01313 - val_accuracy: 97.75000
Epoch 26/30
42s loss: 0.00737 - accuracy: 98.70400 - val_loss: 0.01177 - val_accuracy: 97.39000
Epoch 27/30
42s loss: 0.00328 - accuracy: 99.30400 - val_loss: 0.08095 - val_accuracy: 79.97000
Epoch 28/30
42s loss: 0.00321 - accuracy: 99.38800 - val_loss: 0.01018 - val_accuracy: 98.28000
Epoch 29/30
42s loss: 0.00349 - accuracy: 99.30800 - val_loss: 0.00663 - val_accuracy: 98.92000
train_weights = "vary_base_cov_mnist.pt"
train_weights_dict = torch.load(train_weights)
model = Model(class_num)
model.load_state_dict(train_weights_dict, strict=True)
model.to(device)
Model(
(feature_extractor): QLNet(
(conv1): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): LeakyReLU(negative_slope=0.01, inplace=True)
)
(conv2): Sequential(
(0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): LeakyReLU(negative_slope=0.01, inplace=True)
)
(layer1): Sequential(
(0): ResidualBlock(
(conv1): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.01, inplace=True)
)
)
(conv3): Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): LeakyReLU(negative_slope=0.01, inplace=True)
)
(layer2): Sequential(
(0): ResidualBlock(
(conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.01, inplace=True)
)
(1): ResidualBlock(
(conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): LeakyReLU(negative_slope=0.01, inplace=True)
)
)
(adapt_max_pool2d): AdaptiveMaxPool2d(output_size=(1, 40))
)
(gru): GRU(128, 80, num_layers=2, batch_first=True, bidirectional=True)
(fc): Linear(in_features=160, out_features=11, bias=True)
(log_softmax): LogSoftmax(dim=2)
)
%matplotlib inline
dataiter=iter(testloader)
images,labels, label_lengths=dataiter.next()
model.eval()
results=[]
start=time.time()
with torch.no_grad():
predicts = model(images.to(device))[0]
predicts = predicts.cpu()
output = torch.argmax(predicts, dim=-1)
output = output.permute(1, 0)
correct_num = 0
for predict, label, label_length in zip(output, labels, label_lengths):
predict = torch.unique_consecutive(predict)
predict = predict[predict != (class_num - 1)]
if (predict.size()[0] == label_length.item()
and (predict == label[:label_length.item()]).all()):
correct_num += 1
results.append((predict.numpy(),label[:label_length.item()].numpy()))
test_dataX=images.to('cpu').numpy()
test_dataX=np.transpose(test_dataX,(0,2,3,1))
test_dataX=(test_dataX*0.5)+0.5
end=time.time()
print(f"Test Acc : {correct_num}/{len(results)}")
plt.figure(figsize=(18,16))
for i in range(len(test_dataX)):
img=test_dataX[i]
plt.subplot(9,9,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
label=''.join(map(str,results[i][0].tolist()))
real_label=''.join(map(str,results[i][1].tolist()))
if (results[i][0].shape==results[i][1].shape) and (results[i][0]==results[i][1]).all():
color = 'blue'
plt.xlabel(label)
else:
color = 'red'
plt.xlabel('{}({})'.format(label,real_label), color=color)
plt.imshow(img)
plt.tight_layout()
plt.show()
Test Acc : 81/81
plt.plot(accuracies,label='acc')
plt.legend(loc='lower right')
plt.grid()
plt.show()