孪生网络介绍
个人理解:孪生网络就是一次学习两张图片(两张图片是一类或者不是一类),从中发现它们的相似或者不同,等网络学习完成之后,再给网络输入两张图片即可知道它们是否是一类。
实验结果预览
人脸图片:
随机选的人脸图片:
手写签名图片:
随机选的手写签名图片:
达到的效果:
给出两张图片的相似度(一个数值,越大越可能相似),并且可以根据该数值设置一个阈值来进行预测,人脸数据集的预测正确率为90%,手写签名数据集的预测正确率为84.9%。
实现步骤
1. 加载数据
1.1 定义自己的数据集
# 自己的数据集
class SiameseDataset(Dataset): #用上了DataSet from torch.utils.data import DataSet
# imageFolderDataset是torchvision.datasets.ImageFolder返回的datasets
# transform是torchvision.transforms.Compose返回的transform
def __init__(self, imageFolderDataset, transform=None, should_invert=True):
self.imageFolderDataset = imageFolderDataset
self.transform = transform
self.should_invert = should_invert
# 这个函数很重要,要想办法返回自己需要的图片和标签(不同的方法这个不一样)
# 孪生网络要返回两张图片,而且尽可能好好和好坏比例是1:1
def __getitem__(self, index):
#imageFolderDataset.imgs是所有文件夹下的图片的路径+标签
#例如 [('./data/dogcat_2/cat/cat.12484.jpg', 0), ('./data/dogcat_2/cat/cat.12485.jpg', 0),
# ('./data/dogcat_2/dog/dog.12498.jpg', 1), ('./data/dogcat_2/dog/dog.12499.jpg', 1)]
# random.choice是随机选出一张
img0_tuple = random.choice(self.imageFolderDataset.imgs) #import random
# 用随机数来使好好和好坏的比例大致是1:1,从数字0和1之间随机返回一个
should_get_same_class = random.randint(0, 1)
if should_get_same_class:
while True:
img1_tuple = random.choice(self.imageFolderDataset.imgs)
if img0_tuple[1] == img1_tuple[1]:
break
else:
while True:
img1_tuple = random.choice(self.imageFolderDataset.imgs)
if img0_tuple[1] != img1_tuple[1]:
break
# 打开图片并变成灰度图:
img0 = Image.open(img0_tuple[0]) # 用到了Image
img1 = Image.open(img1_tuple[0])
img0 = img0.convert('L')
img1 = img1.convert('L')
# 是否黑白翻转
if self.should_invert:
img0 = PIL.ImageOps.invert(img0) #PIL.ImageOps
img1 = PIL.ImageOps.invert(img1)
# 数据增强(img转换为tensor)
if self.transform is not None:
img0 = self.transform(img0)
img1 = self.transform(img1)
# 把label也转换为tensor,label是1:两张图片不是一类
return img0, img1, torch.from_numpy(np.array([int(img0_tuple[1] != img1_tuple[1])], dtype=np.float32))
def __len__(self):
return len(self.imageFolderDataset.imgs)
1.2 用ImageFolder返回一个数据集类别
# 先用ImageFolder返回一个dataset,再把这个数据集送入到自己造的SiameseDataset中去,得到siamese_dataset
folder_dataset = datasets.ImageFolder(root=Config.train_dir)
transform = transforms.Compose([transforms.Resize((100, 100)),
transforms.ToTensor()])
siamese_dataset = SiameseDataset(imageFolderDataset=folder_dataset, transform=transform, should_invert=False)
1.3 数据可视化
# Dataset类只负责数据的抽象,一次调用__getitem__一次只返回一个样本,使用DataLoader一次可以返回一个batch
vis_dataset = DataLoader(siamese_dataset, shuffle=True, batch_size=8, num_workers=8)
# vis_dataset是可迭代的
dataiter = iter(vis_dataset)
# 等价于img0, img1, label = next(dataiter)
example_batch = next(dataiter)
# 0代表竖着拼接,1代表横着拼接
concatenate = torch.cat((example_batch[0], example_batch[1]), 0)
imshow(torchvision.utils.make_grid(concatenate))
# (0代表是一类,1代表不是一类)
print(example_batch[2].numpy())
输出:
[[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[0.]
2. 定义网络结构
# 定义网络
# 这里有一个计算每次卷积输出大小的问题
# 网络只要管forward的参数即可
# 网络最终输出两个img经过网络计算后的5个维度的表示
class SiameseNet(nn.Module):
def __init__(self):
super(SiameseNet, self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(1, 4, kernel_size=3, padding=1),
nn.BatchNorm2d(4),
nn.ReLU(inplace=True),
nn.Conv2d(4, 8, kernel_size=3, padding=1),
nn.BatchNorm2d(8),
nn.ReLU(inplace=True),
nn.Conv2d(8, 8, kernel_size=3, padding=1),
nn.BatchNorm2d(8),
nn.ReLU(inplace=True),
)
self.fc = nn.Sequential(
nn.Linear(8 * 100 * 100, 500),
# nn.BatchNorm2d(500),
nn.ReLU(inplace=True),
nn.Dropout(0.2),
nn.Linear(500, 500),
# nn.BatchNorm2d(500),
nn.ReLU(inplace=True),
nn.Dropout(0.2),
nn.Linear(500, 5)
)
def forward_once(self, x):
output = self.cnn(x)
output = output.view(output.size()[0], -1)
output = self.fc(output)
return output
def forward(self, img0, img1):
output1 = self.forward_once(img0)
output2 = self.forward_once(img1)
return output1, output2
3. 损失函数
# 损失函数,也是只用管forward里面的参数
class ContrastiveLoss(nn.Module):
def __init__(self, margin=2.0):
super(ContrastiveLoss, self).__init__()
self.margin = margin
def forward(self, output1, output2, label):
distance = F.pairwise_distance(output1, output2, keepdim=True)
loss = torch.mean((1-label) * torch.pow(distance, 2) +
(label) * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
return loss
4. 训练,保存和加载模型
4.1 定义训练函数
# 数据集,网络,损失函数,优化器
# 数据集就是先通过ImageFolder得到(按文件夹分类的)dataset,再传进SiameseDataset类得到自己要的(每次返回img0,img1,label)数据类型, 最后传入DataLoader得到batch
# 训练
train_dataloader = DataLoader(siamese_dataset,
shuffle=True,
num_workers=8,
batch_size=Config.train_batch_size)
net = SiameseNet().cuda()
criterion = ContrastiveLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0005)#from torch import optim
# 定义两个列表画loss曲线
# 把loss添加到loss_history中,把iteration添加到counter中
def train():
counter = []
loss_history = []
iteration_number = 0
for epoch in range(0, Config.train_epoch):
for i, data in enumerate(train_dataloader, 0):
img0, img1, label = data
img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()
# 每次梯度都先要清零
optimizer.zero_grad()
output1, output2 = net(img0, img1)
loss = criterion(output1, output2, label)
# 计算梯度
loss.backward()
# 优化
optimizer.step()
if i % 10 == 0:
print('Epoch number {}\n Current loss {}\n'.format(epoch, loss.item()))
iteration_number += 10
counter.append(iteration_number)
loss_history.append(loss.item())
show_loss(counter, loss_history)
return net
4.2 训练和保存
model = train()
torch.save(model.state_dict(), './model/model.pt')
print('Model saved successfully')
训练误差曲线图:
200个epoch后的loss为:
4.3 加载保存的模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SiameseNet().to(device)
model.load_state_dict(torch.load('./model/model.pt'))
5. 测试
5.1 载入数据
folder_dataset_test = datasets.ImageFolder(root=Config.test_dir)
siamese_dataset_test = SiameseDataset(imageFolderDataset=folder_dataset_test,
transform=transform,
should_invert=False)
test_dataloader = DataLoader(siamese_dataset_test, num_workers=6, batch_size=1, shuffle=True)
dataiter = iter(test_dataloader)
5.2 抽样测试
# Print the sample outputs to view its dissimilarity
counter=0
list_0 = torch.FloatTensor([[0]])
list_1 = torch.FloatTensor([[1]])
for i, data in enumerate(test_dataloader,0):
x0, x1 , label = data
concatenated = torch.cat((x0,x1),0)
output1,output2 = model(x0.to(device),x1.to(device))
distance = F.pairwise_distance(output1, output2)
if label==list_0:
label="Same"
else:
label="Not same"
imshow(torchvision.utils.make_grid(concatenated),'Similarity: {:.2f} Label: {}'.format(1/distance.item(),label))
counter=counter+1
if counter ==20:
break
5.3 整体准确率测试
# 1.item() 返回此张量的值作为标准Python数。这仅适用于具有一个元素的张量。
# 2.tolist() 将张量作为(嵌套)列表返回。可定位到矩阵的具体位置。
# test_dataloader = DataLoader(test_dataloader,num_workers=6,batch_size=1,shuffle=True)
accuracy=0
counter=0
correct=0
for i, data in enumerate(test_dataloader,0):
x0, x1 , label = data
output1,output2 = model(x0.to(device),x1.to(device))
res=F.pairwise_distance(output1.cuda(), output2.cuda())
res = 1/res
label=label[0].tolist()
label=int(label[0])
# result=torch.max(res,1)[1][0]
if res > 1:
result = 0
else:
result = 1
if label == result:
correct=correct+1
counter=counter+1
accuracy=(correct/len(test_dataloader))*100
print("Accuracy:{}%".format(accuracy))
参考链接:
[1]: https://innovationincubator.com/siamese-neural-network-with-pytorch-code-example/
[2]: https://github.com/harveyslash/Facial-Similarity-with-Siamese-Networks-in-Pytorch