实验中采用的卷及神经网络为AlexNet,可视化工具为visdom,数据集由1547张正样本和1547张负样本组成。
1.自定义数据集处理(dataload.py):
import torch
import os, glob
import random, csv
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
class Dataload(Dataset):
def __init__(self, root, resize, mode): # root为数据集根目录
super(Dataload, self).__init__()
self.root = root
self.resize = resize
self.name2label = {}
self.save_label_image_relation('label-image_relation.txt')
# images_path, labels
self.images_path, self.labels = self.load_csv('images.csv')
# 一次读入所有数据,根据指令分为训练、验证、测试集,比例设为8:1:1
if mode == "train": # 0 -> 80%
self.images = self.images_path[:int(0.8 * len(self.images_path))]
self.labels = self.labels[:int(0.8 * len(self.labels))]
elif mode == 'val': # 80% -> 90%
self.images = self.images_path[int(0.8 * len(self.images_path)):int(0.9 * len(self.images_path))]
self.labels = self.labels[int(0.8 * len(self.labels)):int(0.9 * len(self.images_path))]
else: # 90% -> 100%
self.images = self.images_path[int(0.9 * len(self.images_path)):]
self.labels = self.labels[int(0.9 * len(self.labels)):]
def save_label_image_relation(self, filename):
"""
write label image set into txt file,not necessary
"""
for name in sorted(os.listdir(os.path.join(self.root))):
if not os.path.isdir(os.path.join(self.root, name)):
continue
self.name2label[name] = len(self.name2label.keys())
# print("label-image_relation:", self.name2label)
if not os.path.exists(os.path.join(self.root, filename)):
with open(os.path.join(self.root, filename), mode='w', newline='') as f:
for name in self.name2label.keys():
f.writelines("\nclass:" + name + ' label:' + str(self.name2label[name]))
print("write label image set into txt file:", filename)
def load_csv(self, filename):
"""
save image path and label with csv
"pokemon\mewtwo\00000005.png,2 "
"""
if not os.path.exists(os.path.join(self.root, filename)):
images_path = []
for name in self.name2label.keys():
images_path += glob.glob(os.path.join(self.root, name, '*'))# *为通配符
print("image numbers:", len(images_path))
random.shuffle(images_path)
# write into csv file
with open(os.path.join(self.root, filename), mode='w', newline='') as f:
csv_writer = csv.writer(f)
for img_path in images_path:
name = img_path.split(os.sep)[-2]
label = self.name2label[name]
csv_writer.writerow([img_path, label])
print("write into csv file:", filename)
# read from csv file
images_path, labels = [], []
with open(os.path.join(self.root, filename), mode='r', newline='') as f:
csv_reader = csv.reader(f)
for row in csv_reader:
img_path, label = row
label = int(label)
images_path.append(img_path)
labels.append(label)
print("read from csv file:", filename)
assert len(images_path) == len(labels)
return images_path, labels
def __len__(self): # 返回图片数量
return len(self.images)
def denormalize(self, x_hat): # 解归一化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std + mean
return x
def __getitem__(self, idx):
img_path, label = self.images[idx], self.labels[idx]
transform = transforms.Compose([
lambda x: Image.open(x).convert('RGB'), # string path -> image data
transforms.Resize((int(self.resize * 1.25), int(self.resize * 1.25))),
transforms.RandomRotation(15),
transforms.CenterCrop(self.resize),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = transform(img_path)
label = torch.tensor(label)
return img, label # 返回图片和标签
# 以下为测试部分
def main():
import visdom
import time
viz = visdom.Visdom() # 创建visdom,并启动
db = Dataload(root='Concrete_Crack', resize=227, mode='train')
# x, y, cls = next(iter(db))
# print('sample:', x.shape, y.shape, y)
# viz.images(db.denormalize(x), win='sample_x', opts=dict(title=cls))
loader = DataLoader(db, batch_size=32, shuffle=True)
for x, y in loader:
viz.images(db.denormalize(x), nrow=8, win='batch', opts=dict(title='batch'))
viz.text(str(y.numpy()), win='label', opts=dict(title='batch-y'))
time.sleep(10)
if __name__ == '__main__':
main()
-
程序运行前应先在终端中执行
python -m visdom.server
,启动visdom服务 -
其中,
self.save_label_image_relation('label-image_relation.txt')
为保存标签与图像之间的对应关系:
-
__getitem__(self, idx)
方法:当实例对象做P[key]运算时,就会调用类中的__getitem__()
方法 -
适用于二级子文件夹的形式,如下图:
2.数据加载
from dataload import Dataload
BatchSize = 32
train_set = Dataload(root='Concrete_Crack', resize=256, mode='train')
val_set = Dataload(root='Concrete_Crack', resize=256, mode='val')
test_set = Dataload(root='Concrete_Crack', resize=256, mode='test')
train_set = DataLoader(train_set, batch_size=BatchSize, shuffle=True, num_workers=4)
val_set = DataLoader(val_set, batch_size=BatchSize, num_workers=2)
test_set = DataLoader(test_set, batch_size=BatchSize, num_workers=2)
3.构建模型
模型构建采用pytorch提供的AlexNet模型,并修改分类为二分类
alexnet = torchvision.models.alexnet(pretrained=True) # 采用AlexNet并加载预训练模型
model = torch.nn.Sequential(
*list(alexnet.children())[:-1],
Flatten(),
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 2),
).to(device=device)
print(model)
print(model)
模型结构如下:
Sequential(
(0): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(1): AdaptiveAvgPool2d(output_size=(6, 6))
(2): Flatten()
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=9216, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Dropout(p=0.5, inplace=False)
(7): Linear(in_features=4096, out_features=4096, bias=True)
(8): ReLU(inplace=True)
(9): Linear(in_features=4096, out_features=2, bias=True)
)
4.训练
# 优化器,损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=LearningRate)
criteon = torch.nn.CrossEntropyLoss()
# 训练
best_acc, best_epoch, best_step = 0, 0, 0
global_step = 0
viz.line([0], [-1], win='loss', opts=dict(title='loss'))
viz.line([0], [-1], win='val_acc', opts=dict(title='val_acc'))
for epoch in range(Epochs):
for step, (x, y) in enumerate(train_set):
x, y = x.to(device), y.to(device)
model.train()
logits = model(x)
loss = criteon(logits, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
viz.line([loss.item()], [global_step], win='loss', update='append')
global_step += 1
print(
'Epoch:{}/{} step:{} training:{}%'.format(epoch, Epochs, step, step * BatchSize * 100 / train_length))
if step % 100 == 0:
val_acc = evalute(model, val_set)
if val_acc > best_acc:
best_epoch = epoch
best_step = step
best_acc = val_acc
torch.save(model.state_dict(), 'best.mdl')
viz.line([val_acc], [global_step], win='val_acc', update='append')
print('best acc:', best_acc, 'best epoch:', best_epoch, 'best step:', best_step, )
5.测试
import torch
from torchvision import transforms
from PIL import Image
import torchvision
from utils import Flatten, plot_image
from torch import nn
import os
device = torch.device('cuda')
def denormalize(x_hat):
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std + mean
return x
def main():
alexnet = torchvision.models.alexnet(pretrained=True)
model = torch.nn.Sequential(
*list(alexnet.children())[:-1],
Flatten(),
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 2),
).to(device=device)
model.load_state_dict(torch.load('best.mdl'))
model.eval()
print('loaded from ckpt!')
transform = transforms.Compose([
lambda x: Image.open(x).convert('RGB'), # string path -> image data
transforms.Resize((227, 227)),
# transforms.RandomRotation(15),
# transforms.CenterCrop(227),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
model.eval()
for img_name in sorted(os.listdir(os.path.join('test/'))):
img_name = 'test/' + img_name
image_tensor = transform(img_name)
image = denormalize(image_tensor).cpu().permute(1, 2, 0).numpy()
image_tensor = torch.unsqueeze(image_tensor, 0)
image_tensor = image_tensor.to(device)
print(image.shape)
with torch.no_grad():
out = model(image_tensor)
pred = out.argmax(dim=1)
# print(out)
plot_image(image, pred.item(), img_name)
print("img_name:{}:".format(img_name), pred.item())
if __name__ == '__main__':
main()