论文源代码参考地址:
1.前处理程序
主要是将图片随机混合,设置训练数据集,然后训练数据,保存离线模型
注意:需要使用英伟达显卡的CUDA功能
"""
模型训练预处理
训练模型
保存离线模型
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import torchvision
import numpy as np
import os
cwd = os.getcwd()
from PIL import Image
import time
import copy
import random
import cv2
import re
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# %matplotlib inline
from torchsummary import summary
'''
Step1:Load the Cracks Data set and Visualize it
'''
# crack_images = os.listdir('Positive/')
# print("Number of Crack Images: ", len(crack_images))
#
# no_crack_images = os.listdir('Negative/')
# print("Number of No Crack Images: ", len(no_crack_images))
#
# ## Visualize Random images with cracks
# random_indices = np.random.randint(0, len(crack_images), size=4)
# print("*****************Random Images with Cracks**************************")
# random_images = np.array(crack_images)[random_indices.astype(int)]
#
# f, axarr = plt.subplots(2,2)
# axarr[0,0].imshow(mpimg.imread(os.path.join(cwd, 'Positive', random_images[0])))
# axarr[0,1].imshow(mpimg.imread(os.path.join(cwd, 'Positive', random_images[1])))
# axarr[1,0].imshow(mpimg.imread(os.path.join(cwd, 'Positive', random_images[2])))
# axarr[1,1].imshow(mpimg.imread(os.path.join(cwd, 'Positive', random_images[3])))
# # plt.show()
#
# ## Visualize Random images with no cracks
# random_indices = np.random.randint(0, len(no_crack_images), size=4)
# print("*****************Random Images without Cracks**************************")
# random_images = np.array(no_crack_images)[random_indices.astype(int)]
#
# f, axarr = plt.subplots(2,2)
# axarr[0,0].imshow(mpimg.imread(os.path.join(cwd, 'Negative', random_images[0])))
# axarr[0,1].imshow(mpimg.imread(os.path.join(cwd, 'Negative', random_images[1])))
# axarr[1,0].imshow(mpimg.imread(os.path.join(cwd, 'Negative', random_images[2])))
# axarr[1,1].imshow(mpimg.imread(os.path.join(cwd, 'Negative', random_images[3])))
# plt.show()
'''
Step2:Create Train and Val Data sets
'''
base_dir = cwd
files = os.listdir(base_dir)
print(base_dir)
'''
Function:create new folder and copy files
Describable:
'''
def create_training_data(folder_name):
train_dir = base_dir + '/train/' + folder_name
for f in files:
search_object = re.search(folder_name, f)
if search_object:
shutil.move(base_dir + '/' + folder_name, train_dir)
# create new folders: train/Positive and train/Negative
# create_training_data('Positive') # create train/Positive
# create_training_data('Negative') # create train/Negative
'''
Step3:Move images randomly from training to val folders
'''
# os.makedirs('val/Positive')
# os.makedirs('val/Negative')
positive_train = base_dir + '/train/Positive/'
positive_val = base_dir + '/val/Positive/'
negative_train = base_dir + '/train/Negative/'
negative_val = base_dir + '/val/Negative/'
positive_files = os.listdir(positive_train)
negative_files = os.listdir(negative_train)
print(len(positive_files), len(negative_files))
def move_data_from_train_to_val():
# move data from train folder to val folder
for i in positive_files:
if random.random() > 0.85:
shutil.move(positive_train + '/' + i, positive_val) # done
return 0
for j in negative_files:
if random.random() > 0.85:
shutil.move(negative_train + '/' + j, negative_val)
return 1
print(len(os.listdir(positive_val)), len(os.listdir(negative_val)))
'''
Step4:Pytorch data loader and transforms
'''
# Compute mean and std deviation for the dataset
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]
# Define data augmentation and transforms
# randomly transform photos,size horizon,vertical and size changed
chosen_transforms = {'train': transforms.Compose([
transforms.RandomResizedCrop(size=227), # size of photos,227*227
transforms.RandomRotation(degrees=10), # degree changed
transforms.RandomHorizontalFlip(), #
transforms.RandomVerticalFlip(),
transforms.ColorJitter(brightness=0.15, contrast=0.15),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)]),
'val': transforms.Compose([
transforms.Resize(227),
transforms.CenterCrop(227),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)]),
}
print("transformer is OK!")
# Create the data loader
def load_dataset(format, batch_size):
data_path = os.path.join(cwd, format)
dataset = datasets.ImageFolder(
root=data_path,
transform=chosen_transforms[format]
)
data_loader = DataLoader(
dataset,
batch_size=batch_size,
num_workers=4,
shuffle=True
)
return data_loader, len(dataset), dataset.classes
# # Set code to run on device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)
# ---------------------------------------------------------------------
# ---------------you should put next in main function------------------
# ----------------------------------------------------------------------
# train_loader, train_size, class_names = load_dataset('train', 8)
# print("Train Data Set size is: ", train_size)
# print("Class Names are: ", class_names)
# inputs, classes = next(iter(train_loader))
# print(inputs.shape, classes.shape)
# ---------------------------------------------------------------------
# --------------------⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆⬆-----------------------
# ---------------------------------------------------------------------
# Visualize the Augmented Data Set
# Visualize some images
def imshow(inp, title=None):
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([mean_nums])
std = np.array([std_nums])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # Pause a bit so that plots are updated
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
# loop
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
# scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
current_loss = 0.0
current_corrects = 0
# Here's where the training happens
print('Iterating through data...')
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# We need to zero the gradients, don't forget it
optimizer.zero_grad()
# Time to carry out the forward training poss
# We only need to log the loss stats if we are in training phase
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels) # loss function
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
scheduler.step()
# We want variables to hold the loss statistics
current_loss += loss.item() * inputs.size(0) # avoid loss function result accularate
current_corrects += torch.sum(preds == labels.data)
epoch_loss = current_loss / dataset_sizes[phase]
epoch_acc = current_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# Make a copy of the model if the accuracy on the validation set has improved
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_since = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_since // 60, time_since % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# Now we'll load in the best model weights and return it
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_handeled = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_handeled += 1
ax = plt.subplot(num_images // 2, 2, images_handeled)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_handeled == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
'''
Step7:Inference
'''
def predict(model, test_image, print_class=False):
transform = chosen_transforms['val']
test_image_tensor = transform(test_image)
if torch.cuda.is_available():
test_image_tensor = test_image_tensor.view(1, 3, 227, 227).cuda()
else:
test_image_tensor = test_image_tensor.view(1, 3, 227, 227)
with torch.no_grad():
model.eval()
# Model outputs log probabilities
out = model(test_image_tensor)
ps = torch.exp(out)
topk, topclass = ps.topk(1, dim=1)
class_name = idx_to_class[topclass.cpu().numpy()[0][0]]
if print_class:
print("Output class : ", class_name)
return class_name
def predict_on_crops(input_image, height=227, width=227, save_crops=False):
im = cv2.imread(input_image)
imgheight, imgwidth, channels = im.shape
k = 0
output_image = np.zeros_like(im)
for i in range(0, imgheight, height):
for j in range(0, imgwidth, width):
a = im[i:i + height, j:j + width]
## discard image cropss that are not full size
predicted_class = predict(base_model, Image.fromarray(a))
## save image
file, ext = os.path.splitext(input_image)
image_name = file.split('/')[-1]
folder_name = 'out_' + image_name
## Put predicted class on the image
if predicted_class == 'Positive':
color = (0, 0, 255)
else:
color = (0, 255, 0)
cv2.putText(a, predicted_class, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 1, cv2.LINE_AA)
b = np.zeros_like(a, dtype=np.uint8)
b[:] = color
add_img = cv2.addWeighted(a, 0.9, b, 0.1, 0)
## Save crops
if save_crops:
if not os.path.exists(os.path.join('real_images', folder_name)):
os.makedirs(os.path.join('real_images', folder_name))
filename = os.path.join('real_images', folder_name, 'img_{}.png'.format(k))
cv2.imwrite(filename, add_img)
output_image[i:i + height, j:j + width, :] = add_img
k += 1
## Save output image
cv2.imwrite(os.path.join('real_images', 'predictions', folder_name + '.jpg'), output_image)
return output_image
if __name__ == '__main__':
train_loader, train_size, class_names = load_dataset('train', 8)
print("Train Data Set size is: ", train_size)
print("Class Names are: ", class_names)
inputs, classes = next(iter(train_loader))
print(inputs.shape, classes.shape)
# Grab some of the training data to visualize
inputs, classes = next(iter(train_loader))
# class_names = chosen_datasets['train'].classes
# Now we construct a grid from batch
out = torchvision.utils.make_grid(inputs)
idx_to_class = {0: 'Negative', 1: 'Positive'}
plt.figure(figsize=(20, 10))
# imshow(out, title=[x.data.numpy() for x in classes])
# plt.pause(0) # keep displaying
# Load pretrained model
'''
Step5:Set up Pretrained Model¶
'''
resnet50 = models.resnet50(pretrained=True)
# Freeze model parameters
for param in resnet50.parameters():
param.requires_grad = False
## Change the final layer of the resnet model
# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
nn.Linear(fc_inputs, 128),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(128, 2)
)
# Convert model to be used on GPU
resnet50 = resnet50.to(device) # 残差网络
print(summary(resnet50, (3, 227, 227)))
# Define Optimizer and Loss Function
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50.parameters())
# optimizer = optim.SGD(resnet50.fc.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 3 epochs
# optimizer:study rate of optimizer
# step_size:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.01)
# the necessary valiable
dataloaders = {}
dataset_sizes = {}
batch_size = 32 ##### what fuck it is 256,reset ti is 16
dataloaders['train'], dataset_sizes['train'], class_names = load_dataset('train', batch_size) # data
dataloaders['val'], dataset_sizes['val'], _ = load_dataset('val', batch_size)
idx_to_class = {0: 'Negative', 1: 'Positive'}
'''
Step6:Start to train dataset
'''
base_model = train_model(resnet50, criterion, optimizer, exp_lr_scheduler, num_epochs=6) # try to train dataset
# torch.save(base_model.state_dict(), 'base_model_state_dict.pth') # save model parameter !!!!!!!
torch.save(base_model, 'base_model_new.pth') # save model
visualize_model(base_model)
plt.show() #
plt.pause(0)
2.实时检测
使用训练好的模型,输入摄像机照片实时预测结果,目前比较卡顿
"""
裂纹检测
使用离线模型对实时照片预测
从摄像头实时监测是否含有裂纹
问题:卡顿
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import torchvision
import numpy as np
import os
import cv2
cwd = os.getcwd()
from PIL import Image
import time
import copy
import random
import cv2
import re
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from torchsummary import summary
# %matplotlib inline
# files and folder location
base_dir = cwd
files = os.listdir(base_dir)
# Compute mean and std deviation for the dataset
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]
idx_to_class = {0: 'Negative', 1: 'Positive'}
# Define data augmentation and transforms
# randomly transform photos,size horizon,vertical and size changed
chosen_transforms = {'train': transforms.Compose([
transforms.RandomResizedCrop(size=227), # size of photos,227*227
transforms.RandomRotation(degrees=10), # degree changed
transforms.RandomHorizontalFlip(), #
transforms.RandomVerticalFlip(),
transforms.ColorJitter(brightness=0.15, contrast=0.15),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)]),
'val': transforms.Compose([
transforms.Resize(227),
transforms.CenterCrop(227),
transforms.ToTensor(),
transforms.Normalize(mean_nums, std_nums)]),
}
def predict(model, test_image, print_class=False):
transform = chosen_transforms['val']
test_image_tensor = transform(test_image)
if torch.cuda.is_available():
test_image_tensor = test_image_tensor.view(1, 3, 227, 227).cuda()
else:
test_image_tensor = test_image_tensor.view(1, 3, 227, 227)
with torch.no_grad():
model.eval()
# Model outputs log probabilities
out = model(test_image_tensor)
ps = torch.exp(out)
topk, topclass = ps.topk(1, dim=1)
class_name = idx_to_class[topclass.cpu().numpy()[0][0]]
if print_class:
print("Output class : ", class_name)
return class_name
def predict_on_crops(input_image, height=227, width=227, save_crops=False):
# im = cv2.imread(input_image)
im = (input_image) # get photo
# imgheight, imgwidth, channels = im.shape # the information of photos
k = 0
output_image = np.zeros_like(im) # create a ner zero mat and size of im
for i in range(0, imgheight, height):
for j in range(0, imgwidth, width):
a = im[i:i + height, j:j + width]
## discard image cropss that are not full size
predicted_class = predict(base_model, Image.fromarray(a))
## save image
# file, ext = os.path.splitext(input_image)
folder_name = 'out_'
## Put predicted class on the image
if predicted_class == 'Positive':
color = (0, 0, 255)
else:
color = (0, 255, 0)
# cv2.putText(a, predicted_class, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 1, cv2.LINE_AA) #add text
b = np.zeros_like(a, dtype=np.uint8)
b[:] = color
add_img = cv2.addWeighted(a, 0.9, b, 0.1, 0)
## Save crops
# if save_crops:
# if not os.path.exists(os.path.join('real_images', folder_name)):
# os.makedirs(os.path.join('real_images', folder_name))
# filename = os.path.join('real_images', folder_name, 'img_{}.png'.format(k))
# cv2.imwrite(filename, add_img)
output_image[i:i + height, j:j + width, :] = add_img
#cv2.imwrite(os.path.join('real_images', 'predictions', folder_name + '.jpg'), .)
k += 1
# ## Save output image
# cv2.imwrite(os.path.join('real_images', 'predictions', folder_name + '.jpg'), output_image)
return output_image
if __name__ == '__main__':
print(base_dir)
cap = cv2.VideoCapture(0) # open the camera
base_model = torch.load('base_model.pth')
a = cap.set(3, 227*2) # resize the size of video from camera
b = cap.set(4, 227*4)
print(a,b)
while True:
ret, frame = cap.read() # read video frame
# cv2.resizeWindow('img', 227*4, 227*2)
imgheight = 480
imgwidth = 640
# device = torch.device("cuda")
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # 选择希望使用的GPU
# model.to(device)
# plt.figure(figsize=(10, 10))
# output_image = predict_on_crops('./real_images/pipeline/pipe1.jpg', 128, 128)
output_image = predict_on_crops(frame, 128, 128)
# plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
# print(output_image)
cv2.imshow('img', output_image)
if cv2.waitKey(100) & 0xff == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
# plt.figure(figsize=(10, 10))
# output_image = predict_on_crops('real_images/concrete_crack2.jpg')
# plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
#
# plt.figure(figsize=(10, 10))
# output_image = predict_on_crops('real_images/road_surface_crack1.jpg', 128, 128)
# plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
#
# plt.figure(figsize=(10, 10))
# output_image = predict_on_crops('real_images/road_surface_crack3.jpg', 128, 128)
# plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
# plt.pause(0)