from __future__ import print_function
from __future__ import division
import torch, torchvision
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
import time, os, copy
deftrain_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
since = time.time()
val_acc_history =[]
best_model_wts = copy.deepcopy(model.state_dict())
best_acc =0.0for epoch inrange(num_epochs):print('Epoch {} / {}'.format(epoch, num_epochs -1))print('-'*10)for phase in['train','val']:if phase =='train':
model.train()else:
model.eval()
running_loss =0.0
running_corrects =0for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)# zero the parameter gradients
optimizer.zero_grad()# forward# track history if only in trainwith torch.set_grad_enabled(phase =='train'):# Get model outputs and calculate loss# Special case for inception because in training it has an auxiliary output. In train# mode we calculate the loss by summing the final output and the auxiliary output# but in testing we only consider the final output.if is_inception and phase =='train':# From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 +0.4*loss2
else:
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs,1)# backward + optimize only if in training phaseif phase =='train':
loss.backward()
optimizer.step()# statistics
running_loss += loss.item()* inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss /len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double()/len(dataloaders[phase].dataset)print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))# deep copy the modelif phase =='val'and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())if phase =='val':
val_acc_history.append(epoch_acc)print()
time_elapsed = time.time()- since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed //60, time_elapsed %60))print('Best val Acc: {:4f}'.format(best_acc))# load best model weights
model.load_state_dict(best_model_wts)return model, val_acc_history
defset_parameter_requires_grad(model, feature_extracting):if feature_extracting:for param in model.parameters():
param.requires_grad =Falsedefinitialize_model(model_name, num_classes, feature_extract, use_pretrained=True):# Initialize these variables which will be set in this if statement. Each of these# variables is model specific.
model_ft =None
input_size =0if model_name =="resnet":""" Resnet18
"""
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size =224elif model_name =="alexnet":""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6]= nn.Linear(num_ftrs,num_classes)
input_size =224elif model_name =="vgg":""" VGG11_bn
"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6]= nn.Linear(num_ftrs,num_classes)
input_size =224elif model_name =="squeezenet":""" Squeezenet
"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1]= nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
model_ft.num_classes = num_classes
input_size =224elif model_name =="densenet":""" Densenet
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size =224elif model_name =="inception":""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size =299else:print("Invalid model name, exiting...")
exit()return model_ft, input_size
if __name__ =='__main__':
data_dir ='./Dataset/hymenoptera_data'
model_name ='squeezenet'
num_classes =2
batch_size =8
num_epochs =15
feature_extract =True# Detect if we have a GPU available
device = torch.device("cuda:1"if torch.cuda.is_available()else"cpu")# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes,
feature_extract, use_pretrained=True)# Send the model to GPU
model_ft = model_ft.to(device)# Data
data_transforms ={'train': transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])]),'val': transforms.Compose([
transforms.Resize(input_size),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])]),}print("Initializing Datasets and Dataloaders...")# Create training and validation datasets
image_datasets ={x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])for x in['train','val']}# Create training and validation dataloaders
dataloaders_dict ={x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=batch_size, shuffle=True, num_workers=4)for x in['train','val']}# Gather the parameters to be optimized/updated in this run.
params_to_update = model_ft.parameters()print("Params to learn:")if feature_extract:
params_to_update =[]for name,param in model_ft.named_parameters():if param.requires_grad ==True:
params_to_update.append(param)print("\t",name)else:for name,param in model_ft.named_parameters():if param.requires_grad ==True:print("\t",name)# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)# Setup the loss fxn
criterion = nn.CrossEntropyLoss()# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion,
optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))# Initialize the non-pretrained version of the model used for this run
scratch_model,_ = initialize_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(scratch_model, dataloaders_dict, scratch_criterion, scratch_optimizer, num_epochs=num_epochs, is_inception=(model_name=="inception"))# Plot the training curves of validation accuracy vs. number# of training epochs for the transfer learning method and# the model trained from scratch
ohist =[]
shist =[]
ohist =[h.cpu().numpy()for h in hist]
shist =[h.cpu().numpy()for h in scratch_hist]
plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1,num_epochs+1),ohist,label="Pretrained")
plt.plot(range(1,num_epochs+1),shist,label="Scratch")
plt.ylim((0,1.))
plt.xticks(np.arange(1, num_epochs+1,1.0))
plt.legend()
plt.show()
plt.savefig('Compare.png')
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
import copy
defimage_loader(image_name):
image = Image.open(image_name)# fake batch dimension required to fit network's input dimensions
image = loader(image).unsqueeze(0)return image.to(device, torch.float)
unloader = transforms.ToPILImage()# reconvert into PIL imagedefimshow(tensor, title=None):
image = tensor.cpu().clone()# we clone the tensor to not do changes on it
image = image.squeeze(0)# remove the fake batch dimension
image = unloader(image)
plt.imshow(image)if title isnotNone:
plt.title(title)
plt.pause(0.001)# pause a bit so that plots are updatedclassContentLoss(nn.Module):# 尽管此模块名为ContentLoss,但它不是真正的PyTorch损失函数。# 如果要将内容损失定义为PyTorch损失函数,# 则必须创建一个PyTorch autograd函数以在backward方法中手动重新计算/实现梯度。def__init__(self, target,):super(ContentLoss, self).__init__()# we 'detach' the target content from the tree used# to dynamically compute the gradient: this is a stated value,# not a variable. Otherwise the forward method of the criterion# will throw an error.
self.target = target.detach()defforward(self,input):
self.loss = F.mse_loss(input, self.target)returninputdefgram_matrix(input):
a, b, c, d =input.size()# a=batch size(=1)# b=number of feature maps# (c,d)=dimensions of a f. map (N=c*d)
features =input.view(a * b, c * d)# resise F_XL into \hat F_XL
G = torch.mm(features, features.t())# compute the gram product# we 'normalize' the values of the gram matrix# by dividing by the number of element in each feature maps.return G.div(a * b * c * d)classStyleLoss(nn.Module):def__init__(self, target_feature):super(StyleLoss, self).__init__()
self.target = gram_matrix(target_feature).detach()defforward(self,input):
G = gram_matrix(input)
self.loss = F.mse_loss(G, self.target)returninput# create a module to normalize input image so we can easily put it in a# nn.SequentialclassNormalization(nn.Module):def__init__(self, mean, std):super(Normalization, self).__init__()# .view the mean and std to make them [C x 1 x 1] so that they can# directly work with image Tensor of shape [B x C x H x W].# B is batch size. C is number of channels. H is height and W is width.
self.mean = torch.tensor(mean).view(-1,1,1)
self.std = torch.tensor(std).view(-1,1,1)defforward(self, img):# normalize imgreturn(img - self.mean)/ self.std
# desired depth layers to compute style/content losses :
content_layers_default =['conv_4']
style_layers_default =['conv_1','conv_2','conv_3','conv_4','conv_5']defget_style_model_and_losses(cnn, normalization_mean, normalization_std,
style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)# normalization module
normalization = Normalization(normalization_mean, normalization_std).to(device)# just in order to have an iterable access to or list of content/syle# losses
content_losses =[]
style_losses =[]# assuming that cnn is a nn.Sequential, so we make a new nn.Sequential# to put in modules that are supposed to be activated sequentially
model = nn.Sequential(normalization)
i =0# increment every time we see a convfor layer in cnn.children():ifisinstance(layer, nn.Conv2d):
i +=1
name ='conv_{}'.format(i)elifisinstance(layer, nn.ReLU):
name ='relu_{}'.format(i)# The in-place version doesn't play very nicely with the ContentLoss# and StyleLoss we insert below. So we replace with out-of-place# ones here.
layer = nn.ReLU(inplace=False)elifisinstance(layer, nn.MaxPool2d):
name ='pool_{}'.format(i)elifisinstance(layer, nn.BatchNorm2d):
name ='bn_{}'.format(i)else:raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))
model.add_module(name, layer)if name in content_layers:# add content loss:
target = model(content_img).detach()
content_loss = ContentLoss(target)
model.add_module("content_loss_{}".format(i), content_loss)
content_losses.append(content_loss)if name in style_layers:# add style loss:
target_feature = model(style_img).detach()
style_loss = StyleLoss(target_feature)
model.add_module("style_loss_{}".format(i), style_loss)
style_losses.append(style_loss)# now we trim off the layers after the last content and style lossesfor i inrange(len(model)-1,-1,-1):ifisinstance(model[i], ContentLoss)orisinstance(model[i], StyleLoss):break
model = model[:(i +1)]return model, style_losses, content_losses
defget_input_optimizer(input_img):# this line to show that input is a parameter that requires a gradient
optimizer = optim.LBFGS([input_img.requires_grad_()])return optimizer
defrun_style_transfer(cnn, normalization_mean, normalization_std,
content_img, style_img, input_img, num_steps=300,
style_weight=1000000, content_weight=1):"""Run the style transfer."""print('Building the style transfer model..')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
normalization_mean, normalization_std, style_img, content_img)
optimizer = get_input_optimizer(input_img)print('Optimizing..')
run =[0]while run[0]<= num_steps:defclosure():# correct the values of updated input image
input_img.data.clamp_(0,1)
optimizer.zero_grad()
model(input_img)
style_score =0
content_score =0for sl in style_losses:
style_score += sl.loss
for cl in content_losses:
content_score += cl.loss
style_score *= style_weight
content_score *= content_weight
loss = style_score + content_score
loss.backward()# loss得bp!!!
run[0]+=1if run[0]%50==0:print("run {}:".format(run))print('Style Loss : {:4f} Content Loss: {:4f}'.format(
style_score.item(), content_score.item()))print()return style_score + content_score
optimizer.step(closure)# a last correction...
input_img.data.clamp_(0,1)return input_img
if __name__ =='__main__':#1 Prepare
device = torch.device('cuda:1'if torch.cuda.is_available()else'cpu')# desired size of the output image
imsize =512if torch.cuda.is_available()else128# use small size if no gpu
loader = transforms.Compose([
transforms.Resize(imsize),# scale imported image
transforms.ToTensor()])# transform it into a torch tensor
style_img = image_loader("./Dataset/neural-style/picasso.jpg")
content_img = image_loader("./Dataset/neural-style/dancing.jpg")assert style_img.size()== content_img.size(), \
"we need to import style and content images of the same size"#2 Model
cnn = models.vgg19(pretrained=True).features.to(device).eval()
cnn_normalization_mean = torch.tensor([0.485,0.456,0.406]).to(device)
cnn_normalization_std = torch.tensor([0.229,0.224,0.225]).to(device)#3 Input: 使用内容图像或白噪声的副本
input_img = content_img.clone()# input_img = torch.randn(content_img.data.size(), device=device)#4 Train
output = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std,
content_img, style_img, input_img)
plt.figure()
imshow(output, title='Output Image')
plt.ioff()
plt.show()
plt.savefig('transferImg.png')