目录
目标和数据集
使用Unsupervised模型做异常检测:识别给定图像是否和训练图像相似
数据集
Training data
- 100000 human faces
- data/traingset.npy: 100000 images in an numpy array with shape (100000, 64, 64, 3)
● Testing data
- About 10000 from the same distribution with training data (label 0)
- About 10000 from another distribution (anomalies, label 1)
- data/testingset.npy: 19636 images in an numpy array with shape (19636, 64, 64, 3)
方法论
参考李宏毅机器学习笔记——Anomaly Detection(异常侦测)_iwill323的博客-CSDN博客
● Train an autoencoder with small reconstruction error.
● During inference, we can use reconstruction error as anomaly score.
○ Anomaly score can be seen as the degree of abnormality of an image.
○ An image from unseen distribution should have higher reconstruction error.
● Anomaly scores are used as our predicted values.
最后,使用ROC AUCscore对模型进行评价。
导包
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.models as models
from torch.optim import Adam, AdamW
import pandas as pd
import os
from d2l import torch as d2l
def same_seeds(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
same_seeds(48763)
Dataset module
The transform function here normalizes image's pixels from [0, 255] to [-1.0, 1.0].
class CustomTensorDataset(TensorDataset):
"""TensorDataset with support of transforms.
"""
def __init__(self, tensors):
self.tensors = tensors
if tensors.shape[-1] == 3:
self.tensors = tensors.permute(0, 3, 1, 2)
self.transform = transforms.Compose([
transforms.Lambda(lambda x: x.to(torch.float32)),
transforms.Lambda(lambda x: 2. * x/255. - 1.),
])
def __getitem__(self, index):
x = self.tensors[index]
if self.transform:
# mapping images to [-1.0, 1.0]
x = self.transform(x)
return x
def __len__(self):
return len(self.tensors)
autoencoder
原代码。分别是全连接网络,卷积网络,VAE模型
class fcn_autoencoder(nn.Module):
def __init__(self):
super(fcn_autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(64 * 64 * 3, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 12),
nn.ReLU(),
nn.Linear(12, 3)
)
self.decoder = nn.Sequential(
nn.Linear(3, 12),
nn.ReLU(),
nn.Linear(12, 64),
nn.ReLU(),
nn.Linear(64, 128),
nn.ReLU(),
nn.Linear(128, 64 * 64 * 3),
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
class conv_autoencoder(nn.Module):
def __init__(self):
super(conv_autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 12, 4, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(12, 24, 4, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(24, 48, 4, stride=2, padding=1),
nn.ReLU(),
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
nn.Tanh(),
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 12, 4, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(12, 24, 4, stride=2, padding=1),
nn.ReLU(),
)
self.enc_out_1 = nn.Sequential(
nn.Conv2d(24, 48, 4, stride=2, padding=1),
nn.ReLU(),
)
self.enc_out_2 = nn.Sequential(
nn.Conv2d(24, 48, 4, stride=2, padding=1),
nn.ReLU(),
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
nn.Tanh(),
)
def encode(self, x):
h1 = self.encoder(x)
return self.enc_out_1(h1), self.enc_out_2(h1)
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if torch.cuda.is_available():
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
eps = Variable(eps)
return eps.mul(std).add_(mu)
def decode(self, z):
return self.decoder(z)
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparametrize(mu, logvar)
return self.decode(z), mu, logvar
def loss_vae(recon_x, x, mu, logvar, criterion):
"""
recon_x: generating images
x: origin images
mu: latent mean
logvar: latent log variance
"""
mse = criterion(recon_x, x)
KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
KLD = torch.sum(KLD_element).mul_(-0.5)
return mse + KLD
训练
加载数据
# load data
train = np.load('../input/ml2022spring-hw8/data/trainingset.npy', allow_pickle=True)
test = np.load('../input/ml2022spring-hw8/data/testingset.npy', allow_pickle=True)
print(train.shape)
print(test.shape)
# Build training dataloader
batch_size = 256
num_workers = 2
x = torch.from_numpy(train)
train_dataset = CustomTensorDataset(x)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=True, drop_last=True)
print('训练集总长度是 {:d}, batch数量是 {:.2f}'.format(len(train_dataset), len(train_dataset)/ batch_size))
训练函数
def trainer(model, config, train_dataloader, devices):
best_loss = np.inf
num_epochs = config['num_epochs']
model_type = config['model_type']
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
T_0=config['T_0'], T_mult=config['T_2'], eta_min=config['lr']/config['ratio_min'])
if not os.path.isdir('./' + config['model_path'].split('/')[1]):
os.mkdir('./' + config['model_path'].split('/')[1]) # Create directory of saving models.
model.train()
legend = ['train loss']
animator = d2l.Animator(xlabel='epoch', xlim=[0, num_epochs], legend=legend)
for epoch in range(num_epochs):
tot_loss = 0.0
for data in train_dataloader:
img = data.float().to(devices[0])
if model_type in ['fcn']:
img = img.view(img.shape[0], -1)
output = model(img)
if model_type in ['vae']:
loss = loss_vae(output[0], img, output[1], output[2],