Hung-Yi Lee homework[9]: Unsupervised Learning
一、作业要求
目标:分辨给定的两张images是否为风景(植物也算风景,除了image都是32x32x3的图片,没有任何label)
二、实验过程
2.1 训练
训练得到的模型文件放在./checkpoints/last_checkpoints.pth。和预测阶段不同的是,在训练阶段,encoder和decoder网络都派上了用场。
hw9.py
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import random
import torch
import torch.nn as nn
from torch import optim
class Image_Dataset(Dataset):
def __init__(self, image_list):
self.image_list = image_list
def __len__(self):
return len(self.image_list)
def __getitem__(self, idx):
images = self.image_list[idx]
return images
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2),
nn.Conv2d(128, 256, 3, stride=1, padding=1),
nn.ReLU(True),
nn.MaxPool2d(2)
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(256, 128, 5, stride=1),
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 9, stride=1),
nn.ReLU(True),
nn.ConvTranspose2d(64, 3, 17, stride=1),
nn.Tanh()
)
def forward(self, x):
x1 = self.encoder(x)
x = self.decoder(x1)
return x1, x
def preprocess(image_list):
image_list = np.array(image_list)
image_list = np.transpose(image_list, (0, 3, 1, 2))
image_list = (image_list / 255.0) * 2 - 1
image_list = image_list.astype(np.float32)
return image_list
def count_parameters(model, only_trainable=False):
if only_trainable:
return sum(p.numel() for p in model.parameters() if p.requires_grad)
else:
return sum(p.numel() for p in model.parameters())
def same_seeds(seed):
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
np.random.seed(seed) # Numpy module.
random.seed(seed) # Python random module.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
if __name__=='__main__':
trainX = np.load('trainX_new.npy')
trainX_preprocessed = preprocess(trainX)
img_dataset = Image_Dataset(trainX_preprocessed)
same_seeds(0)
model = AE().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)
model.train()
n_epoch = 100
# 准备 dataloader, model, loss criterion 和 optimizer
img_dataloader = DataLoader(img_dataset, batch_size=64, shuffle=True)
epoch_loss = 0
# 主要的训练过程
for epoch in range(n_epoch):
epoch_loss = 0
for data in img_dataloader:
img = data
img = img.cuda()
output1, output = model(img)
loss = criterion(output, img)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 10 == 0:
torch.save(model.state_dict(), './checkpoints/checkpoint_{}.pth'.format(epoch + 1))
epoch_loss += loss.item()
print('epoch [{}/{}], loss:{:.5f}'.format(epoch + 1, n_epoch, epoch_loss))
# 存储模型
torch.save(model.state_dict(), './checkpoints/last_checkpoint.pth')
2.2 预测
对train_X中的数据进行预测,并将得到的预测结果并保存到prediction.csv文件中。【注意,预测阶段只需要encoder的结果latents,利用latents进行聚类后,就可以进行分类了】
train.py
import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.decomposition import KernelPCA
from sklearn.manifold import TSNE
from sklearn.cluster import MiniBatchKMeans
from hw9 import *
def cal_acc(gt, pred):
# Calculate Correct predictions
correct = np.sum(gt == pred)
acc = correct / gt.shape[0]
# 因为是 binary unsupervised clustering,因此取 max(acc, 1-acc)
return max(acc, 1-acc)
def plot_scatter(feat, label, savefig=None):
X = feat[:, 0]
Y = feat[:, 1]
plt.scatter(X, Y, c = label)
plt.legend(loc='best')
if savefig is not None:
plt.savefig(savefig)
plt.show()
return
def inference(X, model, batch_size=256):
X = preprocess(X)
dataset = Image_Dataset(X)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
latents = []
for i, x in enumerate(dataloader):
x = torch.FloatTensor(x)
vec, img = model(x.cuda())
if i == 0:
latents = vec.view(img.size()[0], -1).cpu().detach().numpy()
else:
latents = np.concatenate((latents, vec.view(img.size()[0], -1).cpu().detach().numpy()), axis = 0)
print('Latents Shape:', latents.shape)
return latents
def predict(latents):
# First Dimension Reduction
transformer = KernelPCA(n_components=200, kernel='rbf', n_jobs=-1)
kpca = transformer.fit_transform(latents)
print('First Reduction Shape:', kpca.shape)
# # Second Dimesnion Reduction
X_embedded = TSNE(n_components=2).fit_transform(kpca)
print('Second Reduction Shape:', X_embedded.shape)
# Clustering
pred = MiniBatchKMeans(n_clusters=2, random_state=0).fit(X_embedded)
pred = [int(i) for i in pred.labels_]
pred = np.array(pred)
return pred, X_embedded
def invert(pred):
return np.abs(1-pred)
def save_prediction(pred, out_csv='prediction.csv'):
with open(out_csv, 'w') as f:
f.write('id,label\n')
for i, p in enumerate(pred):
f.write(f'{i},{p}\n')
print(f'Save prediction to {out_csv}.')
if __name__=='__main__':
# load model
model = AE().cuda()
model.load_state_dict(torch.load('./checkpoints/last_checkpoint.pth'))
model.eval()
# 准备数据
trainX = np.load('trainX_new.npy')
# 得到预测结果
latents = inference(X=trainX, model=model)
pred, X_embedded = predict(latents)
# 保存预测结果
save_prediction(pred, 'prediction.csv')
下图是prediction.csv中的部分数据:
2.3 使用val数据集来查看正确率和聚类结果
val.py
from hw9 import *
from train import *
if __name__=='__main__':
valX = np.load('valX.npy')
valY = np.load('valY.npy')
model = AE().cuda()
model.load_state_dict(torch.load('./checkpoints/last_checkpoint.pth'))
model.eval()
latents = inference(valX, model)
pred_from_latent, emb_from_latent = predict(latents)
acc_latent = cal_acc(valY, pred_from_latent)
print('The clustering accuracy is:', acc_latent)
print('The clustering result:')
plot_scatter(emb_from_latent, valY, savefig='p1_baseline.png')
聚类结果: