链接
Easy
DatasetFolder
用于读取按照特定格式组织的文本
from torchvision.datasets import DatasetFolder
train_set = DatasetFolder("food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
- root:数据集的根目录,包含多个子目录,每个子目录代表一个类别。
- transform:可选参数,用于对加载的图像数据进行预处理,例如缩放、裁剪、归一化等操作。
- target_transform:可选参数,用于对标签数据进行预处理。 loader:可选参数,用于加载数据的函数。默认是default_loader,用于加载常见的图像文件格式,如JPEG、PNG等。如果你的数据格式不同,你可以自定义一个加载函数,并传递给loader 参数。
- lambda x: Image.open(x):这是一个匿名函数,接受一个参数 x,表示图像文件的路径。在函数内部,它使用 Image.open(x) 来加载图像文件。
结果
测试集上的结果在 0.5 左右波动
Medium
图像变换
查阅了一些资料,多数是写在一个transform里面的,故调整如下
train_tfm = transforms.Compose([
# Resize the image into a fixed shape (height = width = 128)
transforms.Resize((128, 128)),
transforms.RandomCrop(128, padding=16), #先四周填充0,在把图像随机裁剪成128*128
transforms.RandomHorizontalFlip(), # 随机水平翻转,概率为0.5
# ToTensor() should be the last one of the transforms.
transforms.ToTensor(),
])
网络结构调整
主要的变化就是在全连接层中间添加了两个dropout层,这是因为观察到easy的代码训练时,训练集正确率上升但是验证集正确率不变的情况。
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.cnn_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(4, 4, 0),
)
self.fc_layers = nn.Sequential(
nn.Linear(256 * 8 * 8, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 11)
)
def forward(self, x):
# input (x): [batch_size, 3, 128, 128]
# output: [batch_size, 11]
# Extract features by convolutional layers.
x = self.cnn_layers(x)
# The extracted feature map must be flatten before going to fully-connected layers.
x = x.flatten(1)
# The features are transformed by fully-connected layers to obtain the final logits.
x = self.fc_layers(x)
return x
结果
能达到他的medium标准,接下来就看扩充数据集的
Hard
扩充数据集
把未分类的标签一起加入进来
def get_pseudo_labels(dataset, model, threshold=0.65):
device = "cuda" if torch.cuda.is_available() else "cpu"
# Construct a data loader.
data_loader = DataLoader(dataset, batch_size=512, shuffle=False)
# Make sure the model is in eval mode.
model.eval()
# Define softmax function.
softmax = nn.Softmax(dim=-1)
# Initialize a list to store pseudo-labeled samples
pseudo_labeled_samples = []
# Iterate over the dataset by batches.
for img, _ in tqdm(data_loader):
img = img.to(device)
# Forward the data
with torch.no_grad():
logits = model(img)
# Obtain the probability distributions by applying softmax on logits.
probs = softmax(logits)
# Filter the data
for i, prob in enumerate(probs):
max_prob, pseudo_label = torch.max(prob, dim=0)
if max_prob.item() > threshold:
# Retrieve the image path and append it with the pseudo label
image_path = dataset.samples[i][0]
pseudo_labeled_samples.append((image_path, pseudo_label.item()))
# Construct new DatasetFolder with pseudo-labels
pseudo_dataset = DatasetFolder(
root=dataset.root,
loader=dataset.loader,
extensions=dataset.extensions,
transform=dataset.transform,
target_transform=None,
is_valid_file=None
)
# Update the samples attribute
pseudo_dataset.samples = pseudo_labeled_samples
print(f'The num of select_images is {len(pseudo_dataset.samples)}')
# Turn off the eval mode
model.train()
return pseudo_dataset
以dataset相同的参数重建了一个数据集,并改变了.samples,筛选出具有高可能性的图片,添加标签
PS:.samples中存储了图片对应的路径和标签,getitem函数直接从这里获取返回值,因此只要改变这一项就能筛选图片,官方文档相关函数定义如下:
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path, target = self.samples[index]。 # 这一行获取返回值
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self) -> int:
return len(self.samples)
训练函数的修改
两个修改:
- 增加了模型保存的部分,方便保存最优结果,最开始的导入方便增加训练轮数
- 需要把do_semi 改成 True,才会运行扩充数据集的函数
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize a model, and put it on the device specified.
model = Classifier().to(device)
model.load_state_dict(torch.load("./model1.ckpt"))
model.device = device
# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)
# The number of training epochs.
n_epochs = 80
# Whether to do semi-supervised learning 半监督学习.
do_semi = True
best_acc = 0 # 保存训练最好的模型
for epoch in range(n_epochs):
# ---------- TODO ----------
# In each epoch, relabel the unlabeled dataset for semi-supervised learning.
# Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
if do_semi:
# Obtain pseudo-labels for unlabeled data using trained model.
pseudo_set = get_pseudo_labels(unlabeled_set, model)
# Construct a new dataset and a data loader for training.
# This is used in semi-supervised learning only.
concat_dataset = ConcatDataset([train_set, pseudo_set])
train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
# ---------- Training ----------
# Make sure the model is in train mode before training.
model.train()
# These are used to record information in training.
train_loss = []
train_accs = []
# Iterate the training set by batches.
for i,batch in enumerate(tqdm(train_loader)):
if isinstance(batch[1], int):
try:
batch[1] = torch.tensor(batch[1])
# 再次尝试处理数据
# 例如: output = model(batch[0].to(device))
except Exception as convert_error:
print(f"Failed to convert int to tensor in batch {i}: {convert_error}")
# A batch consists of image data and corresponding labels.
imgs, labels = batch
# Forward the data. (Make sure data and model are on the same device.)
logits = model(imgs.to(device))
# Calculate the cross-entropy loss.
# We don't need to apply softmax before computing cross-entropy as it is done automatically.
loss = criterion(logits, labels.to(device))
# Gradients stored in the parameters in the previous step should be cleared out first.
optimizer.zero_grad()
# Compute the gradients for parameters.
loss.backward()
# Clip the gradient norms for stable training.
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
# Update the parameters with computed gradients.
optimizer.step()
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy.
train_loss.append(loss.item())
train_accs.append(acc)
# The average loss and accuracy of the training set is the average of the recorded values.
train_loss = sum(train_loss) / len(train_loss)
train_acc = sum(train_accs) / len(train_accs)
# Print the information.
print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
# ---------- Validation ----------
# Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
model.eval()
# These are used to record information in validation.
valid_loss = []
valid_accs = []
# Iterate the validation set by batches.
for batch in tqdm(valid_loader):
# A batch consists of image data and corresponding labels.
imgs, labels = batch
# We don't need gradient in validation.
# Using torch.no_grad() accelerates the forward process.
with torch.no_grad():
logits = model(imgs.to(device))
# We can still compute the loss (but not the gradient).
loss = criterion(logits, labels.to(device))
# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy.
valid_loss.append(loss.item())
valid_accs.append(acc)
# The average loss and accuracy for entire validation set is the average of the recorded values.
valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)
if valid_acc>best_acc: # 保存模型
torch.save(model.state_dict(), "./model1.ckpt")
best_acc = valid_acc
print('saving model at this epoch')
# Print the information.
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
训练效果
在固定门限值且合并数据集的情况下,正确率最高仍然只能到达0.58
进一步修改
两个方向的改变:
- 模型在验证集上的正确率越高,则表明模型越好,因此在模型较好时,选取更多的“伪标签”加入
- 把训练集和“伪标签”数据集分开,用验证集进行两次验证,并保留较好的结果进一步训练
伪标签图片的比例调整比较困难,因此采用第二种调整,为此需要在best_model之外设置变量来保存当前比较好的模型。
没有特别好的效果,等待进一步学习