李沐树叶分类 kaggle

钢铁小狗侠
已于 2023-10-27 20:03:56 修改
阅读量191
点赞数
文章标签： 1024程序员节
于 2023-10-24 15:00:20 首次发布
本文链接：https://blog.csdn.net/m0_63086198/article/details/134012771
版权
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import pylab
import torchvision.models as models
# This is for the progress bar.
from tqdm import tqdm
import seaborn as sns
from d2l import torch as d2l
from torch.optim.lr_scheduler import CosineAnnealingLR
import timm

labels_dataframe = pd.read_csv('/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/train.csv')
labels_dataframe.describe()   # count为样本数，unique为种类数， top为出现次数最多的类， freq为出现次数

# 排序
# set中的元素是唯一的，不能包含重复元素，sorted对列表按字母顺序进行排列
leaves_labels = sorted(list(set(labels_dataframe['label'])))
n_classes = len(leaves_labels)

# 把label转换为对应的数字
# zip将两个可迭代对象leaves_labels和range(n_classes)按照相同的索引一一配对
class_to_num = dict(zip(leaves_labels, range(n_classes)))
# 再转换回来，方便预测时使用, v : k 表示新字典中的键值对
num_to_class = {v : k for k, v in class_to_num.items()}
# 建立数据库
class LeaveData(Dataset):
    def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.2, resize_height=256, resize_width=256):
        """
        Args:
            csv_path (string): csv 文件路径
            file_path (string): 图像文件所在路径
            mode (string): 训练模式还是测试模式
            valid_ratio (float): 验证集比例
        """

        # 调整图片大小，将图片设置为相同格式
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.file_path = file_path
        self.mode = mode

        # header = None 是将列名，即第一行的内容变为从0开始的整数
        self.data_info = pd.read_csv(csv_path, header=None)
        # 减一是因为第一行是代表列名的整数
        self.data_len = len(self.data_info.index) - 1   # 样本数（行数）
        self.train_len = int(self.data_len * (1 - valid_ratio))

        if mode == 'train':
            # 读取样本对应的图片名称（image0，image1...）,np.asarray将数组转换为numoy数组，类似于torch的tensor
            self.train_image = np.asarray(self.data_info.iloc[1:self.train_len, 0])
            # 读取label，即图片对应的树类
            self.train_label = np.asarray(self.data_info.iloc[1:self.train_len, 1])
            self.image_arr = self.train_image
            self.label_arr = self.train_label

        elif mode == 'valid':
            self.valid_image = np.asarray(self.data_info.iloc[self.train_len:, 0])
            self.valid_label = np.asarray(self.data_info.iloc[self.train_len:, 1])
            self.image_arr = self.valid_image
            self.label_arr = self.valid_label

        elif mode == 'test':
            self.test_image = np.asarray(self.data_info.iloc[1:, 0])
            self.image_arr = self.test_image

        self.real_len = len(self.image_arr)

        print('Finished reading the {} set of Leaves Dataset ({} samples found)'
              .format(mode, self.real_len))

    def __getitem__(self, index):
        # 获取图片文件名
        single_image_name = self.image_arr[index]
        # 读取文件( 使用python的 Pillow 库中的 Image.open 函数)
        img_as_img = Image.open(self.file_path + single_image_name)

        #如果需要将RGB三通道的图片转换成灰度图片可参考下面两行
        #         if img_as_img.mode != 'L':
        #             img_as_img = img_as_img.convert('L')

        # 建立数据处理模块，可以使用Nomarlize等操作
        if self.mode == 'train':
            # pytorch 的 transforms 模块
            transform = transforms.Compose([
                transforms.Resize((224,224)),
                transforms.RandomHorizontalFlip(p=0.5),   #随机水平翻转 选择一个概率
                transforms.ToTensor()]
                )
        else:
            # valid和test不做数据增强
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor()
            ])

        # 处理图片
        img_as_img = transform(img_as_img)

        if self.mode == 'test':
            return img_as_img
        else:
            label = self.label_arr[index]
            number_label = class_to_num[label]

            return img_as_img, number_label  # 返回每一个index对应的图片名和对应的label

    def __len__(self):
        return self.real_len


train_path = '/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/train.csv'
test_path = '/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/test.csv'
img_path = '/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/'

train_dataset = LeaveData(train_path, img_path, mode='train')
val_dataset = LeaveData(train_path, img_path, mode='valid')
test_dataset = LeaveData(test_path, img_path, mode='test')


# 定义data loader
train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=5
    )

val_loader = torch.utils.data.DataLoader(
        dataset=val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=5
    )
test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=5
    )

def im_convert(tensor):

    # 将张量移到 CPU 上，并克隆、分离它以确保不会影响原始数据
    image = tensor.to("cpu").clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1,2,0)   # 改变维度的排列顺序，使通道维度成为最后一个维度，本来维度应该是（0，1，2）
    image = image.clip(0, 1)         # 将像素值夹紧到 [0, 1] 范围内

    return image

def example_show():
    fig=plt.figure(figsize=(20, 12))
    columns = 4
    rows = 2

    dataiter = iter(val_loader)
    inputs, classes = next(dataiter)

    for idx in range (columns*rows):
        # 创建子图ax，一共有八个子图
        ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
        ax.set_title(num_to_class[int(classes[idx])])
        plt.imshow(im_convert(inputs[idx]))
    plt.show()

# where is device
def get_device():
    return 'cuda' if torch.cuda.is_available() else 'cpu'

device = get_device()

# 是否冻住模型前面的一些层,梯度消失，网络不参与更新
def set_parameter_requires_grad(mode1, feature_extracting):
    if feature_extracting:
        model = mode1
        for param in model.parameters():
            param.requires_grad = False

# resnet34 model
def res_model(num_classes, feature_extract = False, use_pretrained=True):

    model_ft = models.resnet50(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    # 用于获取 model_ft 模型中最后一个全连接层 (fc) 的输入特征数
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))

    return model_ft

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.classes = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.classes - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

learning_rate = 4e-5
weight_decay = 4e-5
num_epoch = 50
model_path = '/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/pre_res_model.ckpt'


def __train__():
    # Initialize a model, and put it on the device specified.
    # 输出特征数为176，代表176个树种
    model = res_model(176)
    # gpu运行
    model = model.to(device)
    model.device = device

    # For the classification task, we use cross-entropy as the measurement of performance.
    # 使用交叉熵函数求损失值
    criterion = nn.CrossEntropyLoss()

    # Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
    # 采用Adam优化器
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # lr_scheduler
    scheduler = CosineAnnealingLR(optimizer, T_max=25)
    # The number of training epochs.
    n_epochs = num_epoch

    best_acc = 0.0
    for epoch in range(n_epochs):
        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()
        # These are used to record information in training.
        train_loss = []
        train_accs = []
        # Iterate the training set by batches.
        # tqdm用于在循环迭代过程中显示进度条
        for i, batch in enumerate(tqdm(train_loader)):
            # A batch consists of image data and corresponding labels.
            imgs, labels = batch
            imgs = imgs.to(device)
            labels = labels.to(device)
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs)

            smoothing_loss = LabelSmoothingLoss(classes=176, smoothing=0.1)

            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            # softmax激活函数内置在交叉熵函数中
            loss = smoothing_loss(logits, labels)
            # loss = criterion(logits, labels)

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
            # Compute the gradients for parameters.
            loss.backward()
            # Update the parameters with computed gradients.
            optimizer.step()
            scheduler.step()  # 更新学习率

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels).float().mean()

            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_accs.append(acc)

        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)

        # Print the information.
        print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        animator = d2l.Animator(xlabel='epoch', xlim=[1, n_epochs],
                                legend=['train loss', 'train acc', 'test acc'])
        if (i + 1) % (n_epochs // 5) == 0 or i == n_epochs - 1:
            animator.add(epoch + (i + 1) / n_epochs,
                         (train_loss, train_acc, None))

        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()
        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []

        # Iterate the validation set by batches.
        for batch in tqdm(val_loader):
            imgs, labels = batch
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_accs.append(acc)

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)

        # Print the information.
        print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

        # if the model improves, save a checkpoint at this epoch
        if valid_acc > best_acc:
            best_acc = valid_acc
            torch.save(model.state_dict(), model_path)
            print('saving model with acc {:.3f}'.format(best_acc))

        animator.add(epoch + 1, (None, None, valid_acc.cpu().numpy()))


saveFileName = '/mnt/datab/home/yuanwenzheng/kaggle/树叶分类/submission.csv'

def __predict__():
    ## predict
    model = res_model(176)

    # create model and load weights from checkpoint
    model = model.to(device)
    model.load_state_dict(torch.load(model_path))

    # Make sure the model is in eval mode.
    # Some modules like Dropout or BatchNorm affect if the model is in training mode.
    model.eval()

    # Initialize a list to store the predictions.
    predictions = []
    # Iterate the testing set by batches.
    for batch in tqdm(test_loader):
        imgs = batch
        with torch.no_grad():
            logits = model(imgs.to(device))

        # Take the class with greatest logit as prediction and record it.
        predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

    preds = []
    for i in predictions:
        preds.append(num_to_class[i])

    test_data = pd.read_csv(test_path)
    test_data['label'] = pd.Series(preds)
    submission = pd.concat([test_data['image'], test_data['label']], axis=1)
    submission.to_csv(saveFileName, index=False)
    print("Done!")

if __name__ == "__main__":
    Flag = 1
    if Flag == 0:
        __train__()
    else:
        __predict__()
    #pylab.show()