PyTorch 分类实现（MNIST）——read data from Image

最新推荐文章于 2024-04-28 17:05:44 发布

holmes_MX

最新推荐文章于 2024-04-28 17:05:44 发布

阅读量738

点赞数

分类专栏： PyTorch学习

本文链接：https://blog.csdn.net/holmes_MX/article/details/81870472

版权

PyTorch学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

0. 写作目的

好记性不如烂笔头。

1. 直接上代码

#！/usr/bin/env python
# _*_ coding:utf-8 _*_


import torch
import torch.nn as nn  ## 各层类型的实现
import torch.nn.functional as F  ## 各层函数的实现，与层类型对应， 如：卷积函数/池化函数/归一化函数等
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import argparse
import os
import numpy as np
from sklearn.metrics import accuracy_score

currentDir = os.getcwd()
dataDir = currentDir + '/mnist_image'
trainDir = dataDir + '/train'
testDir = dataDir + '/test'


############################################## create train and test(also called val) list ########3

###  train/ (each category files)

def createList(train = True):
    if(train):
        counter = 0
        f = open(dataDir + '/train.txt', 'w')
        for className in os.listdir(trainDir):
            for imageName in os.listdir(trainDir + '/' + str(className) ):
                tempImageName = trainDir + '/' + str(className) + '/' + str(imageName)
                f.write(tempImageName + ' ' + str(className) + '\n')
                counter += 1
            print("class: {} is done".format(className))
        f.close()
        print("train list is done! and train data is: {}".format(counter))
    else:
        f = open(dataDir + '/test.txt', 'w')
        counter = 0
        for className in os.listdir( testDir ):
            for imageName in os.listdir( testDir + '/' + str(className) ):
                tempImageName = testDir + '/' + str(className) + '/' + str(imageName)
                f.write(tempImageName + ' ' + str(className) + '\n')
                counter += 1
            print("class: {} is done".format(className))
        f.close()
        print("test list is done! and test data is: {}".format(counter))

############################################## read the data ########################

def default_loader(path):
    return Image.open(path).convert('RGB')

class MyDataset(Dataset):
    def __init__(self, txt, transform = None, target_transform = None, loader=default_loader):
        fh = open(txt, 'r')
        imgs = []
        for line in fh:
            line = line.strip('\n')
            line = line.rstrip() ## delete the right space
            words = line.split()
            imgs.append((words[0], int(words[1])))
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = self.loader(fn)
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)


################################### create the Net and training ######################


## torch.nn.Conv2d( input_channel, output_channel, kernel_size, stride = 1, padding= 0 )
## torch.nn.MaxPool2( kernel_size, stride(default = kernel_size), padding = 0 )
## torch.nn.Linear( input_size, output_size )
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # conv  not change size of image
        ## input size (28 , 28, 3)
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 32, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        ## 14, 14, 32
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        ## 7,7 64
        self.conv3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 64, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        ## 3 * 3 * 64
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(64 * 3 * 3, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 10)
        )
    def forward(self, x):
        conv1_out = self.conv1(x)
        conv2_out = self.conv2(conv1_out)
        conv3_out = self.conv3(conv2_out)
        res = conv3_out.view(conv3_out.size(0), -1)
        out = self.dense(res)
        return out


def train(args, model, device, train_loader, optimizer, epoch, loss_func):
    model.train()
    train_loss = 0.
    train_acc  = 0.
    for batch_idx,(data, label) in enumerate(train_loader):
        batch_x, batch_y = data.to(device), label.to(device)
        batch_x, batch_y = Variable(batch_x), Variable(batch_y)
        optimizer.zero_grad()  ## 清除所有优化的梯度
        output = model(batch_x)
        loss = loss_func(output, batch_y)
        loss.backward() ## 反传
        optimizer.step()  #参数更新
        train_loss += loss.data[0]

        pred = torch.max(output, 1)[1]
        train_acc += accuracy_score(batch_y, pred)

        print("epoch{}: iter{}: loss {:.6f} Acc: {:.6f}".format(epoch + 1,batch_idx + 1, train_loss/(batch_idx + 1), train_acc/(batch_idx + 1)))
    if args.saveModel:
        if not os.path.exists(currentDir + '/' + args.saveModelDir):
            os.mkdir( currentDir + '/' + args.saveModelDir)
        tempSaveModelDir = currentDir + '/' + args.saveModelDir + '/model' + str(epoch + 1) + '.pkl'
        torch.save(model.state_dict(), tempSaveModelDir)


def val(args, model, device, test_loader, optimizer, epoch, loss_func):

    model.eval()
    test_loss = 0.
    test_acc = 0.
    with torch.no_grad():  ## 禁止使用反传
        for batch_idx, (data, label) in enumerate(test_loader):
            batch_x, batch_y = data.to(device), label.to(device)
            batch_x, batch_y = Variable(batch_x), Variable(batch_y)
            output = model(batch_x)
            loss = loss_func(output, batch_y)
            test_loss += loss.data[0]


            pred = torch.max(output, 1)[1]
            test_acc += accuracy_score(batch_y, pred)

            print("epoch{}: iter{}: loss {:.6f} Acc: {:.6f}".format(epoch + 1, batch_idx + 1, test_loss/(batch_idx + 1),
                                                                    test_acc/(batch_idx + 1) ))



### testQucik() return the confiusionMatrix, but its details are not return,
 ##     more details need test() function.
def testQuick(batch_size):
    global confusionMatrix
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    test_data = MyDataset(txt=dataDir + '/test.txt', transform=transforms.ToTensor())
    test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

    test_loss = 0.
    test_acc = 0.
    model = Net()
    loss_func = torch.nn.CrossEntropyLoss()

    model.load_state_dict(torch.load(currentDir + '/model/model2.pkl'))
    with torch.no_grad():  ## 禁止使用反传
        for batch_idx, (data, label) in enumerate(test_loader):
            batch_x, batch_y = data.to(device), label.to(device)
            batch_x, batch_y = Variable(batch_x), Variable(batch_y)
            output = model(batch_x)
            loss = loss_func(output, batch_y)
            test_loss += loss.data[0]

            pred2 = output.max(1, keepdim=True)[1]
            test_acc += accuracy_score(batch_y, pred2)

            pred2 = pred2.numpy()  ## output is: [[6], [6], [6], ... ,[6]]
            batch_y = batch_y.numpy()  ## output is [6, 6, 6, 6, ...,  6]
            pred = []
            for ii in range(len(pred2)):
                pred.append( (pred2[ii])[0] )
            #print(pred)
            #print(batch_y)

            for ii in range(len(pred)):
                confusionMatrix[batch_y[ii], pred[ii]] += 1

            print("iter{}: loss {:.6f} Acc: {:.6f}".format(batch_idx + 1, test_loss/(batch_idx + 1),
                                                                    test_acc/(batch_idx + 1)))
            #print(confusionMatrix)
            #if(batch_idx == 1):
             #   break

def main():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch_size', type=int, default=32, metavar='N',help="batch_size")
    parser.add_argument('--test_batch_size', type=int, default=32, metavar='N', help="batch_size for test")
    parser.add_argument('--epochs', type=int, default=10, metavar='N', help="epochs")
    parser.add_argument('--learning_rate', type=float, default=0.01, metavar='LR', help="learning rate")
    parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGG momentum')
    parser.add_argument('--no_cuda', action='store_true', default=False, help="disable to use CUDA")
    parser.add_argument('--seed', type=int, default=2018, metavar='S', help="seed for training")
    parser.add_argument('--log_interval', type=int, default=10,metavar='N', help='how many batches to wait before logging training status')
    parser.add_argument('--saveModel', type=int, default=1, metavar='N', help="each saveModel epoch save model")
    parser.add_argument('--saveModelDir', default="model", type=str, metavar='PATH', help='path to save model')
    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")


    #################### create List
    createList(True)
    createList(False)

    ##################### load train and test data
    train_data = MyDataset(txt=dataDir + '/train.txt', transform=transforms.ToTensor())
    test_data = MyDataset(txt=dataDir + '/test.txt', transform=transforms.ToTensor())

    train_loader = DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_data, batch_size=args.test_batch_size, shuffle=False)


    ###################### define a Network
    model = Net()
    print(model)

    #################### net loss and SGD
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum)
    loss_func = torch.nn.CrossEntropyLoss()

    ###################### train and test
    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, epoch, loss_func)
        val(args, model, device, test_loader, optimizer, epoch, loss_func)

if __name__ == "__main__":
    is_test = True
    if(is_test):
        global confusionMatrix
        global wrongImageName
        global wrongLabel
        wrongImageName = []
        wrongLabel = []
        confusionMatrix = np.zeros((10, 10))
        testQuick(batch_size=16)
        print(confusionMatrix)

        last_acc = 0
        for ii in range(len(confusionMatrix)):
            last_acc += (confusionMatrix[ii])[ii]
        print("test data number: {:.6f}".format(np.sum(confusionMatrix)))
        print("test data acc: {:.6f}".format(last_acc / np.sum( confusionMatrix )))

        '''
        wrongImageNameDir = currentDir + '/wrongImageName.txt'
        f = open(wrongImageNameDir, 'w')
        for ii in range(len(wrongImageName)):
            f.write(wrongImageName[ii] + str(wrongLabel[ii]) + '\n')
        f.close()
        '''
    else:  ### training
        main()

2. 训练结果

这里只给出训练开始的结果和训练中的部分结果。(训练参数：SGD(lr=0.01, momentum=0.9))

训练开始时：

训练中间时的结果：

训练中测试集的结果：

3. 总结

从我开始体验PyTorch这个框架来看，PyTorch的框架较为简单，构建网络的速度较快，而且自动反传，部署到GPU上也很便利。但是有一个不足是，对于函数的调用过程封装得很好，使程序员可能不知道具体做了哪些事情。

There may be some mistakes in this blog. So, any suggestions and comments are welcome!

[Reference]

[1] https://ptorch.com/docs/1/torch-nn

[2] https://www.jianshu.com/p/3963b2cdd771

holmes_MX

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
PyTorch 分类实现（MNIST）——read data from Image

0. 写作目的好记性不如烂笔头。1. 直接上代码#！/usr/bin/env python# _*_ coding:utf-8 _*_import torchimport torch.nn as nn ## 各层类型的实现import torch.nn.functional as F ## 各层函数的实现，与层类型对应，如：卷积函数/池化函数/归一化函数等from...
复制链接

扫一扫

专栏目录