Pytorch 读取二进制格式的数据集来训练

最新推荐文章于 2024-01-17 10:00:00 发布

nodototao

最新推荐文章于 2024-01-17 10:00:00 发布

阅读量2.2k

点赞数 3

分类专栏：阿里云PAI 文章标签：深度学习 pytorch 神经网络

本文链接：https://blog.csdn.net/nodototao/article/details/111264992

版权

阿里云PAI 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

本文介绍了如何将数据集转换为CIFAR10类的二进制格式，并使用PyTorch框架进行猫狗大战数据集的训练。首先，通过`pickle`库创建二进制数据文件，然后使用PyTorch加载这些数据并训练简单的卷积神经网络模型。训练过程包括定义数据加载器、模型结构和训练循环。

摘要由CSDN通过智能技术生成

使用pickle制作类cifar10二进制格式的数据集
使用pytorc框架来训练（以猫狗大战数据集为例）
此方法是为了实现阿里云PAI studio上可视化训练模型时使用的数据格式。

一、制作类cifar10二进制格式数据

import os, cv2
from pickled import *
from load_data import *

data_path = './data_n/test'
file_list = './data_n/test.txt'
save_path = './bin'

if __name__ == '__main__':
  data, label, lst = read_data(file_list, data_path, shape=128)
  pickled(save_path, data, label, lst, bin_num=1)

read_data模块

import cv2
import os
import numpy as np

DATA_LEN = 49152
CHANNEL_LEN = 16384
SHAPE = 128


def imread(im_path, shape=None, color="RGB", mode=cv2.IMREAD_UNCHANGED):
  im = cv2.imread(im_path, cv2.IMREAD_UNCHANGED)
  if color == "RGB":
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    # im = np.transpose(im, [2, 1, 0])
  if shape != None:
    assert isinstance(shape, int) 
    im = cv2.resize(im, (shape, shape))
  return im


def read_data(filename, data_path, shape=None, color='RGB'):
  """
     filename (str): a file 
       data file is stored in such format:
         image_name  label
     data_path (str): image data folder
     return (numpy): a array of image and a array of label
  """ 
  if os.path.isdir(filename):
    print("Can't found data file!")
  else:
    f = open(filename)
    lines = f.read().splitlines()
    count = len(lines)
    data = np.zeros((count, DATA_LEN), dtype=np.uint8)
    #label = np.zeros(count, dtype=np.uint8)
    lst = [ln.split(' ')[0] for ln in lines]
    label = [int(ln.split(' ')[1]) for ln in lines]
    
    idx = 0
    s, c = SHAPE, CHANNEL_LEN
    for ln in lines:
      fname, lab = ln.split(' ')
      im = imread(os.path.join(data_path, fname), shape=s, color='RGB')
      '''
      im = cv2.imread(os.path.join(data_path, fname), cv2.IMREAD_UNCHANGED)
      im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
      im = cv2.resize(im, (s, s))
      '''
#      print(len(np.reshape(im[:,:,0], c))) # 1024
      data[idx, :c] = np.reshape(im[:, :, 0], c)
      data[idx, c:2*c] = np.reshape(im[:, :, 1], c)
      data[idx, 2*c:] = np.reshape(im[:, :, 2], c)
      label[idx] = int(lab)
      idx = idx + 1
      
    return data, label, lst

pickled模块

import os
import pickle

BIN_COUNTS = 5


def pickled(savepath, data, label, fnames, bin_num=BIN_COUNTS, mode="train"):
  '''
    savepath (str): save path
    data (array): image data, a nx3072 array
    label (list): image label, a list with length n
    fnames (str list): image names, a list with length n
    bin_num (int): save data in several files
    mode (str): {'train', 'test'}
  '''
  assert os.path.isdir(savepath)
  total_num = len(fnames)
  samples_per_bin = total_num / bin_num
  assert samples_per_bin > 0
  idx = 0
  for i in range(bin_num): 
    start = int(i*samples_per_bin)
    end = int((i+1)*samples_per_bin)

    print(start)
    print(end)
    
    if end <= total_num:
      dict = {'data': data[start:end, :],
              'labels': label[start:end],
              'filenames': fnames[start:end]}
    else:
      dict = {'data': data[start:, :],
              'labels': label[start:],
              'filenames': fnames[start:]}
    if mode == "train":
      dict['batch_label'] = "training batch {} of {}".format(idx, bin_num)
    else:
      dict['batch_label'] = "testing batch {} of {}".format(idx, bin_num)
      
#    with open(os.path.join(savepath, 'data_batch_'+str(idx)), 'wb') as fi:
    with open(os.path.join(savepath, 'batch_tt' + str(idx)), 'wb') as fi:
      pickle.dump(dict, fi)
    idx = idx + 1


def unpickled(filename):
  #assert os.path.isdir(filename)
  assert os.path.isfile(filename)
  with open(filename, 'rb') as fo:
    dict = pickle.load(fo)
  return dict

测试生成的二进制数据


import os
import pickle
import numpy as np
import cv2

def load_batch(fpath):
    with open(fpath, 'rb') as f:
        d = pickle.load(f)

    data = d["data"]
    labels = d["labels"]
    return data, labels


def load_data(dirname, one_hot=False):
    X_train = []
    Y_train = []

    for i in range(0):
        fpath = os.path.join(dirname, 'data_batch_' + str(i))
        print(fpath)
        data, labels = load_batch(fpath)
        if i == 0:
            X_train = data
            Y_train = labels
        else:
            X_train = np.concatenate([X_train, data], axis=0)
            Y_train = np.concatenate([Y_train, labels], axis=0)

    ftpath = os.path.join(dirname, 'batch_tt0')
    X_test, Y_test = load_batch(ftpath)

    X_test = np.dstack((X_test[:, :16384], X_test[:, 16384:32768],
                             X_test[:, 32768:]))
    X_test = np.reshape(X_test, [-1, 128, 128, 3])
    print(X_test.shape)
    xx_test = np.transpose(X_test,(0, 3, 1, 2))
    print(xx_test.shape)
#    print(X_test[2])
    imgs = X_test[2:4]
    img = imgs[1]
    print(img.shape)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.imshow('img', img)
    cv2.waitKey(0)


if __name__ == '__main__':
    dirname = 'test'
    load_data(dirname)

二、使用制作好的数据训练

import torch
import os
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import pickle
import numpy as np

#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_batch(fpath):
    with open(fpath, 'rb') as f:
        d = pickle.load(f)

    data = d["data"]
    labels = d["labels"]
    return data, labels

def load_data(dirname, one_hot=False, train=False):
    print(dirname)
    if train:
        X_train = []
        Y_train = []

        for i in range(1):
            fpath = os.path.join(dirname, 'data_batch_' + str(i))
            print(fpath)
            data, labels = load_batch(fpath)
            if i == 0:
                X_train = data
                Y_train = labels
            else:
                X_train = np.concatenate([X_train, data], axis=0)
                Y_train = np.concatenate([Y_train, labels], axis=0)

        X_train = np.dstack((X_train[:, :16384], X_train[:, 16384:32768],
                             X_train[:, 32768:]))
        X_train = np.reshape(X_train, [-1, 128, 128, 3])
 #       X_train = np.transpose(X_train, (0, 3, 1, 2))
        return X_train, Y_train

    else:
        ftpath = os.path.join(dirname, 'test_batch_0')
        print(ftpath)
        X_test, Y_test = load_batch(ftpath)
        X_test = np.dstack((X_test[:, :16384], X_test[:, 16384:32768],
                            X_test[:, 32768:]))
        X_test = np.reshape(X_test, [-1, 128, 128, 3])
        # 这里不需要转化数据格式[n, h, w, c]
 #       X_test = np.transpose(X_test, (0, 3, 1, 2))

        return X_test, Y_test


class MyDataset(torch.utils.data.Dataset):

    def __init__(self, namedir, transform=None, train=False):
        super().__init__()
        self.namedir = namedir
        self.transform = transform
        self.train = train
        self.datas, self.labels = load_data(self.namedir, train=self.train)

    def __getitem__(self, index):
#        print(index)
        imgs = self.datas[index]
#        print(imgs.shape)
#        print(imgs)
        img_labes = int(self.labels[index])
 #       print(img_labes)

        if self.transform is not None:
            imgs = self.transform(imgs)

        return imgs, img_labes

    def __len__(self):
        return len(self.labels)


class MyDataset_s(torch.utils.data.Dataset):
    def __init__(self, datatxt, transform=None):
        super().__init__()
        imgs = []
        fh = open(datatxt, 'r')

        for line in fh:
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))
        self.imgs = imgs
        self.transform = transform

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

mean = [0.5071, 0.4867, 0.4408]
stdv = [0.2675, 0.2565, 0.2761]

transform = transforms.Compose([

#    transforms.Resize([224, 224]),

#    transforms.RandomHorizontalFlip(),

    transforms.ToTensor(),

    transforms.Normalize(mean=mean, std=stdv)

])

train_data = MyDataset(namedir='data\\train\\', transform=transform, train=True)
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=4, shuffle=True)
test_data = MyDataset(namedir='data\\val\\', transform=transform, train=False)
testloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=4, shuffle=True)

classes = ('cat', 'dog')

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
        #        self.conv5 = nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):                       # (n, 3, 128, 128)
        x = self.pool(F.relu(self.conv1(x)))    # (n, 16, 64, 64)
        x = self.pool(F.relu(self.conv2(x)))    # (n, 32, 32, 32)
        x = self.pool(F.relu(self.conv3(x)))    # (n, 64, 16, 16)
        x = self.pool(F.relu(self.conv4(x)))    # (n, 32, 8, 8)

        #        x = self.pool(F.relu(self.conv5(x)))
        # print(x)
        x = x.view(-1, 32 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x


class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        # 3 * 224 * 224

        self.conv1_1 = nn.Conv2d(3, 64, 3)  # 64 * 222 * 222
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=(1, 1))  # 64 * 222 * 222
        self.maxpool1 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 64 * 112 * 112
        self.conv2_1 = nn.Conv2d(64, 128, 3)  # 128 * 110 * 110
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=(1, 1))  # 128 * 110 * 110
        self.maxpool2 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 128 * 56 * 56
        self.conv3_1 = nn.Conv2d(128, 256, 3)  # 256 * 54 * 54
        self.conv3_2 = nn.Conv2d(256, 256, 2, padding=(1, 1))  # 256 * 54 * 54
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=(1, 1))  # 256 * 54 * 54
        self.maxpool3 = nn.MaxPool2d((2, 2), padding=(1, 1))  # 256 * 28 * 28
        self.conv4_1 = nn.Conv2d(256, 512, 3)  # 512 * 26 * 26
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 26 * 26
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 26 * 26
        self.maxpool4 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 14 * 14
        self.conv5_1 = nn.Conv2d(512, 512, 3)  # 512 * 12 * 12
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 12 * 12
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=(1, 1))  # 512 * 12 * 12
        self.maxpool5 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 7 * 7

        # view

        self.fc1 = nn.Linear(512 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        # x.size(0)即为batch_size

        in_size = x.size(0)
        out = self.conv1_1(x)  # 222
        out = F.relu(out)
        out = self.conv1_2(out)  # 222
        out = F.relu(out)
        out = self.maxpool1(out)  # 112
        out = self.conv2_1(out)  # 110
        out = F.relu(out)
        out = self.conv2_2(out)  # 110
        out = F.relu(out)
        out = self.maxpool2(out)  # 56
        out = self.conv3_1(out)  # 54
        out = F.relu(out)
        out = self.conv3_2(out)  # 54
        out = F.relu(out)
        out = self.conv3_3(out)  # 54
        out = F.relu(out)
        out = self.maxpool3(out)  # 28
        out = self.conv4_1(out)  # 26
        out = F.relu(out)
        out = self.conv4_2(out)  # 26
        out = F.relu(out)
        out = self.conv4_3(out)  # 26
        out = F.relu(out)
        out = self.maxpool4(out)  # 14
        out = self.conv5_1(out)  # 12

        out = F.relu(out)
        out = self.conv5_2(out)  # 12
        out = F.relu(out)
        out = self.conv5_3(out)  # 12
        out = F.relu(out)
        out = self.maxpool5(out)  # 7

        # 展平

        out = out.view(in_size, -1)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)

        #       out = F.log_softmax(out, dim=1)

        return out


class VGG8(nn.Module):
    def __init__(self):
        super(VGG8, self).__init__()

        # 3 * 224 * 224
        self.conv1_1 = nn.Conv2d(3, 64, 3)  # 64 * 222 * 222
        self.maxpool1 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 64 * 112 * 112
        self.conv2_1 = nn.Conv2d(64, 128, 3)  # 128 * 110 * 110
        self.maxpool2 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 128 * 56 * 56
        self.conv3_1 = nn.Conv2d(128, 256, 3)  # 256 * 54 * 54
        self.maxpool3 = nn.MaxPool2d((2, 2), padding=(1, 1))  # 256 * 28 * 28
        self.conv4_1 = nn.Conv2d(256, 512, 3)  # 512 * 26 * 26
        self.maxpool4 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 14 * 14
        self.conv5_1 = nn.Conv2d(512, 512, 3)  # 512 * 12 * 12
        self.maxpool5 = nn.MaxPool2d((2, 2), padding=(1, 1))  # pooling 512 * 7 * 7

        # view
        self.fc1 = nn.Linear(512 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        # x.size(0)即为batch_size
        in_size = x.size(0)
        out = self.conv1_1(x)  # 222
        out = F.relu(out)
        out = self.maxpool1(out)  # 112
        out = self.conv2_1(out)  # 110
        out = F.relu(out)
        out = self.maxpool2(out)  # 56
        out = self.conv3_1(out)  # 54
        out = F.relu(out)
        out = self.maxpool3(out)  # 28
        out = self.conv4_1(out)  # 26
        out = F.relu(out)
        out = self.maxpool4(out)  # 14
        out = self.conv5_1(out)  # 12
        out = F.relu(out)
        out = self.maxpool5(out)  # 7

        # 展平
        out = out.view(in_size, -1)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)

        #       out = F.log_softmax(out, dim=1)

        return out


net = Net()

#net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)

if __name__ == '__main__':
    for epoch in range(11):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
#            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0
        if epoch % 2 == 0:
            correct = 0
            total = 0
            with torch.no_grad():
                for data in testloader:
                    images, labels = data

#                    images, labels = images.to(device), labels.to(device)
                    outputs = net(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                print('Accuracy of the network on the 1000 test images: %d %%' % (100 * correct / total))

print('finished !!!')