使用pickle制作类cifar10二进制格式的数据集
使用pytorc框架来训练(以猫狗大战数据集为例)
此方法是为了实现阿里云PAI studio上可视化训练模型时使用的数据格式。
一、制作类cifar10二进制格式数据
import os, cv2
from pickled import *
from load_data import *
data_path = './data_n/test'
file_list = './data_n/test.txt'
save_path = './bin'
if __name__ == '__main__':
data, label, lst = read_data(file_list, data_path, shape=128)
pickled(save_path, data, label, lst, bin_num=1)
read_data模块
import cv2
import os
import numpy as np
DATA_LEN = 49152
CHANNEL_LEN = 16384
SHAPE = 128
def imread(im_path, shape=None, color="RGB", mode=cv2.IMREAD_UNCHANGED):
im = cv2.imread(im_path, cv2.IMREAD_UNCHANGED)
if color == "RGB":
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# im = np.transpose(im, [2, 1, 0])
if shape != None:
assert isinstance(shape, int)
im = cv2.resize(im, (shape, shape))
return im
def read_data(filename, data_path, shape=None, color='RGB'):
"""
filename (str): a file
data file is stored in such format:
image_name label
data_path (str): image data folder
return (numpy): a array of image and a array of label
"""
if os.path.isdir(filename):
print("Can't found data file!")
else:
f = open(filename)
lines = f.read().splitlines()
count = len(lines)
data = np.zeros((count, DATA_LEN), dtype=np.uint8)
#label = np.zeros(count, dtype=np.uint8)
lst = [ln.split(' ')[0] for ln in lines]
label = [int(ln.split(' ')[1]) for ln in lines]
idx = 0
s, c = SHAPE, CHANNEL_LEN
for ln in lines:
fname, lab = ln.split(' ')
im = imread(os.path.join(data_path, fname), shape=s, color='RGB')
'''
im = cv2.imread(os.path.join(data_path, fname), cv2.IMREAD_UNCHANGED)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (s, s))
'''
# print(len(np.reshape(im[:,:,0], c))) # 1024
data[idx, :c] = np.reshape(im[:, :, 0], c)
data[idx, c:2*c] = np.reshape(im[:, :, 1], c)
data[idx, 2*c:] = np.reshape(im[:, :, 2], c)
label[idx] = int(lab)
idx = idx + 1
return data, label, lst
pickled模块
import os
import pickle
BIN_COUNTS = 5
def pickled(savepath, data, label, fnames, bin_num=BIN_COUNTS, mode="train"):
'''
savepath (str): save path
data (array): image data, a nx3072 array
label (list): image label, a list with length n
fnames (str list): image names, a list with length n
bin_num (int): save data in several files
mode (str): {'train', 'test'}
'''
assert os.path.isdir(savepath)
total_num = len(fnames)
samples_per_bin = total_num / bin_num
assert samples_per_bin > 0
idx = 0
for i in range(bin_num):
start = int(i*samples_per_bin)
end = int((i+1)*samples_per_bin)
print(start)
print(end)
if end <= total_num:
dict = {'data': data[start:end, :],
'labels': label[start:end],
'filenames': fnames[start:end]}
else:
dict = {'data': data[start:, :],
'labels': label[start:],
'filenames': fnames[start:]}
if mode == "train":
dict['batch_label'] = "training batch {} of {}".format(idx, bin_num)
else:
dict['batch_label'] = "testing batch {} of {}".format(idx, bin_num)
# with open(os.path.join(savepath, 'data_batch_'+str(idx)), 'wb') as fi:
with open(os.path.join(savepath, 'batch_tt' + str(idx)), 'wb') as fi:
pickle.dump(dict, fi)
idx = idx + 1
def unpickled(filename):
#assert os.path.isdir(filename)
assert os.path.isfile(filename)
with open(filename, 'rb') as fo:
dict = pickle.load(fo)
return dict
测试生成的二进制数据
import os
import pickle
import numpy as np
import cv2
def load_batch(fpath):
with open(fpath, 'rb') as f:
d = pickle.load(f)
data = d["data"]
labels = d["labels"]
return data, labels
def load_data(dirname, one_hot=False):
X_train = []
Y_train = []
for i in range(0):
fpath = os.path.join(dirname, 'data_batch_' + str(i))
print(fpath)
data, labels = load_batch(fpath)
if i == 0:
X_train = data
Y_train = labels
else:
X_train = np.concatenate([X_train, data], axis=0)
Y_train = np.concatenate([Y_train, labels], axis=0)
ftpath = os.path.join(dirname, 'batch_tt0')
X_test, Y_test = load_batch(ftpath)
X_test = np.dstack((X_test[:, :16384], X_test[:, 16384:32768],
X_test[:, 32768:]))
X_test = np.reshape(X_test, [-1, 128, 128, 3])
print(X_test.shape)
xx_test = np.transpose(X_test,(0, 3, 1, 2))
print(xx_test.shape)
# print(X_test[2])
imgs = X_test[2:4]
img = imgs[1]
print(img.shape)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
cv2.imshow('img', img)
cv2.waitKey(0)
if __name__ == '__main__':
dirname = 'test'
load_data(dirname)
二、使用制作好的数据训练
import torch
import os
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import pickle
import numpy as np
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def load_batch(fpath):
with open(fpath, 'rb') as f:
d = pickle.load(f)
data = d["data"]
labels = d["labels"]
return data, labels
def load_data(dirname, one_hot=False, train=False):
print(dirname)
if train:
X_train = []
Y_train = []
for i in range(1):
fpath = os.path.join(dirname, 'data_batch_' + str(i))
print(fpath)
data, labels = load_batch(fpath)
if i == 0:
X_train = data
Y_train = labels
else:
X_train = np.concatenate([X_train, data], axis=0)
Y_train = np.concatenate([Y_train, labels], axis=0)
X_train = np.dstack((X_train[:, :16384], X_train[:, 16384:32768],
X_train[:, 32768:]))
X_train = np.reshape(X_train, [-1, 128, 128, 3])
# X_train = np.transpose(X_train, (0, 3, 1, 2))
return X_train, Y_train
else:
ftpath = os.path.join(dirname, 'test_batch_0')
print(ftpath)
X_test, Y_test = load_batch(ftpath)
X_test = np.dstack((X_test[:, :16384], X_test[:, 16384:32768],
X_test[:, 32768:]))
X_test = np.reshape(X_test, [-1, 128, 128, 3])
# 这里不需要转化数据格式[n, h, w, c]
# X_test = np.transpose(X_test, (0, 3, 1, 2))
return X_test, Y_test
class MyDataset(torch.utils.data.Dataset):
def __init__(self, namedir, transform=None, train=False):
super().__init__()
self.namedir = namedir
self.transform = transform
self.train = train
self.datas, self.labels = load_data(self.namedir, train=self.train)
def __getitem__(self, index):
# print(index)
imgs = self.datas[index]
# print(imgs.shape)
# print(imgs)
img_labes = int(self.labels[index])
# print(img_labes)
if self.transform is not None:
imgs = self.transform(imgs)
return imgs, img_labes
def __len__(self):
return len(self.labels)
class MyDataset_s(torch.utils.data.Dataset):
def __init__(self, datatxt, transform=None):
super().__init__()
imgs = []
fh = open(datatxt, 'r')
for line in fh:
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
def __getitem__(self, index):
fn, label = self.imgs[index]
img = Image.open(fn).convert('RGB')
if self.transform is not None:
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
mean = [0.5071, 0.4867, 0.4408]
stdv = [0.2675, 0.2565, 0.2761]
transform = transforms.Compose([
# transforms.Resize([224, 224]),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=stdv)
])
train_data = MyDataset(namedir='data\\train\\', transform=transform, train=True)
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=4, shuffle=True)
test_data = MyDataset(namedir='data\\val\\', transform=transform, train=False)
testloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=4, shuffle=True)
classes = ('cat', 'dog')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
# self.conv5 = nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * 8 * 8, 256)
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x): # (n, 3, 128, 128)
x = self.pool(F.relu(self.conv1(x))) # (n, 16, 64, 64)
x = self.pool(F.relu(self.conv2(x))) # (n, 32, 32, 32)
x = self.pool(F.relu(self.conv3(x))) # (n, 64, 16, 16)
x = self.pool(F.relu(self.conv4(x))) # (n, 32, 8, 8)
# x = self.pool(F.relu(self.conv5(x)))
# print(x)
x = x.view(-1, 32 * 8 * 8)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class VGG16(nn.Module):
def __init__(self):
super(VGG16, self).__init__()
# 3 * 224 * 224
self.conv1_1 = nn.Conv2d(3, 64, 3) # 64 * 222 * 222
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=(1, 1)) # 64 * 222 * 222
self.maxpool1 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 64 * 112 * 112
self.conv2_1 = nn.Conv2d(64, 128, 3) # 128 * 110 * 110
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=(1, 1)) # 128 * 110 * 110
self.maxpool2 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 128 * 56 * 56
self.conv3_1 = nn.Conv2d(128, 256, 3) # 256 * 54 * 54
self.conv3_2 = nn.Conv2d(256, 256, 2, padding=(1, 1)) # 256 * 54 * 54
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=(1, 1)) # 256 * 54 * 54
self.maxpool3 = nn.MaxPool2d((2, 2), padding=(1, 1)) # 256 * 28 * 28
self.conv4_1 = nn.Conv2d(256, 512, 3) # 512 * 26 * 26
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=(1, 1)) # 512 * 26 * 26
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=(1, 1)) # 512 * 26 * 26
self.maxpool4 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 512 * 14 * 14
self.conv5_1 = nn.Conv2d(512, 512, 3) # 512 * 12 * 12
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=(1, 1)) # 512 * 12 * 12
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=(1, 1)) # 512 * 12 * 12
self.maxpool5 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 512 * 7 * 7
# view
self.fc1 = nn.Linear(512 * 7 * 7, 512)
self.fc2 = nn.Linear(512, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x):
# x.size(0)即为batch_size
in_size = x.size(0)
out = self.conv1_1(x) # 222
out = F.relu(out)
out = self.conv1_2(out) # 222
out = F.relu(out)
out = self.maxpool1(out) # 112
out = self.conv2_1(out) # 110
out = F.relu(out)
out = self.conv2_2(out) # 110
out = F.relu(out)
out = self.maxpool2(out) # 56
out = self.conv3_1(out) # 54
out = F.relu(out)
out = self.conv3_2(out) # 54
out = F.relu(out)
out = self.conv3_3(out) # 54
out = F.relu(out)
out = self.maxpool3(out) # 28
out = self.conv4_1(out) # 26
out = F.relu(out)
out = self.conv4_2(out) # 26
out = F.relu(out)
out = self.conv4_3(out) # 26
out = F.relu(out)
out = self.maxpool4(out) # 14
out = self.conv5_1(out) # 12
out = F.relu(out)
out = self.conv5_2(out) # 12
out = F.relu(out)
out = self.conv5_3(out) # 12
out = F.relu(out)
out = self.maxpool5(out) # 7
# 展平
out = out.view(in_size, -1)
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
out = F.relu(out)
out = self.fc3(out)
# out = F.log_softmax(out, dim=1)
return out
class VGG8(nn.Module):
def __init__(self):
super(VGG8, self).__init__()
# 3 * 224 * 224
self.conv1_1 = nn.Conv2d(3, 64, 3) # 64 * 222 * 222
self.maxpool1 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 64 * 112 * 112
self.conv2_1 = nn.Conv2d(64, 128, 3) # 128 * 110 * 110
self.maxpool2 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 128 * 56 * 56
self.conv3_1 = nn.Conv2d(128, 256, 3) # 256 * 54 * 54
self.maxpool3 = nn.MaxPool2d((2, 2), padding=(1, 1)) # 256 * 28 * 28
self.conv4_1 = nn.Conv2d(256, 512, 3) # 512 * 26 * 26
self.maxpool4 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 512 * 14 * 14
self.conv5_1 = nn.Conv2d(512, 512, 3) # 512 * 12 * 12
self.maxpool5 = nn.MaxPool2d((2, 2), padding=(1, 1)) # pooling 512 * 7 * 7
# view
self.fc1 = nn.Linear(512 * 7 * 7, 512)
self.fc2 = nn.Linear(512, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x):
# x.size(0)即为batch_size
in_size = x.size(0)
out = self.conv1_1(x) # 222
out = F.relu(out)
out = self.maxpool1(out) # 112
out = self.conv2_1(out) # 110
out = F.relu(out)
out = self.maxpool2(out) # 56
out = self.conv3_1(out) # 54
out = F.relu(out)
out = self.maxpool3(out) # 28
out = self.conv4_1(out) # 26
out = F.relu(out)
out = self.maxpool4(out) # 14
out = self.conv5_1(out) # 12
out = F.relu(out)
out = self.maxpool5(out) # 7
# 展平
out = out.view(in_size, -1)
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
out = F.relu(out)
out = self.fc3(out)
# out = F.log_softmax(out, dim=1)
return out
net = Net()
#net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
if __name__ == '__main__':
for epoch in range(11):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
# inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 100 == 99:
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
if epoch % 2 == 0:
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
# images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 1000 test images: %d %%' % (100 * correct / total))
print('finished !!!')