代码地址
CIFAR-10数据集
环境准备
python3.6
tensorflow1.14
keras2.2.5
其他的不用指定版本
数据集改为本地文件
环境准备好了以后,可以直接运行good-llp-gan.py,这里代码会去下载CIFAR-10数据集,会很慢,我们可以先下来来,然后本地直接加载。
下载好cifar-10-python.tar.gz,解压,放在dataset文件夹下。
load_local_cifar10.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.keras import backend as K
import numpy as np
import os
import sys
from six.moves import cPickle
def load_batch(fpath, label_key='labels'):
"""Internal utility for parsing CIFAR data.
# Arguments
fpath: path the file to parse.
label_key: key for label data in the retrieve
dictionary.
# Returns
A tuple `(data, labels)`.
"""
with open(fpath, 'rb') as f:
if sys.version_info < (3,):
d = cPickle.load(f)
else:
d = cPickle.load(f, encoding='bytes')
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode('utf8')] = v
d = d_decoded
data = d['data']
labels = d[label_key]
data = data.reshape(data.shape[0], 3, 32, 32)
return data, labels
def load_data(ROOT):
"""Loads CIFAR10 dataset.
# Returns
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
"""
# dirname = 'cifar-10-batches-py'
# origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
# path = get_file(dirname, origin=origin, untar=True)
path = ROOT
num_train_samples = 50000
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.empty((num_train_samples,), dtype='uint8')
for i in range(1, 6):
fpath = os.path.join(path, 'data_batch_' + str(i))
(x_train[(i - 1) * 10000: i * 10000, :, :, :],
y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath)
fpath = os.path.join(path, 'test_batch')
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if K.image_data_format() == 'channels_last':
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_test, y_test)
然后修改utils.py中的load_data()函数
def load_data():
if hp.target_dataset == "CIFAR-10":
if os.path.exists(hp.DATASET_DIR + hp.target_dataset):
print("load data from pickle")
with open(hp.DATASET_DIR + hp.target_dataset + "/train_X.pkl", 'rb') as f:
train_X = pickle.load(f)
with open(hp.DATASET_DIR + hp.target_dataset + "/train_y.pkl", 'rb') as f:
train_y = pickle.load(f)
with open(hp.DATASET_DIR + hp.target_dataset + "/valid_X.pkl", 'rb') as f:
valid_X = pickle.load(f)
with open(hp.DATASET_DIR + hp.target_dataset + "/valid_y.pkl", 'rb') as f:
valid_y = pickle.load(f)
with open(hp.DATASET_DIR + hp.target_dataset + "/test_X.pkl", 'rb') as f:
test_X = pickle.load(f)
with open(hp.DATASET_DIR + hp.target_dataset + "/test_y.pkl", 'rb') as f:
test_y = pickle.load(f)
else:
# (cifar_X_1, cifar_y_1), (cifar_X_2, cifar_y_2) = cifar10.load_data()
(cifar_X_1, cifar_y_1), (cifar_X_2, cifar_y_2) = load_local_cifar10.load_data('dataset/cifar-10-batches-py')
cifar_X = np.r_[cifar_X_1, cifar_X_2]
cifar_y = np.r_[cifar_y_1, cifar_y_2]
cifar_X = cifar_X.astype('float32') / 255.0
cifar_y = np.eye(10)[cifar_y.astype('int32').flatten()]
train_X, test_X, train_y, test_y = train_test_split(cifar_X, cifar_y, test_size=5000,
random_state=hp.RANDOM_STATE)
train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=5000,
random_state=hp.RANDOM_STATE)
os.mkdir(hp.DATASET_DIR + hp.target_dataset)
with open(hp.DATASET_DIR + hp.target_dataset + "/train_X.pkl", 'wb') as f1:
pickle.dump(train_X, f1)
with open(hp.DATASET_DIR + hp.target_dataset + "/train_y.pkl", 'wb') as f1:
pickle.dump(train_y, f1)
with open(hp.DATASET_DIR + hp.target_dataset + "/valid_X.pkl", 'wb') as f1:
pickle.dump(valid_X, f1)
with open(hp.DATASET_DIR + hp.target_dataset + "/valid_y.pkl", 'wb') as f1:
pickle.dump(valid_y, f1)
with open(hp.DATASET_DIR + hp.target_dataset + "/test_X.pkl", 'wb') as f1:
pickle.dump(test_X, f1)
with open(hp.DATASET_DIR + hp.target_dataset + "/test_y.pkl", 'wb') as f1:
pickle.dump(test_y, f1)
return train_X, train_y, valid_X, valid_y, test_X, test_y
修改good-llp-gan.py
# 注掉
# trainx, trainy, validx, validy, testx, testy = utils.load_data()
cifar10_dir = 'dataset/cifar-10-batches-py'
trainx, trainy, validx, validy, testx, testy = utils.load_data()
然后运行,也可以使用gpu加速,只要加入这两行(tensorflow1.0)
import keras.backend.tensorflow_backend as KTF
KTF.set_session(tf.Session(config=tf.ConfigProto(device_count={'gpu': 0})))