一、分割数据成train和test 代码:
# data segmentation code # resize origin code to match classifier model and split the data set into training and test set
import sys
import os
import shutil
import csv
import subprocess
import random
import time
import itertools
from PIL import Image
#UCM path
imagesPath = 'data/EuroSAT-databack'
converted_path ='gen'
#NUPW Path
#imagesPath = '/home/hpc-126/remote-host/NUPW-45/NWPU-RESISC45'
#converted_path ='/home/hpc-126/remote-host/NUPW-45/train224x224'
train_path = ''
test_path =''
imageWidth =32
imageHeight =32
split_ratio =0.80 # ratio of train and test set size
datatype ='euro'
labels = ''
if datatype == 'euro':
labels = {
'SeaLake': 9,
'River': 8,
'PermanentCrop': 6,
'AnnualCrop': 0,
'Pasture': 5,
'Forest': 1,
'HerbaceousVegetation': 2,
'Highway': 3,
'Residential': 7,
'Industrial': 4,
}
elif datatype =='NUPW':
labels = {
'airplane': 0,
'airport' : 1,
'baseball_diamond': 2,
'basketball_court': 3,
'beach':4,
'bridge':5,
'chaparral':6,
'church':7,
'circular_farmland':8,
'cloud':9,
'commercial_area':10,
'dense_residential':11,
'desert':12,
'forest':13,
'freeway':14,
'golf_course':15,
'ground_track_field':16,
'harbor':17,
'industrial_area':18,
'intersection':19,
'island':20,
'lake':21,
'meadow':22,
'medium_residential':23,
'mobile_home_park':24,
'mountain':25,
'overpass':26,
'palace':27,
'parking_lot':28,
'railway':29,
'railway_station':30,
'rectangular_farmland':31,
'river':32,
'roundabout':33,
'runway':34,
'sea_ice':35,
'ship':36,
'snowberg':37,
'sparse_residential':38,
'stadium':39,
'storage_tank':40,
'tennis_court':41,
'terrace':42,
'thermal_power_station':43,
'wetland':44
}
else :
print ('please specify the data type : euro NUPW')
def remove_dir(path):
try:
shutil.rmtree(path)
except OSError, e:
if e.errno == 2:
pass
else:
raise
def convert_images(path):
images = []
train_path = os.path.join(converted_path, 'train')
test_path = os.path.join(converted_path, 'test')
os.mkdir(train_path)
os.mkdir(test_path)
for root, dirs, files in os.walk(path):
if root == path:
continue
category = os.path.basename(root)
label = labels[category]
UCMjpgpath_train =(os.path.join(train_path, str(label)))
UCMjpgpath_test = (os.path.join(test_path, str(label)))
os.mkdir(UCMjpgpath_train)
os.mkdir(UCMjpgpath_test)
random.shuffle(files)
count =0
for name in files:
im = Image.open(os.path.join(root, name))
(width, height) = im.size
if width != imageWidth or height != imageHeight:
im = im.resize((imageWidth, imageHeight), Image.ANTIALIAS)
if name.find('.tif') ==-1:
jpeg_name=name
else:
jpeg_name = name.replace(".tif", ".jpg")
if count < int(len(files)*split_ratio):
im.save(os.path.join(UCMjpgpath_train, jpeg_name))
else:
im.save(os.path.join(UCMjpgpath_test, jpeg_name))
count+=1
return images
def main (argv):
if os.path.exists(converted_path):
remove_dir(converted_path)
os.mkdir(converted_path)
convert_images(imagesPath)
if __name__== "__main__":
main(sys.argv)
二、执行转化pickle文件操作 代码:(python3下用pickle库 python2 用cpickle 但pickle读取时候出错)
import PIL.Image as Image
from scipy.misc import imsave
import numpy as np
import random
import pickle
import os
#
def initPKL(imgSet_shuffle, train_or_test):
imgSet = []
labels = []
label_names = []
if train_or_test == 'train':
set_name = 'trainSet.pkl'
else:
set_name = 'testSet.pkl'
for i in imgSet_shuffle:
imgSet.append(i[0])
labels.append(i[1])
label_names.append(i[2])
imgSet = np.array(imgSet)
labels = np.array(labels)
label_names = np.array(label_names)
arr = (imgSet,labels,label_names)
#
data = (arr[0],arr[1],arr[2])
output = open(set_name, 'wb')
pickle.dump(data, output)
output.close()
def initArr(folders_path):
i = 0
imgSet = []
folders = os.listdir(folders_path)
for folder in folders:
#
label = [0,0,0,0,0,0,0,0,0,0]
files = os.listdir(folders_path + '/'+folder)
label[i] = 1
for file in files:
#
img_arr = np.array(Image.open(folders_path + '/' + folder + '/' + file)) / 127.5*2.0 -1.0
print(folders_path + '/' + folder + '/' + file)
#print(img_arr)
imgSet.append((img_arr, label, folder))
i += 1
return imgSet
#
train_folders_path = 'gen/train'
test_folders_path = 'gen/test/'
train_imgSet = initArr(train_folders_path)
test_imgSet = initArr(test_folders_path)
#
random.shuffle(train_imgSet)
random.shuffle(test_imgSet)
train_set_shuffle = np.array(train_imgSet)
test_set_shuffle = np.array(test_imgSet)
#
initPKL(train_set_shuffle, 'train')
initPKL(test_set_shuffle, 'test')
#
f = open('./trainSet.pkl', 'rb')
x, y, z = pickle.load(f)
f.close()
print(np.shape(x[3]), y[3], z[3])
三、Mgan中调用 代码:
def main(_):
tmp,label,lname = pickle.load(open("trainSet.pkl", "rb"))#cifar10_train.pkl 原来是cifar10_train.pkl
#print(tmp)
#x_train = tmp['data'].astype(np.float32).reshape([-1, 32, 32, 3]) / 127.5 - 1. #data tmp['data'] 原来可能生成的时候用的data属性。上面代码生成的不用加['data']直接写成下面格式即可
x_train = tmp.astype(np.float32).reshape([-1, 32, 32, 3]) # / 127.5 - 1. #data tmp['data'] 这里 相除相减的问题,其实可以看作正则化?
四、python3下读取python2生成的pickle文件时候出错。解决方法是 生成文件python3下生成,这样就可以 用python3下的方法读取不出错。或者 按下面的方法解决。
python的pickle模块实现了基本的数据序列和反序列化。通过pickle模块的序列化操作我们能够将程序中运行的对象信息保存到文件中去,永久存储;通过pickle模块的反序列化操作,我们能够从文件中创建上一次程序保存的对象。
python2使用的是cPickle模块,而在python3中cPickle已经被取消,取而代之的是pickle模块。
开发过程中,我曾经遇到一个奇怪的问题,在读取一个文件时候,使用python2的如下方式:
import cPickle
train, test, dicts = cPickle.load(open(“./dataset/atis.pkl”))
是可以正常读取文件的。
可是当换做python3的方式读取文件时候,如下:
import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”))
却获得了错误提示,提示信息如下:
Traceback (most recent call last):
File “Main.py”, line 4, in
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”))
TypeError: ‘str’ does not support the buffer interface
查询错误信息后得知解决办法 链接,应该指明用二进制方式打开文件,于是代码改为:
import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”))
可是这时候错误变成了:
Traceback (most recent call last):
File “Main.py”, line 4, in
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”))
UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xe8 in position 0: ordinal not in range(128)
于是再次求助万能的google,终于找到了解决办法 链接,我们需要告诉pickle:how to convert Python bytestring data to Python 3 strings,The default is to try and decode all string data as ASCII,所以代码改为:
import pickle
train, test, dicts = pickle.load(open(“./dataset/atis.pkl”, “rb”), encoding=’iso-8859-1’)
问题终于的到了解决。
ISO8859-1,通常叫做 Latin-1。Latin-1 包括了书写所有西方欧洲语言不可缺少的附加字符。iso8859-1 和 ascii 编码相似。但为了方便表示各种各样的语言,逐渐出现了很多标准编码,重要的有如下几个。
而 gb2312 是标准中文字符集。
UTF-8 是 UNICODE 的一种变长字符编码,即 RFC 3629。简单的说——大字符集。可以解决多种语言文本显示问题,从而实现应用国际化和本地化。
https://zhidao.baidu.com/question/26613602.html
https://www.cnblogs.com/doudou-taste/p/7351278.html