MNIST数据可视化,我参考了http://blog.csdn.net/u014046170/article/details/47445919#python的代码,将它的两个函数的功能写到了一块。记录在这,更改后的代码记录在这作为备忘。
from PIL import Image
import struct,os
def _download(data_dir):
if not os.path.isdir(data_dir):
os.system("mkdir " + data_dir)
os.chdir(data_dir)
if (not os.path.exists('train-images-idx3-ubyte')) or \
(not os.path.exists('train-labels-idx1-ubyte')) or \
(not os.path.exists('t10k-images-idx3-ubyte')) or \
(not os.path.exists('t10k-labels-idx1-ubyte')):
os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip")
os.system("unzip -u mnist.zip; rm mnist.zip")
os.chdir("..")
def vision(train_data,train_label,save_dir):
#image
image_file = open(train_data,'rb')
image_index = 0
image_buf = image_file.read()
image_file.close()
image_magic, image_images, image_rows, image_columns = struct.unpack_from('>IIII', image_buf, image_index)
image_index = struct.calcsize('>IIII')
#label
label_file = open(train_label,'rb')
label_index = 0
label_buf = label_file.read()
label_file.close()
label_magic, label_items = struct.unpack_from('>II',label_buf,label_index)
label_index = struct.calcsize('>II')
#handle
for i in xrange(image_images):
image=Image.new('L',(image_columns,image_rows))
label=int(struct.unpack_from('>B',label_buf,label_index)[0])
label_index += struct.calcsize('>B')
for x in xrange(image_rows):
for y in xrange(image_columns):
image.putpixel((y,x),int(struct.unpack_from('>B',image_buf,image_index)[0]))
image_index += struct.calcsize('>B')
print 'save ' + str(i) + '_' + str(label) + ' image'
image.save(save_dir + str(label) + '/' + str(i) + '.png')
#python ~/mxnet/tools/make_list.py train/ train --recursive=True --exts=.png
if __name__ == '__main__':
_download('MNIST')
print 'Please copy down code and press Enter:\n'+'mkdir -vp MNIST/{train,test}/{0,1,2,3,4,5,6,7,8,9} && exit 0\n'
os.system(" bash")
vision('MNIST/train-images-idx3-ubyte','MNIST/train-labels-idx1-ubyte','MNIST/train/')
vision('MNIST/t10k-images-idx3-ubyte','MNIST/t10k-labels-idx1-ubyte','MNIST/test/')
os.system('python ~/mxnet/tools/make_list.py MNIST/train/ MNIST/train --recursive=True --exts=.png')
os.system('~/mxnet/bin/im2rec MNIST/train.lst MNIST/train/ MNIST/train.res')
os.system('python ~/mxnet/tools/make_list.py MNIST/test/ MNIST/test --recursive=True --exts=.png')
os.system('~/mxnet/bin/im2rec MNIST/test.lst MNIST/test/ MNIST/test.res')
用法向这样:
user@debian: python
>>> import test #上面的文件保存到test.py文件中
>>> test.vision('train-images-idx3-ubyte','train-labels-idx3-ubyte') #这样就在当前目录下的test文件夹中生成了相关的数字文件