参考博文1:http://blog.csdn.net/qq_32166627/article/details/52640730
赵老师的书第十六天可视化方法主要采用matlab接口方法实现,本篇博文采用python方法实现书中第十六天中实现的所有可视化。
一、数据可视化
1、mnist数据可视化
1)、训练样本可视化
首先要打开jupyter notebook(具体配置python接口和jupyter的方法参考我的博文,地址:http://blog.csdn.net/xunan003/article/details/73555424)
新建python2,代码输入:
import numpy as np
import struct
from PIL import Image
import os
data_file = '/home/xn/caffe/data/mnist/train-images-idx3-ubyte' #需要修改的路径,train-images-idx3-ubyte文件所在的位置
# It's 47040016B, but we should set to 47040000B
data_file_size = 47040016
data_file_size = str(data_file_size - 16) + 'B'
data_buf = open(data_file, 'rb').read()
magic, numImages, numRows, numColumns = struct.unpack_from(
'>IIII', data_buf, 0)
datas = struct.unpack_from(
'>' + data_file_size, data_buf, struct.calcsize('>IIII'))
datas = np.array(datas).astype(np.uint8).reshape(
numImages, 1, numRows, numColumns)
label_file = '/home/xn/caffe/data/mnist/train-labels-idx1-ubyte' #需要修改的路径 ,train-images-idx3-ubyte文件所在位置,最好采用绝对路径
# It's 60008B, but we should set to 60000B
label_file_size = 60008
label_file_size = str(label_file_size - 8) + 'B'
label_buf = open(label_file, 'rb').read()
magic, numLabels = struct.unpack_from('>II', label_buf, 0)
labels = struct.unpack_from(
'>' + label_file_size, label_buf, struct.calcsize('>II'))
labels = np.array(labels).astype(np.int64)
datas_root = '/home/xn/caffe/examples/mnist/mnist_train' #需要修改的路径,你最终可视化后的图片保存在哪里
if not os.path.exists(datas_root):
os.mkdir(datas_root)
for i in range(10):
file_name = datas_root + os.sep + str(i)
if not os.path.exists(file_name):
os.mkdir(file_name)
for ii in range(numLabels):
img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
label = labels[ii]
file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_train_' + str(ii) + '.png'
img.save(file_name)
运行上面程序,可得到训练用的50000个样本集图片。打开/home/xn/caffe/examples/mnist/mnist_train文件即可查看。
2)、测试样本可视化
在jupyter notebook命令窗口下输入python程序
import numpy as np
import struct
from PIL import Image
import os
data_file = '/home/xn/caffe/data/mnist/t10k-images-idx3-ubyte' #需要修改的路径,t10k-images-idx3-ubyte文件所在的位置
# It's 7840016B, but we should set to 7840000B
data_file_size = 7840016
data_file_size = str(data_file_size - 16) + 'B'
data_buf = open(data_file, 'rb').read()
magic, numImages, numRows, numColumns = struct.unpack_from(
'>IIII', data_buf, 0)
datas = struct.unpack_from(
'>' + data_file_size, data_buf, struct.calcsize('>IIII'))
datas = np.array(datas).astype(np.uint8).reshape(
numImages, 1, numRows, numColumns)
label_file = '/home/xn/caffe/data/mnist/t10k-labels-idx1-ubyte'#需要修改的路径,标签t10k-labels-idx1-ubyte文件所在位置
# It's 10008B, but we should set to 10000B
label_file_size = 10008
label_file_size = str(label_file_size - 8) + 'B'
label_buf = open(label_file, 'rb').read()
magic, numLabels = struct.unpack_from('>II', label_buf, 0)
labels = struct.unpack_from(
'>' + label_file_size, label_buf, struct.calcsize('>II'))
labels = np.array(labels).astype(np.int64)
datas_root = '/home/xn/caffe/examples/mnist/mnist_test' #需要修改的路径(可视化后保存的位置)
if not os.path.exists(datas_root):
os.mkdir(datas_root)
for i in range(10):
file_name = datas_root + os.sep + str(i)
if not os.path.exists(file_name):
os.mkdir(file_name)
for ii in range(numLabels):
img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
label = labels[ii]
file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_test_' + str(ii) + '.png'
img.save(file_name)
运行上面程序,在相应的文件/home/xn/caffe/examples/mnist/mnist_test中查看
2、cifar10数据可视化
首先下载python版cifar10数据。
先给个cifar数据下载链接:http://www.cs.toronto.edu/~kriz/cifar.html 链接上提到三个数据版本,分别是python,matlab,binary版本,分别适合python,matlab,C程序 下载cifar-10-python.tar.gz文件,下载下来复制到caffe/data/cifar10文件夹中,解压待用。
然后就是利用jupyter notebook来运行程序了。代码如下:
import pickle as p
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as plimg
from PIL import Image
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb')as f:
datadict = p.load(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32)
Y = np.array(Y)
return X, Y
def load_CIFAR_Labels(filename):
with open(filename, 'rb') as f:
lines = [x for x in f.readlines()]
print(lines)
if __name__ == "__main__":
load_CIFAR_Labels("/home/xn/caffe/data//cifar10/cifar-10-batches-py/batches.meta") #batches.meta路径,刚下载下来的cifar10数据文件夹中包含
imgX, imgY = load_CIFAR_batch("/home/xn/caffe/data/cifar10/cifar-10-batches-py/data_batch_1") #data_batch_1路径,刚下载下来的cifar10数据文件中包含
print imgX.shape
print "正在保存图片:"
for i in xrange(imgX.shape[0]):
imgs = imgX[i - 1]
if i < 100:#只循环100张图片,这句注释掉可以便利出所有的图片,图片较多,可能要一定的时间
img0 = imgs[0]
img1 = imgs[1]
img2 = imgs[2]
i0 = Image.fromarray(img0)
i1 = Image.fromarray(img1)
i2 = Image.fromarray(img2)
img = Image.merge("RGB",(i0,i1,i2))
name = "img" + str(i)
img.save("/home/xn/caffe/examples/images/cifar10/images/"+name,"png")#文件夹下是RGB融合后的图,保存的路径,需要特别注意的一点,此路径如果是要保存在你原本没有建立的文件夹下的情况下,需要自己手动建立,不像前面mnist程序会自己建立,而这个程序运行是不会自动建立的,如果你没有建立,程序会报错,显示路径问题。
for j in xrange(imgs.shape[0]):
img = imgs[j - 1]
name = "img" + str(i) + str(j) + ".png"
print "正在保存图片" + name
plimg.imsave("/home/xn/caffe/examples/images/cifar10/image/" + name, img)#文件夹下是RGB分离的图像,保存的图像路径,同上面所说的,注意路径的建立。
print "保存完毕."
我们可以在/home/xn/caffe/examples/images/cifar10/images/文件夹下和/home/xn/caffe/examples/images/cifar10/image/文件夹下查看保存的图片,后者图片数量是前者的三倍