因想查看吴恩达老师的深度学习课程里面h5数据文件存储的图片,所以来搜索了h5文件的操作。随后发现看到一位博主有同样的想法,于是把他保存单张图片的代码改成了批量保存的,并且写了非本课程也适用的版本。
原博客底下有人说不能运行,是因为缺少了矩阵处理的代码。现在不需要原课程代码也可以读取h5文件了。底下有另两个代码块,同课程学习者可以把最下面的代码块直接粘到原代码底下
运行就可以保存h5文件的图片——记得更改数据文件的地址,如果你不是这课程的学生的话
这段代码是参考了一些博主所写的。以下是完整代码
需要自行改动地址和h5文件内部对应的键名
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os
#import PIL
# 读取数据集
def load_dataset():
#改动下三行中的地址和键名
destination_dataset = h5py.File('文件夹/名字.h5', "r")
destination_set_x_orig = np.array(destination_dataset["你h5文件对应的键名destination_set_x"][:]) # your destination set features
destination_set_y_orig = np.array(destination_dataset["你h5文件对应的键名destination_set_y"][:]) # your destination set labels
destination_set_y_orig = destination_set_y_orig.reshape((1, destination_set_y_orig.shape[0]))
return destination_set_x_orig, destination_set_y_orig
destination_set_x_orig , destination_set_y = load_dataset()
destination_set_x_flatten = destination_set_x_orig.reshape(destination_set_x_orig.shape[0],-1).T
destination_set_x = destination_set_x_flatten / 255
def createFolder(path):
folder = os.path.exists(path)
if not folder: # 判断是否存在文件夹,如果不存在则创建文件夹
os.makedirs(path)
file = './destination_images'
createFolder(file)
# range范围取决于你h5文件存储的图像数量
for count in range(0,100):
index = count
plt.imsave("destination_images/"+ str((index+1)) +".jpg", destination_set_x[:, index].reshape((destination_set_x_orig.shape[1], destination_set_x_orig.shape[1], 3)))
plt.imshow(destination_set_x[:, index].reshape((destination_set_x_orig.shape[1], destination_set_x_orig.shape[1], 3)))
plt.show()
这下面是完整的代码块,可以单独运行,不需要课程原代码
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os
#import PIL
# 读取数据集
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = load_dataset()
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255
def createFolder(path):
folder = os.path.exists(path)
if not folder: # 判断是否存在文件夹,如果不存在则创建文件夹
os.makedirs(path)
file1 = './train_images'
file2 = './test_images'
createFolder(file1)
createFolder(file2)
# range范围取决于你h5文件存储的图像数量
for count1 in range(0,209):
index = count1
plt.imsave("train_images/"+ str((index+1)) +".jpg", train_set_x[:, index].reshape((train_set_x_orig.shape[1], train_set_x_orig.shape[1], 3)))
plt.imshow(train_set_x[:, index].reshape((train_set_x_orig.shape[1], train_set_x_orig.shape[1], 3)))
plt.show()
for count2 in range(0,50):
index = count2
plt.imsave("test_images/"+ str((index+1)) +".jpg", test_set_x[:, index].reshape((test_set_x_orig.shape[1], test_set_x_orig.shape[1], 3)))
plt.imshow(test_set_x[:, index].reshape((test_set_x_orig.shape[1], test_set_x_orig.shape[1], 3)))
plt.show()
上面的代码包含了读取h5文件的一系列定义,吴恩达老师的课程代码里面有,所以就不必重新定义了。不想弹出一堆照片的话,可以把显示的部分注释掉了,
对于把有完整代码文件的朋友,把这段粘到后面就行了。
range范围取决于你h5文件存储的图像数量
def createFolder(path):
folder = os.path.exists(path)
if not folder: # 判断是否存在文件夹,如果不存在则创建文件夹
os.makedirs(path)
file1 = './train_images'
file2 = './test_images'
createFolder(file1)
createFolder(file2)
# range范围取决于你h5文件存储的图像数量
for count1 in range(0,209):
index = count1
plt.imsave("train_images/"+ str((index+1)) +".jpg", train_set_x[:, index].reshape((train_set_x_orig.shape[1], train_set_x_orig.shape[1], 3)))
plt.imshow(train_set_x[:, index].reshape((train_set_x_orig.shape[1], train_set_x_orig.shape[1], 3)))
plt.show()
for count2 in range(0,50):
index = count2
plt.imsave("test_images/"+ str((index+1)) +".jpg", test_set_x[:, index].reshape((test_set_x_orig.shape[1], test_set_x_orig.shape[1], 3)))
plt.imshow(test_set_x[:, index].reshape((test_set_x_orig.shape[1], test_set_x_orig.shape[1], 3)))
plt.show()
想要读取别的h5文件的话记得改动地址。
同课程学习者也可以另外建一个代码文件,学着老师引用读取数据库函数的方式读取,这样就不需要那一块定义load的函数了。因为那一块就是课程里面的lr_utils文件里的。