代码中难点
- readline()方法:从字面意思可以看出,该方法每次读出一行内容,所以,读取时占用内存小,比较适合大文件,该方法返回一个字符串对象。
相对于把text文件中刑如2028_7.jpg 7的数据一行行读入存到contents中 - 其余的写在程序中
代码
import tensorflow as tf
from PIL import Image
import numpy as np
import os
train_path = './mnist_image_label/mnist_train_jpg_60000/'
train_txt = './mnist_image_label/mnist_train_jpg_60000.txt'
x_train_savepath = './mnist_image_label/mnist_x_train.npy'
y_train_savepath = './mnist_image_label/mnist_y_train.npy'
test_path = './mnist_image_label/mnist_test_jpg_10000/'
test_txt = './mnist_image_label/mnist_test_jpg_10000.txt'
x_test_savepath = './mnist_image_label/mnist_x_test.npy'
y_test_savepath = './mnist_image_label/mnist_y_test.npy'
def generateds(path, txt):
f = open(txt, 'r')
contents = f.readlines()
f.close()
x, y_ = [], []
for content in contents:
value = content.split()
img_path = path + value[0]
img = Image.open(img_path)
img = np.array(img.convert('L'))
img = img / 255.
x.append(img)
y_.append(value[1])
print('loading : ' + content)
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_