1.获取文件夹下的所有图片路径
import os
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
def list_images(basePath, contains=None):
# 返回有效的图片路径数据集
return list_files(basePath, validExts=image_types, contains=contains)
def list_files(basePath, validExts=None, contains=None):
# 遍历图片数据目录,生成每张图片的路径
for (rootDir, dirNames, filenames) in os.walk(basePath):
# 循环遍历当前目录中的文件名
for filename in filenames:
# if the contains string is not none and the filename does not contain
# the supplied string, then ignore the file
if contains is not None and filename.find(contains) == -1:
continue
# 通过确定.的位置,从而确定当前文件的文件扩展名
ext = filename[filename.rfind("."):].lower()
# 检查文件是否为图像,是否应进行处理
if validExts is None or ext.endswith(validExts):
# 构造图像路径
imagePath = os.path.join(rootDir, filename)
yield imagePath
import utils_paths
imagePaths = sorted(list(utils_paths.list_images(path)))
2.大量图片分批读入内存,避免OOM,即一次性将图片路径读入内存,制作生成器按照路径将图片分批读入内存,适用于Keras、png图片,参考:TensorFlow和Keras解决大数据量内存溢出问题。
def get_im_cv2(paths, img_rows, img_cols, normalize=True):
imgs = []
for path in paths:
img = cv2.imread(path)
resized = cv2.resize(img, (img_cols, img_rows))
if normalize:
resized = resized.astype('float32')
resized /= 225
imgs.append(resized)
return np.array(imgs).reshape(len(paths), img_cols, img_rows, 3)
def get_train_batch(X_train, y_train, batch_size, img_w, img_h):
while 1:
for i in range(0, len(X_train), batch_size):
x = get_im_cv2(X_train[i:i+batch_size], img_w, img_h)
y = y_train[i:i+batch_size,:]
yield x, y
return get_train_batch
# 训练
history = model.fit(get_train_batch(image_train, label_train, batch_size=batch_size, img_w=img_w, img_h=img_h),
epochs=1000,
validation_data=get_train_batch(image_valid, label_valid, batch_size=batch_size, img_w=img_w, img_h=img_h),
steps_per_epoch = len(image_train)//batch_size+1,
validation_steps = len(image_valid)//batch_size+1,
verbose=1,
callbacks=[ckpt, early_stop],
workers=1)
#预测
pre = model.predict(get_train_batch(image_test, label_test, batch_size=batch_size, img_w=img_w, img_h=img_h),
verbose=1,
steps = len(image_test)//batch_size+1)
3.图片预处理,原始图片需要裁剪,但裁剪位置不定,底色为红色,需要裁剪掉多余的红边。
k=0
for path in imagePaths:
if os.path.isfile(result_path+path[17:])==False:
img = cv2.imread(path)
img_1 = cv2.imread(path)
rows,cols,channels = img.shape
index=[]
for i in range(rows):
for j in range(cols):
if img[i][j][0]==0 and img[i][j][1]==0 and img[i][j][2]==128:
img_1[i][j][2]=0
if np.all(img_1[i]==0)==True:
index.append(i)
img_result = np.delete(img,index,axis=0)[:,np.linspace(130,300,num=170,dtype=int),:]
# cv2.imshow('img',img_result)
# cv2.waitKey(10000)
# cv2.destroyAllWindows()
cv2.imwrite(result_path+path[17:], img_result)
del img,img_1,img_result,index
gc.collect()
k=+1
print(f'n = {k} finished')