Python 图片预处理和数据集制作 相关代码备份

1.获取文件夹下的所有图片路径

import os

image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")

def list_images(basePath, contains=None):
    # 返回有效的图片路径数据集
    return list_files(basePath, validExts=image_types, contains=contains)

def list_files(basePath, validExts=None, contains=None):
    # 遍历图片数据目录,生成每张图片的路径
    for (rootDir, dirNames, filenames) in os.walk(basePath):
        # 循环遍历当前目录中的文件名
        for filename in filenames:
            # if the contains string is not none and the filename does not contain
            # the supplied string, then ignore the file
            if contains is not None and filename.find(contains) == -1:
                continue

            # 通过确定.的位置,从而确定当前文件的文件扩展名
            ext = filename[filename.rfind("."):].lower()

            # 检查文件是否为图像,是否应进行处理
            if validExts is None or ext.endswith(validExts):
                # 构造图像路径
                imagePath = os.path.join(rootDir, filename)
                yield imagePath
import utils_paths
imagePaths = sorted(list(utils_paths.list_images(path)))

2.大量图片分批读入内存,避免OOM,即一次性将图片路径读入内存,制作生成器按照路径将图片分批读入内存,适用于Keras、png图片,参考:TensorFlow和Keras解决大数据量内存溢出问题

def get_im_cv2(paths, img_rows, img_cols, normalize=True):
    imgs = []    
    for path in paths:        
        img = cv2.imread(path)
        resized = cv2.resize(img, (img_cols, img_rows))       
        if normalize:
            resized = resized.astype('float32')
            resized /= 225
        imgs.append(resized)        
    return np.array(imgs).reshape(len(paths), img_cols, img_rows, 3)


def get_train_batch(X_train, y_train, batch_size, img_w, img_h):
  while 1:
    for i in range(0, len(X_train), batch_size):
        x = get_im_cv2(X_train[i:i+batch_size], img_w, img_h)
        y = y_train[i:i+batch_size,:] 
        yield x, y
  return get_train_batch 
# 训练
history = model.fit(get_train_batch(image_train, label_train, batch_size=batch_size, img_w=img_w, img_h=img_h),
          epochs=1000,
          validation_data=get_train_batch(image_valid, label_valid, batch_size=batch_size, img_w=img_w, img_h=img_h),
          steps_per_epoch = len(image_train)//batch_size+1,
          validation_steps = len(image_valid)//batch_size+1,
          verbose=1,
          callbacks=[ckpt, early_stop],
          workers=1)
          
#预测
pre = model.predict(get_train_batch(image_test, label_test, batch_size=batch_size, img_w=img_w, img_h=img_h),
                          verbose=1,
                          steps = len(image_test)//batch_size+1)

3.图片预处理,原始图片需要裁剪,但裁剪位置不定,底色为红色,需要裁剪掉多余的红边。

k=0
for path in imagePaths:
    if os.path.isfile(result_path+path[17:])==False:
        img = cv2.imread(path)
        img_1 = cv2.imread(path)
        rows,cols,channels = img.shape

        index=[]
        for i in range(rows):  
            for j in range(cols): 
                if img[i][j][0]==0 and img[i][j][1]==0 and img[i][j][2]==128:
                    img_1[i][j][2]=0
            if np.all(img_1[i]==0)==True:
                index.append(i)
        img_result = np.delete(img,index,axis=0)[:,np.linspace(130,300,num=170,dtype=int),:]
        # cv2.imshow('img',img_result)
        # cv2.waitKey(10000)
        # cv2.destroyAllWindows()
        cv2.imwrite(result_path+path[17:], img_result)
        
        del img,img_1,img_result,index
        gc.collect()
        k=+1
        print(f'n = {k} finished')
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值