Python 图片预处理和数据集制作相关代码备份

最新推荐文章于 2024-05-15 05:50:33 发布

ALeidy

最新推荐文章于 2024-05-15 05:50:33 发布

阅读量492

点赞数

分类专栏：科研日常文章标签： tensorflow opencv 机器学习

本文链接：https://blog.csdn.net/sinat_32476273/article/details/117224726

版权

科研日常专栏收录该内容

6 篇文章 0 订阅

订阅专栏

1.获取文件夹下的所有图片路径

import os

image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")

def list_images(basePath, contains=None):
    # 返回有效的图片路径数据集
    return list_files(basePath, validExts=image_types, contains=contains)

def list_files(basePath, validExts=None, contains=None):
    # 遍历图片数据目录，生成每张图片的路径
    for (rootDir, dirNames, filenames) in os.walk(basePath):
        # 循环遍历当前目录中的文件名
        for filename in filenames:
            # if the contains string is not none and the filename does not contain
            # the supplied string, then ignore the file
            if contains is not None and filename.find(contains) == -1:
                continue

            # 通过确定.的位置，从而确定当前文件的文件扩展名
            ext = filename[filename.rfind("."):].lower()

            # 检查文件是否为图像，是否应进行处理
            if validExts is None or ext.endswith(validExts):
                # 构造图像路径
                imagePath = os.path.join(rootDir, filename)
                yield imagePath

import utils_paths
imagePaths = sorted(list(utils_paths.list_images(path)))

2.大量图片分批读入内存，避免OOM，即一次性将图片路径读入内存，制作生成器按照路径将图片分批读入内存，适用于Keras、png图片，参考：TensorFlow和Keras解决大数据量内存溢出问题。

def get_im_cv2(paths, img_rows, img_cols, normalize=True):
    imgs = []    
    for path in paths:        
        img = cv2.imread(path)
        resized = cv2.resize(img, (img_cols, img_rows))       
        if normalize:
            resized = resized.astype('float32')
            resized /= 225
        imgs.append(resized)        
    return np.array(imgs).reshape(len(paths), img_cols, img_rows, 3)


def get_train_batch(X_train, y_train, batch_size, img_w, img_h):
  while 1:
    for i in range(0, len(X_train), batch_size):
        x = get_im_cv2(X_train[i:i+batch_size], img_w, img_h)
        y = y_train[i:i+batch_size,:] 
        yield x, y
  return get_train_batch

# 训练
history = model.fit(get_train_batch(image_train, label_train, batch_size=batch_size, img_w=img_w, img_h=img_h),
          epochs=1000,
          validation_data=get_train_batch(image_valid, label_valid, batch_size=batch_size, img_w=img_w, img_h=img_h),
          steps_per_epoch = len(image_train)//batch_size+1,
          validation_steps = len(image_valid)//batch_size+1,
          verbose=1,
          callbacks=[ckpt, early_stop],
          workers=1)
          
#预测
pre = model.predict(get_train_batch(image_test, label_test, batch_size=batch_size, img_w=img_w, img_h=img_h),
                          verbose=1,
                          steps = len(image_test)//batch_size+1)

3.图片预处理，原始图片需要裁剪，但裁剪位置不定，底色为红色，需要裁剪掉多余的红边。

k=0
for path in imagePaths:
    if os.path.isfile(result_path+path[17:])==False:
        img = cv2.imread(path)
        img_1 = cv2.imread(path)
        rows,cols,channels = img.shape

        index=[]
        for i in range(rows):  
            for j in range(cols): 
                if img[i][j][0]==0 and img[i][j][1]==0 and img[i][j][2]==128:
                    img_1[i][j][2]=0
            if np.all(img_1[i]==0)==True:
                index.append(i)
        img_result = np.delete(img,index,axis=0)[:,np.linspace(130,300,num=170,dtype=int),:]
        # cv2.imshow('img',img_result)
        # cv2.waitKey(10000)
        # cv2.destroyAllWindows()
        cv2.imwrite(result_path+path[17:], img_result)
        
        del img,img_1,img_result,index
        gc.collect()
        k=+1
        print(f'n = {k} finished')

ALeidy

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Python 图片预处理和数据集制作相关代码备份

1.获取文件夹下的所有图片路径import osimage_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")def list_images(basePath, contains=None): # 返回有效的图片路径数据集 return list_files(basePath, validExts=image_types, contains=contains)def list_files(basePath, va
复制链接

扫一扫