在计算机视觉领域对图片进行预处理时,通常需要先计算出数据集中所有图片的平均像素均值,本文详细给出了计算自定义图片数据集的平均像素的python代码。
1.获取自定义数据集的全部绝对路径
def is_imgfile(filepath):
filepath = os.path.expanduser(filepath)
if os.path.isfile(filepath) and imghdr.what(filepath):
return True
return False
def load_imgpaths_from_dir(dirpath, walk=False):
imgpaths = []
dirpath = os.path.expanduser(dirpath)
if walk: # wm,表示需要递归的读取文件夹
for (root, _, files) in os.walk(dirpath):
for file in files:
file = os.path.join(root, file)
if is_imgfile(file): # wm,判断该文件是不是图片,如果是图片则把它的绝对路径加入list
imgpaths.append(file)
else:
for path in os.listdir(dirpath):
path = os.path.join(dirpath, path)
if not is_imgfile(path):
continue
imgpaths.append(path)
return imgpaths
测试:
if __name__ == '__main__':
path='G:\dataset\hymenoptera_data'
img_path_list=load_imgpaths_from_dir(dirpath=path,walk=True)
print(len(img_path_list))
print(img_path_list)
结果:
2.计算图片数据集的三个通道的平均像素值
def compute_imgdataset_mean_pixel_value(imgset_all_path):
'''
imgset_all_path:是一个列表,包含着所有图片的绝对路径
mpv:是三个通道的平均像素值
'''
mpv = np.zeros(shape=(3,))
pbar = tqdm(total=len(imgset_all_path),desc='computing mean pixel value of training dataset...')
for imgpath in imgset_all_path:
img = Image.open(imgpath)
x = np.array(img) / 255.
mpv += x.mean(axis=(0, 1))#wm,对于图像的三个通道分别计算平均值,
pbar.update()
mpv /= len(imgset_all_path)
pbar.close()
return mpv
3.完整代码
import os
import numpy as np
import imghdr
from tqdm import tqdm
from PIL import Image
def is_imgfile(filepath):
filepath = os.path.expanduser(filepath)
if os.path.isfile(filepath) and imghdr.what(filepath):
return True
return False
def load_imgpaths_from_dir(dirpath, walk=False):
imgpaths = []
dirpath = os.path.expanduser(dirpath)
if walk: # wm,表示需要递归的读取文件夹
for (root, _, files) in os.walk(dirpath):
for file in files:
file = os.path.join(root, file)
if is_imgfile(file): # wm,判断该文件是不是图片,如果是图片则把它的绝对路径加入list
imgpaths.append(file)
else:
for path in os.listdir(dirpath):
path = os.path.join(dirpath, path)
if not is_imgfile(path):
continue
imgpaths.append(path)
return imgpaths
def compute_imgdataset_mean_pixel_value(imgset_all_path):
'''
imgset_all_path:是一个列表,包含着所有图片的绝对路径
mpv:是三个通道的平均像素值
'''
mpv = np.zeros(shape=(3,))
pbar = tqdm(total=len(imgset_all_path),desc='computing mean pixel value of training dataset...')
for imgpath in imgset_all_path:
img = Image.open(imgpath)
x = np.array(img) / 255.
mpv += x.mean(axis=(0, 1))#wm,对于图像的三个通道分别计算平均值,
pbar.update()
mpv /= len(imgset_all_path)
pbar.close()
return mpv
if __name__ == '__main__':
path='G:\dataset\hymenoptera_data'
img_path_list=load_imgpaths_from_dir(dirpath=path,walk=True)
print(len(img_path_list))
print(img_path_list)
mvp=compute_imgdataset_mean_pixel_value(img_path_list)
print("mvp:",mvp)
结果: