# -*- coding: utf-8 -*-
import os
def all_files_path(rootDir):
for root, dirs, files in os.walk(rootDir): # 分别代表根目录、文件夹、文件
for file in files: # 遍历文件
file_path = os.path.join(root, file) # 获取文件绝对路径
filepaths.append(file_path) # 将文件路径添加进列表
# for dir in dirs: # 遍历目录下的子目录
# dir_path = os.path.join(root, dir) # 获取子目录路径
# all_files_path(dir_path) # 递归调用
if __name__ == "__main__":
filepaths = [] # 初始化列表用来
all_files_path('文件路径')
with open('******.txt', 'a') as f:
for filepath in filepaths:
f.write(filepath + '\n')
import os
from PIL import Image
import numpy as np
import tqdm
def main():
# 数据集通道数
img_channels = 3
# txt文件中每一行是一个图片路径
txt = '***.txt'
assert os.path.exists(txt), f"'{txt}' does not exist."
# 遍历txt每一行获取路径
imgs_path = []
with open(txt, 'r') as f:
for line in f:
imgs_path.append(line.strip('\n'))
# 累计mean和std,三个通道,这里是RGB,PIL库中的Image.open 默认RGB,cv2.imread是BGR
cumulative_mean = np.zeros(img_channels)
cumulative_std = np.zeros(img_channels)
# 统计数据集长度
print(f"INFO: {len(imgs_path)} imgs in total")
for img_path in tqdm.tqdm(imgs_path, total=len(imgs_path)):
img = np.array(Image.open(img_path)) / 255.
# 对每个维度进行统计,Image.open打开的是HWC格式,最后一维是通道数
for d in range(3):
cumulative_mean[d] += img[:, :, d].mean()
cumulative_std[d] += img[:, :, d].std()
mean = cumulative_mean / len(imgs_path)
std = cumulative_std / len(imgs_path)
print(f"mean: {mean}")
print(f"std: {std}")
if __name__ == '__main__':
main()