比如统计数据集中图片数量(其他文件做法相同),实质上遍历多级目录
#----------------------------------------------------#
# path是你的一级目录
# path -> second_category -> third_catecory
#----------------------------------------------------#
def get_image_num(path, train_own_data):
num = 0
if train_own_data:
train_path = os.path.join(path, 'second_catecory')
for character in os.listdir(train_path):
#----------------------------------------------------#
# 这是二级目录的遍历
# os.listdir(character_path)遍历输入路径下有文件,返回一个列表,通过len()可以返回其数量
#----------------------------------------------------#
character_path = os.path.join(train_path, character)
num += len(os.listdir(character_path))
else:
train_path = os.path.join(path, 'second_catecory')
for alphabet in os.listdir(train_path):
#-------------------------------------------------------------#
# 这是三级目录的遍历,这里alphabet相当于third_category
#-------------------------------------------------------------#
alphabet_path = os.path.join(train_path, alphabet)
for character in os.listdir(alphabet_path):
#----------------------------------------------------#
# 在大众类下遍历小种类。
#----------------------------------------------------#
character_path = os.path.join(alphabet_path, character)
num += len(os.listdir(character_path))
return num