需求:多层文件夹包含的txt文件,统计txt文件里的id的类别数
txt的内容如下:
# -*- coding:utf-8 -*-
import os
txt_path="/data/"
#遍历所有文件
def get_all_abs_path(source_dir):
path_list = []
for fpathe, dirs, fs in os.walk(source_dir):
for f in fs:
p = os.path.join(fpathe, f)
path_list.append(p)
return path_list
#找到所有的txt文件
def find_txt():
txt_list=[]
label_list=get_all_abs_path(txt_path)
for txt_path_ in label_list:
a=txt_path_.split('\\')[-1]
b=a.split('.')[-1]
if b=="txt":
# txt_list.append(sku_label)
# f = open(sku_label)
with open(txt_path_, "r", encoding='latin1') as f: # 打开文件
lines = f.readlines() # 读取文件中的一行
for line in lines:
txt_list.append(line) # 将每一行添加到列表中
f.close()
# print(f)
return txt_list
#统计每个txt文件里的某个value的类别数
def count_label():
sku_label=[]
for label in find_txt():
c=eval(label)
d=c['goods']
for label_one in d:
sku_label.append(label_one["id"])
sku_label=set(sku_label)
return sku_label
if __name__ == '__main__':
for i in count_label():
print(i)