这次做了一个从特定文件中采集数据集的工具,直接上代码:
import os
def get_filename(path, filetype):
global result
text_file = os.listdir(path)
for files in text_file: # 此处的files是文件的名称
if not os.path.isdir(path + "\\" + files):
# print("files:" + files)
for line in open("{0}\\{1}".format(path, files), encoding='utf-8'): # 循环每行
if filetype in files:
print(str(line))
str1 = line
result = file.write(str1)
else:
# for f in text_file:
# if os.path.isdir(path + "\\" + f):
# if f[0] == ".":
# pass # 排除隐藏文件夹
# else:
# curPathDirList.append(f) # 添加非隐藏文件夹
get_filename(path + "\\" + files, filetype)
return result
if __name__ == '__main__':
path = 'E:\\linux-2.0.1\\arch\\alpha\\boot' # 要读取的文件夹的目录
filetype = '.c' # 要读取的文件夹的类型
file = open('C:\\Users\\84097\\Desktop\\alpha', 'a+', encoding='utf-8') # 新建存储文件
get_filename(path, filetype)