def read_imdb2(data_dir):
"""读取数据集文本序列和标签"""
data = []
tag=0
new_file='F:\\dll1'
for file in os.listdir(data_dir):
filenamedir=os.path.join(data_dir, file)
with open(filenamedir, 'rb') as f:
# join用来做路径拼接
review = f.read()
# print(review[0])
#根据文件夹的内容选定特征值去判断,然后进行分类,在此利用的是文件开头是"MZ"
M,Z=review[0],review[1]
M,Z=chr(M),chr(Z)
# print(M+Z)
if M == 'M' and Z == 'Z':
print(review[0:3])
new_file1 = os.path.join(new_file, file)
print(new_file1)
shutil.copy(filenamedir, new_file1)
tag=tag+1
print("共有文件{}".format(tag))
根据文件内容进行文件提取分类
最新推荐文章于 2023-07-12 16:43:55 发布