import hashlib
import os
def md5_vaule(dir):
hasher = hashlib.md5()
afile = open(dir, 'rb')
buf = afile.read()
a = hasher.update(buf)
return str(hasher.hexdigest())
def file_name(file_dir):
all =[]
for root, dirs, files in os.walk(file_dir):
tmp = []
for i in files:
tmp.append(root+"\\"+i)
for j in tmp:
all.append(j)
return all
if __name__ == '__main__':
print("\n检测某一文件夹中有多少个相同的文件\n\n输入要检查的文件夹路径.如:C:\\test (注意:必须是斜杠“\\”)")
folder_name = input()
print("\n")
file_names = file_name(folder_name)
all_md5 = []
for i in file_names:
all_md5.append(md5_vaule(i))
print(i)
all_md5_set = list(set(all_md5))
all_md5_set.sort(key = all_md5.index)
print("有多少重复: ",len(all_md5)-len(all_md5_set))
ENTER = input()