摘抄后调整md5文件方式
import os, hashlib
class MD5():
def __init__(self, path1, path2):
# path1 用来装指定删除文件的文件夹路径,path2装需要删除文件的文件夹路径
self.path1 = path1
self.path2 = path2
def getMD5(self, path):
md5 = hashlib.md5()
with open(path, 'rb') as f:
while True:
block = f.read(8096)
if not block:
break
md5.update(block)
return md5.hexdigest()
def md5list(self):
md5list = {}
for root, dirs, files in os.walk(self.path1, topdown=False):
for file in files:
fpath = os.path.join(root, file)
md5 = self.getMD5(fpath)
md5list[md5] = fpath
return md5list
def del_met(self):
md5_src = self.md5list()
for root, dirs, files in os.walk(self.path2, topdown=False):
for file in files:
fpath = os.path.join(root, file)
md5 = self.getMD5(fpath)
if md5 in md5_src:
print('删除重复文件:', fpath)
os.remove(fpath)
def del_repeat(self):
md5_src = {}
for root, dirs, files in os.walk(self.path2, topdown=False):
for file in files:
fpath = os.path.join(root, file)
md5 = self.getMD5(fpath)
if md5 in md5_src:
print('删除重复文件:', fpath)
os.remove(fpath)
else:
md5_src[md5] = fpath
if __name__ == '__main__':
m = MD5(path1,path2)
# path2文件夹内图片存在和path1内一样的文件进行删除
m.del_met()
# 文件夹内重复删除
m.del_repeat()