生活中会下载很多MP3文件资源,但是这些资源中不可避免的会产生很多重复的文件,于是自己利用Python3.8写了一个自动删除重复文件的简本。运行的时候还请注意文件路径的填写格式。话不多说,直接上脚本。
import os
import sys
import hashlib
from os.path import getsize
class ClearRepeat(object):
def __init__(self):
self.file_path = None
self.file_collection = None
self.file_origin = {}
self.file_repeat = {}
def getSource(self,file_path):
try:
if os.path.exists(file_path):
self.file_path = file_path
self.file_collection = []
for dirpath,dirnames,filenames in os.walk(self.file_path):
for file in filenames:
fullpath = os.path.join(dirpath,file)
self.file_collection.append(fullpath)
print('File Collection Success.Total File:%d\r'%len(self.file_collection),end='')
print('\n')
except Exception as error:
self.file_path = None
print(error)
def findRepeat(self):
if self.file_path != None and self.file_collection != []:
file_count=0
for file in self.file_collection:
try:
compound_key = (getsize(file),self.createChecksum(file))
if compound_key in self.file_origin:
print("\nDelete Repete File %s"%file)
os.remove(file)
else:
self.file_origin[compound_key] = file
except Exception as error:
print(error)
file_count+=1
print("Check File Count:%d\r"%file_count,end='')
print("\nDelete Repeat File Success!")
else:
print("\nPlease Check File Path Is Correctly!")
def createChecksum(self,path):
fp = open(path,encoding='gb18030', errors='ignore')
checksum = hashlib.md5()
while True:
buffer = fp.read(8192)
if not buffer: break
checksum.update(buffer.encode('utf-16'))
fp.close()
checksum = checksum.digest()
return checksum
if __name__ == '__main__':
file_path = 'E:\\我的共享\\车载音乐'
obj_clear = ClearRepeat()
obj_clear.getSource(file_path)
obj_clear.findRepeat()