自动删除目录下重复的文件（可以自定义类型）

本文链接：https://blog.csdn.net/2301_81547508/article/details/135359578

前言

最近整理nas群晖的文件，发现影音库里面很多重复的资源，自己就写了一个python脚本自动的处理。

import os
import hashlib
# 只删除以下列表中的重复文件类型.如果想删除其他类型的文件,自己添加一下就行了
file_type = ['.jpg', '.jpeg', '.png', '.gif', '.psd', '.bmp', '.webp', '.mp4', '.mkv', '.avi', '.mov', 'mpeg', 'mpg',
              '.rar', '.zip','img']
check_files = []
def remove_reapt_files():
    for root, dirs, files in os.walk(r'/Users/yuqianjun/Downloads/'):
        #这个路径/Users/yuqianjun/Downloads/就是你需要 查重的路径；自己定义就好了
        for name in files:
            print(name)
            p_type = os.path.splitext(os.path.join(root, name))[1]
            if p_type in file_type:
                check_files.append(os.path.join(root, name))
        for name in dirs:
            p_type = os.path.splitext(os.path.join(root, name))[1]
            if p_type in file_type:
                check_files.append(os.path.join(root, name))
    files_dict = {}
    r_index = 0
    print('Fiels Num:%s' % len(check_files))
    for value in check_files:
        md5_hash = hashlib.md5()
        try:
            with open(value, "rb+") as f:
                for byte_block in iter(lambda: f.read(4096), b""):
                    md5_hash.update(byte_block)
                file_md5 = md5_hash.hexdigest()
                print('Check file MD5:%s' % value)
            if files_dict.get(file_md5) is None:
                files_dict[file_md5] = value
            else:
                d_path = files_dict[file_md5]
                d_path_stats = os.stat(d_path)
                file_stats = os.stat(value)
                d_time = d_path_stats.st_ctime
                f_time = file_stats.st_ctime
                if d_time > f_time:
                    os.remove(d_path)
                    files_dict[file_md5] = value
                    print('Delete File:', d_path)
                    r_index += 1
                else:
                    os.remove(value)
                    print('Delete File:', value)
                    r_index += 1
        except Exception as e:
            pass
            print('File does not exist or has been deleted')
    print('File Count:%s, Repeat Files Num:%s. All deleted!' %( len(check_files),str(r_index)))
if __name__ == '__main__':
    remove_reapt_files()