微信存储占空间太大怎么办？用Python批量净化！

最新推荐文章于 2024-08-03 19:27:22 发布

Ghost_Jing

最新推荐文章于 2024-08-03 19:27:22 发布

阅读量359

点赞数 6

文章标签： python 微信开发语言

本文链接：https://blog.csdn.net/weixin_61428726/article/details/135989626

版权

原理：

遍历当前文件夹下所有文件（递归），更改权限为可写，然后计算md5值，存入临时列表，如果发现有重复md5，则删除。代码如下：

import os, stat
import hashlib


def calculate_md5(file_path):
    """计算文件的MD5值"""
    md5 = hashlib.md5()
    with open(file_path, "rb") as file:
        while chunk := file.read(8192):
            md5.update(chunk)
    return md5.hexdigest()


def is_readable(file_path):
    """检查文件是否可读"""
    try:
        with open(file_path, "rb"):
            pass
        return True
    except IOError:
        return False


def find_duplicate_files(folder_path):
    """遍历文件夹，找到重复的文件并删除"""
    md5_dict = {}

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            os.chmod(file_path, stat.S_IWRITE)
            if not is_readable(file_path):
                # 文件不可读，跳过处理
                print(f"无法访问文件: {file_path}")
                continue

            try:
                file_md5 = calculate_md5(file_path)
            except PermissionError:
                # 捕获拒绝访问异常，跳过当前文件
                print(f"无法访问文件: {file_path}")
                continue

            if file_md5 in md5_dict:
                # 如果已经存在相同的MD5值，则删除当前文件
                print(f"删除重复文件: {file_path}")
                try:
                    os.remove(file_path)
                except PermissionError:
                    # 捕获拒绝访问异常，跳过当前文件
                    print(f"无法访问文件: {file_path}")
                    continue
            else:
                md5_dict[file_md5] = file_path


if __name__ == "__main__":
    folder_path = "."  # 设置要遍历的文件夹路径
    find_duplicate_files(folder_path)