【Python】获得指定路径下找到所有大文件

最新推荐文章于 2023-05-06 15:49:09 发布

小明2766

最新推荐文章于 2023-05-06 15:49:09 发布

阅读量180

点赞数

分类专栏： Python 文章标签： python pandas

本文链接：https://blog.csdn.net/bill2766/article/details/126882212

版权

Python 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

代码如下：

import os
import math
import pandas as pd

def byte_to_read(byte):
    num = byte
    dic = {0:'B', 1:'KB', 2:'MB', 3:'GB'}
    # 研究了一下，在apple mac中，1KB = 1000B
    for i in range(3):
        if num < math.pow(1000,1):
            return str(round(num,2)) + dic[i]
        else:
            num /= math.pow(1000,1)
    return str(round(num,2)) + 'GB'

def getAllBig(path, byte):
    res = pd.DataFrame(columns=['path', 'name','file_size'])
    for folderName, subFolders,fileNames in os.walk(path):
        for filename in fileNames:
            filesize = os.path.getsize(os.path.join(folderName, filename))
            if filesize >= byte:
                res = res.append(pd.Series({'path': folderName, 'name':filename, 'file_size':filesize}), ignore_index=True)

    res['size'] = res['file_size'].map(byte_to_read)
    res.sort_values('file_size', ascending=False, inplace=True)
    del res['file_size']
    res.reset_index(drop=True, inplace=True)
    return res

# 在这里，path可以改为自己的路径
# 可以自己设置阈值byte，100MB=100*1024KB=100*1024*1024B（字节）
bigs = getAllBig('/Users/trent2766/Documents/办公文件', 100*1024*1024)
print(bigs)
bigs.to_csv('big_files.csv', encoding='utf_8_sig')