import os
import re
from subprocess import getoutput
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
def filesize_float_function(filepath):
'''
获得文件大小
Args:
filepath:
Returns:
'''
return os.path.getsize(filepath) / float(1024 ** 2)
def filesize_to_MB_function(filepath):
'''
获得文件大小的MB单位
Args:
filepath:
Returns:
'''
return "%.10f MB" % (os.path.getsize(filepath) / float(1024 ** 2))
def filesize_to_GB_function(filepath):
'''
获得文件大小的GB单位
Args:
filepath:
Returns:
'''
return "%.10f GB" % (os.path.getsize(filepath) / float(1024 ** 3))
def filesize_to_TB_function(filepath):
'''
获得文件大小的TB单位
Args:
filepath:
Returns:
'''
return "%.10f TB" % (os.path.getsize(filepath) / float(1024 ** 4))
def get_filename_function(filepath):
'''
从路径中获得文件名
Args:
filepath:
Returns:
'''
return os.path.splitext(filepath)[-1]
def get_filetype_function(filepath
, types=['.py', 'txt', 'log', 'csv'
,'docx','doc'
, 'dp' , "pdf",'ipynb',"jpeg"
,'raw',"jpg" ,'bmp','jpg'
,'svg','png','tif','gif'
,'pcx','tga','exif','fpx'
,'psd','cdr','pcd','dxf'
,'ufo','eps','ai','WMF'
,'webp']):
'''
从文件名中获得疑似的文件类型
Args:
filename: 文件的绝对路径地址
types: 选择的文件类型
Returns:
'''
file_type = os.path.splitext(filepath)[-1].split('.')[-1]
if file_type in types:
return file_type
else:
return False
def get_time_stamp_to_time_function(timestamp):
"""
把时间戳转化为时间
"""
time_struct = time.localtime(timestamp)
return time.strftime('%Y-%m-%d %H:%M:%S',time_struct)
def get_access_time_function(filepath):
"""
获取访问时间
"""
return os.path.getatime(filepath)
def get_create_time_function(filepath):
"""
获取创建时间
"""
return os.path.getctime(filepath)
def get_modify_time_function(filepath):
"""
获取修改时间
"""
return os.path.getmtime(filepath)
def floa_time_function(strptime):
"""
时间戳转浮点数
"""
return time.mktime(time.strptime(strptime,'%Y-%m-%d %H:%M:%S'))
def filter_file_paths(dir_path=os.path.expanduser("~/")):
counter = 0
result = {"文件地址":[],"文件类型":[],"创建时间":[]
,"访问时间":[],"修改时间":[],"文件大小浮点数":[]
,"文件大小mb":[],"创建时间戳":[],"访问时间戳":[]
,"修改时间戳":[]}
for dir_, folder, files in tqdm(os.walk(dir_path)):
for file in files:
counter += 1
temp_path = os.path.join(dir_, file)
check_file = os.path.isfile(temp_path)
if check_file:
result["文件地址"].append(temp_path)
result["文件类型"].append(get_filetype_function(temp_path))
result["创建时间"].append(get_create_time_function(temp_path))
result["访问时间"].append(get_access_time_function(temp_path))
result["修改时间"].append(get_modify_time_function(temp_path))
result["创建时间戳"].append(get_time_stamp_to_time_function(get_create_time_function(temp_path)))
result["访问时间戳"].append(get_time_stamp_to_time_function(get_access_time_function(temp_path)))
result["修改时间戳"].append(get_time_stamp_to_time_function(get_modify_time_function(temp_path)))
result["文件大小浮点数"].append(filesize_float_function(temp_path))
result["文件大小mb"].append(filesize_to_MB_function(temp_path))
return pd.DataFrame(result)
filter_file_paths(os.path.expanduser(os.getcwd()))
05-10
1578