目录
获取多级父目录:
import os
path = '/home/user/projects/example_project/subdir/file.txt'
third_level_parent = os.path.abspath(os.path.join(path, "../../.."))
print(f"The third level parent directory of {path} is {third_level_parent}")
linux 获取目录大小,包含隐藏目录
import glob
import subprocess
import os
def get_directory_size_with_du(directory):
try:
# 调用 du -sh 命令并捕获输出
result = subprocess.run(['du', '-sh', directory], capture_output=True, text=True, check=True)
# 输出是类似 "1.2G /path/to/directory"
size = result.stdout.split()[0] # 提取文件大小
return size
except subprocess.CalledProcessError as e:
print(f"Error occurred: {e}")
return None
def find_first_level_subdirs(directory):
first_level_subdirs = []
for root, dirs, files in os.walk(directory):
# 遍历一级子目录
for subdir in dirs:
first_level_subdirs.append(os.path.join(root, subdir))
# 阻止 os.walk() 递归遍历
break
return first_level_subdirs
if __name__ == '__main__':
base_dir = "/home/lixiang/workspace/giga-generativeai"
# base_dir = "/home/lixiang/workspace/giga-generativeai/giga-train/gt_projects"
dirs=glob.glob(base_dir+'/*')+glob.glob(base_dir+'/.*')
for dir in dirs:
size = get_directory_size_with_du(dir)
if size[-1] in ["G"]:
print(f"dir size: {size}",dir)
elif size[-1] in ["M","G"]:
size_number=float(size[:-1])
if size[-1]=="M":
if size_number>1:
print(f"dir size: {size}",dir)
subdirs = find_first_level_subdirs(base_dir)
for dir in subdirs:
print(dir)
windows系统:
import math
import subprocess
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return f"{s} {size_name[i]}"
def get_directory_size_with_powershell(directory):
try:
# 使用 PowerShell 命令获取目录大小
result = subprocess.run(['powershell', '-Command', f"(Get-ChildItem -Recurse '{directory}' | Measure-Object -Property Length -Sum).Sum"], capture_output=True, text=True, check=True)
size_in_bytes = int(result.stdout.strip())
return size_in_bytes
except subprocess.CalledProcessError as e:
print(f"Error occurred: {e}")
return None
if __name__ == '__main__':
# 示例使用
directory_path =r"F:\biadu_down\jacke121-yolov5-v3.0_class"
size_in_bytes = get_directory_size_with_powershell(directory_path)
size_human_readable = convert_size(size_in_bytes)
print(f"Directory size: {size_human_readable} m")
有个目录大小很大,子目录占用空间又很小
解决方法:查看隐藏文件:
ls -la /path/to/a
发现有个隐藏目录 .git 很大,
查看 .git 大小:
du -sh /path/to/a/.git
删除目录: .git
rm -R .git
获取所有后缀
import os
def get_all_file_extensions(directory):
extensions = set()
for root, dirs, files in os.walk(directory):
for file in files:
_, ext = os.path.splitext(file)
if ext: # 如果有扩展名
extensions.add(ext)
return extensions
if __name__ == '__main__':
dir_path = r"E:\project\depth\linre\linre"
all_extensions = get_all_file_extensions(dir_path)
for extension in all_extensions:
print(extension)
img_files = ['%s/%s' % (i[0].replace("\\", "/"), j) for i in os.walk(dir_path) for j in i[-1] if j.endswith(('cu', 'xpng', 'xjpeg'))]
for file in img_files:
print(file)