Shell遍历HDFS路径统计层级目录大小

#!/bin/bash 

# 获取工作目录路径  $(dirname $0),意为取得当前执行的脚本文件的父目录
workdir=$(cd $(dirname $0); pwd)
date=`date +%Y-%m-%d-%H:%M:%S`

init(){
    # 先删除,以便获取最新生成结果
    rm -rf $workdir/hdfs_detail.txt
    touch $workdir/hdfs_detail.txt
    chmod 777 $workdir/hdfs_detail.txt
    echo "[Init Time]:$date" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt

}
hdfs_collect(){
    echo "                                        ----[ 汇总 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "|    大小    |   当前目录   |" >> $workdir/hdfs_detail.txt
    hadoop fs -du / |sort -r -n| awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
}

hdfs_detail(){
    echo "                                       ----[ 明细 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    #touch $workdir/hdfsfirst.txt
    #touch $workdir/hdfssecond.txt
    #touch $workdir/hdfsthird.txt
    #chmod 777 $workdir/hdfsfirst.txt
    #chmod 777 $workdir/hdfssecond.txt
    #chmod 777 $workdir/hdfsthird.txt
   #hdfs1=`hadoop fs -ls / | awk '{print $8}'`
   #大于1T
   #hdfs1=`hadoop fs -du / |awk '{if($1 >1099511627776 && $2 != "/spark2-history"){print $2}}' >> $workdir/hdfsfirst.txt`

   for first in `cat $workdir/hdfsfirst.txt`;
   do
       hadoop fs -du  $first  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for second in `cat $workdir/hdfsfirst.txt`;
   do
      #大于80G
    #  hadoop fs -du $second |awk '{if($1 >85899345920){print $2}}' >> $workdir/hdfssecond.txt
      hadoop fs -du  $second  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for third in `cat $workdir/hdfssecond.txt`;
   do
      #大于50G
     # hadoop fs -du $third |awk '{if($1 >53687091200){print $2}}' >> $workdir/hdfsthird.txt
      hadoop fs -du $third  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2);}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2);}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2);}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done




:<<!
   for line in $hdfs1;
   do
            hadoop fs -du $line |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
        for line1 in $hdfs2;
        do
          hadoop fs -du $line1 |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line1'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line1'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
           for line2 in $hdfs3;
            do
                hadoop fs -du $line2  |sort -r -n | awk '{size=$1/1024;if(size<1024){printf("%10.3f KB\t%s\n",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("\033[36m%10.3f MB\t%s\n\033[0m",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("\033[35m%10.3f GB\t%s\n\033[0m",size,$2,"'$line2'");}else{size=size/1024;printf("\033[31m%10.3f TB\t%s\n\033[0m",size,$2,"'$line2'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
            done
        done
       echo "" >> $workdir/hdfs_detail.txt     
   done
    rm -rf $workdir/hdfsfirst.txt
    rm -rf $workdir/hdfssecond.txt
    rm -rf $workdir/hdfsthird.txt
!
}
init
hdfs_collect
hdfs_detail
echo "SUCCESS"

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值