#输出结果的根目录
result_dir=/data/yzw/formal_result
rm -rf $result_dir
mkdir $result_dir
#结果输出路径
parent_dir=$result_dir
merge_dir=$result_dir/merge
distinct_dir=$result_dir/distinct
report_grouper_dir=$result_dir/report_grouper
#创建目录
rm -rf $report_grouper_dir #$merge_dir $distinct_dir
mkdir $report_grouper_dir #$merge_dir $distinct_dir
#将hdfs结果的全路径分别写到文件中
echo -n "" > $merge_dir/hdfs_file_name
echo -n "" > $distinct_dir/hdfs_file_name
echo -n "" > $report_grouper_dir/hdfs_file_name
hdfs dfs -ls
/user/hive/warehouse/ads.db/ads_get_finalindicatorame_and_zhidingunit_and_zhengshivalue | grep -v "Found" | awk -F " " '{print $8}' >>$merge_dir/hdfs_file_name
hdfs dfs -ls /user/hive/warehouse/ads.db/ads_distinct_zhibiao_nian_diqu | grep -v "Found" | awk -F " " '{print $8}'>>$distinct_dir/hdfs_file_name
hdfs dfs -ls /user/hive/warehouse/ads.db/ads_report_grouper_aggre_year | grep -v "Found" | awk -F " " '{print $8}'>>$report_grouper_dir/hdfs_file_name
for dir in $report_grouper_dir $distinct_dir $merge_dir
do
names=`cat $dir/hdfs_file_name`
for name in $names
#name:hdfs全路径
do
tail_name=`echo $name | awk -F '/' '{print $7}'`
#取后缀,例如:000000_0
hdfs dfs -get $name $dir/$tail_name
echo "get $name IS FINISHED"
echo -n "" > $dir/$tail_name.txt
iconv -f UTF-8 -t GB18030 $dir/$tail_name -o $dir/$tail_name.txt
#转换成GB18030格式
echo "$name change to GB18030 IS FINISHED"
done
#压缩
cd $dir
compress_file_name=`echo $dir | awk -F '/' '{print $5}'` #如果修改了目录这里也要修改,可能不是$5了
echo "$compress_file_name"
#取后缀:distinct或merge
rm -rf $parent_dir/$compress_file_name.tar.gz
find -name "*.txt" | xargs tar -zcvf $parent_dir/$compress_file_name.tar.gz
#打包时要打包的文件名只能是相对路径
echo "$compress_file_name.tar.gz IS FINISHED"
# echo "$name IS FINISHED"
done
echo "ALL FINISHED"
[hadoop]将hive表的数据拉取到本地,转gb18030,并压缩
最新推荐文章于 2024-04-29 22:40:49 发布