1、hdfs小文件合并脚本
#!/bin/bash
bizdate=$1
path=$2
for (( i = 0; i < 10; i++ )); do
bizhour="0${i}"
echo "${bizdate} ${bizhour}"
hdfs dfs -test -e /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/
if [ $? -ne 1 ];
then
echo "进行hdfs操作"
hdfs dfs -cat /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-00* | hdfs dfs -copyFromLocal - /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-11
hdfs dfs -rmr /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-00*
fi
hdfs dfs -test -e /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/
if [ $? -ne 1 ];
then
echo "进行hdfs操作"
hdfs dfs -cat /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-00* | hdfs dfs -copyFromLocal - /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-11
hdfs dfs -rmr /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-00*
fi
done
for (( i = 10; i < 24; i++ )); do
bizhour=${i}
echo "${bizdate} ${bizhour}"
hdfs dfs -test -e /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/
if [ $? -ne 1 ];
then
echo "进行hdfs操作"
hdfs dfs -cat /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-00* | hdfs dfs -copyFromLocal - /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-11
hdfs dfs -rmr /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=access/part-00*
fi
hdfs dfs -test -e /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/
if [ $? -ne 1 ];
then
echo "进行hdfs操作"
hdfs dfs -cat /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-00* | hdfs dfs -copyFromLocal - /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-11
hdfs dfs -rmr /${path}/bizdate=${bizdate}/bizhour=${bizhour}/log_type=action/part-00*
fi
done