1.依赖的check
chk_table=$1
ds=$2
hh=$3
mi=$4
echo "Check table ${chk_table} partition(ds=${ds},hh=${hh},mi=${mi})."
#每5分钟检查一次,检查1小时
i=0
while [ $i -lt 180 ]
do
i=` expr $i + 5 `
echo '===i的值是===' $i
table_partitionsize_1=`hive -e "desc ${chk_table} partition(ds=${ds},hh=${hh},mi=${mi});"|grep -i size|awk '{print $3}'`
if [ $table_partitionsize_1 -gt 0 ]
then
echo `date` " Ready NOW, table ${chk_table} partition ${ds}${hh}${mi} size is ${table_partitionsize_1} . "
exit 0
else
echo `date` " NOT ready !"
echo "sleep 1 min"
sleep 60
fi
done
echo "Tried too many times, exit."
exit -1
2.自动创建分区
一般在实时sink数据到hive的时候需要使用
2.1 15分钟分区
# import datetime
tomorrow =args['tomorrow']
table = args['table']
for hh in range (0,24,1):
hh = str(hh).zfill(2) #位数不足前面补0
for mi in range (0,60,15):
mi = str(mi).zfill(2)
# print (tomorrow+hh+mi)
sql = 'alter table {} add if not exists partition (ds=\'{}\', hh=\'{}\',mm=\'{}\')'.format(table,tomorrow,hh,mi)
print (sql)
o.execute_sql(sql)
2.2 5分钟分区
import datetime
tomorrow =args['tomorrow']
table = args['table']
for hh in range (0,24,1):
hh = str(hh).zfill(2) #位数不足前面补0
for mi in range (0,60,05):
mi = str(mi).zfill(2)
# print (tomorrow+hh+mi)
sql = 'alter table {} add if not exists partition (ds=\'{}\', hh=\'{}\',mm=\'{}\')'.format(table,tomorrow,hh,mi)
print (sql)
o.execute_sql(sql)