#!/bin/bash
beg_date=`date -d "${1}" +%s`
end_date=`date -d "${2}" +%s`
if((beg_date >${end_date}));then
echo "beg_date < end_date"
exit 0;
fi
currentDate=""
for((i=${beg_date};i<=${end_date};i=i+86400))
do
currentDate=`date -d @${i} +%Y%m%d`
echo "-----create /${currentDate}-----"
hive -e "use data;create table order_status_log${currentDate}(id string,order_id string,order_status string,operate_time string) row format delimited fields terminated by ',';"
done
自动化创建hive表脚本 上图
#!/bin/bash
#第一步:获取json文件中的日期,date这个文本是我手动创建的,并且执行echo 20200901 > /root/sh/date手动添加的
date=`cat /root/sh/date`
#第二步:获取json文件日期的后一天日期
afterday_timestamp=$[`date -d "${date}" +%s`+86400]
afterday=`date -d @${afterday_timestamp} +%Y%m%d`
#这一步是全局替换,将0901替换成0902
sed -i "s/order_status_log${date}/order_status_log${afterday}/g" /root/datax/job/log.json
#更新/root/sh/date文本中的日期,要和json文件中的日期保持一致
echo ${afterday} > /root/sh/date
#执行datax
python /root/datax/bin/datax.py /root/datax/job/log.json
自动化采集数据 到hive脚本上图
#!/bin/bash
#此脚本中插入分区表的sql语句需要你自己补全才能够执行此脚本成功
#分区表中分区列的值为date(operate_time)的值
beg_date=`date -d "${1}" +%s`
end_date=`date -d "${2}" +%s`
if((beg_date >${end_date}));then
echo "beg_date < end_date"
exit 0;
fi
currentDate=""
for((i=${beg_date};i<=${end_date};i=i+86400))
do
currentDate=`date -d @${i} +%Y%m%d`
hive -e "
set hive.exec.dynamic.partition.mode=nostrict;
set hive.exec.dynamic.partition=true;
set hive.exec.max.dynamic.partitions=1000;
insert into table data.order_status_log_fenqu partition(day) select id,order_id,order_status,operate_time,date(operate_time) as day from data.order_status_log${currentDate};"
done
hive -S -e "select day,count(id) from data.order_status_log_fenqu group by day;" > /root/sh/result
自动化插入分区表 脚本,上图
#!/bin/bash
user="root"
password="123456"
host="192.168.174.9"
port=5550
mysql_conn="mysql -h"$host" -u"$user" -P"${port}" -p"$password""
cat /root/sh/result | while read day count
do
$mysql_conn -e "INSERT INTO test.fys VALUES('$day','$count')"
done
自动化采集数据到9的虚拟机的脚本 上图
DROP TABLE [IF EXISTS] table_name;
删除hive表的语句