方式一:直接写在脚本中
load_track_logs.sh:
#!/bin/sh
## 环境变量生效
. /etc/profile
## HIVE HOME
HIVE_HOME=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6
## 日志目录
LOG_DIR=/datas/tracklogs
## 获取昨天的日期
yesterday=`date -d -1days '+%Y%m%d'`
####### echo "yesterday is ${yesterday}"
## 在此需要判断${LOG_DIR}/${yesterday} 是否存在
## 循环遍历目录中的文件名称
for line in `ls ${LOG_DIR}/${yesterday}`
do
echo "loading ${line} to db_track.yhd_log_load"
## echo "prepare load the file: ${line}"
## 2016 11 18 19
date=${line:0:4}${line:4:2}${line:6:2}
# date = ${line:0:8}
hour=${line:8:2}
## echo "date: ${date}, hour: ${hour}"
${HIVE_HOME}/bin/hive -e "load data local inpath '${LOG_DIR}/${yesterday}/${line}' into table db_track.yhd_log_load partition (date = '${date}', hour = '${hour}') ;"
done
方式二:脚本调用sql文件
load_track_logs.sh:
#!/bin/sh
## 环境变量生效
. /etc/profile
## HIVE HOME
HIVE_HOME=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6
## 日志目录
LOG_DIR=/datas/tracklogs
## 脚本目录
SCRIPT_DIR=/home/beifeng
## 获取昨天的日期
yesterday=`date -d -1days '+%Y%m%d'`
####### echo "yesterday is ${yesterday}"
## 在此需要判断${LOG_DIR}/${yesterday} 是否存在
## 循环遍历目录中的文件名称
for line in `ls ${LOG_DIR}/${yesterday}`
do
echo "loading ${line} to db_track.yhd_log_load"
## echo "prepare load the file: ${line}"
## 2016 11 18 19
date=${line:0:4}${line:4:2}${line:6:2}
# date = ${line:0:8}
hour=${line:8:2}
## echo "date: ${date}, hour: ${hour}"
load_file=${LOG_DIR}/${yesterday}/${line}
${HIVE_HOME}/bin/hive --hiveconf LOAD_FILE_PARAM=${load_file} --hiveconf DATE_PARAM=${date} --hiveconf HOUR_PARAM=${hour} -f ${SCRIPT_DIR}/load_data.sql
done
load_data.sql:
load data local inpath '${hiveconf:LOAD_FILE_PARAM}' into table db_track.yhd_log_load partition (date = '${hiveconf:DATE_PARAM}', hour = '${hiveconf:HOUR_PARAM}') ;