因工作需求,要将mysql的数据导入到Hive中通过Spark SQL进行分析,经过一翻周折,完成了这个自动化脚本,算是实现需求的第一步,也算是在Shell编程这条路上算是迈出了第一步。
mysql中每天一张表,hive中一个表,按时间分区,如dt=180820.
以下是导数据的Shell脚本:
#!/bin/bash
source /etc/profile
host=127.0.0.1
for((i=0;;i++))
do
day=$(date "+%y%m%d" -d "$1 $i day")
if [ ${day} -gt $2 ]
then
break
else
sql="select pass_id,pass_time,camera_id,camera_code,device_id,server_id,face_img_url,environ_img_url,quality_score,age,gender,attractive,eyeglass,sunglass,smile,mask,race,eyeopen,mouthopen,beard,feature,create_time from brsface.t_person_passinfo_${day} where \$CONDITIONS";
sqoop import --connect jdbc:mysql://${host}:3306/brsface \
--username root \
--password 123456 \
--query "${sql}" \
--fields-terminated-by '\001' \
--delete-target-dir \
--target-dir hdfs://hadoop01:9000/data01/mysql2hdfs/brsface/t_person_passinfo/${day}/ \
--split-by pass_id \
-m 1
echo Sqoop import data:${day} success...
hive -e "
use brsface;
CREATE EXTERNAL TABLE IF NOT EXISTS brsface.t_person_passinfo (
pass_id string,
pass_time string,
camera_id string,
camera_code bigint,
device_id string,
server_id string,
face_img_url string,
environ_img_url string,
quality_score double,
age int,
gender int,
attractive int,
eyeglass int,
sunglass int,
smile int,
mask int,
race int,
eyeopen int,
mouthopen int,
beard int,
feature string,
create_time string
)PARTITIONED BY (
dt string
)
row format delimited fields terminated by '\001'
stored as textfile
location 'hdfs://hadoop01:9000/data01/mysql2hdfs/brsface/t_person_passinfo/';
ALTER TABLE brsface.t_person_passinfo ADD IF NOT EXISTS partition(dt='${day}') location '${day}';
"
echo Hive create table add partition: dt=${day} ok...
fi
done