实时同步抽数ETL---Shell脚本

方式一(推荐): 

sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  'test'  /user/hive/warehouse/s66_postgres.db/test_miaodazhuang1/dt=20210107  "SELECT CAST(EXTRACT(EPOCH FROM COALESCE(update_time,create_time)::TIMESTAMP WITH TIME ZONE)* 1000 AS int8)||'|9' AS event_timestamp,CAST('INSERT' AS VARCHAR) AS event_type,tt.* FROM test_miaodazhuang1 AS tt WHERE ((create_time >= '2021-01-07 00:00:00' and create_time<= '2021-01-08') OR (update_time >= '2021-01-07 00:00:00' and update_time<= '2021-01-08'))"  create_time  test_miaodazhuang1  root.etl 

etl.sh 详细代码如下: 

#!/bin/bash
export LNAG=zh_CN.UTF-8

# 补数逻辑,pg => hdfs
# -- --schema <name>

connect=$1
username=$2
password=$3
hdfs_path=$4
query_condition=$5
alias_time=$6
tablename=$7
queue=$8

hdfs_path_tmp=${hdfs_path}_tmp
query_condition="${query_condition}"

echo "query_condition==> ${query_condition}"

sqoop_sql="sqoop import -D  mapred.job.queue.name=${queue}\
    --connect ${connect}?connectTimeout=300  \
    --username ${username}  \
    --password ${password}  \
    --target-dir ${hdfs_path_tmp}   \
    --fields-terminated-by '\t'  \
    --hive-delims-replacement ' '   \
    --null-string '\\\\N'  \
    --null-non-string '\\\\N' \
    --query \"${query_condition} AND \\\$CONDITIONS\" \
    --split-by ${alias_time} \
     "
echo "执行的sqoop脚本==> ${sqoop_sql}"
eval $sqoop_sql

if [ $? -eq 0 ] ;then
    echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 提交抽数任务到 hdfs ${hdfs_path_tmp} 成功"

    echo "==>准备删除_SUCCESS文件"
    rm_sucess_file=" hdfs dfs -rm ${hdfs_path_tmp}/_SUCCESS "
    echo "rm_sucess_file == ${rm_sucess_file}"
    eval $rm_sucess_file
    if [ $? -eq 0 ] ;then
        echo "删除${rm_sucess_file}文件成功"
    else
        echo "删除${rm_sucess_file}文件失败"
    fi

    echo "==>判断${hdfs_path}路径是否存在"
    hdfs dfs -test -e ${hdfs_path}
    if [ $? -eq 0 ] ;then
        echo "${hdfs_path} exist"
    else
        echo "${hdfs_path} is not exist! create it"
        hdfs dfs -mkdir -p ${hdfs_path}
    fi
    
    echo "==>数据从临时路径${hdfs_path_tmp} 移动到${hdfs_path}"
        files=`hadoop fs -ls ${hdfs_path_tmp} | awk -F " " '{print $8}'`
        for file in ${files}
              do        
                        time_stamp=`date +%s`
                        filename=`echo ${file} | awk -F "part-m-" '{print $2}'`
                        mv_sql=" hdfs dfs -mv ${file} ${hdfs_path}/${tablename}-etl-${time_stamp}"
                        echo "mv_sql == ${mv_sql}"
                        eval $mv_sql
                        if [ $? -eq 0 ] ;then
                                echo "移动${file}到${hdfs_path}成功"
                        else
                                echo "移动${file}到${hdfs_path}失败"
                        fi
              done

    echo "==>删除临时文件夹${hdfs_path_tmp}"
    rm_tmp_dir="hdfs dfs -rm -r ${hdfs_path_tmp}"
    echo "rm_tmp_dir == ${rm_tmp_dir}"
    eval $rm_tmp_dir
    if [ $? -eq 0 ] ;then
        echo "删除临时文件夹${hdfs_path_tmp}成功"
    else
        echo "删除临时文件夹${hdfs_path_tmp}失败"
    fi
    echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 抽数数据到 hdfs ${hdfs_path} 结束"
else
    echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 抽数库中数据到 hdfs ${hdfs_path_tmp} 失败"
    exit 1
fi

生成的sqoop抽数的命令如下所示: 

sqoop import -D mapred.job.queue.name=root.etl	--connect jdbc:postgresql://192.168.0.36:5432/postgres?connectTimeout=300  	--username test       --password test  	--target-dir /user/hive/warehouse/s66_postgres.db/test_miaodazhuang1/dt=20210107_tmp   	--fields-terminated-by '\t'  	--hive-delims-replacement ' '   	--null-string '\\N'  	--null-non-string '\\N' 	--query "SELECT CAST(EXTRACT(EPOCH FROM COALESCE(update_time,create_time)::TIMESTAMP WITH TIME ZONE)* 1000 AS int8)||'|9' AS event_timestamp,CAST('INSERT' AS VARCHAR) AS event_type,tt.* FROM test_miaodazhuang1 AS tt WHERE ((create_time >= '2021-01-07 00:00:00' and create_time<= '2021-01-08') OR (update_time >= '2021-01-07 00:00:00' and update_time<= '2021-01-08')) AND \$CONDITIONS" 	--split-by create_time

方式二:

params_tmp.txt 

sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl
sh  /data/dfp/etl/etl.sh  jdbc:postgresql://192.168.0.36:5432/postgres  test  test  /user/hive/warehouse/s66_postgres.db/test_part/dt=20201209  "SELECT CASE WHEN update_time !=null THEN update_time ELSE create_time END as event_timestamp,'INSERT' AS event_type,tt.* FROM test_part AS tt WHERE ((create_time >= '2020-12-09 00:00:00' and create_time<= '2020-12-10') OR (update_time >= '2020-12-09 00:00:00' and update_time<= '2020-12-10')) AND \\\\\\$CONDITIONS"  create_time  test_part  root.etl

#!/bin/bash
export LNAG=zh_CN.UTF-8
# 补数逻辑,读取文件 array.txt=> line
cat params_tmp.txt|while read line

do
	echo "sqoop_sql==>${line}"
	eval $line

	if [ $? -eq 0 ] ;then
		echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 提交抽数任务到 hdfs ${hdfs_path_tmp} 成功"
	else
		echo "`date "+%Y-%m-%d %H:%M:%S"` ==> 抽数库中数据到 hdfs ${hdfs_path_tmp} 失败"
		exit 1
	fi
	sleep 1
done

echo "==>准备删除_参数文件"
	rm_sucess_file="rm -f /data/dfp/etl/params_tmp.txt"
	echo "rm_sucess_file == ${rm_sucess_file}"
	eval $rm_sucess_file
echo "==>删除_参数文件成功!"

  • 0
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

莱恩大数据

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值