#!/bin/bash #注意:作业调度的服务器系统时间要正确 建议等原表的分区数据接入成功后再启动脚本执行 echo 'job start...' nowtime=`date -d today "+%Y-%m-%d-%H"` echo 'nowtime='$nowtime #grepCondition="2020-05-14-22-25" grepCondition=$nowtime partition="set hive.cli.print.header=flase;show partitions test.hn_csgcjs_spgl_xmjbxxb;" v_partition=`hive -e "$partition" | grep data_dt="${grepCondition}" | sort | tail -n 1` #取最新的分区 echo $v_partition #获取源表的最新分区号 partition_nums=${v_partition:8:16} echo 'partition_nums='$partition_nums #===================合法数据逻辑开始========================================== echo 'correct data job start....' #创建sqoop分区表 HIVE_SQL_CREATE_TABLE="CREATE TABLE IF NOT EXISTS test.hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2 ( dfsjzj string, xzqhdm string, xmdm string, xmmc string, gcdm string, gcfw string, qjdgcdm string, xmtzly INT, tdhqfs INT, tdsfdsjfa INT, sfwcqypg INT, splclx INT, lxlx INT, gcfl INT, jsxz INT, xmzjsx INT, gbhydmfbnd string, gbhy string, nkgsj date, njcsj date, xmsfwqbj INT, xmwqbjsj TIMESTAMP, ztze DOUBLE, jsddxzqh string, jsdd string, xmjsddx DOUBLE, xmjsddy DOUBLE, jsgmjnr string, ydmj DOUBLE, jzmj DOUBLE, sbsj TIMESTAMP, splcbm string, splbbh DOUBLE, sjyxbs INT, sjwxyy string, SJSCZT INT, SBYY string, ctime TIMESTAMP, utime TIMESTAMP, SYNC_TIME TIMESTAMP, SystemSource string, First_Sync_time TIMESTAMP ) partitioned by(data_dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001';" #插入分区数据 HIVE_SQL_INSERT_TABLE="insert overwrite TABLE test.hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2 partition(data_dt='$partition_nums') SELECT DFSJZJ, CASE WHEN m.country_xzqhdm IS NOT NULL THEN m.country_xzqhdm ELSE a.XZQHDM END AS XZQHDM, XMDM, XMMC, GCDM, GCFW, QJDGCDM, XMTZLY, TDHQFS, TDSFDSJFA, SFWCQYPG, SPLCLX, LXLX, GCFL, JSXZ, XMZJSX, GBHYDMFBND, CONCAT ( CASE WHEN c.priv_code IS NULL THEN b.priv_code ELSE c.priv_code END, CAST ( SUBSTRING ( regexp_replace ( CASE WHEN c.priv_code IS NULL THEN b.priv_code ELSE c.priv_code END, 'X', '' ), 2, 6 ) AS VARCHAR (6) ) ) AS GBHY, NKGSJ, NJCSJ, XMSFWQBJ, XMWQBJSJ, ZTZE, JSDDXZQH, JSDD, XMJSDDX, XMJSDDY, JSGMJNR, YDMJ, JZMJ, SBSJ, SPLCBM, SPLBBH, SJYXBS, SJWXYY, SJSCZT, '' AS SBYY, CTIME, UTIME, from_unixtime(unix_timestamp()) AS SYNC_TIME, '' AS SystemSource, from_unixtime(unix_timestamp()) AS First_Sync_time FROM test.HN_CSGCJS_SPGL_XMJBXXB a LEFT JOIN test.hn_zwfw_xzqhdm_map m ON a.XZQHDM = m.city_xzqhdm LEFT JOIN ( SELECT a.hy_name, a.hy_code, a.old_code, regexp_replace (c.p_code, 'X', '') AS priv_code FROM test.hn_zwfw_hyfl a LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code ) b ON a.GBHY = b.old_code LEFT JOIN ( SELECT a.hy_name, a.hy_code, a.old_code, regexp_replace (c.p_code, 'X', '') AS priv_code FROM test.hn_zwfw_hyfl a LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code ) c ON a.GBHY = c.hy_code WHERE GBHYDMFBND='2017' and a.data_dt='$partition_nums';" #执行hql hive -e "$HIVE_SQL_CREATE_TABLE" echo 'insert correct data start....' hive -e "$HIVE_SQL_INSERT_TABLE" echo 'insert correct data end...' echo 'correct data job sqoop start...' sqoop export --connect 'jdbc:sqlserver://59.212.146.201:1433;DatabaseName=Exchange_GCJSXM_HNS_TEST' --username 'GCJSXM_GGFW' --password 'GCJSXM_GGFW!@#$20200511' --table 'SPGL_XMJBXXB' --columns 'DFSJZJ,XZQHDM,XMDM,XMMC,GCDM,GCFW,QJDGCDM,XMTZLY,TDHQFS,TDSFDSJFA,SFWCQYPG,SPLCLX,LXLX,GCFL,JSXZ,XMZJSX,GBHYDMFBND,GBHY,NKGSJ,NJCSJ,XMSFWQBJ,XMWQBJSJ,ZTZE,JSDDXZQH,JSDD,XMJSDDX,XMJSDDY,JSGMJNR,YDMJ,JZMJ,SBSJ,SPLCBM,SPLBBH,SJYXBS,SJWXYY,SJSCZT,SBYY,CTIME,UTIME,SYNC_TIME,SystemSource,First_Sync_time' --fields-terminated-by '\001' --export-dir 'hdfs://hainan/apps/hive/warehouse/test.db/hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2/data_dt='$partition_nums'' --input-null-string '\\N' --input-null-non-string '\\N' --hive-partition-key data_dt --hive-partition-value $partition_nums echo 'correct data job sqoop end...' echo 'correct data job end....' #===================不合法数据逻辑开始========================================== echo 'correct data job start....' #创建sqoop分区表 HIVE_SQL_CREATE_TABLE_INVALID="CREATE TABLE IF NOT EXISTS test.hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2 ( dfsjzj string, xzqhdm string, xmdm string, xmmc string, gcdm string, gcfw string, qjdgcdm string, xmtzly INT, tdhqfs INT, tdsfdsjfa INT, sfwcqypg INT, splclx INT, lxlx INT, gcfl INT, jsxz INT, xmzjsx INT, gbhydmfbnd string, gbhy string, nkgsj date, njcsj date, xmsfwqbj INT, xmwqbjsj TIMESTAMP, ztze DOUBLE, jsddxzqh string, jsdd string, xmjsddx DOUBLE, xmjsddy DOUBLE, jsgmjnr string, ydmj DOUBLE, jzmj DOUBLE, sbsj TIMESTAMP, splcbm string, splbbh DOUBLE, sjyxbs INT, sjwxyy string, SJSCZT INT, SBYY string, ctime TIMESTAMP, utime TIMESTAMP, SYNC_TIME TIMESTAMP, SystemSource string, First_Sync_time TIMESTAMP ) partitioned by(data_dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001';" #插入分区数据 HIVE_SQL_INSERT_TABLE_INVALID="insert overwrite TABLE test.hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2 partition(data_dt='$partition_nums') SELECT DFSJZJ, CASE WHEN m.country_xzqhdm IS NOT NULL THEN m.country_xzqhdm ELSE a.XZQHDM END AS XZQHDM, XMDM, XMMC, GCDM, GCFW, QJDGCDM, XMTZLY, TDHQFS, TDSFDSJFA, SFWCQYPG, SPLCLX, LXLX, GCFL, JSXZ, XMZJSX, GBHYDMFBND, CONCAT ( CASE WHEN c.priv_code IS NULL THEN b.priv_code ELSE c.priv_code END, CAST ( SUBSTRING ( regexp_replace ( CASE WHEN c.priv_code IS NULL THEN b.priv_code ELSE c.priv_code END, 'X', '' ), 2, 6 ) AS VARCHAR (6) ) ) AS GBHY, NKGSJ, NJCSJ, XMSFWQBJ, XMWQBJSJ, ZTZE, JSDDXZQH, JSDD, XMJSDDX, XMJSDDY, JSGMJNR, YDMJ, JZMJ, SBSJ, SPLCBM, SPLBBH, SJYXBS, SJWXYY, SJSCZT, '' AS SBYY, CTIME, UTIME, from_unixtime(unix_timestamp()) AS SYNC_TIME, '' AS SystemSource, from_unixtime(unix_timestamp()) AS First_Sync_time FROM test.HN_CSGCJS_SPGL_XMJBXXB a LEFT JOIN test.hn_zwfw_xzqhdm_map m ON a.XZQHDM = m.city_xzqhdm LEFT JOIN ( SELECT a.hy_name, a.hy_code, a.old_code, regexp_replace (c.p_code, 'X', '') AS priv_code FROM test.hn_zwfw_hyfl a LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code ) b ON a.GBHY = b.old_code LEFT JOIN ( SELECT a.hy_name, a.hy_code, a.old_code, regexp_replace (c.p_code, 'X', '') AS priv_code FROM test.hn_zwfw_hyfl a LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code ) c ON a.GBHY = c.hy_code WHERE (GBHYDMFBND<>'2017' or GBHYDMFBND is null) and a.data_dt='$partition_nums';" #执行hql hive -e "$HIVE_SQL_CREATE_TABLE_INVALID" echo 'insert invalid data start....' hive -e "$HIVE_SQL_INSERT_TABLE_INVALID" echo 'insert invalid data end...' echo 'invalid data job sqoop start...' #错误的数据目前还没确定同步到哪里 #sqoop export --connect 'jdbc:sqlserver://59.212.146.201:1433;DatabaseName=Exchange_GCJSXM_HNS_TEST' --username 'GCJSXM_GGFW' --password 'GCJSXM_GGFW!@#$20200511' --table 'SPGL_XMJBXXB' --columns 'DFSJZJ,XZQHDM,XMDM,XMMC,GCDM,GCFW,QJDGCDM,XMTZLY,TDHQFS,TDSFDSJFA,SFWCQYPG,SPLCLX,LXLX,GCFL,JSXZ,XMZJSX,GBHYDMFBND,GBHY,NKGSJ,NJCSJ,XMSFWQBJ,XMWQBJSJ,ZTZE,JSDDXZQH,JSDD,XMJSDDX,XMJSDDY,JSGMJNR,YDMJ,JZMJ,SBSJ,SPLCBM,SPLBBH,SJYXBS,SJWXYY,SJSCZT,SBYY,CTIME,UTIME,SYNC_TIME,SystemSource,First_Sync_time' --fields-terminated-by '\001' --export-dir 'hdfs://hainan/apps/hive/warehouse/test.db/hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2/data_dt='$partition_nums'' --input-null-string '\\N' --input-null-non-string '\\N' --hive-partition-key data_dt --hive-partition-value $partition_nums echo 'invalid data job sqoop end...' echo 'invalid data job end....' echo 'job end...' |