#!/bin/bash
########################################################################################################################
# Creater :
# Creation Time :
# Description :
# Modify By :
# Modify Time :
# Modify Content :
# Script Version :1.0.3
########################################################################################################################
#===============================================================================
# 机构名称
organ_params=$1
# 调度周期
data_day_str=$2
hive -e "
set mapred.job.priority=NORMAL;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
SET hive.default.fileformat=Orc;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
--set hive.auto.convert.join = true;
--set mapreduce.reduce.memory.mb=12288;
--set mapreduce.reduce.java.opts=-Xmx11788M;
drop table if exists tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""";
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" AS
SELECT
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
A.OPEN_TIME,
A.CLOSE_TIME,
OPEN_TIME AS BEG_DATE,
'99991231' AS END_DATE,
A.DT,
A.MODEL_TYPE
FROM STG.STG_C0000000000001_CUST_PRO_ACC_I_D A
WHERE dt = '"""$data_day_str"""' and A.MODEL_TYPE = 'N'
UNION ALL
SELECT Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
FROM STG.STG_C0000000000001_CUST_PRO_ACC_I_D B
WHERE dt = '"""$data_day_str"""' and B.MODEL_TYPE = 'M' ;
drop table tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params""";
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params""" AS
SELECT
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
C.OPEN_TIME,
C.CLOSE_TIME,
C.BEG_DATE,
C.END_DATE,
C.DT,
C.MODEL_TYPE,
COUNT(1) AS repeats_num
FROM (select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE C.BEG_DATE <= C.END_DATE
AND NOT EXISTS (SELECT 1
FROM odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
WHERE C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
AND ( C.BEG_DATE = D.BEG_DATE AND C.END_DATE = D.END_DATE
) )
union all
select c.* from (
select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE C.BEG_DATE <= C.END_DATE ) c
left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
on C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
where not (
C.BEG_DATE < D.BEG_DATE AND C.END_DATE > D.BEG_DATE
)
union all
select c.* from (
select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE C.BEG_DATE <= C.END_DATE ) c
left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
on C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
where ( C.BEG_DATE >= D.BEG_DATE AND C.BEG_DATE < D.END_DATE AND C.END_DATE > D.END_DATE
)
)c
GROUP BY Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
C.OPEN_TIME,
C.CLOSE_TIME,
C.BEG_DATE,
C.END_DATE,
C.DT,
C.MODEL_TYPE;
-- 2.STAGE异常数据(包括重复报、跨链、错误数据)
drop table if exists tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params""";
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params""" AS
SELECT
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
C.OPEN_TIME,
C.CLOSE_TIME,
C.BEG_DATE,
C.END_DATE,
C.DT,
C.MODEL_TYPE,
CASE WHEN C.BEG_DATE > C.END_DATE THEN 'BEN>END' ELSE 'KL' END AS YCSJ, -- 异常数据标识
COUNT(1) AS REPEATS_NUM
FROM (
select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE (C.BEG_DATE > C.END_DATE)
union all
select c.* from (
select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE C.BEG_DATE <= C.END_DATE ) c -- 错误数据
left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
on C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
where (( C.BEG_DATE = D.BEG_DATE AND C.END_DATE = D.END_DATE ) -- 对应上图中1.两区间相等(重复报)
)
union all
select c.* from ( select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE (C.BEG_DATE <= C.END_DATE ) ) c -- 错误数据
left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
on C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
where
( C.BEG_DATE < D.BEG_DATE AND C.END_DATE > D.BEG_DATE ) -- 对应上图中2&3&4:,stage的开始时间在ODM的开始时间之前,且stage的结束时间在ODM的开始时间之后(包括stage.end_date >= odm.end_date的情况)
union all
select c.* from (
select
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT,
MODEL_TYPE
from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" C
WHERE (C.BEG_DATE <= C.END_DATE ) ) c -- 错误数据
left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
on C.Org_code = D.Org_code
AND C.Acc_no = D.Acc_no
AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
where (( C.BEG_DATE >= D.BEG_DATE AND C.BEG_DATE < D.END_DATE AND C.END_DATE > D.END_DATE ) -- 对应上图中5&6.stage的开始时间在odm的区间之内(stage.beg_date >= odm.beg_date),stage的结束时间大于odm的结束时间
)
) c
GROUP BY
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
C.OPEN_TIME,
C.CLOSE_TIME,
C.BEG_DATE,
C.END_DATE,
C.DT,
C.MODEL_TYPE;
-- 3.临时表合并ODM与STAGE数据,按beg_date与end_date获取该主键的上下条数据
DROP TABLE tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""";
CREATE TABLE tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" AS
SELECT C.etl_date,
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
C.OPEN_TIME,
C.CLOSE_TIME,
C.BEG_DATE,
C.END_DATE,
C.DT,
MODEL_TYPE,
C.CHAIN_TYPE,
C.TAB_NAME,
c.repeats_num,
LAG(C.BEG_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_BEG_DATE,
LAG(C.END_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_END_DATE,
LAG(C.TAB_NAME, 1, C.TAB_NAME) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_TAB_NAME, -- 不存在上一条的,默认为本条的tab_name
LEAD(C.BEG_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_BEG_DATE,
LEAD(C.END_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_END_DATE,
LEAD(C.TAB_NAME, 1, C.TAB_NAME) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_TAB_NAME -- 不存在下一条的,默认为本条的tab_name
FROM (SELECT etl_date,
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
stg_DT as dt ,
MODEL_TYPE,
CHAIN_TYPE,
1 AS repeats_num,
'ODM' TAB_NAME
FROM odm.odm_C0000000000001_CUST_PRO_ACC_H_D A
UNION ALL
SELECT current_timestamp() as etl_date ,
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
END_DATE,
DT ,
MODEL_TYPE,
NULL AS CHAIN_TYPE,
repeats_num,
'STAGE' TAB_NAME
FROM tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params""" B ) C;
-- 4.插入到ODM表中,按END_DATE分区
-- 包括:4.1 ODM中被更新且更新后的数据,有效标志为Y
-- 4.2 ODM中被更新且更新前的数据,有效标志为N
-- 4.3 临时表1的数据(STAGE中正常的数据),有效标志为N
-- 4.4 临时表2的数据(STAGE中异常的数据),有效标志为N
INSERT OVERWRITE TABLE odm.odm_C0000000000001_CUST_PRO_ACC_H_D PARTITION(END_DATE)
SELECT
current_timestamp(),
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
CASE
WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'ODM' THEN D.BEG_DATE
WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.BEG_DATE
WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'ODM' THEN D.PRE_END_DATE
WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.PRE_END_DATE
ELSE NULL END AS BEG_DATE,
MODEL_TYPE,
CASE WHEN D.PRE_BEG_DATE IS NULL THEN 'N' ELSE 'M' END AS CHAIN_TYPE,
'Y' AS YXBZ,
dt ,
-- 'ODM_NEW' AS TAB_SRC,
1 AS REPEATS_NUM,
CASE
WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'ODM' THEN D.END_DATE
WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.NEXT_BEG_DATE
WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'ODM' THEN D.END_DATE
WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.NEXT_BEG_DATE
ELSE NULL END AS END_DATE
-- 其余字段取ODM字段
FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" D
WHERE D.TAB_NAME = 'ODM' -- ODM拆链后的数据及无变化的数据处理为有效
AND NVL(D.PRE_END_DATE, '99991231') <> NVL(D.NEXT_BEG_DATE, '19900101') -- 去除ODM一段链被STAGE拆成多段,且STAGE多段区间相加等于ODM的情况
UNION ALL
SELECT
current_timestamp(),
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
BEG_DATE,
MODEL_TYPE,
'M' AS CHAIN_TYPE,
'N' AS YXBZ,
dt ,
-- 'ODM_OLD' AS TAB_SRC,
1 AS REPEATS_NUM,
END_DATE
-- 其余字段取ODM字段
FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" D
WHERE D.TAB_NAME = 'ODM'
AND NOT (D.PRE_END_DATE = D.BEG_DATE AND D.NEXT_BEG_DATE = D.END_DATE) -- ODM拆链前的数据处理为无效
UNION ALL
SELECT
current_timestamp(),
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
E.BEG_DATE,
MODEL_TYPE,
CASE WHEN E.PRE_BEG_DATE IS NULL THEN 'N' ELSE 'M' END AS CHAIN_TYPE,
'Y' AS YXBZ,
dt ,
-- 'STAGE_NOR' AS TAB_SRC,
E.REPEATS_NUM AS REPEATS_NUM,
END_DATE
-- 其余字段取stage字段
FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" E
WHERE E.TAB_NAME = 'STAGE'
UNION ALL
SELECT current_timestamp(),
Org_code ,
Acc_type ,
Acc_name ,
Acc_no ,
Set_Bank_hq ,
Set_Bank_hq_no ,
OPEN_TIME,
CLOSE_TIME,
F.BEG_DATE,
MODEL_TYPE,
NULL AS CHAIN_TYPE,
'N' YXBZ,
dt ,
-- 'STAGE_UNNOR' AS TAB_SRC,
F.REPEATS_NUM,
F.END_DATE
FROM tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params""" F ;
"