有一段sql

9 篇文章 0 订阅
#!/bin/bash
########################################################################################################################
#  Creater        :
#  Creation Time  :
#  Description    :
#  Modify By      :
#  Modify Time    :
#  Modify Content :
#  Script Version :1.0.3
########################################################################################################################
#===============================================================================
# 机构名称
organ_params=$1

# 调度周期
data_day_str=$2




hive -e "

set mapred.job.priority=NORMAL;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
SET hive.default.fileformat=Orc;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
--set hive.auto.convert.join = true;
--set mapreduce.reduce.memory.mb=12288;
--set mapreduce.reduce.java.opts=-Xmx11788M;

drop table if exists tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""";
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params""" AS 
SELECT 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
               A.OPEN_TIME,
               A.CLOSE_TIME,
               OPEN_TIME AS BEG_DATE,
               '99991231' AS END_DATE,
               A.DT,
               A.MODEL_TYPE
          FROM STG.STG_C0000000000001_CUST_PRO_ACC_I_D A
         WHERE dt = '"""$data_day_str"""' and  A.MODEL_TYPE = 'N'
        UNION ALL
        SELECT  Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
          FROM STG.STG_C0000000000001_CUST_PRO_ACC_I_D B
         WHERE dt = '"""$data_day_str"""' and  B.MODEL_TYPE = 'M'  ; 


drop table tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params"""; 
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params""" AS 
SELECT 
       Org_code       ,
       Acc_type       ,
       Acc_name       ,
       Acc_no         ,
       Set_Bank_hq    ,
       Set_Bank_hq_no ,
       C.OPEN_TIME,
       C.CLOSE_TIME,
       C.BEG_DATE,
       C.END_DATE,
       C.DT,
       C.MODEL_TYPE,
       COUNT(1) AS repeats_num 
  FROM (select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE C.BEG_DATE <= C.END_DATE 
  AND NOT EXISTS (SELECT 1
          FROM odm.odm_C0000000000001_CUST_PRO_ACC_H_D  D
         WHERE C.Org_code = D.Org_code
           AND C.Acc_no = D.Acc_no
           AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
           AND ( C.BEG_DATE = D.BEG_DATE AND C.END_DATE = D.END_DATE      
          ) ) 
           union all 
           select c.* from ( 
           select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE C.BEG_DATE <= C.END_DATE ) c 
  left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D  D
         on  C.Org_code = D.Org_code
           AND C.Acc_no = D.Acc_no
           AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
           where not (
             C.BEG_DATE < D.BEG_DATE AND C.END_DATE > D.BEG_DATE   
           )  
           
           union all 
           select c.* from ( 
           select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE C.BEG_DATE <= C.END_DATE ) c  
  left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D  D
         on  C.Org_code = D.Org_code
           AND C.Acc_no = D.Acc_no
           AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
           where ( C.BEG_DATE >= D.BEG_DATE AND C.BEG_DATE < D.END_DATE AND C.END_DATE > D.END_DATE  
           )  
  )c 
GROUP BY   Org_code       ,
           Acc_type       ,
           Acc_name       ,
           Acc_no         ,
           Set_Bank_hq    ,
           Set_Bank_hq_no ,
          C.OPEN_TIME,
          C.CLOSE_TIME,
          C.BEG_DATE,
          C.END_DATE,
          C.DT,
          C.MODEL_TYPE;

-- 2.STAGE异常数据(包括重复报、跨链、错误数据)
drop table if exists tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params"""; 
CREATE TABLE tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params""" AS 
SELECT 
       Org_code       ,
       Acc_type       ,
       Acc_name       ,
       Acc_no         ,
       Set_Bank_hq    ,
       Set_Bank_hq_no ,
       C.OPEN_TIME,
       C.CLOSE_TIME,
       C.BEG_DATE,
       C.END_DATE,
       C.DT,
       C.MODEL_TYPE,
       CASE WHEN C.BEG_DATE > C.END_DATE THEN 'BEN>END' ELSE  'KL' END AS YCSJ, -- 异常数据标识
       COUNT(1) AS REPEATS_NUM
  FROM (
  select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE (C.BEG_DATE > C.END_DATE)
 union all 
 select c.* from ( 
  select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE C.BEG_DATE <= C.END_DATE ) c   -- 错误数据
 left join  odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
         on  C.Org_code = D.Org_code
           AND C.Acc_no = D.Acc_no
           AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
           where (( C.BEG_DATE = D.BEG_DATE AND C.END_DATE = D.END_DATE )     -- 对应上图中1.两区间相等(重复报)
           ) 
           union all 
           select c.* from (  select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE (C.BEG_DATE <= C.END_DATE ) ) c  -- 错误数据
 left join  odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
         on C.Org_code = D.Org_code
           AND C.Acc_no = D.Acc_no
           AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
           where  
            ( C.BEG_DATE < D.BEG_DATE AND C.END_DATE > D.BEG_DATE )    -- 对应上图中2&3&4:,stage的开始时间在ODM的开始时间之前,且stage的结束时间在ODM的开始时间之后(包括stage.end_date >= odm.end_date的情况)
           union all 
           select c.* from ( 
           select 
                Org_code       ,
                Acc_type       ,
                Acc_name       ,
                Acc_no         ,
                Set_Bank_hq    ,
                Set_Bank_hq_no ,
                OPEN_TIME,
                CLOSE_TIME,
                BEG_DATE,
                END_DATE,
                DT,
                MODEL_TYPE
        from tmp.TMP_S_CUST_PRO_ACC_0000"""$organ_params"""  C
 WHERE (C.BEG_DATE <= C.END_DATE ) )  c    -- 错误数据
 left join odm.odm_C0000000000001_CUST_PRO_ACC_H_D D
        on C.Org_code = D.Org_code
       AND C.Acc_no = D.Acc_no
       AND C.Set_Bank_hq_no = D.Set_Bank_hq_no
     where (( C.BEG_DATE >= D.BEG_DATE AND C.BEG_DATE < D.END_DATE AND C.END_DATE > D.END_DATE )  -- 对应上图中5&6.stage的开始时间在odm的区间之内(stage.beg_date >= odm.beg_date),stage的结束时间大于odm的结束时间
           ) 
           
    ) c        
 GROUP BY 
          Org_code       ,
          Acc_type       ,
          Acc_name       ,
          Acc_no         ,
          Set_Bank_hq    ,
          Set_Bank_hq_no ,
          C.OPEN_TIME,
          C.CLOSE_TIME,
          C.BEG_DATE,
          C.END_DATE,
          C.DT,
          C.MODEL_TYPE;

-- 3.临时表合并ODM与STAGE数据,按beg_date与end_date获取该主键的上下条数据
DROP TABLE tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""";
CREATE TABLE tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" AS 
   SELECT         C.etl_date,
                  Org_code       ,
                  Acc_type       ,
                  Acc_name       ,
                  Acc_no         ,
                  Set_Bank_hq    ,
                  Set_Bank_hq_no ,
                C.OPEN_TIME,
                C.CLOSE_TIME,
                C.BEG_DATE,
                C.END_DATE,
                C.DT,
                MODEL_TYPE,
                C.CHAIN_TYPE,
                C.TAB_NAME,
                c.repeats_num,
                LAG(C.BEG_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_BEG_DATE, 
                LAG(C.END_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_END_DATE,
                LAG(C.TAB_NAME, 1, C.TAB_NAME) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS PRE_TAB_NAME, -- 不存在上一条的,默认为本条的tab_name
                LEAD(C.BEG_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_BEG_DATE,
                LEAD(C.END_DATE, 1, NULL) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_END_DATE,
                LEAD(C.TAB_NAME, 1, C.TAB_NAME) OVER(PARTITION BY C.Org_code, C.Acc_no, C.Set_Bank_hq_no ORDER BY C.BEG_DATE, C.END_DATE) AS NEXT_TAB_NAME  -- 不存在下一条的,默认为本条的tab_name
          FROM (SELECT etl_date,
                        Org_code       ,
                        Acc_type       ,
                        Acc_name       ,
                        Acc_no         ,
                        Set_Bank_hq    ,
                        Set_Bank_hq_no ,
                        OPEN_TIME,
                        CLOSE_TIME,
                        BEG_DATE,
                        END_DATE,
                        stg_DT as dt ,
                        MODEL_TYPE,
                        CHAIN_TYPE,
                        1 AS repeats_num,
                        'ODM' TAB_NAME
                   FROM odm.odm_C0000000000001_CUST_PRO_ACC_H_D A
                 UNION ALL
                 SELECT current_timestamp() as etl_date ,
                          Org_code       ,
                          Acc_type       ,
                          Acc_name       ,
                          Acc_no         ,
                          Set_Bank_hq    ,
                          Set_Bank_hq_no ,
                        OPEN_TIME,
                        CLOSE_TIME,
                        BEG_DATE,
                        END_DATE,
                        DT ,
                        MODEL_TYPE,
                        NULL AS CHAIN_TYPE,
                        repeats_num,
                        'STAGE' TAB_NAME
                   FROM tmp.TMP_S_CUST_PRO_ACC_00"""$organ_params""" B ) C;
 
-- 4.插入到ODM表中,按END_DATE分区
-- 包括:4.1 ODM中被更新且更新后的数据,有效标志为Y
-- 4.2 ODM中被更新且更新前的数据,有效标志为N
-- 4.3 临时表1的数据(STAGE中正常的数据),有效标志为N
-- 4.4 临时表2的数据(STAGE中异常的数据),有效标志为N
INSERT OVERWRITE TABLE odm.odm_C0000000000001_CUST_PRO_ACC_H_D PARTITION(END_DATE) 
SELECT   
         current_timestamp(),
         Org_code       ,
         Acc_type       ,
         Acc_name       ,
         Acc_no         ,
         Set_Bank_hq    ,
         Set_Bank_hq_no ,
         OPEN_TIME,
         CLOSE_TIME,
       CASE
         WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'ODM' THEN D.BEG_DATE
         WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.BEG_DATE
         WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'ODM' THEN D.PRE_END_DATE
         WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.PRE_END_DATE
         ELSE NULL END AS BEG_DATE,
        MODEL_TYPE,
        CASE WHEN D.PRE_BEG_DATE IS NULL THEN 'N' ELSE 'M' END AS CHAIN_TYPE,
       'Y' AS YXBZ,
         dt , 
     --  'ODM_NEW' AS TAB_SRC,
       1 AS REPEATS_NUM,
        CASE
         WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'ODM' THEN D.END_DATE
         WHEN D.PRE_TAB_NAME = 'ODM' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.NEXT_BEG_DATE
         WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'ODM' THEN D.END_DATE
         WHEN D.PRE_TAB_NAME = 'STAGE' AND D.NEXT_TAB_NAME = 'STAGE' THEN D.NEXT_BEG_DATE
         ELSE NULL END AS END_DATE
-- 其余字段取ODM字段
  FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" D
 WHERE D.TAB_NAME = 'ODM' -- ODM拆链后的数据及无变化的数据处理为有效
   AND NVL(D.PRE_END_DATE, '99991231') <> NVL(D.NEXT_BEG_DATE, '19900101') -- 去除ODM一段链被STAGE拆成多段,且STAGE多段区间相加等于ODM的情况
UNION ALL 
SELECT   
       current_timestamp(),
       Org_code       ,
       Acc_type       ,
       Acc_name       ,
       Acc_no         ,
       Set_Bank_hq    ,
       Set_Bank_hq_no ,
       OPEN_TIME,
       CLOSE_TIME,
       BEG_DATE,
       MODEL_TYPE,
       'M' AS CHAIN_TYPE,
       'N' AS YXBZ,
       dt , 
        -- 'ODM_OLD' AS TAB_SRC,
       1 AS REPEATS_NUM,
       END_DATE
-- 其余字段取ODM字段
  FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" D
 WHERE D.TAB_NAME = 'ODM'
   AND NOT (D.PRE_END_DATE = D.BEG_DATE AND D.NEXT_BEG_DATE = D.END_DATE) -- ODM拆链前的数据处理为无效
UNION ALL
SELECT   
         current_timestamp(),
         Org_code       ,
         Acc_type       ,
         Acc_name       ,
         Acc_no         ,
         Set_Bank_hq    ,
         Set_Bank_hq_no ,
         OPEN_TIME,
         CLOSE_TIME,
        E.BEG_DATE,
         MODEL_TYPE,
         CASE WHEN E.PRE_BEG_DATE IS NULL THEN 'N' ELSE 'M' END AS CHAIN_TYPE,
       'Y' AS YXBZ,
        dt , 
      -- 'STAGE_NOR' AS TAB_SRC,
       E.REPEATS_NUM AS REPEATS_NUM,
        END_DATE
-- 其余字段取stage字段
  FROM tmp.TMP_ODM_CUST_PRO_ACC_00"""$organ_params""" E
 WHERE E.TAB_NAME = 'STAGE'
UNION ALL
SELECT   current_timestamp(),
         Org_code       ,
         Acc_type       ,
         Acc_name       ,
         Acc_no         ,
         Set_Bank_hq    ,
         Set_Bank_hq_no ,
         OPEN_TIME,
         CLOSE_TIME,
         F.BEG_DATE,
         MODEL_TYPE,
         NULL AS CHAIN_TYPE,
     
       'N' YXBZ,
         dt ,
      -- 'STAGE_UNNOR' AS TAB_SRC,
       F.REPEATS_NUM,
       F.END_DATE
  FROM tmp.TMP_S_CUST_PRO_ACC_01"""$organ_params""" F ;

"

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值