自动化生成标准ETL脚本

前言

在做ETL开发的时候,如果针对每一个字段的处理逻辑基本相同,需要处理的字段和模型又特别多。在这种情况下我们可以通过此ETL生成器,批量生成标准的ETL。

脚本

利用这个思路,可以对模板进行任意的改造

#!/bin/bash
# generate_sql.sh
function detail_mask(){
    FILE=$1
    TABLE=`tail -n1 $FILE|xargs`
    DB=`echo $TABLE | awk -F "_" '{print $1}'`
    SQL="SELECT \n"
    CNT=0;
    for word in `sed  '1d;/^from/,+5d' $FILE |xargs|sed 's/,//g'`; 
    do  
        if (( $CNT==0 ))
        then
            CNT=1
            LINE="\t RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),-2,2)),7,'*') AS ${word} \n"
        else
            LINE="\t,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),-2,2)),7,'*') AS ${word} \n"
        fi
        SQL=${SQL}${LINE}
    done
    SQL=${SQL}"FROM ${DB}.${TABLE} a \n
        JOIN dev.trd_pty_carr_bus_tmp b \n
        ON   a.carrier_name = b.carr_bus_name \n
        WHERE a.dp='ACTIVE' \n
        AND   a.is_delete=1 \n
        AND   a.create_time>'2021-08-24' \n
        LIMIT 1000"
    echo "-----------------------------------------------------------明细数据要求-----------------------------------------------------------" > ${TABLE}".sql"
    echo -e ${SQL} >>  ${TABLE}".sql"
}


function non_blank_rate(){
    FILE=$1
    TABLE=`tail -n1 $FILE|xargs`
    DB=`echo $TABLE | awk -F "_" '{print $1}'`
    sub_sql1="SELECT \n"
    sub_sql2="FROM \n\t( \n
                          \t SELECT\n"
    CNT=0
    for word in `sed  '1d;/^from/,+5d' $FILE |xargs|sed 's/,//g'`; 
    do 
        if (( $CNT==0 ))
        then
            CNT=1
            sub_sql1=${sub_sql1}"\t CAST(SUM(nonblank_${word})/COUNT(1) AS DECIMAL(5,2)) AS ${word}\n"
            sub_sql2=${sub_sql2}"\t\t CASE 
                                 \t   WHEN ${word} IS NULL OR TRIM(CAST(${word} AS STRING))='' THEN 0
                                 \t   ELSE                                                              1
                                 \t   END nonblank_${word}\n"
        else
            sub_sql1=${sub_sql1}"\t,CAST(SUM(nonblank_${word})/COUNT(1) AS DECIMAL(5,2)) AS ${word}\n"
            sub_sql2=${sub_sql2}"\t\t,CASE 
                                 \t   WHEN ${word} IS NULL OR TRIM(CAST(${word} AS STRING))='' THEN 0
                                 \t   ELSE                                                              1
                                 \t   END nonblank_${word}\n"
        fi
    done
    SQL=${sub_sql1}${sub_sql2}"\t\tFROM ${DB}.${TABLE} \n
                               \t\tWHERE dp='ACTIVE'\n
                               ) tmp"
    echo -e "\n" >> ${TABLE}".sql"
    echo "-----------------------------------------------------------汇总数据要求-----------------------------------------------------------" >> ${TABLE}".sql"
    echo -e ${SQL} >>  ${TABLE}".sql"
}

USAGE="Usage: $0 {file_path} {n}"

function main(){
    if (( $#<2 ))
    then
        echo $USAGE
        return;
    fi

    case $1 in 
        1)
            detail_mask $2
        ;;
        2)
            non_blank_rate $2
        ;;
        *)
            echo "Usage: $0 {file_path} {1|2}"
    esac

}

function entry(){
    if (( $#<1 ))
    then
        echo "Usage: $0 {file_path}"
        return;
    fi
   main 1  $1
   main 2  $1
}

entry $@

可以讲一下格式数据生成标准的ETL

select
  start_date,
  change_code,
  business_group_id,
  organization_id,
  name,
  date_from,
  date_to,
  short_code,
  set_of_books_id,
  default_legal_context_id,
  usable_flag
from
  dwd_tms_tfc_trans_bill

生成的ETL

-----------------------------------------------------------明细数据要求-----------------------------------------------------------
SELECT 
	 RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.start_date,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.start_date,'') AS STRING)),-2,2)),7,'*') AS start_date 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.change_code,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.change_code,'') AS STRING)),-2,2)),7,'*') AS change_code 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.business_group_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.business_group_id,'') AS STRING)),-2,2)),7,'*') AS business_group_id 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.organization_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.organization_id,'') AS STRING)),-2,2)),7,'*') AS organization_id 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.name,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.name,'') AS STRING)),-2,2)),7,'*') AS name 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.date_from,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.date_from,'') AS STRING)),-2,2)),7,'*') AS date_from 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.date_to,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.date_to,'') AS STRING)),-2,2)),7,'*') AS date_to 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.short_code,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.short_code,'') AS STRING)),-2,2)),7,'*') AS short_code 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.set_of_books_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.set_of_books_id,'') AS STRING)),-2,2)),7,'*') AS set_of_books_id 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.default_legal_context_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.default_legal_context_id,'') AS STRING)),-2,2)),7,'*') AS default_legal_context_id 
	,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.usable_flag,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.usable_flag,'') AS STRING)),-2,2)),7,'*') AS usable_flag 
FROM dwd.dwd_tms_tfc_trans_bill a 
 JOIN dev.trd_pty_carr_bus_tmp b 
 ON a.carrier_name = b.carr_bus_name 
 WHERE a.dp='ACTIVE' 
 AND a.is_delete=1 
 AND a.create_time>'2021-08-24' 
 LIMIT 1000


-----------------------------------------------------------汇总数据要求-----------------------------------------------------------
SELECT 
	 CAST(SUM(nonblank_start_date)/COUNT(1) AS DECIMAL(5,2)) AS start_date
	,CAST(SUM(nonblank_change_code)/COUNT(1) AS DECIMAL(5,2)) AS change_code
	,CAST(SUM(nonblank_business_group_id)/COUNT(1) AS DECIMAL(5,2)) AS business_group_id
	,CAST(SUM(nonblank_organization_id)/COUNT(1) AS DECIMAL(5,2)) AS organization_id
	,CAST(SUM(nonblank_name)/COUNT(1) AS DECIMAL(5,2)) AS name
	,CAST(SUM(nonblank_date_from)/COUNT(1) AS DECIMAL(5,2)) AS date_from
	,CAST(SUM(nonblank_date_to)/COUNT(1) AS DECIMAL(5,2)) AS date_to
	,CAST(SUM(nonblank_short_code)/COUNT(1) AS DECIMAL(5,2)) AS short_code
	,CAST(SUM(nonblank_set_of_books_id)/COUNT(1) AS DECIMAL(5,2)) AS set_of_books_id
	,CAST(SUM(nonblank_default_legal_context_id)/COUNT(1) AS DECIMAL(5,2)) AS default_legal_context_id
	,CAST(SUM(nonblank_usable_flag)/COUNT(1) AS DECIMAL(5,2)) AS usable_flag
FROM 
	( 
 	 SELECT
		 CASE 	 WHEN start_date IS NULL OR TRIM(CAST(start_date AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_start_date
		,CASE 	 WHEN change_code IS NULL OR TRIM(CAST(change_code AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_change_code
		,CASE 	 WHEN business_group_id IS NULL OR TRIM(CAST(business_group_id AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_business_group_id
		,CASE 	 WHEN organization_id IS NULL OR TRIM(CAST(organization_id AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_organization_id
		,CASE 	 WHEN name IS NULL OR TRIM(CAST(name AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_name
		,CASE 	 WHEN date_from IS NULL OR TRIM(CAST(date_from AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_date_from
		,CASE 	 WHEN date_to IS NULL OR TRIM(CAST(date_to AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_date_to
		,CASE 	 WHEN short_code IS NULL OR TRIM(CAST(short_code AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_short_code
		,CASE 	 WHEN set_of_books_id IS NULL OR TRIM(CAST(set_of_books_id AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_set_of_books_id
		,CASE 	 WHEN default_legal_context_id IS NULL OR TRIM(CAST(default_legal_context_id AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_default_legal_context_id
		,CASE 	 WHEN usable_flag IS NULL OR TRIM(CAST(usable_flag AS STRING))='' THEN 0 	 ELSE 1 	 END nonblank_usable_flag
		FROM dwd.dwd_tms_tfc_trans_bill
 		WHERE dp='ACTIVE'
 ) tmp

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值