前言
在做ETL开发的时候,如果针对每一个字段的处理逻辑基本相同,需要处理的字段和模型又特别多。在这种情况下我们可以通过此ETL生成器,批量生成标准的ETL。
脚本
利用这个思路,可以对模板进行任意的改造
#!/bin/bash
# generate_sql.sh
function detail_mask(){
FILE=$1
TABLE=`tail -n1 $FILE|xargs`
DB=`echo $TABLE | awk -F "_" '{print $1}'`
SQL="SELECT \n"
CNT=0;
for word in `sed '1d;/^from/,+5d' $FILE |xargs|sed 's/,//g'`;
do
if (( $CNT==0 ))
then
CNT=1
LINE="\t RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),-2,2)),7,'*') AS ${word} \n"
else
LINE="\t,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.${word},'') AS STRING)),-2,2)),7,'*') AS ${word} \n"
fi
SQL=${SQL}${LINE}
done
SQL=${SQL}"FROM ${DB}.${TABLE} a \n
JOIN dev.trd_pty_carr_bus_tmp b \n
ON a.carrier_name = b.carr_bus_name \n
WHERE a.dp='ACTIVE' \n
AND a.is_delete=1 \n
AND a.create_time>'2021-08-24' \n
LIMIT 1000"
echo "-----------------------------------------------------------明细数据要求-----------------------------------------------------------" > ${TABLE}".sql"
echo -e ${SQL} >> ${TABLE}".sql"
}
function non_blank_rate(){
FILE=$1
TABLE=`tail -n1 $FILE|xargs`
DB=`echo $TABLE | awk -F "_" '{print $1}'`
sub_sql1="SELECT \n"
sub_sql2="FROM \n\t( \n
\t SELECT\n"
CNT=0
for word in `sed '1d;/^from/,+5d' $FILE |xargs|sed 's/,//g'`;
do
if (( $CNT==0 ))
then
CNT=1
sub_sql1=${sub_sql1}"\t CAST(SUM(nonblank_${word})/COUNT(1) AS DECIMAL(5,2)) AS ${word}\n"
sub_sql2=${sub_sql2}"\t\t CASE
\t WHEN ${word} IS NULL OR TRIM(CAST(${word} AS STRING))='' THEN 0
\t ELSE 1
\t END nonblank_${word}\n"
else
sub_sql1=${sub_sql1}"\t,CAST(SUM(nonblank_${word})/COUNT(1) AS DECIMAL(5,2)) AS ${word}\n"
sub_sql2=${sub_sql2}"\t\t,CASE
\t WHEN ${word} IS NULL OR TRIM(CAST(${word} AS STRING))='' THEN 0
\t ELSE 1
\t END nonblank_${word}\n"
fi
done
SQL=${sub_sql1}${sub_sql2}"\t\tFROM ${DB}.${TABLE} \n
\t\tWHERE dp='ACTIVE'\n
) tmp"
echo -e "\n" >> ${TABLE}".sql"
echo "-----------------------------------------------------------汇总数据要求-----------------------------------------------------------" >> ${TABLE}".sql"
echo -e ${SQL} >> ${TABLE}".sql"
}
USAGE="Usage: $0 {file_path} {n}"
function main(){
if (( $#<2 ))
then
echo $USAGE
return;
fi
case $1 in
1)
detail_mask $2
;;
2)
non_blank_rate $2
;;
*)
echo "Usage: $0 {file_path} {1|2}"
esac
}
function entry(){
if (( $#<1 ))
then
echo "Usage: $0 {file_path}"
return;
fi
main 1 $1
main 2 $1
}
entry $@
可以讲一下格式数据生成标准的ETL
select
start_date,
change_code,
business_group_id,
organization_id,
name,
date_from,
date_to,
short_code,
set_of_books_id,
default_legal_context_id,
usable_flag
from
dwd_tms_tfc_trans_bill
生成的ETL
-----------------------------------------------------------明细数据要求-----------------------------------------------------------
SELECT
RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.start_date,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.start_date,'') AS STRING)),-2,2)),7,'*') AS start_date
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.change_code,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.change_code,'') AS STRING)),-2,2)),7,'*') AS change_code
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.business_group_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.business_group_id,'') AS STRING)),-2,2)),7,'*') AS business_group_id
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.organization_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.organization_id,'') AS STRING)),-2,2)),7,'*') AS organization_id
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.name,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.name,'') AS STRING)),-2,2)),7,'*') AS name
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.date_from,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.date_from,'') AS STRING)),-2,2)),7,'*') AS date_from
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.date_to,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.date_to,'') AS STRING)),-2,2)),7,'*') AS date_to
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.short_code,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.short_code,'') AS STRING)),-2,2)),7,'*') AS short_code
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.set_of_books_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.set_of_books_id,'') AS STRING)),-2,2)),7,'*') AS set_of_books_id
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.default_legal_context_id,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.default_legal_context_id,'') AS STRING)),-2,2)),7,'*') AS default_legal_context_id
,RPAD(CONCAT(SUBSTR(TRIM(CAST(NVL(a.usable_flag,'') AS STRING)),1,2),'***',SUBSTR(TRIM(CAST(NVL(a.usable_flag,'') AS STRING)),-2,2)),7,'*') AS usable_flag
FROM dwd.dwd_tms_tfc_trans_bill a
JOIN dev.trd_pty_carr_bus_tmp b
ON a.carrier_name = b.carr_bus_name
WHERE a.dp='ACTIVE'
AND a.is_delete=1
AND a.create_time>'2021-08-24'
LIMIT 1000
-----------------------------------------------------------汇总数据要求-----------------------------------------------------------
SELECT
CAST(SUM(nonblank_start_date)/COUNT(1) AS DECIMAL(5,2)) AS start_date
,CAST(SUM(nonblank_change_code)/COUNT(1) AS DECIMAL(5,2)) AS change_code
,CAST(SUM(nonblank_business_group_id)/COUNT(1) AS DECIMAL(5,2)) AS business_group_id
,CAST(SUM(nonblank_organization_id)/COUNT(1) AS DECIMAL(5,2)) AS organization_id
,CAST(SUM(nonblank_name)/COUNT(1) AS DECIMAL(5,2)) AS name
,CAST(SUM(nonblank_date_from)/COUNT(1) AS DECIMAL(5,2)) AS date_from
,CAST(SUM(nonblank_date_to)/COUNT(1) AS DECIMAL(5,2)) AS date_to
,CAST(SUM(nonblank_short_code)/COUNT(1) AS DECIMAL(5,2)) AS short_code
,CAST(SUM(nonblank_set_of_books_id)/COUNT(1) AS DECIMAL(5,2)) AS set_of_books_id
,CAST(SUM(nonblank_default_legal_context_id)/COUNT(1) AS DECIMAL(5,2)) AS default_legal_context_id
,CAST(SUM(nonblank_usable_flag)/COUNT(1) AS DECIMAL(5,2)) AS usable_flag
FROM
(
SELECT
CASE WHEN start_date IS NULL OR TRIM(CAST(start_date AS STRING))='' THEN 0 ELSE 1 END nonblank_start_date
,CASE WHEN change_code IS NULL OR TRIM(CAST(change_code AS STRING))='' THEN 0 ELSE 1 END nonblank_change_code
,CASE WHEN business_group_id IS NULL OR TRIM(CAST(business_group_id AS STRING))='' THEN 0 ELSE 1 END nonblank_business_group_id
,CASE WHEN organization_id IS NULL OR TRIM(CAST(organization_id AS STRING))='' THEN 0 ELSE 1 END nonblank_organization_id
,CASE WHEN name IS NULL OR TRIM(CAST(name AS STRING))='' THEN 0 ELSE 1 END nonblank_name
,CASE WHEN date_from IS NULL OR TRIM(CAST(date_from AS STRING))='' THEN 0 ELSE 1 END nonblank_date_from
,CASE WHEN date_to IS NULL OR TRIM(CAST(date_to AS STRING))='' THEN 0 ELSE 1 END nonblank_date_to
,CASE WHEN short_code IS NULL OR TRIM(CAST(short_code AS STRING))='' THEN 0 ELSE 1 END nonblank_short_code
,CASE WHEN set_of_books_id IS NULL OR TRIM(CAST(set_of_books_id AS STRING))='' THEN 0 ELSE 1 END nonblank_set_of_books_id
,CASE WHEN default_legal_context_id IS NULL OR TRIM(CAST(default_legal_context_id AS STRING))='' THEN 0 ELSE 1 END nonblank_default_legal_context_id
,CASE WHEN usable_flag IS NULL OR TRIM(CAST(usable_flag AS STRING))='' THEN 0 ELSE 1 END nonblank_usable_flag
FROM dwd.dwd_tms_tfc_trans_bill
WHERE dp='ACTIVE'
) tmp