select
a.db_name,a.table_name,a.table_comment,a.source_db,a.source_table,sum(d.param_value) as param_value, SUBSTR(c.part_name,4,10) as part_date,
'dmp' as create_by,'dmp' as update_by
from
(select
stg_task.db_name,stg_task.table_name,stg_task.table_comment,stg_map.source_db,stg_map.source_table
from
(select db_name,table_name,table_comment from dw_etl.syn_analysis_stg_task where task_status=1) stg_task
inner join
(select target_db,target_table,source_db,source_table from dw_etl.syn_stg_map group by target_db,target_table,source_db,source_table) stg_map
on stg_task.db_name=stg_map.target_db and stg_task.table_name=stg_map.target_table
) a
inner join
(select tbl_id,tbl_name from hive.TBLS) b
on b.tbl_name=a.table_name
inner join
(select tbl_id,part_id,part_name from hive.PARTITIONS) c
on c.tbl_id=b.tbl_id
inner join
(select part_id,param_key,param_value from hive.PARTITION_PARAMS where PARAM_KEY='numRows') d
on d.part_id=c.part_id
group by
a.db_name,a.table_name,a.table_comment,a.source_db,a.source_table, SUBSTR(c.part_name,4,10)
on duplicate key update db_name=values(db_name),table_name=values(table_name),table_comment=values(table_comment),
source_db=values(source_db),source_table=values(source_table),param_value=values(param_value),part_date=values(part_date),
create_by=values(create_by),update_by=values(update_by);
hive分区表分区数据量统计
于 2023-01-09 11:37:36 首次发布