-- 下面是 单字符串 测试 解析 sql语句中的 表名
with cte_test (
select REGEXP_replace(
'select aa.user_name from jk.ods_test_user aa
left join jk.ods_test_dept bb
on aa.user_id = bb.user_id
left join (select * from ods_test_course
) cc
on aa.user_id = cc.user_id','join','from') as replace_str
)
,cte_split_row_str as (
select
-- regexp_replace(
-- regexp_replace(
regexp_replace(bb.from_split_str,'[\t\n\r\\s]+','&')
-- ,' ','&')
-- ,'\\(|\\)','&')
as split_str
from cte_test
lateral view explode(split(replace_str,'from')) bb as from_split_str
)
,cte_row_table_name_str as (
select
if(position('select' in split_str)>0,null,split(split_str,'&')[1]) as row_table_name
from cte_split_row_str
)
select
regexp_replace(concat_ws(',',collect_set(row_table_name)),'\\(|\\)','') as table_name_strs
from cte_row_table_name_str
where row_table_name is not null
;
-- 下面是真实场景解析,注意 这个没有对临时表进行 回溯解析
-- spark
with cte_test (
select
report_work_id
,REGEXP_replace(
REGEXP_replace(
REGEXP_replace(
REGEXP_replace(sql_str
,'`',''
)
,'JOIN|FROM|join','from'
)
,'SELECT','select'
)
,'WHERE','where'
) as replace_str
from test_extract_sql_table_name
)
,cte_split_row_str as (
select
report_work_id
,regexp_replace(bb.from_split_str,'[\t\n\r\\s]+','&') as split_str
from cte_test
lateral view explode(split(replace_str,'from')) bb as from_split_str
)
,cte_row_table_name_str as (
select
report_work_id
,split_str
,case when position('select' in split_str)=0 then split(split_str,'&')[1]
when position('select' in split_str)>0 and position('where' in split_str)>0 then split(split_str,'&')[1]
when position('select' in split_str)>0 then null
else null end as row_table_name
from cte_split_row_str
)
select
report_work_id
,regexp_replace(concat_ws(',',collect_set(row_table_name)),'\\(|\\)','') as table_name_strs
from cte_row_table_name_str
where row_table_name is not null
and length(row_table_name)>2
and position('::' in row_table_name) =0
group by report_work_id
;