用hive sql 解析sql字符串中的表名

每天喝牛奶

已于 2023-10-12 13:55:35 修改

阅读量288

点赞数 1

文章标签： hive sql hadoop

于 2023-10-09 13:43:27 首次发布

本文链接：https://blog.csdn.net/qq_20942097/article/details/133700214

版权

文章介绍了如何在Spark环境中使用SQL语句处理，通过正则表达式和CTE(CommonTableExpressions)来解析查询中的表名，包括临时表的处理。方法涉及替换关键字、拆分字符串和集合操作来获取表名列表。

摘要由CSDN通过智能技术生成

-- 下面是单字符串测试解析 sql语句中的表名
with cte_test (
select REGEXP_replace(
'select aa.user_name from jk.ods_test_user aa
left join jk.ods_test_dept bb
on aa.user_id = bb.user_id
left join (select * from ods_test_course
) cc
on aa.user_id = cc.user_id','join','from') as replace_str
)

,cte_split_row_str as (
select
-- regexp_replace(
-- regexp_replace(
   regexp_replace(bb.from_split_str,'[\t\n\r\\s]+','&')

           --    ,' ','&')
   -- ,'\\(|\\)','&')
as split_str
from cte_test
lateral view explode(split(replace_str,'from')) bb as from_split_str
)

,cte_row_table_name_str as (
select
if(position('select' in split_str)>0,null,split(split_str,'&')[1]) as row_table_name
from cte_split_row_str
)

select
regexp_replace(concat_ws(',',collect_set(row_table_name)),'\\(|\\)','') as table_name_strs
from cte_row_table_name_str
where row_table_name is not null
;

-- 下面是真实场景解析，注意这个没有对临时表进行回溯解析

-- spark
with cte_test (
select
report_work_id
,REGEXP_replace(
REGEXP_replace(
REGEXP_replace(
REGEXP_replace(sql_str
,'`',''
)
,'JOIN|FROM|join','from'
)
,'SELECT','select'
)
,'WHERE','where'
) as replace_str
from test_extract_sql_table_name
)

,cte_split_row_str as (
select
report_work_id
,regexp_replace(bb.from_split_str,'[\t\n\r\\s]+','&') as split_str
from cte_test
lateral view explode(split(replace_str,'from')) bb as from_split_str
)

,cte_row_table_name_str as (
select
report_work_id
,split_str
,case when position('select' in split_str)=0 then split(split_str,'&')[1]
when position('select' in split_str)>0 and position('where' in split_str)>0 then split(split_str,'&')[1]
when position('select' in split_str)>0 then null
else null end as row_table_name
from cte_split_row_str
)

select
report_work_id
,regexp_replace(concat_ws(',',collect_set(row_table_name)),'\\(|\\)','') as table_name_strs
from cte_row_table_name_str
where row_table_name is not null
and length(row_table_name)>2
and position('::' in row_table_name) =0
group by report_work_id
;