hive 临时表、regexp_replace、get_json_object、explode、spli、regexp_extract、substring_index混用

对array数据处理

with tablea as
(
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“fd”:“client_name”,“cmp”:7,“val”:“陈洋勇”,“desc”:“客户姓名 包含 陈洋勇”,“c_has_operator”:“3”},{“op”:1,“fd”:“client_age”,“cmp”:4,“val”:“29”,“desc”:“年龄 = 29岁”,“c_has_operator”:“1”}]}’ as group_condition
union all
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“sub”:[{“op”:1,“fd”:“client_sex”,“cmp”:4,“val”:“0”,“desc”:“客户性别 = 男”,“c_has_operator”:“0”},{“op”:“0”,“fd”:“client_sex”,“cmp”:4,“val”:“1”,“desc”:“客户性别 = 女”,“c_has_operator”:“0”}]}]}’ as group_condition
union all
select ‘中间状态客群名’ as group_condition
)
select group_condition from tablea;
在这里插入图片描述

with tablea as
(
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“fd”:“client_name”,“cmp”:7,“val”:“陈洋勇”,“desc”:“客户姓名 包含 陈洋勇”,“c_has_operator”:“3”},{“op”:1,“fd”:“client_age”,“cmp”:4,“val”:“29”,“desc”:“年龄 = 29岁”,“c_has_operator”:“1”}]}’ as group_condition
union all
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“sub”:[{“op”:1,“fd”:“client_sex”,“cmp”:4,“val”:“0”,“desc”:“客户性别 = 男”,“c_has_operator”:“0”},{“op”:“0”,“fd”:“client_sex”,“cmp”:4,“val”:“1”,“desc”:“客户性别 = 女”,“c_has_operator”:“0”}]}]}’ as group_condition
union all
select ‘中间状态客群名’ as group_condition
)
select group_condition,get_json_object(group_condition,’ . c o n . f d ′ ) a s a r r f r o m t a b l e a w h e r e g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') as arr from tablea where get_json_object(group_condition,' .con.fd)asarrfromtableawheregetjsonobject(groupcondition,.con.fd’) is not null
union all
select group_condition, get_json_object(group_condition,’ . c o n . s u b . f d ′ ) a s a r r f r o m t a b l e a w h e r e g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') as arr from tablea where get_json_object(group_condition,' .con.sub.fd)asarrfromtableawheregetjsonobject(groupcondition,.con.sub.fd’) is not null;

在这里插入图片描述

with tablea as
(
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“fd”:“client_name”,“cmp”:7,“val”:“陈洋勇”,“desc”:“客户姓名 包含 陈洋勇”,“c_has_operator”:“3”},{“op”:1,“fd”:“client_age”,“cmp”:4,“val”:“29”,“desc”:“年龄 = 29岁”,“c_has_operator”:“1”}]}’ as group_condition
union all
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“sub”:[{“op”:1,“fd”:“client_sex”,“cmp”:4,“val”:“0”,“desc”:“客户性别 = 男”,“c_has_operator”:“0”},{“op”:“0”,“fd”:“client_sex”,“cmp”:4,“val”:“1”,“desc”:“客户性别 = 女”,“c_has_operator”:“0”}]}]}’ as group_condition
union all
select ‘中间状态客群名’ as group_condition
)
select group_condition,get_json_object(group_condition,’ . c o n . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1
union all
select group_condition,get_json_object(group_condition,’ . c o n . s u b . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . s u b . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1;
在这里插入图片描述

with tablea as
(
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“fd”:“client_name”,“cmp”:7,“val”:“陈洋勇”,“desc”:“客户姓名 包含 陈洋勇”,“c_has_operator”:“3”},{“op”:1,“fd”:“client_age”,“cmp”:4,“val”:“29”,“desc”:“年龄 = 29岁”,“c_has_operator”:“1”}]}’ as group_condition
union all
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“sub”:[{“op”:1,“fd”:“client_sex”,“cmp”:4,“val”:“0”,“desc”:“客户性别 = 男”,“c_has_operator”:“0”},{“op”:“0”,“fd”:“client_sex”,“cmp”:4,“val”:“1”,“desc”:“客户性别 = 女”,“c_has_operator”:“0”}]}]}’ as group_condition
union all
select ‘中间状态客群名’ as group_condition
) ,
asd as (select group_condition,get_json_object(group_condition,’ . c o n . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1
union all
select group_condition,get_json_object(group_condition,’ . c o n . s u b . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . s u b . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1)
select * from asd;
在这里插入图片描述

with tablea as
(
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“fd”:“client_name”,“cmp”:7,“val”:“陈洋勇”,“desc”:“客户姓名 包含 陈洋勇”,“c_has_operator”:“3”},{“op”:1,“fd”:“client_age”,“cmp”:4,“val”:“29”,“desc”:“年龄 = 29岁”,“c_has_operator”:“1”}]}’ as group_condition
union all
select ‘{“ver”:“2.0”,“con”:[{“op”:1,“sub”:[{“op”:1,“fd”:“client_sex”,“cmp”:4,“val”:“0”,“desc”:“客户性别 = 男”,“c_has_operator”:“0”},{“op”:“0”,“fd”:“client_sex”,“cmp”:4,“val”:“1”,“desc”:“客户性别 = 女”,“c_has_operator”:“0”}]}]}’ as group_condition
union all
select ‘中间状态客群名’ as group_condition
) ,
asd as (select group_condition,get_json_object(group_condition,’ . c o n . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1
union all
select group_condition,get_json_object(group_condition,’ . c o n . s u b . f d ′ ) a s a r r , r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') as arr ,regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)asarr,regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’),’"’,’’),’\[|\]’,’’)
,c1
from tablea
–where get_json_object(group_condition,’ . c o n . s u b . f d ′ ) i s n o t n u l l l a t e r a l v i e w e x p l o d e ( s p l i t ( r e g e x p r e p l a c e ( r e g e x p r e p l a c e ( g e t j s o n o b j e c t ( g r o u p c o n d i t i o n , ′ .con.sub.fd') is not null lateral view explode(split(regexp_replace(regexp_replace(get_json_object(group_condition,' .con.sub.fd)isnotnulllateralviewexplode(split(regexpreplace(regexpreplace(getjsonobject(groupcondition,.con.sub.fd’) ,’"’,’’),’\[|\]’,’’) ,’,’)) b as c1)
select c1,count(1) from asd group by c1;
在这里插入图片描述
截取数字、去除固定文字、取文字中数字
select regexp_extract(‘501001.OF’,’([0-9]+)’)
,regexp_extract(‘F050004.OF’,’([0-9]+)’)
,regexp_replace(‘华夏纯债债券型证券投资基金A类’, ‘(AB) ∣ ( A |(A (A)|(B ) ∣ ( C )|(C )(C)|(A类) ∣ ( B 类 ) |(B类) (B)|(C类) ∣ ( Y 类 ) |(Y类) (Y)|(A/B类) ∣ ( A / E 类 ) |(A/E类) (A/E)|(A/C类) ∣ ( A / B ) |(A/B) (A/B)|(E类) ∣ ( A 级 ) |(A级) (A)|(A1) ∣ ( I 类 ) |(I类) (I)’, ‘’)
,regexp_replace(‘华夏纯债债券型证券投资基金I类’, ‘(AB) ∣ ( A |(A (A)|(B ) ∣ ( C )|(C )(C)|(A类) ∣ ( B 类 ) |(B类) (B)|(C类) ∣ ( Y 类 ) |(Y类) (Y)|(A/B类) ∣ ( A / E 类 ) |(A/E类) (A/E)|(A/C类) ∣ ( A / B ) |(A/B) (A/B)|(E类) ∣ ( A 级 ) |(A级) (A)|(A1) ∣ ( I 类 ) |(I类) (I)’, ‘’)
,regexp_replace(‘华夏纯债债券型证券投资基金AB类’, ‘(AB)’, ‘’)
,regexp_extract(split(‘客户姓名李明(客户号139009)为【20230924 14:09:03】策略商城产品购买断点客户,客户点击【产品名称:黄金眼(商品编号:19)】签约按钮但未成功下单(签约),请尽快跟进服务!’,‘商品编号:’)[1],’([0-9]+)’,1);
在这里插入图片描述
select ‘尊敬的客户,1月新年有礼的活动已经开始报名啦,报名即可领银豆,任意福利达标可领更多奖励,具体活动详情您可登录APP查看,快捷报名入口:{{https://vip.com:8088/cms-h5/index.html?works_link_id=AErjx3MVyB}}。投资有风险,入市需谨慎。’ as asd
,regexp_extract(‘尊敬的客户,1月新年有礼的活动已经开始报名啦,报名即可领银豆,任意福利达标可领更多奖励,具体活动详情您可登录APP查看,快捷报名入口:{{https://vip.com:8088/cms-h5/index.html?works_link_id=AErjx3MVyB}},{{https://vip.com:8088/cms-h5/index.html?works_link_id=AErjx3MVyB}}。投资有风险,入市需谨慎。’,’(https)(.?)(}})’,0)
,replace(regexp_extract(‘尊敬的客户,1月新年有礼的活动已经开始报名啦,报名即可领银豆,任意福利达标可领更多奖励,具体活动详情您可登录APP查看,快捷报名入口:{{https://vip.com:8088/cms-h5/index.html?works_link_id=AErjx3MVyB}}’,’(https)(.
?)(}})’,0),’}’,’’);
结果
select v_data
,get_json_object(v_data,’ . d i s t i n c t i d ′ ) a s d i s t i n c t i d , g e t j s o n o b j e c t ( v d a t a , ′ .distinct_id') as distinct_id ,get_json_object(v_data,' .distinctid)asdistinctid,getjsonobject(vdata,.properties.dl_eventid’) as dl_eventid
,get_json_object(v_data,’ . p r o p e r t i e s . d l t i m e ′ ) a s d l t i m e , g e t j s o n o b j e c t ( v d a t a , ′ .properties.dl_time') as dl_time ,get_json_object(v_data,' .properties.dltime)asdltime,getjsonobject(vdata,.properties.dl_num’) as dl_num
,get_json_object(v_data,’ . p r o p e r t i e s . u a ′ ) a s u a , g e t j s o n o b j e c t ( v d a t a , ′ .properties.ua') as ua ,get_json_object(v_data,' .properties.ua)asua,getjsonobject(vdata,.properties.host’) as host
,get_json_object(v_data,’ . p r o p e r t i e s . l o n g U r l ′ ) a s l o n g U r l , g e t j s o n o b j e c t ( v d a t a , ′ .properties.longUrl') as longUrl ,get_json_object(v_data,' .properties.longUrl)aslongUrl,getjsonobject(vdata,.properties.shortUrl’) as shortUrl
,replace(replace(split(substring_index(v_data,’$ip’,-1),’,’)[0],‘ip":"’,’’),’"’,’’) as ip
from ods.ods_isales_mongodb_sensors_stat_event_dd
where etl_date=20240618 and event = ‘dl_pageview’;

结果
{“distinct_id”:“RNiHlL”,“time”:1718651827371,“type”:“track”,“event”:“dl_pageview”,“properties”:{“shortUrl”:“https://t.bocichina.com/1ymiIn-RNiHlL”,“host”:“116.210.236.211”,“longUrl”:“https://vip.bocichina.com:8088/h5-outer/#/?target=points”,“ua”:"{“x-real-ip”:“116.210.236.211”,“host”:“t.bocichina.com”,“connection”:“close”,“x-forwarded-for”:“116.210.236.211”,“accept-encoding”:“gzip”,“user-agent”:“antispam/1.0.0”,“via”:“1.1 ID-0016035524053044 uproxy-5”}",“dl_eventid”:“1ymiIn”,“dl_num”:“97871700”,“dl_time”:“2024-06-18 03:17:07”,“ i p " : " 172.19.244.241 " , " ip":"172.19.244.241"," ip":"172.19.244.241","is_login_id”:false,“ c i t y " : " 保 留 I P " , " city":"保留IP"," city":"IP","province”:“保留IP”,“KaTeX parse error: Expected 'EOF', got '}' at position 16: country":"保留IP"}̲,"lib":{"lib”:“NoLib”,“ l i b v e r s i o n " : " u n k n o w n " , " lib_version":"unknown"," libversion":"unknown","lib_method”:“unknown”,“ l i b d e t a i l " : " u n k n o w n " , " lib_detail":"unknown"," libdetail":"unknown","app_version”:“unknown”},“possible_remapping_data”:true,“dtk”:[“dl_time”],“map_id”:“RNiHlL”,“user_id”:3186726444591,“recv_time”:1718651802953,“extractor”:{“f”:“sdf_input_topic”,“o”:4656918483,“n”:“sdf_input_topic”,“s”:4679099653,“c”:4679099653,“p”:0,“e”:“data01.dcanalytics.sa”},“project_id”:3,“project”:“production”,“ver”:2}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值