数据解析之json串解析

用hive做数据统计

 select 
    summary_data,
    card,
    query,
    query_total_search_amount,
    query_total_click_amount,
    current_card_total_click_amount,
    click_ratio,
    current_card_click_ratio
  from
  (
   select 
     '202109' summary_data,
     card,
     query,
     sum(query_search_amount) query_total_search_amount,
     sum(query_click_amount) query_total_click_amount,
     sum(current_card_click_amount) current_card_total_click_amount,
     round(sum(current_card_click_amount)/sum(query_search_amount)*100,2) click_ratio,
     round(sum(current_card_click_amount)/sum(query_click_amount)*100,2) current_card_click_ratio,
     rank() over(partition by card ORDER BY round(sum(current_card_click_amount)/sum(query_search_amount)*100,2) desc) rk 
   from ads_search_query_card_all_d 
   where pt_d rlike '202109' 
   group by card,query
   ) t 
  where rk<=1000
  and card='Shop Card'

非结构化数据中的json串

{
“@hostAddr”:“10.2.43.96”
,“message”:“{“cpCooperationMode”:3,“date”:“2021-04-20 11:38:16”
,“copyright”:“CN”
,“langInfo”:[{“country”:“CN”,“lang”:“zh”}]
,“eventCategory”:“a001”
,“channel”:201,“rating”:[{“system”:“China”
,“level”:“Adults” ,“ratingAge”:0}]
,“source”:“SnapShotCarShow”
,“hiBoardDocId”:“826140605_901_kuran1034=4628021517418579”
,“multiUrlDistribution”:0 ,“dislikeReasonCodes”:[]
,“dislikeReasons”:[]
,“image”:“https://contentcenter.dbankcdn.com/pub_1/AssistService_WiseInfo_100_1/89/v3/yI726Xe7Rym6PEcAt7IN8w/6eaa26b4-8205-4352-bad6-38f9880bded9.jpg”
,“hiBoardCpType”:1
,“contentSyncPolicy”:0,“dtype”:21,“pageLevel”:“1”
,“eventType”:“WITHTAGCONTENT”,“hwDocId”:“kuran1034=4628021517418579”
,“expirationAt”:“20591213”,“labels”:“奔驰,德系,轿车”,“tags”:[]
,“isGreen”:“false”,“functionID”:“0”,“ctype”:“video_live”
,“huaweiVideoSpId”:“12”,“authority”:0,“pageQuality”:“1”,“businessTag”:[]
,“isHot”:“2”,“simHash”:[2184298192,371685435],“status”:1,“eventID”:“2d961d8f184ed5ba96c1c6aa6fbb1b0438e36359b44f472a”
,“isGreat”:“2”,“subcat”:“汽车随拍”,“title”:“Mercedes-BenzE55AMGW211BrutalExhaust”,“multiShareUrlData”:[]
,“multiUrlData”:[],“duration”:294
,“iconOfSource”:“https://cinema-pics-drcn.dbankcdn.com/mmp/img/categ5/20190511/11/15575445519923467.jpg”
,“videoUrl”:”{\“spVolumeId\”:\“1034=4628021517418579\”,\“mvId\”:\“kuran1034=4628021517418579\”
,\“volumeId\”:\“kuran1034=4628021517418579\”,\“spId\”:\“12\”}“,“eventTime”:“20210420154533”
,“wiseInfoEntities”:{“Mercedes”:15.123446271751328},“categories”:“xg06000000”,“realDate”:“2021-04-20
11:38:16”
,“timestamp”:1618904733507,“sourceID”:“201905111115518170001125000000”,“summary”:“Mercedes-BenzE55AMGW211BrutalExhaust”
,“green”:false,“cpID”:“901”
,“docid”:“kuran1034=4628021517418579”
,“url”:“https://portal-drcn.video.cloud.huawei.com/share/shortvod/929/kuran1034=4628021517418579.html”
,“videoSize”:{},“imageUrls”:[],“recommPeriod”:“1”,“locations”:[],“category”:“汽车”
,“effectiveFrom”:“20210420”}”
,“@hostName”:“ncn2a-hwassistant-DataCenterService-2-43-96”
,“@logType”:“wiseinfo-feedcontent-to-hive”}

SQL解析以上的json串,获取如下字段

select 
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.hiBoardDocId'),'\\n|\\t','') as item_id,
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.text'),'\\n|\\t','') as text,
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.title'),'\\n|\\t','') as title,
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.summary'),'\\n|\\t','') as summary,
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.ctype'),'\\n|\\t','') as ctype,
  regexp_replace(get_json_object(get_json_object(content,'$.message'),'$.categories'),'\\n|\\t','') as categories
from ods_feedcontent_h WHERE pt_h rlike '$date';

json串:param={“info”:“{“type”:3,“curPosition”:23731,“duration”:23731,“pos”:7}”}

解析:

select distinct param
       ,get_json_object(param,'$.info') as info
       ,get_json_object(get_json_object(param,'$.info'),'$.duration') as  duration1
       ,regexp_replace(get_json_object(get_json_object(param,'$.info'),'$.duration'),'\\n|\\t','') as duration2
       ,explode(regexp_replace(get_json_object(get_json_object(param,'$.info'),'$.duration'),'\\n|\\t','')) as duration3
  from dwd_feed_useraction_tmp_h
 where pt_h='2021112400' 
   and subStr(action_time,1,10) = '2021-11-23'
   and action_type ='5'  --播放
 limit 10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值