hive解析json

-- json数据
{"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"}
{"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"}
{"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"}
-- hive get_json_objec解析json练习
CREATE TABLE json_t( lines string ) comment'json测试表';

load data local inpath '/root/json.txt' overwrite into table json_t;

0: jdbc:hive2://wxt01:10000> select lines from json_t;
OK
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
|                                                                                                                                                                                                                                                                                                                            lines                                                                                                                                                                                                                                                                                                                             |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| {"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"}    |
| {"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"}  |
| {"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"}            |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
3 rows selected (0.1 seconds)
0: jdbc:hive2://wxt01:10000> 

-- get_json_object()
SELECT
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID
FROM json_t
;
+-----------------+-----------------+---------------------------+--+
|     app_no      | channel_source  |        session_id         |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393  | 微信              | YLYC99019292814246548503  |
| FZGR9373403233  | 微信              | KYTD89059389151000106477  |
| CQEL4819162613  | APP             | LECY94935395143750820224  |
+-----------------+-----------------+---------------------------+--+

-- json_tuple
select
APP_NO,
CHANNEL_SOURCE,
SESSION_ID
from json_t
lateral view json_tuple(lines,'APP_NO','CHANNEL_SOURCE','SESSION_ID') b as APP_NO,CHANNEL_SOURCE,SESSION_ID;
+-----------------+-----------------+---------------------------+--+
|     app_no      | channel_source  |        session_id         |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393  | 微信              | YLYC99019292814246548503  |
| FZGR9373403233  | 微信              | KYTD89059389151000106477  |
| CQEL4819162613  | APP             | LECY94935395143750820224  |
+-----------------+-----------------+---------------------------+--+


SELECT
get_json_object(lines,'$.OTHER_INFO.MOBILE_ADDR') as MOBILE_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA2.CERT_ADDR') as CERT_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA1.MOBILE_NUM') as MOBILE_NUM_one,
get_json_object(lines,'$.OTHER_INFO.MOBILE_NUM') as MOBILE_NUM_two,
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.BUSINESS_CODE') as BUSINESS_CODE,
get_json_object(lines,'$.STEP_TYPE') as STEP_TYPE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID,
get_json_object(lines,'$.APPLY_TIME') as APPLY_TIME,
get_json_object(lines,'$.CUST_INFO.GENDER_CD') as GENDER_CD,
get_json_object(lines,'$.CUST_INFO.NAME_EN') as NAME_EN,
get_json_object(lines,'$.CUST_INFO.MAIL') as MAIL,
get_json_object(lines,'$.CUST_INFO.CERT_TYPE') as CERT_TYPE,
get_json_object(lines,'$.CUST_INFO.CERT_NO') as CERT_NO,
get_json_object(lines,'$.CUST_INFO.IDEN_VALID_DT') as IDEN_VALID_DT,
get_json_object(lines,'$.CUST_INFO.CUST_NAME') as CUST_NAME,
get_json_object(lines,'$.CUST_INFO.AGE') as AGE,
get_json_object(lines,'$.CUST_INFO.MOBILE') as MOBILE,
get_json_object(lines,'$.SCENARIO_INFO.ADDRESS') as ADDRESS,
get_json_object(lines,'$.SCENARIO_INFO.CARD_NUM') as CARD_NUM
FROM json_t
;

+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
|           mobile_addr           |            cert_addr             | mobile_num_one  | mobile_num_two  |     app_no      | channel_source  | business_code  | step_type  |        session_id         |   apply_time   | gender_cd  |  name_en   |         mail          | cert_type  |       cert_no       | iden_valid_dt  | cust_name  | age  |    mobile    |           address            |     card_num      |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
| 四川省内江市潘咱陛路1233号闷羞小区19单元484| 宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092| 15360425546     | 15902875723     | RVVG5252500393  | 微信              | TEST01         | 002        | YLYC99019292814246548503  | 1628067497730  | 2          | Edmund     | m74e5czt2y@rvhts.pdu  | 2          | 446760199707032392  | 99991231       | 独孤拘害       | 5    | 15744158190  | 云南省临沧市猫逐还路1265号兢顶朔小区3单元376| 6223887916967795  |
| 青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491| 安徽省淮南市蚂鄙系路2476号时究小区10单元2465| 15210663115     | 15290171152     | FZGR9373403233  | 微信              | TEST01         | 002        | KYTD89059389151000106477  | 1628067579234  | 2          | Frederica  | y0jogwn7vv@bnryy.4cp  | 2          | 423047199210041239  | 99991231       | 尉迟蛀        | 5    | 13857964198  | 山东省济宁市迂佬藻路1489号展镁小区7单元1617| 6229127368018050  |
| 浙江省杭州市藻极路1103号缄奉小区5单元1080| 甘肃省天水市褥茨路735号废输剑小区14单元490| 15065179482     | 13427757308     | CQEL4819162613  | APP             | TEST01         | 004        | LECY94935395143750820224  | 1628067745581  | 1          | Ursula     | ximjrvukl5@43sw4.xvg  | 1          | 141043198504099090  | 99991231       | 高晚         | 1    | 13658608487  | 河北省唐山市圭布懂路4597号骡昏小区8单元544| 6224871441029643  |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+

select
get_json_object('{"name":"张三","age":"44"}','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name  | age  |
+-------+------+--+
| 张三    | 44   |
+-------+------+--+

-- json格式不正确或不完整的放回null
select
get_json_object('{"name":"张三","age":""}','$.name') as name,
get_json_object('{"name":"张三","age":""}','$.age') as age
;
+-------+------+--+
| name  | age  |
+-------+------+--+
| 张三    |      |
+-------+------+--+

select
get_json_object('{"name":}','$.name') as name
;
+-------+--+
| name  |
+-------+--+
| NULL  |
+-------+--+

select
get_json_object('{"name":null}','$.name') as name
;
+-------+--+
| name  |
+-------+--+
| NULL  |
+-------+--+

select
get_json_object('{"name":张三}','$.name') as name
;
+-------+--+
| name  |
+-------+--+
| NULL  |
+-------+--+

select
get_json_object('{"name":zhangsang}','$.name') as name
;

select
get_json_object('{"age":4}','$.age') as age
;
+------+--+
| age  |
+------+--+
| 4    |
+------+--+

-- json不完整的返回null
select
get_json_object('{"name":"张三","age":"44"','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name  | age  |
+-------+------+--+
| NULL  | 44   |
+-------+------+--+

-- json中有数组
{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}

select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.code') as code,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.msg') as msg,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data') as data
;
+-------+------+-------------------------------------------------------------+--+
| code  | msg  |                            data                             |
+-------+------+-------------------------------------------------------------+--+
| 200   | OK   | ["3148120404262382","1443363211304208","4322133941384257"]  |
+-------+------+-------------------------------------------------------------+--+

--获取json中的数组中第一个数据
select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data[0]') as data
;
+-------------------+--+
|       data        |
+-------------------+--+
| 3148120404262382  |
+-------------------+--+

{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}],  "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }
-- 去到从json中解析出第一个weight字段
select
get_json_object('{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}],  "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }','$.store.fruit[0].weight') as weight
;
+---------+--+
| weight  |
+---------+--+
| 8       |
+---------+--+
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值