-- json数据
{"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"}
{"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"}
{"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"}
-- hive get_json_objec解析json练习
CREATE TABLE json_t( lines string ) comment'json测试表';
load data local inpath '/root/json.txt' overwrite into table json_t;
0: jdbc:hive2://wxt01:10000> select lines from json_t;
OK
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| lines |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
| {"OTHER_INFO":{"MOBILE_ADDR":"四川省内江市潘咱陛路1233号闷羞小区19单元484室","DATA2":{"CERT_ADDR":"宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室"},"DATA1":{"MOBILE_NUM":"15360425546"},"MOBILE_NUM":"15902875723"},"APP_NO":"RVVG5252500393","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"YLYC99019292814246548503","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Edmund","MAIL":"m74e5czt2y@rvhts.pdu","CERT_TYPE":"2","CERT_NO":"446760199707032392","IDEN_VALID_DT":"99991231","CUST_NAME":"独孤拘害","AGE":"5","MOBILE":"15744158190"},"SCENARIO_INFO":{"ADDRESS":"云南省临沧市猫逐还路1265号兢顶朔小区3单元376室","CARD_NUM":"6223887916967795"},"APPLY_TIME":"1628067497730"} |
| {"OTHER_INFO":{"MOBILE_ADDR":"青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室","DATA2":{"CERT_ADDR":"安徽省淮南市蚂鄙系路2476号时究小区10单元2465室"},"DATA1":{"MOBILE_NUM":"15210663115"},"MOBILE_NUM":"15290171152"},"APP_NO":"FZGR9373403233","CHANNEL_SOURCE":"微信","BUSINESS_CODE":"TEST01","STEP_TYPE":"002","SESSION_ID":"KYTD89059389151000106477","CUST_INFO":{"GENDER_CD":"2","NAME_EN":"Frederica","MAIL":"y0jogwn7vv@bnryy.4cp","CERT_TYPE":"2","CERT_NO":"423047199210041239","IDEN_VALID_DT":"99991231","CUST_NAME":"尉迟蛀","AGE":"5","MOBILE":"13857964198"},"SCENARIO_INFO":{"ADDRESS":"山东省济宁市迂佬藻路1489号展镁小区7单元1617室","CARD_NUM":"6229127368018050"},"APPLY_TIME":"1628067579234"} |
| {"OTHER_INFO":{"MOBILE_ADDR":"浙江省杭州市藻极路1103号缄奉小区5单元1080室","DATA2":{"CERT_ADDR":"甘肃省天水市褥茨路735号废输剑小区14单元490室"},"DATA1":{"MOBILE_NUM":"15065179482"},"MOBILE_NUM":"13427757308"},"APP_NO":"CQEL4819162613","CHANNEL_SOURCE":"APP","BUSINESS_CODE":"TEST01","STEP_TYPE":"004","SESSION_ID":"LECY94935395143750820224","CUST_INFO":{"GENDER_CD":"1","NAME_EN":"Ursula","MAIL":"ximjrvukl5@43sw4.xvg","CERT_TYPE":"1","CERT_NO":"141043198504099090","IDEN_VALID_DT":"99991231","CUST_NAME":"高晚","AGE":"1","MOBILE":"13658608487"},"SCENARIO_INFO":{"ADDRESS":"河北省唐山市圭布懂路4597号骡昏小区8单元544室","CARD_NUM":"6224871441029643"},"APPLY_TIME":"1628067745581"} |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--+
3 rows selected (0.1 seconds)
0: jdbc:hive2://wxt01:10000>
-- get_json_object()
SELECT
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID
FROM json_t
;
+-----------------+-----------------+---------------------------+--+
| app_no | channel_source | session_id |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393 | 微信 | YLYC99019292814246548503 |
| FZGR9373403233 | 微信 | KYTD89059389151000106477 |
| CQEL4819162613 | APP | LECY94935395143750820224 |
+-----------------+-----------------+---------------------------+--+
-- json_tuple
select
APP_NO,
CHANNEL_SOURCE,
SESSION_ID
from json_t
lateral view json_tuple(lines,'APP_NO','CHANNEL_SOURCE','SESSION_ID') b as APP_NO,CHANNEL_SOURCE,SESSION_ID;
+-----------------+-----------------+---------------------------+--+
| app_no | channel_source | session_id |
+-----------------+-----------------+---------------------------+--+
| RVVG5252500393 | 微信 | YLYC99019292814246548503 |
| FZGR9373403233 | 微信 | KYTD89059389151000106477 |
| CQEL4819162613 | APP | LECY94935395143750820224 |
+-----------------+-----------------+---------------------------+--+
SELECT
get_json_object(lines,'$.OTHER_INFO.MOBILE_ADDR') as MOBILE_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA2.CERT_ADDR') as CERT_ADDR,
get_json_object(lines,'$.OTHER_INFO.DATA1.MOBILE_NUM') as MOBILE_NUM_one,
get_json_object(lines,'$.OTHER_INFO.MOBILE_NUM') as MOBILE_NUM_two,
get_json_object(lines,'$.APP_NO') as APP_NO,
get_json_object(lines,'$.CHANNEL_SOURCE') as CHANNEL_SOURCE,
get_json_object(lines,'$.BUSINESS_CODE') as BUSINESS_CODE,
get_json_object(lines,'$.STEP_TYPE') as STEP_TYPE,
get_json_object(lines,'$.SESSION_ID') as SESSION_ID,
get_json_object(lines,'$.APPLY_TIME') as APPLY_TIME,
get_json_object(lines,'$.CUST_INFO.GENDER_CD') as GENDER_CD,
get_json_object(lines,'$.CUST_INFO.NAME_EN') as NAME_EN,
get_json_object(lines,'$.CUST_INFO.MAIL') as MAIL,
get_json_object(lines,'$.CUST_INFO.CERT_TYPE') as CERT_TYPE,
get_json_object(lines,'$.CUST_INFO.CERT_NO') as CERT_NO,
get_json_object(lines,'$.CUST_INFO.IDEN_VALID_DT') as IDEN_VALID_DT,
get_json_object(lines,'$.CUST_INFO.CUST_NAME') as CUST_NAME,
get_json_object(lines,'$.CUST_INFO.AGE') as AGE,
get_json_object(lines,'$.CUST_INFO.MOBILE') as MOBILE,
get_json_object(lines,'$.SCENARIO_INFO.ADDRESS') as ADDRESS,
get_json_object(lines,'$.SCENARIO_INFO.CARD_NUM') as CARD_NUM
FROM json_t
;
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
| mobile_addr | cert_addr | mobile_num_one | mobile_num_two | app_no | channel_source | business_code | step_type | session_id | apply_time | gender_cd | name_en | mail | cert_type | cert_no | iden_valid_dt | cust_name | age | mobile | address | card_num |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
| 四川省内江市潘咱陛路1233号闷羞小区19单元484室 | 宁夏回族自治区固原市薄精路3992号裸糕氏小区9单元1092室 | 15360425546 | 15902875723 | RVVG5252500393 | 微信 | TEST01 | 002 | YLYC99019292814246548503 | 1628067497730 | 2 | Edmund | m74e5czt2y@rvhts.pdu | 2 | 446760199707032392 | 99991231 | 独孤拘害 | 5 | 15744158190 | 云南省临沧市猫逐还路1265号兢顶朔小区3单元376室 | 6223887916967795 |
| 青海省海北藏族自治州砰役厩路3499号唇瞪小区9单元491室 | 安徽省淮南市蚂鄙系路2476号时究小区10单元2465室 | 15210663115 | 15290171152 | FZGR9373403233 | 微信 | TEST01 | 002 | KYTD89059389151000106477 | 1628067579234 | 2 | Frederica | y0jogwn7vv@bnryy.4cp | 2 | 423047199210041239 | 99991231 | 尉迟蛀 | 5 | 13857964198 | 山东省济宁市迂佬藻路1489号展镁小区7单元1617室 | 6229127368018050 |
| 浙江省杭州市藻极路1103号缄奉小区5单元1080室 | 甘肃省天水市褥茨路735号废输剑小区14单元490室 | 15065179482 | 13427757308 | CQEL4819162613 | APP | TEST01 | 004 | LECY94935395143750820224 | 1628067745581 | 1 | Ursula | ximjrvukl5@43sw4.xvg | 1 | 141043198504099090 | 99991231 | 高晚 | 1 | 13658608487 | 河北省唐山市圭布懂路4597号骡昏小区8单元544室 | 6224871441029643 |
+---------------------------------+----------------------------------+-----------------+-----------------+-----------------+-----------------+----------------+------------+---------------------------+----------------+------------+------------+-----------------------+------------+---------------------+----------------+------------+------+--------------+------------------------------+-------------------+--+
select
get_json_object('{"name":"张三","age":"44"}','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| 张三 | 44 |
+-------+------+--+
-- json格式不正确或不完整的放回null
select
get_json_object('{"name":"张三","age":""}','$.name') as name,
get_json_object('{"name":"张三","age":""}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| 张三 | |
+-------+------+--+
select
get_json_object('{"name":}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+
select
get_json_object('{"name":null}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+
select
get_json_object('{"name":张三}','$.name') as name
;
+-------+--+
| name |
+-------+--+
| NULL |
+-------+--+
select
get_json_object('{"name":zhangsang}','$.name') as name
;
select
get_json_object('{"age":4}','$.age') as age
;
+------+--+
| age |
+------+--+
| 4 |
+------+--+
-- json不完整的返回null
select
get_json_object('{"name":"张三","age":"44"','$.name') as name,
get_json_object('{"name":"张三","age":"44"}','$.age') as age
;
+-------+------+--+
| name | age |
+-------+------+--+
| NULL | 44 |
+-------+------+--+
-- json中有数组
{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}
select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.code') as code,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.msg') as msg,
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data') as data
;
+-------+------+-------------------------------------------------------------+--+
| code | msg | data |
+-------+------+-------------------------------------------------------------+--+
| 200 | OK | ["3148120404262382","1443363211304208","4322133941384257"] |
+-------+------+-------------------------------------------------------------+--+
--获取json中的数组中第一个数据
select
get_json_object('{"code": 200,"msg": "OK","data": ["3148120404262382","1443363211304208","4322133941384257"]}','$.data[0]') as data
;
+-------------------+--+
| data |
+-------------------+--+
| 3148120404262382 |
+-------------------+--+
{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }
-- 去到从json中解析出第一个weight字段
select
get_json_object('{"store":{"fruit":[{"weight":8,"type":"apple"}, {"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"}}, "email":"amy@only_for_json_udf_test.net", "owner":"amy" }','$.store.fruit[0].weight') as weight
;
+---------+--+
| weight |
+---------+--+
| 8 |
+---------+--+
hive解析json
最新推荐文章于 2023-04-11 11:46:24 发布