第一种
如果数据格式是Json格式,如果按照常规的思路我们要使用自定义函数去解析, 那么在这里使用JSonSerde就可以直接读取Json格式的文件,不需要做转化。
如果是第三方jar包或者是自己写的,就必须要先加载jar包:
hive> add jar /hivedata/json-serde-1.3.8-jar-with-dependencies.jar;
第二种
json格式数据表需要通过serde机制处理
1 在hive-site.xml中设置三方jar包
<property>
<name>hive.aux.jars.path</name>
<value>/usr/local/hive-2.1.1/lib/</value>
</property>
2 在hive.aux.jars.path设置的路径中增加hive-hcatalog-core-2.1.1.jar 拷贝路径为$HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-2.1.1.jar到/usr/local/hive-2.1.1/lib/中,重启hive即可
简单案例演示:
#数据内容:
{"pid":1,"content":"this is pid of 1 content"}
{"pid":2,"content":"this is pid of 2 content"}
#创建表
create table if not exists js1(
pid int,
content string
)
row format serde "org.openx.data.jsonserde.JsonSerDe";
#导入数据
load data local inpath '/opt/data/js.json' into table js1;
复杂案例演示
#数据内容
{"uid":1,"uname":"zs","belong":["zs1","zs2","zs3"],"tax":{"shebao":
[220,280,300],"gongjijin":[600,1200,2400]}}
{"uid":2,"uname":"ls","belong":["ls1","ls2","ls3"],"tax":{"shebao":
[260,300,360],"gongjijin":[800,1600,2600]}}
#创建表
create table if not exists complex(
uid int,
uname string,
belong array<String>,
tax map<String,array<double>>
)
row format serde "org.openx.data.jsonserde.JsonSerDe";
#插入表格
load data local inpath '/opt/data/complex.json' into table complex;
#查询
select c.*
from complex c
where size(c.belong) = 3 and c.tax["gongjijin"][1] > 1200;