Hive 中的 json_tuple 和 parse_url_tuple 函数
json_tuple
{"movie" :"2022" , "rate" :"5" , "time" :"956716207" , "userid" :"6040" }
select json_tuple( '{"movie":"2022","rate":"5","time":"956716207","userid":"6040"}' , "movie" , "rate" , "time" , "userid" )
as ( movieid, rate, time , userid) ;
movieid rate time userid
2022 5 956716207 6040
parse_url_tuple
parse_url_tuple 函数用来解析 URL
http:
select parse_url_tuple( "http://www.aaa.com/bbb/cc.html?sessionid=123456&a=b&c=d" , "HOST" , "PATH" , "QUERY" , "QUERY:sessionid" , "QUERY:a" , "QUERY:c" )
as ( HOST, PATH, QUERY, SESSIONID, A, C) ;
host path query sessionid a c
www. aaa. com / bbb/ cc. html sessionid= 123456 & a= b& c= d 123456 b d
开窗函数的使用
hive_rownumber. id hive_rownumber. age hive_rownumber. name hive_rownumber. gender
1 18 zhangsan M
2 19 lisi M
3 22 wangwu F
4 16 zhaoliu F
5 30 tianqi M
6 26 wangba F
select id, age, name, gender, r
from
(
select id, age, name, gender,
ROW_NUMBER( ) over ( PARTITION BY gender order by age desc ) r
from hive_rownumber
) t
where t. r <= 2 ;
id age name gender r
6 26 wangba F 1
3 22 wangwu F 2
5 30 tianqi M 1
2 19 lisi M 2
hiveserver2(HS2) 和 beeline
beeline是hive官方推荐的一个命令行客户端,需要先启动hiveserver2(HS2)服务
cd ${HIVE_HOME} /bin
nohup ./hiveserver2 &
./beeline -u jdbc:hive2://hadoop001:10000/default -n hadoop
复杂数据类型
arrays:数组,一个数组中装的元素类型是一样的
zhangsan beijing, shanghai, tianjin, hangzhou
lisi changchun, chengdu, wuhan, beijing
create table hive_array(
name string,
work_locations array< string>
) row format
delimited fields terminated by '\t'
collection items terminated by ',' ;
hive_array. name hive_array. work_locations
zhangsan [ "beijing" , "shanghai" , "tianjin" , "hangzhou" ]
lisi [ "changchun" , "chengdu" , "wuhan" , "beijing" ]
work_locations[ index ]
size( work_locations)
array_contains( work_locations, "hangzhou" ) ;
1 , zhangsan, father:xiaoming
2 , lisi, father:mayun
3 , wangwu, father:wangjianlin
4 , mayun, father:mayongzhen
create table hive_map(
id int ,
name string,
members map< string, string> ,
age int
) row format
delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':' ;
hive_map. id hive_map. name hive_map. members hive_map. age
1 zhangsan {"father" :"xiaoming" , "mother" :"xiaohuang" , "brother" :"xiaoxu" } 28
2 lisi {"father" :"mayun" , "mother" :"huangyi" , "brother" :"guanyu" } 22
3 wangwu {"father" :"wangjianlin" , "mother" :"ruhua" , "sister" :"jingtian" } 29
4 mayun {"father" :"mayongzhen" , "mother" :"angelbaby" } 26
members[ "father" ]
map_keys( members)
map_values( members)
size( members)
192.168 .1 .1
192.168 .1 .2
192.168 .1 .3
192.168 .1 .4
create table hive_struct(
ip string,
userinfo struct< name:string, age:int > )
row format
delimited fields terminated by '#'
collection items terminated by ':' ;
hive_struct. ip hive_struct. userinfo
192.168 .1 .1 {"name" :"zhangsan" , "age" :40 }
192.168 .1 .2 {"name" :"lisi" , "age" :50 }
192.168 .1 .3 {"name" :"wangwu" , "age" :60 }
192.168 .1 .4 {"name" :"zhaoliu" , "age" :70 }
userinfo. name
分区表
10703007267488 2014 - 05 - 01 06 :01 :12.334 + 01
10101043505096 2014 - 05 - 01 07 :28 :12.342 + 01
10103043509747 2014 - 05 - 01 07 :50 :12.33 + 01
10103043501575 2014 - 05 - 01 09 :27 :12.33 + 01
10104043514061 2014 - 05 - 01 09 :03 :12.324 + 01
create table order_partition(
order_no string,
order_time string
)
partitioned by ( event_month string)
row format delimited fields terminated by '\t' ;
load data local inpath 'xxx' into table order_partition partition ( event_month= '2014-05' ) ;
order_partition. order_no order_partition. order_time order_partition. event_month
10703007267488 2014 - 05 - 01 06 :01 :12.334 + 01 2014 - 05
10101043505096 2014 - 05 - 01 07 :28 :12.342 + 01 2014 - 05
10103043509747 2014 - 05 - 01 07 :50 :12.33 + 01 2014 - 05
10103043501575 2014 - 05 - 01 09 :27 :12.33 + 01 2014 - 05
10104043514061 2014 - 05 - 01 09 :03 :12.324 + 01 2014 - 05
hdfs dfs - mkdir / user / hive/ warehouse/ order_partition/ event_month= 2014 - 06
hdfs dfs - put xxx / user / hive/ warehouse/ order_partition/ event_month= 2014 - 06
msck repair table order_partition
create table order_multi_partition(
order_no string,
order_time string
)
partitioned by ( event_month string, step string)
row format delimited fields terminated by '\t' ;
load data local inpath 'xxx' into table order_partition partition ( event_month= '2014-05' , step= 1 ) ;
order_multi_partition. order_no order_multi_partition. order_time order_multi_partition. event_month order_multi_partition. step
10703007267488 2014 - 05 - 01 06 :01 :12.334 + 01 2014 - 05 1
10101043505096 2014 - 05 - 01 07 :28 :12.342 + 01 2014 - 05 1
10103043509747 2014 - 05 - 01 07 :50 :12.33 + 01 2014 - 05 1
10103043501575 2014 - 05 - 01 09 :27 :12.33 + 01 2014 - 05 1
10104043514061 2014 - 05 - 01 09 :03 :12.324 + 01 2014 - 05 1
create table emp_partition(
empno int ,
ename string,
job string,
mgr int ,
hiredate string,
sal double ,
comm double
) partitioned by ( deptno int )
row format
delimited fields terminated by '\t' ;
insert into table emp_partition partition ( deptno= 10 )
select empno, ename, job, mgr, hiredate, sal, comm from emp
where deptno= 10 ;
insert into table emp_dynamic_partition partition ( deptno)
select empno, ename, job, mgr, hiredate, sal, comm, deptno from emp;