作业:
1.自己建立多级分区,并观察hdfs上的目录
CREATE TABLE `sqq.emp_pars`(
`empno` decimal(4,0),
`ename` varchar(10),
`job` varchar(9),
`mgr` decimal(4,0),
`hiredate` date,
`sal` decimal(7,2),
`comm` decimal(7,2),
`deptno` decimal(2,0))
PARTITIONED BY (
`year` string,
`mounth` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='\t',
'serialization.format'='\t')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://sqq:9000/user/hive/warehouse/sqq.db/emp_pars'
TBLPROPERTIES (
'bucketing_version'='2',
'transient_lastDdlTime'='1649141922');
insert into table emp_pars partition(year='2022',mounth='04')
select * from emp
2.lateral view的用法
平铺数据,解析数组
select case when phone is not null then phone else '000000' end as phone
from json
LATERAL VIEW json_tuple(line,'phone') t as phone
3.json_tuple和get_json_object的区别,尤其是性能方面
select json_tuple(line,'ceo','company','address','phone')
as (ceo,company,address,phone)
from json
select get_json_object(line,'$.ceo') as ceo,
get_json_object(line,'$.company') as company,
get_json_object(line,'$