altertable emp_part1 drop partition (day='20170306',hour='0');load data local inpath '/home/user01/emp.txt'intotable emp_part1 partition (day='20170308',hour='9');load data local inpath '/home/user01/emp.txt'intotable emp_part1 partition (day='20170308',hour='10');load data local inpath '/home/user01/emp.txt'intotable emp_part1 partition (day='20170308',hour='14');load data local inpath '/home/user01/emp.txt'intotable emp_part1 partition (day='20170309',hour='10');
查询分区数据
select * from emp_part1 whereday='20170308'select * from emp_part1 whereday='20170308'andhour='14';
查询所有的分区信息
show partitons emp_part1;
分区可以理解为分类,通过分类把不同类型,时间,地域的数据放到不同的目录下。
分类的标准就是分区字段,可以一个,也可以多个。
分区表的意义在于优化查询。查询时尽量利用分区字段。如果不使用分区字段,就会全表扫描。
-
动态分区表:多维度数据处理及查询 严格模式:static partitioned by (county string,states string) 非严格模式:partitioned by (county string,states string)
需要设置以下参数: //是否开启动态分区功能 0.13版本默认开启
set hive.exec.dynamic.partition=true;
动态分区的模式,默认strict,表示必须指定至少一个分区为静态分区,nonstrict模式表示允许所有的分区字段都可以使用动态分区
set hive.exec.dynamic.partition.mode=nostrict;createtable dypart(
id int,
name string
)
partitioned by (addr string)
row format delimited fields terminated by'\;';
//使用特殊字符作为分隔符时需要转义
//动态分区必须使用mapreduce才能完成,所以不能使用load方式加载
insertintotable dypart partition (addr) select deptno,deptname,addr as addr from dept;
insertintotable dypart2 partition (country='usa',province) select empno,empname,empjob,mgno,birthday,salary,bonus,depno,depno as province from emp;
通过子查询方式
insertintotable dypart2 partition (country='usa',province) select c.empno,c.empname,c.empjob,c.mgno,c.birthday,c.salary,c.bonus,c.depno,c.deptname as province from (select * from emp a join dept b on a.depno=b.deptno) c;
覆盖导入方式,此时overwrite和into不能连用
insert overwrite table dypart2 partition (country='china',province) select a.empno,a.empname,a.empjob,a.mgno,a.birthday,a.salary,a.bonus,a.depno,b.deptname as province from emp a join dept b ona.depno=b.deptno;