使用动态分区插入数据时,无需指定分区键值,系统根据插入的数据,自动分配分区。
动态分区需注意以下几点:
- 需有一个同构的普通表做为源表;
- 分区键值和源表字段之间是根据位置来判断的,而不是根据命名来匹配的,分区键值一般对应SELECT后的最后一个字段;
- 动态分区默认是关闭的,使用前要设置相关参数;
下面是一个动态分区的例子:
# 创建分区表和普通表
create table myhive.student_dynamic_partition
(
stu_no int,
stu_name string
) partitioned by (class_no int)
row format delimited fields terminated by ' ';
create table myhive.student
(
stu_no int,
stu_name string,
class_no int
)
row format delimited fields terminated by ' ';
# 上传数据文件到HDFS
[hadoop@node01 hiveData]$ hdfs dfs -put student.txt /
[hadoop@node01 hiveData]$ hdfs dfs -cat /student.txt
1001 john 1
1002 susan 1
1003 smith 2
1004 tom 2
1005 simen 3
# 普通表导入数据
hive (myhive)> load data inpath '/student.txt' overwrite into table student;
hive (myhive)> select * from student;
student.stu_no student.stu_name student.class_no
1001 john 1
1002 susan 1
1003 smith 2
1004 tom 2
1005 simen 3
# 使用动态分区插入数据到分区表中
hive (myhive)> set hive.exec.dynamic.partition=true; #打开动态分区
hive (myhive)> set hive.exec.dynamic.partition.mode=nonstrict; #动态分区模式设置为非严格
hive (myhive)> set hive.exec.max.dynamic.partitions.pernode=1000; #设置每个mapper或reducer的最大动态分区个数
hive (myhive)> insert overwrite table student_dynamic_partition
> partition (class_no)
> select stu_no,stu_name,class_no
> from student;
hive (myhive)> select * from student_dynamic_partition;
student_dynamic_partition.stu_no student_dynamic_partition.stu_name student_dynamic_partition.class_no
1001 john 1
1002 susan 1
1003 smith 2
1004 tom 2
1005 simen 3