hive 动态分区
按照某个字段自动的将数据加载到指定的分区中
即在插入数据时,不指定具体的分区列值,而是仅仅指定分区字段
1.建临时表
create table demo(
id int,
birthday string,
cost int
)
row format delimited fields terminated by '\t';
2.加载数据
load data local inpath '/root/demo' into table demo;
3.数据
1 2010-11-12 120
2 2010-11-12 121
3 2010-11-12 122
4 2010-11-12 124
5 2010-11-13 122
6 2010-11-13 120
7 2010-11-14 123
8 2010-11-13 1202
9 2010-11-12 1202
10 2010-11-12 1201
4.创建分区表
create table demo2(
id int,
cost int,
birthday string
)
partitioned by(bt string)
row format delimited fields terminated by '\t';
5.设置参数
set hive.exec.dynamic.partition=true; //使用动态分区
set hive.exec.dynamic.partition.mode=nonstrick;//无限制模式,如果模式是strict,则必须有一个静态分区且放在最前面
set hive.exec.max.dynamic.partitions.pernode=10000;//每个节点生成动态分区的最大个数
set hive.exec.max.dynamic.partitions=100000;//生成动态分区的最大个数
set hive.exec.max.created.files=150000;//一个任务最多可以创建的文件数目
set dfs.datanode.max.xcievers=8192;//限定一次最多打开的文件数
set hive.merge.mapfiles=true; //map端的结果进行合并
set mapred.reduce.tasks =20000; //设置reduce task个数
6.加载数据
最后多出一个字段用来动态分区
insert into table demo2 partition(bt)
select id,cost,birthday,birthday from demo; --注意顺序
7.查询分区
0: jdbc:hive2://wxt01:10000> show partitions demo2;
OK
+----------------+--+
| partition |
+----------------+--+
| bt=2010-11-12 |
| bt=2010-11-13 |
| bt=2010-11-14 |
+----------------+--+
3 rows selected (0.626 seconds)
8.整个分区表的数据
0: jdbc:hive2://wxt01:10000> select * from demo2;
OK
+-----------+-------------+-----------------+-------------+--+
| demo2.id | demo2.cost | demo2.birthday | demo2.bt |
+-----------+-------------+-----------------+-------------+--+
| 1 | 120 | 2010-11-12 | 2010-11-12 |
| 2 | 121 | 2010-11-12 | 2010-11-12 |
| 3 | 122 | 2010-11-12 | 2010-11-12 |
| 4 | 124 | 2010-11-12 | 2010-11-12 |
| 9 | 1202 | 2010-11-12 | 2010-11-12 |
| 10 | 1201 | 2010-11-12 | 2010-11-12 |
| 5 | 122 | 2010-11-13 | 2010-11-13 |
| 6 | 120 | 2010-11-13 | 2010-11-13 |
| 8 | 1202 | 2010-11-13 | 2010-11-13 |
| 7 | 123 | 2010-11-14 | 2010-11-14 |
+-----------+-------------+-----------------+-------------+--+
10 rows selected (2.418 seconds)
0: jdbc:hive2://wxt01:10000>
9.查询分区数据
0: jdbc:hive2://wxt01:10000> select id,cost,birthday from demo2 where bt='2010-11-13';
OK
+-----+-------+-------------+--+
| id | cost | birthday |
+-----+-------+-------------+--+
| 5 | 122 | 2010-11-13 |
| 6 | 120 | 2010-11-13 |
| 8 | 1202 | 2010-11-13 |
+-----+-------+-------------+--+
3 rows selected (0.44 seconds)
0: jdbc:hive2://wxt01:10000>