hive分区表操作指南_动态分区在哪一层-CSDN博客

本文链接：https://blog.csdn.net/dbc_zt/article/details/109726266

hive分区表操作

分区参数介绍

-- 设置动态分区参数(开启动态分区，一般使用前两个参数就就可以)
set hive.exec.dynamic.partition=true; -- 使用动态分区，默认false
-- strict可设置为静态和半动态，要求至少包含一个静态分区列，且放在最前面
-- nonstrict可设置为静态、半动态和动态，动态必须设置此参数。
set hive.exec.dynamic.partition.mode=nonstrick;
-- 允许的最大的动态分区的个数。默认1000。
set  hive.exec.max.dynamic.partitions=10000;
-- 一个mapreduce job所允许的最大的动态分区的个数。默认是100。
set hive.exec.max.dynamic.partitions.pernode=10000;
-- 一个任务最多可以创建的文件数目
set hive.exec.max.created.files=150000; 
-- 限定一次最多打开的文件数
set dfs.datanode.max.xcievres=9182;

分区表创建

-- 创建分区表
CREATE TABLE IF NOT EXISTS cl_ods.ods_msg_user_info_di(
user_id bigint COMMENT'用户id',
user_msg string COMMENT'用户信息',
user_mail string COMMENT'用户邮箱')COMMENT'用户信息表'
partitioned by (pt_mon string COMMENT'月分区',pt_day string COMMENT '天分区')
ROW format delimited fields terminated by ',';

-- 查看数据表 -- show create table cl_ods.ods_msg_user_info_di;
create table cl_ods.ods_msg_user_info_df(
user_id bigint comment '用户id',
user_msg string comment '用户信息',
user_mail string comment '用户邮箱',
pt_mon string comment '月',
pt_day string comment '天') comment '用户信息表'
row format delimited fields terminated by ',';-- ...

插入数据

静态分区插入数据

静态分区是在插入数据前就指定好的，一般通过insert into、insert overwrite into、load data等方式插入

-- overwrite 为可选项此值，会覆盖原始该分区数据
INSERT /*OVERWRITE*/ INTO TABLE cl_ods.ods_msg_user_info_di 
PARTITION(pt_mon='201801',pt_day='20180101') 
SELECT user_id,user_msg,user_mail 
FROM cl_ods.ods_msg_user_info_df 
WHERE pt_mon='201801' and pt_day='20180101';

混合分区插入数据

一部分分区是静态，一部分是动态分区，需要设置动态分区模式

set hive.exec.dynamic.partition=true;  
set hive.exec.dynamic.partition.mode=strick;

INSERT /*OVERWRITE*/ INTO TABLE cl_ods.ods_msg_user_info_di 
PARTITION(pt_mon='201801',pt_day) -- 此处pt_day 动态获取
SELECT user_id,user_msg,user_mail,pt_day
FROM cl_ods.ods_msg_user_info_df 
WHERE pt_mon='201801' and pt_day='20180101';

动态分区插入数据

分区从查询字段中自动获取

set hive.exec.dynamic.partition=true;  
set hive.exec.dynamic.partition.mode=nonstrick;

INSERT /*OVERWRITE*/ INTO TABLE cl_ods.ods_msg_user_info_di 
PARTITION(pt_mon,pt_day) -- 此处pt_day 动态获取
SELECT user_id,user_msg,user_mail,pt_mon, pt_day
FROM cl_ods.ods_msg_user_info_df 
WHERE pt_mon='201801' and pt_day='20180101';

补充

查看分区命令

SHOW PARTITIONS partitons_table_name /*[partition(partition_key=partition_val)]*/;
-- eg
SHOW PARTITIONS cl_ods.ods_msg_user_info_di partiton(pt_mon='201801');