hive笔记整理2020-12-01
hive分区:
静态分区步骤
1) 前提有静态数据
2) 创建分区表
3) 将静态数据导入到指定的分区中
创建普通表导入本地数据
drop table tb_log ; --删除表
create table tb_log(
log_id string ,
url string ,
ct string
)
row format delimited fields terminated by ',' ; --格式化以,分割
load data local inpath "/data/log/" into table tb_log ; --导入数据到tb
创建一级分区表
create table tb_partition_log(
log_id string ,
url string ,
ct string
)
partitioned by(dt string) -- 指定分区字段
row format delimited fields terminated by ',' ;
-----------------将静态数据导入分区表中--------------
load data local inpath "/data/log/20201128.log" into table tb_partition_log partition(dt='20201128') ;
load data local inpath "/data/log/20201129.log" into table tb_partition_log partition(dt='20201129') ;
load data local inpath "/data/log/20201130.log" into table tb_partition_log partition(dt='20201130') ;
指定三级分区
-----------------年月日三级分区------------
create table tb_partition_log2(
log_id string ,
url string ,
ct string
)
partitioned by(y string , m string , d string) -- 指定年月日三个分区字段
row format delimited fields terminated by ',' ;
load data local inpath "/data/log/20201128.log" into table tb_partition_log2 partition(y='2020',m='11' , d='28') ;
load data local inpath "/data/log/20191010.log" into table tb_partition_log2 partition(y='2019',m='10' , d='10') ;
动态分区步骤
1) 建普通表
2) 导入数据
3) 分区表
4) 开启动态分区支持
5) 通过select insert的方式导入数据
创建普通表
create table tb_user_log(
uid int ,
name string ,
city string
)
row format delimited fields terminated by ',' ;
load data local inpath "/data/user_log/" into table tb_user_log ;
创建动态表
CREATE TABLE tb_dynamic_partition_user_log(
id int ,
name string ,
city string
)
partitioned by(p_city string , p_uid int ) ; --动态分区字段
开启动态分区支持
set hive.exec.dynamic.partition=true ;
set hive.exec.dynamic.partition.mode=nonstrick;
insert into tb_dynamic_partition_user_log partition(p_city ,p_uid)
select uid , name , city , city as p_city , uid as p_uid from tb_user_log ;
修改分区表
-- 删除分区
alter table tb_partition_log drop partition(dt='20201111') ;
-- 添加分区
alter table tb_partition_log add partition(dt='20201111') ;
alter table tb_partition_log add partition(dt='20201112') partition(dt='20201113') ;
-- 查看表的所有分区
show partitions tb_partition_log