hive高阶语句

最新推荐文章于 2024-05-14 15:23:36 发布

晚春初夏的你

最新推荐文章于 2024-05-14 15:23:36 发布

阅读量208

点赞数 1

文章标签： hive 大数据

本文链接：https://blog.csdn.net/weixin_42834505/article/details/107935867

版权

（1）ctas:as select

create table ctas_employee as select * from employee;

（2）cte:with … as

create table cte_employee as 
with
r1 as (select name from employee where name ='Michael'),
r2 as (select name from employee where sex_age.sex='MAle'),
r3 as (select name from employee where sex_age.sex='Female')
select * from r1 union all select * from r3

（3）like：复制表结构

create table employee_like like employee;
或者
create table employee_like as select * from employee where 1=2;

（4）创建临时表：表只针对当前的session有效，如果出现重名的化，临时表的优先级高于已有的表

create temporary table temp_table(id int);

create temporary table temp_employee as select * from employee;

（5）删除表：with perge表示不进入垃圾回收站，彻底删除

drop table if exists 表名[with perge];

（6）清空表数据（慎用）

truncate table 表名;

（7）修改表

--alter针对元数据
ALTER TABLE employee RENAME TO new_employee;
ALTER TABLE c_employee SET TBLPROPERTIES ('comment'='New name, comments');
ALTER TABLE employee_internal SET SERDEPROPERTIES ('field.delim' = '$');
ALTER TABLE c_employee SET FILEFORMAT RCFILE; -- 修正表文件格式
-- 修改表的列操作
ALTER TABLE employee_internal CHANGE old_name new_name STRING; -- 修改列名
ALTER TABLE c_employee ADD COLUMNS (work string); -- 添加列
ALTER TABLE c_employee REPLACE COLUMNS (name string); -- 替换所有列为一个列

（8）静态分区（分区的本质就是建立多个层级的文件夹）

--建表
create table if not exists employee_partitioned(
 name string,
 work_place array<string>,
 sex_age struct<sex:string,age:int>,
 skill_score map<string,int>,
 depart_title map<string,array<string>>
)
partitioned by (year int,month int)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;

--添加静态分区：
alter table employee_partitioned add
partition(year=2019,month=7)
partition(year=2020,month=7)
partition(year=2020,month=8)

--删除分区
alter table employee_partitioned drop
partition(year=2019,month=7)

--查看分区表有多少分区
show partitions employee_partitioned

--加载数据到分区表中
load data local inpath '/opt/text/employee.txt'
into table employee_partitioned 
partition(year=2020,month=7)

（9）动态分区（分区的本质就是建立多个层级的文件夹）

--创建内部表
create table if not exists employee_hr(
 name string,
 employee_id int,
 number string,
 start_date date
)
row formate delimited 
fields terminated by '|'
stored as textfile;

--加载数据
load data local inpath '/opt/text/employee_hr.txt'
into table employee_hr

--开启动态分区
set hive.exec.dynamic.partiton=true;--开启分区
set hive.exec.dynamic.partition.mode=nonstrict;--设置分区模式为非严格模式

--动态分区建表语句和静态分区相同
create table if not exists employee_dynamtic(
 name string,
 employee_id int,
 number string
)
partitioned by (year int,month int)
row format delimited
stored as textfile;

--动态分区插入数据（只能使用  insert 方式添加动态分区数据）
insert into employee_dynamtic partition(year,month)
select name,employee_id,number,year(start_date),month(start_date) 
from employee_hr;

（10）分桶表（分桶对应HDFS中的文件）

--作用：
更高的查询处理效率
使抽样（sampling）更高效
一般根据“桶列”的哈希函数将数据进行分桶

--分桶只有动态分桶,开启参数
set hive.enforce.bucketing=true;
set map.reduce.tasks=分桶数；

--定义分桶（分桶的列是表中已有的列，分桶数最好是2的n次方）
clustered by(employee_id) into 2 buckets;

--创建分桶表
create table if not exists employee_bucket(
 name string,
 work_place Array<string>,
 sex_age struct<sex:string,age:int>,
 skills_score map<string,int>,
 depart_title map<string,Array<string>>
)
clustered by (name) into 2 buckets
row format delimited 
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;

--注意：必须使用insert方式加载数据
insert into table employee_bucket
select * from employee;

（11）分桶抽样（sampling）

假设我们在创建表的时候分了16个桶
16/32=1/2个桶，从第3个桶开始取，如果取2分样本，即我们拿到的桶是3，3.5

select * from table_name tablesample(bucket 3 out of 32 on rend()) s;

假设我们在创建表的时候分了2个桶
2/32=1/16个桶，从第1个桶开始取，如果取2分样本，即我们拿到的桶是1,1+1/16

select * from table_name tablesample(bucket 1 out of 32 on id) s;

（12）视图

只是建立了一个计算的逻辑，并不存储数据，数据还是在原来的表中，不会在hdfs上面创建文件夹

--创建视图
create view view_employee as select * from employee;

--概述
通过隐藏子查询、连接和函数来简化查询的逻辑结构
只保存定义，不存储数据
如果删除或更改基础表，则查询视图将失败
视图是只读的，不能插入或装载数据

--应用场景
将特定的列提供给客户，保护数据隐私
用于查询语句复杂的场景

（13）侧视图

select name,skills_score,wps from employee
lateral view explode(work_place) test as wps

晚春初夏的你

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫