hive高阶语句

(1)ctas:as select
create table ctas_employee as select * from employee;
(2)cte:with … as
create table cte_employee as 
with
r1 as (select name from employee where name ='Michael'),
r2 as (select name from employee where sex_age.sex='MAle'),
r3 as (select name from employee where sex_age.sex='Female')
select * from r1 union all select * from r3
(3)like:复制表结构
create table employee_like like employee;
或者
create table employee_like as select * from employee where 1=2;
(4)创建临时表:表只针对当前的session有效,如果出现重名的化,临时表的优先级高于已有的表
create temporary table temp_table(id int);

create temporary table temp_employee as select * from employee;
(5)删除表:with perge表示不进入垃圾回收站,彻底删除
drop table if exists 表名[with perge];
(6)清空表数据(慎用)
truncate table 表名;
(7)修改表
--alter针对元数据
ALTER TABLE employee RENAME TO new_employee;
ALTER TABLE c_employee SET TBLPROPERTIES ('comment'='New name, comments');
ALTER TABLE employee_internal SET SERDEPROPERTIES ('field.delim' = '$');
ALTER TABLE c_employee SET FILEFORMAT RCFILE; -- 修正表文件格式
-- 修改表的列操作
ALTER TABLE employee_internal CHANGE old_name new_name STRING; -- 修改列名
ALTER TABLE c_employee ADD COLUMNS (work string); -- 添加列
ALTER TABLE c_employee REPLACE COLUMNS (name string); -- 替换所有列为一个列
(8)静态分区(分区的本质就是建立多个层级的文件夹)
--建表
create table if not exists employee_partitioned(
 name string,
 work_place array<string>,
 sex_age struct<sex:string,age:int>,
 skill_score map<string,int>,
 depart_title map<string,array<string>>
)
partitioned by (year int,month int)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;

--添加静态分区:
alter table employee_partitioned add
partition(year=2019,month=7)
partition(year=2020,month=7)
partition(year=2020,month=8)

--删除分区
alter table employee_partitioned drop
partition(year=2019,month=7)

--查看分区表有多少分区
show partitions employee_partitioned

--加载数据到分区表中
load data local inpath '/opt/text/employee.txt'
into table employee_partitioned 
partition(year=2020,month=7)
(9)动态分区(分区的本质就是建立多个层级的文件夹)
--创建内部表
create table if not exists employee_hr(
 name string,
 employee_id int,
 number string,
 start_date date
)
row formate delimited 
fields terminated by '|'
stored as textfile;

--加载数据
load data local inpath '/opt/text/employee_hr.txt'
into table employee_hr

--开启动态分区
set hive.exec.dynamic.partiton=true;--开启分区
set hive.exec.dynamic.partition.mode=nonstrict;--设置分区模式为非严格模式

--动态分区建表语句和静态分区相同
create table if not exists employee_dynamtic(
 name string,
 employee_id int,
 number string
)
partitioned by (year int,month int)
row format delimited
stored as textfile;

--动态分区插入数据(只能使用  insert 方式添加动态分区数据)
insert into employee_dynamtic partition(year,month)
select name,employee_id,number,year(start_date),month(start_date) 
from employee_hr;

(10)分桶表(分桶对应HDFS中的文件)
--作用:
更高的查询处理效率
使抽样(sampling)更高效
一般根据“桶列”的哈希函数将数据进行分桶

--分桶只有动态分桶,开启参数
set hive.enforce.bucketing=true;
set map.reduce.tasks=分桶数;

--定义分桶(分桶的列是表中已有的列,分桶数最好是2的n次方)
clustered by(employee_id) into 2 buckets;

--创建分桶表
create table if not exists employee_bucket(
 name string,
 work_place Array<string>,
 sex_age struct<sex:string,age:int>,
 skills_score map<string,int>,
 depart_title map<string,Array<string>>
)
clustered by (name) into 2 buckets
row format delimited 
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;

--注意:必须使用insert方式加载数据
insert into table employee_bucket
select * from employee;
(11)分桶抽样(sampling)
假设我们在创建表的时候分了16个桶
16/32=1/2个桶,从第3个桶开始取,如果取2分样本,即我们拿到的桶是3,3.5

select * from table_name tablesample(bucket 3 out of 32 on rend()) s;

假设我们在创建表的时候分了2个桶
2/32=1/16个桶,从第1个桶开始取,如果取2分样本,即我们拿到的桶是1,1+1/16

select * from table_name tablesample(bucket 1 out of 32 on id) s;
(12)视图
只是建立了一个计算的逻辑,并不存储数据,数据还是在原来的表中,不会在hdfs上面创建文件夹

--创建视图
create view view_employee as select * from employee;

--概述
通过隐藏子查询、连接和函数来简化查询的逻辑结构
只保存定义,不存储数据
如果删除或更改基础表,则查询视图将失败
视图是只读的,不能插入或装载数据

--应用场景
将特定的列提供给客户,保护数据隐私
用于查询语句复杂的场景
(13)侧视图
select name,skills_score,wps from employee
lateral view explode(work_place) test as wps
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值