DDL
-- 显示当前hive中有多少函数可用
SHOW FUNCTIONS [LIKE "<pattern>"];
-- 显示函数的描述信息
desc function concat;
-- 显示函数的扩展描述信息
desc function extended concat;
SHOW LOCKS <table_name>;
SHOW LOCKS <table_name> EXTENDED;
SHOW LOCKS <table_name> PARTITION (<partition_spec>);
SHOW LOCKS <table_name> PARTITION (<partition_spec>) EXTENDED;
SHOW LOCKS (DATABASE|SCHEMA) database_name;
解锁
unlock table <table_name>;
- 分桶表
-- 分桶表 分桶数必须是2的n次幂
create table if not exists ${table} (
user_id BIGINT COMMENT "user_id",
age BIGINT COMMENT "age",
name STRING COMMENT "name"
) partitioned by (p_date BIGINT COMMENT "日期分区") clustered by (user_id) sorted by (age) into 8 buckets ;
- 动态分区插入
-- 动态插入
--启动动态分区功能
set hive.exec.dynamic.partition=true;
--允许全部分区都是动态分区
-- set hive.exec.dynamic.partition.mode=nostrick;
insert overwrite table ${t1} partition (p_date = 20231226, p_hour)
select
user_id,
name,
p_hour
from ${t1};
- 一次查询多次插入
-- 插入多个表,多个分区
with
view_data as (
select
user_id,
name,
p_date
from ${t1}
)
from view_data
insert overwrite table ${t1} partition(p_date = 20231226)select
user_id,
name
where p_date = 20231226
insert overwrite table ${t2} partition(p_date = 20231226)select
user_id,
name
where p_date = 20231226;`
DQL
- select 字段可使用正则表达式
- json
- get_json_object
select get_json_object('{
"store": {
"fruit": [
{
"weight": 8,
"type": "apple"
},
{
"weight": 9,
"type": "pear"
}
],
"bicycle": {
"price": 19.95,
"color": "red"
}
},
"email": "amy@only_for_json_udf_test.net",
"owner": "amy"
}','$.store.fruit[*].weight');
- json_tupe
select json_tuple('{
"store": {
"fruit": [
{
"weight": 8,
"type": "apple"
},
{
"weight": 9,
"type": "pear"
}
],
"bicycle": {
"price": 19.95,
"color": "red"
}
},
"email": "amy@only_for_json_udf_test.net",
"owner": "amy"
}','store','email','owner');
- group
-- grouping__id
-- grouping__id 计算逻辑 不包含参与计算
with
t as (
select
1 as a,
2 as b,
3 as c,
4 as d
union all
select
11 as a,
12 as b,
13 as c,
14 as d
union all
select
21 as a,
22 as b,
23 as c,
24 as d
)
select
a,
b,
c,
case
when grouping__id = 1 then 'a,b'
when grouping__id = 3 then 'a'
when grouping__id = 0 then 'abc'
else 'unknow'
end,
grouping__id
from t
group by a,--4
b,--2
c --1
grouping sets ( a, ( a, b), ( a, b, c));
select
a,
b,
c,
case
when grouping__id = 1 then 'a,b'
when grouping__id = 3 then 'a'
when grouping__id = 0 then 'abc'
else 'unknow'
end,
grouping__id
from t
group by a,--4
b,--2
c --1
with cube;
select
a,
b,
c,
case
when grouping__id = 1 then 'a,b'
when grouping__id = 3 then 'a'
when grouping__id = 0 then 'abc'
else 'unknow'
end,
grouping__id
from t
group by a,--4
b,--2
c --1
with rollup;
-- -,a,ab,abc
- 开启本地模式
set hive.exec.mode.local.auto=true;
- join优化
/*+ STREAMTABLE(<TABLE_NAME>)*/
标记大表- left semi join 只读取左表
/*+MAPJOIN(<TABLE_NAME>)*/
- 宏命令
create temporary macro demo(x double) x + 10.3;
select demo(4.0)
UDF
udf官方文档
可使用UDFType优化自定义UDF
- udf
- udaf
- udtf
- json_tuple
- explode
- inline
- stack
- win(窗口函数)