HSQL 记录

最新推荐文章于 2024-07-17 17:53:42 发布

tcoding

最新推荐文章于 2024-07-17 17:53:42 发布

阅读量235

点赞数

文章标签：大数据 hive

本文链接：https://blog.csdn.net/qq_23934475/article/details/134894430

版权

DDL

hiveDDL官方文档


-- 显示当前hive中有多少函数可用
SHOW FUNCTIONS [LIKE "<pattern>"];
-- 显示函数的描述信息
desc function concat;

-- 显示函数的扩展描述信息
desc function extended concat;

SHOW LOCKS <table_name>;
SHOW LOCKS <table_name> EXTENDED;
SHOW LOCKS <table_name> PARTITION (<partition_spec>);
SHOW LOCKS <table_name> PARTITION (<partition_spec>) EXTENDED;
SHOW LOCKS (DATABASE|SCHEMA) database_name;  

解锁
unlock table <table_name>；

分桶表

-- 分桶表 分桶数必须是2的n次幂
create table if not exists ${table} (
    user_id BIGINT COMMENT "user_id",
    age BIGINT COMMENT "age",
    name    STRING COMMENT "name"
) partitioned by (p_date BIGINT COMMENT "日期分区") clustered by (user_id) sorted by (age) into 8 buckets ;

动态分区插入

-- 动态插入

--启动动态分区功能
set hive.exec.dynamic.partition=true;
--允许全部分区都是动态分区
-- set hive.exec.dynamic.partition.mode=nostrick;

insert overwrite table ${t1} partition (p_date = 20231226, p_hour)
select
    user_id,
    name,
    p_hour
from ${t1};

一次查询多次插入

-- 插入多个表，多个分区

with
    view_data as (
        select
            user_id,
            name,
            p_date
        from ${t1}
    )
from view_data
insert overwrite table ${t1} partition(p_date = 20231226)select
    user_id,
    name
where p_date = 20231226
insert overwrite table ${t2} partition(p_date = 20231226)select
    user_id,
    name
where p_date = 20231226;`

DQL

select 字段可使用正则表达式
json
- get_json_object

select get_json_object('{
  "store": {
    "fruit": [
      {
        "weight": 8,
        "type": "apple"
      },
      {
        "weight": 9,
        "type": "pear"
      }
    ],
    "bicycle": {
      "price": 19.95,
      "color": "red"
    }
  },
  "email": "amy@only_for_json_udf_test.net",
  "owner": "amy"
}','$.store.fruit[*].weight');

json_tupe

select json_tuple('{
  "store": {
    "fruit": [
      {
        "weight": 8,
        "type": "apple"
      },
      {
        "weight": 9,
        "type": "pear"
      }
    ],
    "bicycle": {
      "price": 19.95,
      "color": "red"
    }
  },
  "email": "amy@only_for_json_udf_test.net",
  "owner": "amy"
}','store','email','owner');

group


-- grouping__id
-- grouping__id 计算逻辑 不包含参与计算

with
    t as (
        select
            1 as a,
            2 as b,
            3 as c,
            4 as d
        union all
        select
            11 as a,
            12 as b,
            13 as c,
            14 as d
        union all
        select
            21 as a,
            22 as b,
            23 as c,
            24 as d
    )
select
    a,
    b,
    c,
    case
        when grouping__id = 1 then 'a,b'
        when grouping__id = 3 then 'a'
        when grouping__id = 0 then 'abc'
                              else 'unknow'
        end,
    grouping__id
from t
group by a,--4
         b,--2
         c --1
    grouping sets ( a, ( a, b), ( a, b, c));

select
    a,
    b,
    c,
    case
        when grouping__id = 1 then 'a,b'
        when grouping__id = 3 then 'a'
        when grouping__id = 0 then 'abc'
                              else 'unknow'
        end,
    grouping__id
from t
group by a,--4
         b,--2
         c --1
with cube;

select
    a,
    b,
    c,
    case
        when grouping__id = 1 then 'a,b'
        when grouping__id = 3 then 'a'
        when grouping__id = 0 then 'abc'
                              else 'unknow'
        end,
    grouping__id
from t
group by a,--4
         b,--2
         c --1
with rollup;
-- -,a,ab,abc

开启本地模式 set hive.exec.mode.local.auto=true;
join优化
- /*+ STREAMTABLE(<TABLE_NAME>)*/标记大表
- left semi join 只读取左表
- /*+MAPJOIN(<TABLE_NAME>)*/
宏命令

  create temporary macro demo(x double) x + 10.3;

  select demo(4.0)

UDF

udf官方文档
可使用UDFType优化自定义UDF

udf
udaf
udtf
- json_tuple
- explode
- inline
- stack
win（窗口函数）

tcoding

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
HSQL 记录

可使用UDFType优化自定义UDF。
复制链接

扫一扫

HSQL 记录

DDL

DQL

UDF

“相关推荐”对你有帮助么？