HiveSQL高级进阶10大技巧

最新推荐文章于 2023-01-30 15:35:26 发布

尚硅谷铁粉

最新推荐文章于 2023-01-30 15:35:26 发布

阅读量637

点赞数

分类专栏：大数据文章标签： sql 数据库 database

原文链接：http://www.atguigu.com

版权

大数据专栏收录该内容

104 篇文章 11 订阅

订阅专栏

直接上干货，HiveSQL高级进阶技巧，重要性不言而喻。掌握这10个技巧，你的SQL水平将有一个质的提升，达到一个较高的层次！

1.删除：

insert overwrite tmp 
select * from tmp where id != '666';

2.更新：

直接上干货，HiveSQL高级进阶技巧，重要性不言而喻。掌握这10个技巧，你的SQL水平将有一个质的提升，达到一个较高的层次！

insert overwrite tmp 
select id,label,
       if(id = '1' and label = 'grade','25',value) as value 
from tmp where id != '666';

3.行转列：

-- Step03：最后将info的内容切分
select id,split(info,':')[0] as label,split(info,':')[1] as value
from 
(
-- Step01：先将数据拼接成“heit:180,weit:60,age:26”
    select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value 
    from tmp
) as tmp
-- Step02：然后在借用explode函数将数据膨胀至多行
lateral view explode(split(value,',')) mytable as info;

4.列转行1：

select 
tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age 
from 
(select id,label,value from tmp2 where label = 'heit') as tmp1
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'weit') as tmp2
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'age') as tmp3
on tmp1.id = tmp3.id;

5.列转行2：

select
id,tmpmap['height'] as height,tmpmap['weight'] as weight,tmpmap['age'] as age
from 
(
    select id,
           str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap  
    from tmp2 group by id
) as tmp1;

6.分析函数1：

select id,label,value,
       lead(value,1,0)over(partition by id order by label) as lead,
       lag(value,1,999)over(partition by id order by label) as lag,
       first_value(value)over(partition by id order by label) as first_value,
       last_value(value)over(partition by id order by label) as last_value
from tmp;

7.分析函数2：


select id,label,value,
       row_number()over(partition by id order by value) as row_number,
       rank()over(partition by id order by value) as rank,
       dense_rank()over(partition by id order by value) as dense_rank
from tmp;

8.多维分析1：

select col1,col2,col3,count(1),
       Grouping__ID 
from tmp 
group by col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())

9.多维分析2：

select col1,col2,col3,count(1),
       Grouping__ID 
from tmp 
group by col1,col2,col3
with cube;

10.数据倾斜groupby：

select label,sum(cnt) as all from 
(
    select rd,label,sum(1) as cnt from 
    (
        select id,round(rand(),2) as rd,value from tmp1
    ) as tmp
    group by rd,label
) as tmp
group by label;

11.数据倾斜join：

select label,sum(value) as all from 
(
    select rd,label,sum(value) as cnt from
    (
        select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value 
        from 
        (
            select id,round(rand(),1) as rd,label,value from tmp1
        ) as tmp1
        join
        (
            select id,rd,label,value from tmp2
            lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd
        ) as tmp2
        on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label
    ) as tmp1
    group by rd,label
) as tmp1
group by label;

尚硅谷铁粉

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
HiveSQL高级进阶10大技巧

直接上干货，HiveSQL高级进阶技巧，重要性不言而喻。掌握这10个技巧，你的SQL水平将有一个质的提升，达到一个较高的层次！1.删除：insert overwrite tmp select * from tmp where id != '666';2.更新：直接上干货，HiveSQL高级进阶技巧，重要性不言而喻。掌握这10个技巧，你的SQL水平将有一个质的提升，达到一个较高的层次！insert overwrite tmp select id,label, if(i
复制链接

扫一扫