hive 中复杂 sql 的使用

Hql中生僻的 sql

1,一列多行转一行多列

create table dev_updated.costomer_wujb5(
customer string,
product  string,
monetary  string
);

truncate table costomer;

insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_1',10000);
insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_2',5000);
insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_3',15000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_1',8000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_2',4000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_3',20000);

select * from dev_updated.costomer_wujb5;

select customer,
max(case when product = 'Fin_pro_1' then monetary else null end) Fin_pro_1,
max(case when product = 'Fin_pro_2' then monetary else null end) Fin_pro_2,
max(case when product = 'Fin_pro_3' then monetary else null end) Fin_pro_3
from dev_updated.costomer_wujb5 group by customer
 

 

2,去除特殊字符

select regexp_replace('\t abc \n def \r hij', '\n|\t|\r', '');

3,计算当前月最后一天的日期

select 
 date_sub(
     concat(
        concat(
            (case when month(current_date())=12 then year(current_date())+1 else year(current_date()) end),
            '-',
            (case when month(current_date())=12 then '01'
                  when month(current_date())=11 then '12'
                  else concat('0',month(current_date())+1) end)),
            '-',
            '01'),
        1)    as month_last_day; 
        
--------------这个简单,几个单词搞定
        select last_day(current_date);
 

4,每个月周五最大的一天

select 
case when t.a=5 then '2019-03-31'
     when t.a=7 then date_sub('2019-03-31',2)
     when t.a=6 then date_sub('2019-03-31',1)
     when t.a=4 then date_sub('2019-03-31',6)
     when t.a=3 then date_sub('2019-03-31',5)
     when t.a=2 then date_sub('2019-03-31',4)
     else date_sub('2019-03-31',3) end max_friday
from(
select pmod(datediff('2019-03-31','2018-01-01')+1,7) a
) t;

4.1,每个月的第一个周末

select case when t.a=0 and t.b<8 then 1 else 0 end 
from(
select pmod(datediff(substr(cast(current_date as string),1,10),'2018-01-01')+1,7) a,day(current_date) b
) t

可参考链接:https://blog.csdn.net/hechaoyong12345/article/details/106481596   --  pmod()的用法

5,分区的使用

show partitions dev_updated.wuwang_test_udl -----------查看分区

alter table dev_updated.wuwang_test_udl drop partition (batch_number="true") -------删除分区,同时会将该分区下的数据删除

6,行转列,列转行

-------hive中的列转行-----concat_ws(collect_set())
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp


------mysql中的列转行-----group_concat
select id,group_concat(name separator ';') from aa group by id;

------mysql中 concat_ws 只是一个单纯的拼接----
select concat_ws(',','11','22',NULL);
--结果:11,22

-------- 列转行

select split(name,'===')[0],update_timestamp from (
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp) a
union all
select split(name,'===')[1],update_timestamp from (
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp) a

7,嵌套json 的使用

 select get_json_object('{"store": {"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"} }, 
"email":"amy@only_for_json_udf_test.net", 
"owner":"amy" }',"$.store.bicycle.color")

8,正则表达式的使用

SELECT * FROM `dev_updated`.`test_stu_info_udl` where update_timestamp like '%$%'
select regexp_extract(update_timestamp,'^(/d{4})(-)(/d{2})(.*)$',0) from `dev_updated`.`test_stu_info_udl`

9,时间类型的转换

SELECT from_unixtime('1536313460')

--获取当天时间
select from_unixtime(cast(unix_timestamp() as BIGINT),'yyyyMMdd')

select date_add(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),-1)

select distinct from_unixtime(cast(update_timestamp as BIGINT),'yyyy-MM-dd HH:mm:ss') a FROM `cdl`.`bw_fi_zorpcc06l_cdl` ORDER BY  a;

select unix_timestamp(now())


SELECT from_unixtime(unix_timestamp(now()))


select unix_timestamp('2018-03-26 10:51:36')

select (unix_timestamp(now())-206*24*60*60)*1000

select update_timestamp from cdl.bw_fi_zocbps07l_cdl_test2 limit 10

select cast(from_unixtime(1517910714) as string)

---截取年份
select substr(cast(from_unixtime(unix_timestamp(now())) as string),1,4)

---截取月份
select substr(cast(from_unixtime(unix_timestamp(now())) as string),6,2)

---截取天
select substr(cast(from_unixtime(unix_timestamp(now())) as string),9,2)

10,添加分隔符

alter table `prd_idl`.`sfdc_ms_emea_account_idl` set SERDEPROPERTIES('field.delim'='\t');

11,insert overwrite 、row_number()

insert overwrite table idl.sfdc_ms_na_account_idl 
select 
annualrevenue    ,
currencyisocode    ,
ownerid    ,
createddate    ,
createdbyid    ,
lastmodifieddate    ,
lastmodifiedbyid    ,
systemmodstamp    ,
lastactivitydate    ,
sub_segment__c
from idl.sfdc_ms_na_account_idl a  
left join
(
SELECT *
from
( select *,row_number()OVER(PARTITION BY SAP_BPID ORDER BY SFDC_Account_ID desc) as row  
FROM  `idl`.` test_wuwang_idl` ) t1
where t1.row = 1
)b  
on a.id = b.SFDC_Account_ID
 

12,insert overwrite   partition(xxx) 等同与  dense_rank() -- 可以使用未分区的语句实现分区覆盖的作用

from (
select `(rn)?+.+`
from 
(
select * ,dense_rank() over(partition by ${primary_columns} order by ${order_key} desc) as rn
from
(
select * from ${hive_db_udl}.${table_name}_udl
union all
select * from ${hive_db_idl}.${table_name}_idl
where ${normal_filter} 
) a
where coalesce(${primary_columns})  is not null
) b
where rn = 1 and ${delete_filter}       ---- 如果没有删除条件 传入 ${delete_filter} = TRUE
) c
insert overwrite table  ${hive_db_udl}.${table_name}_udl
select *

13,hive 中修复分区信息

msck repair table table_name

14,hive中存储不同类型的字段

create table cdl.test_array(
id int,
name string,
hobby array<string>,
friend map<string,string>,
mark struct<math:int,english:string>
)
row format delimited fields terminated by ','
collection items terminated by '_'
map keys terminated by ';'
lines terminated by '\n'

select * from cdl.test_array

-- MR 和 spark 都可以执行
insert into cdl.test_array 
select 
3,'xiaohua',array('basketball','read'),str_to_map('xiaoming:no,xiaohong:no'),named_struct('math',90,'english','90')

-- MR 报错,spark 执行成功
insert into cdl.test_array values(
1,'xiaohua',array('basketball','read'),str_to_map('xiaoming:no,xiaohong:no'),named_struct('math',90,'english','90'))

15,同步statistic

analyze table prd_updated.bw_sc_zoidns03l_udl compute statistics;

16,查看hive中改 schema 中的所有表select count(*) from (
select * from information_schema.tables where table_schema='prd_pii'
) a 
17,

view 视图的使用

--- 创建视图

CREATE VIEW prd_idl.view_test_text as
select
id ,
name,
batch_number
from prd_idl.hive_test_parquet

--- view字段增强 

alter VIEW  prd_idl.view_test_text as    
select     
id ,   
name,     
age,   
zdate,
batch_number   
from prd_idl.hive_test_parquet

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值