Hql中生僻的 sql
1,一列多行转一行多列
create table dev_updated.costomer_wujb5(
customer string,
product string,
monetary string
);
truncate table costomer;
insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_1',10000);
insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_2',5000);
insert into dev_updated.costomer_wujb5 values('Mary','Fin_pro_3',15000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_1',8000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_2',4000);
insert into dev_updated.costomer_wujb5 values('Tom','Fin_pro_3',20000);
select * from dev_updated.costomer_wujb5;
select customer,
max(case when product = 'Fin_pro_1' then monetary else null end) Fin_pro_1,
max(case when product = 'Fin_pro_2' then monetary else null end) Fin_pro_2,
max(case when product = 'Fin_pro_3' then monetary else null end) Fin_pro_3
from dev_updated.costomer_wujb5 group by customer
2,去除特殊字符
select regexp_replace('\t abc \n def \r hij', '\n|\t|\r', '');
3,计算当前月最后一天的日期
select
date_sub(
concat(
concat(
(case when month(current_date())=12 then year(current_date())+1 else year(current_date()) end),
'-',
(case when month(current_date())=12 then '01'
when month(current_date())=11 then '12'
else concat('0',month(current_date())+1) end)),
'-',
'01'),
1) as month_last_day;
--------------这个简单,几个单词搞定
select last_day(current_date);
4,每个月周五最大的一天
select
case when t.a=5 then '2019-03-31'
when t.a=7 then date_sub('2019-03-31',2)
when t.a=6 then date_sub('2019-03-31',1)
when t.a=4 then date_sub('2019-03-31',6)
when t.a=3 then date_sub('2019-03-31',5)
when t.a=2 then date_sub('2019-03-31',4)
else date_sub('2019-03-31',3) end max_friday
from(
select pmod(datediff('2019-03-31','2018-01-01')+1,7) a
) t;
4.1,每个月的第一个周末
select case when t.a=0 and t.b<8 then 1 else 0 end
from(
select pmod(datediff(substr(cast(current_date as string),1,10),'2018-01-01')+1,7) a,day(current_date) b
) t
可参考链接:https://blog.csdn.net/hechaoyong12345/article/details/106481596 -- pmod()的用法
5,分区的使用
show partitions dev_updated.wuwang_test_udl -----------查看分区
alter table dev_updated.wuwang_test_udl drop partition (batch_number="true") -------删除分区,同时会将该分区下的数据删除
6,行转列,列转行
-------hive中的列转行-----concat_ws(collect_set())
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp
------mysql中的列转行-----group_concat
select id,group_concat(name separator ';') from aa group by id;
------mysql中 concat_ws 只是一个单纯的拼接----
select concat_ws(',','11','22',NULL);
--结果:11,22
-------- 列转行
select split(name,'===')[0],update_timestamp from (
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp) a
union all
select split(name,'===')[1],update_timestamp from (
SELECT update_timestamp,concat_ws('===',collect_set(name)) name FROM `dev_updated`.`test_stu_info_udl` GROUP BY update_timestamp) a
7,嵌套json 的使用
select get_json_object('{"store": {"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"} },
"email":"amy@only_for_json_udf_test.net",
"owner":"amy" }',"$.store.bicycle.color")
8,正则表达式的使用
SELECT * FROM `dev_updated`.`test_stu_info_udl` where update_timestamp like '%$%'
select regexp_extract(update_timestamp,'^(/d{4})(-)(/d{2})(.*)$',0) from `dev_updated`.`test_stu_info_udl`
9,时间类型的转换
SELECT from_unixtime('1536313460')
--获取当天时间
select from_unixtime(cast(unix_timestamp() as BIGINT),'yyyyMMdd')
select date_add(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),-1)
select distinct from_unixtime(cast(update_timestamp as BIGINT),'yyyy-MM-dd HH:mm:ss') a FROM `cdl`.`bw_fi_zorpcc06l_cdl` ORDER BY a;
select unix_timestamp(now())
SELECT from_unixtime(unix_timestamp(now()))
select unix_timestamp('2018-03-26 10:51:36')
select (unix_timestamp(now())-206*24*60*60)*1000
select update_timestamp from cdl.bw_fi_zocbps07l_cdl_test2 limit 10
select cast(from_unixtime(1517910714) as string)
---截取年份
select substr(cast(from_unixtime(unix_timestamp(now())) as string),1,4)
---截取月份
select substr(cast(from_unixtime(unix_timestamp(now())) as string),6,2)
---截取天
select substr(cast(from_unixtime(unix_timestamp(now())) as string),9,2)
10,添加分隔符
alter table `prd_idl`.`sfdc_ms_emea_account_idl` set SERDEPROPERTIES('field.delim'='\t');
11,insert overwrite 、row_number()
insert overwrite table idl.sfdc_ms_na_account_idl
select
annualrevenue ,
currencyisocode ,
ownerid ,
createddate ,
createdbyid ,
lastmodifieddate ,
lastmodifiedbyid ,
systemmodstamp ,
lastactivitydate ,
sub_segment__c
from idl.sfdc_ms_na_account_idl a
left join
(
SELECT *
from
( select *,row_number()OVER(PARTITION BY SAP_BPID ORDER BY SFDC_Account_ID desc) as row
FROM `idl`.` test_wuwang_idl` ) t1
where t1.row = 1
)b
on a.id = b.SFDC_Account_ID
12,insert overwrite partition(xxx) 等同与 dense_rank() -- 可以使用未分区的语句实现分区覆盖的作用
from (
select `(rn)?+.+`
from
(
select * ,dense_rank() over(partition by ${primary_columns} order by ${order_key} desc) as rn
from
(
select * from ${hive_db_udl}.${table_name}_udl
union all
select * from ${hive_db_idl}.${table_name}_idl
where ${normal_filter}
) a
where coalesce(${primary_columns}) is not null
) b
where rn = 1 and ${delete_filter} ---- 如果没有删除条件 传入 ${delete_filter} = TRUE
) c
insert overwrite table ${hive_db_udl}.${table_name}_udl
select *
13,hive 中修复分区信息
msck repair table table_name
14,hive中存储不同类型的字段
create table cdl.test_array(
id int,
name string,
hobby array<string>,
friend map<string,string>,
mark struct<math:int,english:string>
)
row format delimited fields terminated by ','
collection items terminated by '_'
map keys terminated by ';'
lines terminated by '\n'
select * from cdl.test_array
-- MR 和 spark 都可以执行
insert into cdl.test_array
select
3,'xiaohua',array('basketball','read'),str_to_map('xiaoming:no,xiaohong:no'),named_struct('math',90,'english','90')
-- MR 报错,spark 执行成功
insert into cdl.test_array values(
1,'xiaohua',array('basketball','read'),str_to_map('xiaoming:no,xiaohong:no'),named_struct('math',90,'english','90'))
15,同步statistic
analyze table prd_updated.bw_sc_zoidns03l_udl compute statistics;
16,查看hive中改 schema 中的所有表select count(*) from (
select * from information_schema.tables where table_schema='prd_pii'
) a
17,
view 视图的使用
--- 创建视图
CREATE VIEW prd_idl.view_test_text as
select
id ,
name,
batch_number
from prd_idl.hive_test_parquet
--- view字段增强
alter VIEW prd_idl.view_test_text as
select
id ,
name,
age,
zdate,
batch_number
from prd_idl.hive_test_parquet