-
类型转换函数
select cast(“5” as int) ;
select cast(“2017-08-03” as date) ;
select cast(current_timestamp as date); -
数学运算函数
select round(5.4); ## 5 四舍五入
select round(5.1345,3) ; ##5.135
select ceil(5.4) ; // select ceiling(5.4) from dual; ## 6 向上取整
select floor(5.4); ## 5 向下取整
select abs(-5.4) ; ## 5.4 绝对值
select greatest(3,5,6) ; ## 6
select least(3,5,6) from dual; ##求多个输入参数中的最小值 -
字符串函数
substr(string str, int start) ## 截取子串
substring(string str, int start)
示例:select substr(“abcdefg”,2) from dual;
substr(string, int start, int len)
substring(string, int start, int len)
示例:select substr(“abcdefg”,2,3) from dual;concat(string A, string B…) ## 拼接字符串
concat_ws(string SEP, string A, string B…)
示例:select concat(“ab”,“xy”) from dual; ## abxy
select concat_ws(".",“192”,“168”,“33”,“44”) from dual; ## 192.168.33.44length(string A)
示例:select length(“192.168.33.44”) from dual; ## 13split(string str, string pat)
示例:select split(“192.168.33.44”,".") from dual; 错误的,因为.号是正则语法中的特定字符
select split(“192.168.33.44”,"\.") from dual;upper(string str) ##转大写
lower(string str) -
时间函数
select current_timestamp; ## 获取当前的时间戳(详细时间信息)
select current_date; ## 获取当前的日期取当前时间的秒数时间戳–(距离格林威治时间1970-1-1 0:0:0秒的差距)
select unix_timestamp();
unix时间戳转字符串
from_unixtime(bigint unixtime[, string format])
示例:select from_unixtime(unix_timestamp());
select from_unixtime(unix_timestamp(),“yyyy/MM/dd HH:mm:ss”);字符串转unix时间戳
unix_timestamp(string date, string pattern)
示例: select unix_timestamp(“2017-08-10 17:50:30”);
select unix_timestamp(“2017-08-10 17:50:30”,“yyyy-MM-dd HH:mm:ss”);将字符串转成日期date
select to_date(“2017-09-17 16:58:32”);
-
条件控制函数
a. case when
select id,name,
case
when age<28 then ‘youngth’
when age>27 and age<40 then ‘zhongnian’
else ‘old’
end
from t_user;
b. if
select id,if(age>25,‘working’,‘worked’) from t_user;select moive_name,if(array_contains(actors,'吴刚'),'好电影',’烂片儿’) from t_movie;
-
集合函数
a. array_contains(Array,value)返回boolean值
select moive_name,array_contains(actors,‘吴刚’) from t_movie;
select array_contains(array(‘a’,‘b’,‘c’),‘c’) from dual;
b. sort_array(Array)返回排序后的数组
select sort_array(array(‘c’,‘b’,‘a’)) from dual;
select ‘haha’,sort_array(array(‘c’,‘b’,‘a’)) as xx from (select 0) tmp;
c. size(Array) 返回一个集合的长度,int值
i. select moive_name,size(actors) as actor_number from t_movie; -
行转列函数:explode()
a. 数据:
1,zhangsan,化学:物理:数学:语文
2,lisi,化学:数学:生物:生理:卫生
3,wangwu,化学:语文:英语:体育:生物
b. 创建表
create table t_stu_subject(id int,name string,subjects array)
row format delimited fields terminated by ‘,’
collection items terminated by ‘:’;
c. 使用explode()对数组字段"炸裂"
i. select explode(subjects) as sub from t_stu_subject
ii. 利用这个结果,求去重的课程
select distinct tmp.sub
from
(select explode(subjects) as sub from t_stu_subject) tmp; -
表生成函数
select id,name,tmp.sub
from t_stu_subject lateral view explode(subjects) tmp as sub;
理解: lateral view 相当于两个表在join
左表:是原表
右表:是explode(某个集合字段)之后产生的表
而且:这个join只在同一行的数据间进行那样,可以方便做更多的查询:
比如,查询选修了生物课的同学
select a.id,a.name,a.sub from
(select id,name,tmp.sub as sub from t_stu_subject lateral view explode(subjects) tmp as sub) a
where sub=‘生物’; -
json解析函数
a. 创建一个原始表用来对应原始的json数据(下面括号里面的json为字段名)
create table t_json(json string);
load data local inpath ‘/root/rating.json’ into table t_json;
b. 利用json_tube进行json数据解析
i. select json_tuple(json,‘movie’,‘rate’,‘timeStamp’,‘uid’) as(movie,rate,ts,uid) from t_json limit 10;
c. 真正解析整张json表,将解析结果数据插入一张新表
create table t_movie_rate
as
select json_tuple(json,‘movie’,‘rate’,‘timeStamp’,‘uid’) as(movie,rate,ts,uid) from t_json;
d. 利用json_tuple从原始json数据表中,etl出一个详细信息表:
create table t_rate
as
select
uid,
movie,
rate,
year(from_unixtime(cast(ts as bigint))) as year,
month(from_unixtime(cast(ts as bigint))) as month,
day(from_unixtime(cast(ts as bigint))) as day,
hour(from_unixtime(cast(ts as bigint))) as hour,
minute(from_unixtime(cast(ts as bigint))) as minute,
from_unixtime(cast(ts as bigint)) as ts
from
(select
json_tuple(rateinfo,‘movie’,‘rate’,‘timeStamp’,‘uid’) as(movie,rate,ts,uid)
from t_json) tmp
; -
分析函数
a. 数据:
1,18,a,male
2,19,b,male
3,22,c,female
4,16,d,female
5,30,e,male
6,26,f,female
b. 详解:
row_number() OVER (PARTITION BY COL1 ORDERBY COL2)表示根据COL1分组,在分组内部根据COL2排序,而此函数计算的值就表示每组内部排序后的顺序编号(该编号在组内是连续并且唯一的)。c. 需要查询出每种性别中年龄最大的2条数据(创建表)
create table t_rn(id int,age int,name string,sex string)
row format delimited fields terminated by ‘,’;d. 实现(打标号)
使用row_number函数,对表中的数据按照性别分组,按照年龄倒序排序并进行标记hql代码: select id,age,name,sex, row_number() over(partition by sex order by age desc) as rank from t_rn i. 利用上述结果操作,满足最终要求 select id,age,name,sex from (select id,age,name,sex, row_number() over(partition by sex order by age desc) as rank from t_rn) tmp where rank<=2;
-
分析函数---->sum聚合,避免join
select username,month,m_sale,
sum(m_sale) over (partition by username order by month rows between unbounded preceding and current row)
from
(select username,substr(day,1,7) as month ,sum(msale) as m_sale
from t_sale
group by username,substr(day,1,7))tmp;a. 按照partition后面的字符分组,order by后面的字符排序,求和,(
Hive进阶-----------------------函数篇(常用函数)
最新推荐文章于 2022-11-20 17:39:35 发布