hive内置函数

最新推荐文章于 2024-07-25 11:27:49 发布

weixin_73117535

最新推荐文章于 2024-07-25 11:27:49 发布

阅读量943

点赞数 14

文章标签： hive hadoop 数据仓库

本文链接：https://blog.csdn.net/weixin_73117535/article/details/139159008

版权

1. 日期函数

-- 当前前日期

select current_date;
select unix_timestamp(); 

-- 建议使用current_timestamp，有没有括号都可以 
select current_timestamp();

-- 时间戳转日期

select from_unixtime(1505456567); 
select from_unixtime(1505456567, 'yyyyMMdd'); 
select from_unixtime(1505456567, 'yyyy-MM-dd HH:mm:ss');

-- 日期转时间戳

select unix_timestamp('2019-09-15 14:23:00');

-- 计算时间差

select datediff('2020-04-18','2019-11-21'); 
select datediff('2019-11-21', '2020-04-18');

-- 查询当月第几天

select dayofmonth(current_date);

-- 计算月末:

select last_day(current_date);

-- 当月第1天:

select date_sub(current_date, dayofmonth(current_date)-1)
-- 貌似是2020-6-27 - 2020-6-26 = 2020-6-1 直接拿结尾相减了

-- 下个月第1天:

select add_months(date_sub(current_date, dayofmonth(current_date)-1), 1)

-- 貌似是2020-6-27 - 2020-6-26 = 2020-6-1， 然后再在月份上+1 成了2020-7-1

-- 字符串转时间（字符串必须为：yyyy-MM-dd格式）

select to_date('2020-01-01'); select to_date('2020-01-01 12:12:12');

-- 日期、时间戳、字符串类型格式化输出标准时间格式

select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss'); 
select date_format(current_date(), 'yyyyMMdd'); 
select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');

-- 计算emp表中，每个人的工龄

select *, round(datediff(current_date, hiredate)/365,1) workingyears from emp;

2. 字符串函数

-- 转小写。

--lower 
select lower("HELLO WORLD");

-- 转大写。

--upper 
select lower(ename), ename from emp;

-- 求字符串长度。

--length 
select length(ename), ename from emp;

-- 字符串拼接。 concat / ||

select empno || " " ||ename idname from emp;
 
select concat(empno, " " ,ename) idname from emp;

-- 指定分隔符。 concat_ws(separator, [string | array(string)]+)
--前面写一个连接符，后面写要把什么连起来

SELECT concat_ws('.', 'www', array('Hadoop', 'com')); 
select concat_ws(" ", ename, job) from emp;

-- 求子串。

substr SELECT substr('abcdefghabcdefgh', 5); 
SELECT substr('abcdefghabcdefgh', -5); 
SELECT substr('abcdefghabcdefgh', 5, 5);

-- 字符串切分。split，注意 '.' 要加\\转义

select split("abcdefghabcdefgh", "\\.");

3. 数学函数

-- 四舍五入。round

-- round 
select round(314.15926); 
select round(314.15926, 2); -- 保留两位小数
select round(314.15926, -2); -- 小数点前面两位上做四舍五入，变300

-- 向上取整。ceil

select ceil(3.1415926);  -- 4

-- 向下取整。floor

select floor(3.1415926); -- 3

-- 其他数学函数包括：绝对值、平方、开方、对数运算、三角运算等

4. 条件函数

-- if (boolean testCondition（if条件）, T valueTrue (如果满足if条件), T valueFalseOrNull（如果不满足if条件）)

select sal, if (sal<1500, 1, if (sal < 3000, 2, 3)) from emp;
-- 如果sal<1500，那么就是1
-- <3000就是2
-- else 3

-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END

-- 将emp表的员工工资等级分类：0-1500、1500-3000、3000以上 
select sal, if (sal<=1500, 1, if (sal <= 3000, 2, 3)) from emp;

-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
-- 复杂条件用 case when 更直观

select sal, case when sal<=1500 then 1                 
when sal<=3000 then 2                 
else 3 end sallevel  from emp;  -- sallevel是自己定义的列名

-- 以下语句等价

select ename, deptno,
        case deptno when 10 then 'accounting'
                   when 20 then 'research'
                   when 30 then 'sales'
                   else 'unknown' end deptname  
from emp; 


select ename, deptno,
        case when deptno=10 then 'accounting'
            when deptno=20 then 'research'
            when deptno=30 then 'sales' 
           else 'unknown' end deptname  
from emp;

-- COALESCE(T v1, T v2, ...)。返回参数中的第一个非空值；如果所有值都为 NULL，那么返回NULL

select sal, coalesce(comm, 0) from emp;
-- 如果comm是null，那么就返回0，因为此时0就是第一个非空的，不设0的话如果里面参数都是空，就返回空

-- isnull(a)空isnotnull(a)非空

select * from emp where isnull(comm);
 
select * from emp where isnotnull(comm);

-- nvl(T value, T default_value)

select empno, ename, job, mgr, hiredate, deptno, sal + nvl(comm,0) sumsal  
from emp;

-- nullif(x, y) 相等为空，否则为a

SELECT nullif("b", "b"), nullif("b", "a");
-- 返回空、b

5. UDTF函数

UDTF : User Defined Table-Generating Functions。用户定义表生成函数，一行输入，多行输出。

5.1 explode

炸裂函数 -- 就是将一行中复杂的 array 或者 map 结构拆分成多行

输入是一行，输出是一列

select explode(array('A','B','C')) as col;
 
select explode(map('a', 8, 'b', 88, 'c', 888));

-- UDTF's are not supported outside the SELECT clause, nor nested in expressions
不支持以下格式：

-- 不能有其他字段，像pageid
SELECT pageid, explode(adid_list) AS myCol... is not supported 

-- explode不能嵌套：
SELECT explode(explode(adid_list)) AS myCol... is not supported

5.2 lateral view (测试图的方式)

常与 表生成函数explode结合使用来解决上面的不支持的格式问题
-- lateral view 语法：

lateralView: 
LATERAL VIEW udtf(expression) -- udtf要写在LATERAL VIEW后 面
tableAlias    --表名
AS columnAlias (',' columnAlias)*  --列名
fromClause: FROM baseTable (lateralView)*    -- 基表

-- lateral view 的基本使用

with t1 as (
select 'OK' cola, split('www.lagou.com', '\\.') colb) 
select cola, word  
from t1    
lateral view explode(colb) t2 as word; --t2是起的别名，colc是起的列名

5.3 UDTF 案例1：

问题和数据

-- 数据(id tags)：
1 1,2,3 
2 2,3 
3 1,2

--编写sql,实现如下结果： 
1 1 
1 2 
1 3 
2 2 
2 3 
3 1 
3 2

-- 建表加载数据

create table tab1(id int, tags string) -- 建表 
row format delimited fields terminated by '\t'; --换行作为行分隔符
load data local inpath '/home/hadoop/data/tab1.dat' into table tab1; --加载数据到tab1表

-- SQL

select id, split(tags, ',')  --这只能拆成 1 2 3 分开
 from tab1;

select id, tag  -- 需要两次拆分，第一次拆掉，第二次由行变列
  from tab1
       lateral view explode(split(tags, ",")) t1 as tag;

码出这个lateral view的过程

5.4 UDTF 案例2：找到每个学员的好成绩

-- 数据

姓名|成绩
lisi|Chinese:90,Math:80,English:70 
wangwu|Chinese:88,Math:90,English:96 
maliu|Chinese:99,Math:65,English:60

创建表

create table studscore(
   name string
  ,score map<String,string>) 
row format delimited
fields terminated by '|'  --字段与字段之间拿|分隔
collection items terminated by ',' --集合类型，元素和元素之间的分隔
map keys terminated by ':'; --元素中，key value的分隔

加载数据

load data local inpath '/home/hadoop/data/score.dat' 
overwrite into table studscore;

第一步，使用 explode 函数将map结构拆分为多行

select explode(score) as (subject, socre) from studscore; 
--(subject, socre)要加括号，否则explode(score) as subject会被当成一个语句
--而socre from studscore会被当成一个语句，会从studscore中找socre字段，肯定找不到

拆分成功

第二步：关联name, subject, score1三个字段

-- explode常与 lateral view 函数联用，这两个函数结合在一起能关联其他字段

--但是这里缺少了学员姓名，之间加上学员姓名后不符合语法，出错。下面的语句有是错的
select name, explode(score) as (subject, socre) from studscore;  

-- 在这个语句基础上去修改
select name, subject, score1 as score from studscore 
lateral view explode(score) t1 as subject, score1; --t1是表的别名。as后跟列名

关联成功，蓝色是拆分后的，红色是原数据

第三步：找到每个学员的好成绩，两种方法

-- 方法1：子查询
select name, max(mark) maxscore
  from (select name, subject, mark
          from studscore lateral view explode(score) t1 as
 subject, mark) t1 
group by name;

--方法2: 把第二步的表作为临时表tmp，然后从临时表tmp中查询
with tmp as (
 select name, subject, mark
  from studscore lateral view explode(score) t1 as subject,
 mark 
) 
select name, max(mark) maxscore
  from tmp 
group by name;

weixin_73117535

关注

14
点赞
踩
17

收藏

觉得还不错? 一键收藏
1
评论
hive内置函数

- explode常与 lateral view 函数联用，这两个函数结合在一起能。split，注意 '.' 要加\\转义。码出这个lateral view的过程。关联成功，蓝色是拆分后的，红色是原数据。输入是一行，输出是一列。
复制链接

扫一扫