函数(没有自定义函数)

--查看自带函数
show functions ;
-- 查看自带函数的用法
desc function upper;
desc function date_add;
-- 详细显示用法
desc function extended upper;
--空字段赋值    NVL( value,default_value)
--如果value为NULL,则NVL函数返回default_value的值,否则返回value的值,
-- 如果两个参数都为NULL ,则返回NULL。
select comm,nvl(comm,-1) from emp;
select comm,nvl(comm,mgr) from emp;
-- case when then else end
create table emp_sex(
    name string,
    dept_id string,
    sex string
)row format delimited fields terminated by '\t';
load data local inpath '/opt/module/hive/datas/emp_sex.txt' into table emp_sex;
-- 求出不同部门男女各多少人
select * from emp_sex;
select dept_id,
       sum(case when sex ='男' then 1 else 0 end ) men,
       sum(case when sex='女' then 1 else 0 end ) women
from emp_sex
group by dept_id;
-- 或
select dept_id,
       sum(case sex when '男' then 1 else 0 end ) men,
       sum(case sex when '女' then 1 else 0 end ) women
from emp_sex
group by dept_id;
-- 行转列
-- 字符串连接:CONCAT(string A/col, string B/col…)
-- 加分隔符的字符串连接:CONCAT_WS(separator, str1, str2,...)
-- 将某字段去重汇总转化成array:COLLECT_SET(col)
create table person_info(
name string,
constellation string,
blood_type string)
row format delimited fields terminated by "\t";
load data local inpath "/opt/module/hive/datas/persion_info.txt" into table person_info;
select * from person_info;
-- 把星座和血型一样的人归类到一起
select concat_ws(',',constellation,blood_type) xx,
       concat_ws('|',collect_set(name)) from person_info
group by constellation,blood_type;
-- 列转行
-- array或者map结构拆分成多行:explode(col)
-- LATERAL VIEW udtf(expression) tableAlias AS columnAlias
-- 用于和split, explode等UDTF一起使用,它能够将一列数据拆成多行数据
create table movie_info(
    movie string,
    category string)
row format delimited fields terminated by "\t";
load data local inpath "/opt/module/hive/datas/movie_info.txt" into table movie_info;
select * from movie_info;
select movie,category_name from movie_info
lateral view explode(split(category,',')) movie_info as category_name;
-- lateral view为侧视图,
-- 意义是为了配合UDTF来使用,把某一行数据拆分成多行数据.
-- 不加lateral view的UDTF只能提取单个字段拆分,并不能塞会原来数据表中.
-- 加上lateral view就可以将拆分的单个字段数据与原始表数据关联上.
-- 因此上面使用lateral view 的能运行,下面的不能
-- select movie,explode(split(category,',')) category_name from movie_info;

-- 窗口函数(开窗函数)
-- OVER():指定分析函数工作的数据窗口大小,这个数据窗口大小可能会随着行的改变而变化。
-- CURRENT ROW:当前行
-- n PRECEDING:往前n行数据
-- n FOLLOWING:往后n行数据
-- UNBOUNDED:起点,
-- 	UNBOUNDED PRECEDING 表示从前面的起点,
--  UNBOUNDED FOLLOWING表示到后面的终点
-- LAG(col,n,default_val):往前第n行数据
-- LEAD(col,n, default_val):往后第n行数据
-- NTILE(n):把有序窗口的行分发到指定数据的组中,各个组有编号,编号从1开始,对于每一行,NTILE返回此行所属的组的编号。注意:n必须为int类型。
create table if not exists business(
name string,
orderdate string,
cost int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
load data local inpath "/opt/module/hive/datas/business.txt" into table business;
select * from business;
-- (1)查询在2017年4月份购买过的顾客及总人数
select name,count(*) from business
where orderdate like '2017-04%'
group by name;

select distinct name,count(*) over (partition by name)
from business
where substring(orderdate,1,7)='2017-04'
-- 累计要加上order by  count(*) over (partition by xxx order by)
-- (2)查询顾客的购买明细及月购买总额
select * from business;
select name,orderdate,cost,
       sum(cost) over (partition by month(orderdate))
from business;
-- (3)上述的场景, 将每个顾客的cost按照日期进行累加
select * from business;
-- 按name分组组内数据相加
select name,orderdate,cost,
       sum(cost) over(partition by name) as sum
from business;
--按照anme分组,组内数据累加
select name,orderdate,cost,
       sum(cost) over(partition by name order by orderdate) as sum
from business;
-- sum(cost) over(partition by name order by orderdate rows between
-- UNBOUNDED PRECEDING and current row ) as sample4 ,--和sample3一样,由起点到当前行的聚合
-- sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING and current row) as sample5, --当前行和前面一行做聚合
-- sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING AND 1 FOLLOWING ) as sample6,--当前行和前边一行及后面一行
-- sum(cost) over(partition by name order by orderdate rows between current row and UNBOUNDED FOLLOWING ) as sample7 --当前行及后面所有行
-- (4)查询每个顾客上次的购买时间
-- 往前第几行
select * from business;
select name,orderdate,cost,
       lag(orderdate,1,'1900-01-01') over (partition by name order by orderdate) time1,
       lag(orderdate,2) over (partition by name order by orderdate) time2
from business;
select name,orderdate,cost,
       lag(orderdate,1) over (partition by name order by orderdate) tt
from business;
-- (5)查询前20%时间的订单信息
select * from (
  select name,orderdate,cost,
         ntile(5) over (order by orderdate) sorted
  from business) t
where sorted=1;
-- 使用PERCENT_RANK来计算时间上的百分位
SELECT *
FROM (
  SELECT name,orderdate, cost,
         percent_rank() over(order by orderdate) percentile_rank
  FROM business
) subquery
WHERE percentile_rank <= 0.2
ORDER BY orderdate;
-- CTE
WITH ranked_orders AS (
  SELECT name, orderdate, cost,
         percent_rank() over (order by orderdate) AS percentile_rank
  FROM business
)
SELECT *
FROM ranked_orders
WHERE percentile_rank <= 0.2
ORDER BY orderdate;
-- Rank
-- RANK() 排序相同时会重复,总数不会变,会把相同的越过去,相同的数据也占排序位
-- DENSE_RANK() 排序相同时会重复,总数会减少,相同的数据不占排序位
-- ROW_NUMBER() 会根据顺序计算排序,或组内排序,流畅
create table score(
name string,
subject string,
score int)
row format delimited fields terminated by "\t";
load data local inpath '/opt/module/hive/datas/score.txt' into table score;
select * from score;
-- 计算每门学科成绩排名。
select name,subject,score,
       rank() over (partition by subject order by score desc ) aa,
       dense_rank() over (partition by subject order by score desc ) bb,
       row_number() over (partition by subject order by score desc ) cc
from score;
-- 其他常用函数没做记录
-- 自定义函数,udf和udtf常用
-- (1)UDF(User-Defined-Function)
-- 	一进一出
-- (2)UDAF(User-Defined Aggregation Function)
-- 	聚集函数,多进一出
-- 	类似于:count/max/min
-- (3)UDTF(User-Defined Table-Generating Functions)
-- 	一进多出
-- 	如lateral view explode()
-- 编程步骤
-- 1.继承类,2.实现类中抽象方法3.创建函数
-- 不自定义函数统计字段出现次数input.txt
-- 对于这种比较乱的数据用一个字段,后续再分开
create table if not exists input(
    word string
)
row format delimited fields terminated by '\t';
load data local inpath '/opt/module/hive/datas/input.txt' into table input;
select * from input;
select t.a,count(*)
from (select explode(split(word,',')) a from input) t
group by t.a;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值