创建表,导入数据
create table person_info(
name string,
age int,
constellation string,
blood_type string)
row format delimited fields terminated by " ";
load data local inpath "/root/test.txt" into table person_info;
test.txt 据信息
孙悟空 20 白羊座 A
大海 19 射手座 A
宋宋 36 白羊座 B
猪八戒 40 白羊座 A
凤姐 18 射手座 A
行转列
concat
select concat(name,"-",age,"-",constellation,"-",blood_type)
from person_info;
concat(string A/col, string B/col…):返回输入字符串连接后的结果,支持任意个输入
字符串;
这里把每一行的列都用“-”拼接成一列信息
concat_ws
concat_ws(separator, str1, str2,…):它是一个特殊形式的 concat()。
separator:分隔符。将str1、str2…都用separator连接
str必须是string或array,否则报错
比如:
我们可以把age转成string类型
select concat_ws("-",name,cast(age as string)) from person_in;
concat_set
concat_set(col):函数只接受基本数据类型,它的主要作用是将某字段的值进行去
重汇总,产生 array 类型字段。
select collect_set(constellation) from person_info;
concat、concat_ws、collect_set连用
我先查看不同星座不同血型有哪些人,这些人用“|”分割
首先把不同星座不同血型拼接到一起
select
name,
concat(constellation, ",", blood_type) base
from
person_info
select
t1.base,
concat_ws('|', collect_set(t1.name)) name
from
(select
name,
concat(constellation, ",", blood_type) base
from
person_info) t1
group by
t1.base;
列转行
explode
explode(col):将 hive 一列中复杂的 array 或者 map 结构拆分成多行。
later view
用法:later view udtf(expression) tableAlias AS columnAlias
解释:用于和 split, explode 等 UDTF 一起使用,它能够将一列数据拆成多行数据,在此
基础上可以对拆分后的数据进行聚合。
create table movie_info(
movie string,
category array<string>)
row format delimited fields terminated by "\t"
collection items terminated by ",";
load data local inpath "/opt/module/datas/movie.txt" into table
movie_info;
炸裂开movie_info中category列
select explode(category) from movie_info;
如果想在前面加电影名,下面写法报错,这是需要lateral view
select
movie,
category_name
from
movie_info
lateral view explode(category) table_tmp as category_name;
lateral view explode(category) table_tmp as category_name;
把explode(category) 变成table_tmp ,加上lateral view 把列名叫做category_name
再举个例子1
user friends
小张 王五,李四
小红 张三,李四
select
user,friend
from ods_user_friends
lateral view explode(split(friends,",")) s as friend
user friend
小张 王五
小张 李四
小红 张三
小红 李四
例子2
select
create_by,
max(case when dt='2021-11-23' then cnt else 0 end) as '2021-11-23' ,
max(case when dt='2021-11-24' then cnt else 0 end) as '2021-11-24' ,
max(case when dt='2021-11-25' then cnt else 0 end) as '2021-11-25' ,
max(case when dt='2021-11-26' then cnt else 0 end) as '2021-11-26' ,
max(case when dt='2021-11-27' then cnt else 0 end) as '2021-11-27' ,
max(case when dt='2021-11-28' then cnt else 0 end) as '2021-11-28' ,
max(case when dt='2021-11-29' then cnt else 0 end) as '2021-11-29' ,
max(case when dt='2021-11-30' then cnt else 0 end) as '2021-11-30' ,
max(case when dt='2021-12-01' then cnt else 0 end) as '2021-12-01' ,
max(case when dt='2021-12-02' then cnt else 0 end) as '2021-12-02' ,
max(case when dt='2021-12-03' then cnt else 0 end) as '2021-12-03' ,
max(case when dt='2021-12-04' then cnt else 0 end) as '2021-12-04' ,
max(case when dt='2021-12-05' then cnt else 0 end) as '2021-12-05' ,
max(case when dt='2021-12-06' then cnt else 0 end) as '2021-12-06' ,
max(case when dt='2021-12-07' then cnt else 0 end) as '2021-12-07' ,
max(case when dt='2021-12-08' then cnt else 0 end) as '2021-12-08' ,
max(case when dt='2021-12-09' then cnt else 0 end) as '2021-12-09' ,
max(case when dt='2021-12-10' then cnt else 0 end) as '2021-12-10' ,
max(case when dt='2021-12-11' then cnt else 0 end) as '2021-12-11' ,
max(case when dt='2021-12-12' then cnt else 0 end) as '2021-12-12' ,
max(case when dt='2021-12-13' then cnt else 0 end) as '2021-12-13' ,
max(case when dt='2021-12-14' then cnt else 0 end) as '2021-12-14' ,
max(case when dt='2021-12-15' then cnt else 0 end) as '2021-12-15' ,
max(case when dt='2021-12-16' then cnt else 0 end) as '2021-12-16' ,
max(case when dt='2021-12-17' then cnt else 0 end) as '2021-12-17' ,
max(case when dt='2021-12-18' then cnt else 0 end) as '2021-12-18' ,
max(case when dt='2021-12-19' then cnt else 0 end) as '2021-12-19' ,
max(case when dt='2021-12-20' then cnt else 0 end) as '2021-12-20' ,
max(case when dt='2021-12-21' then cnt else 0 end) as '2021-12-21' ,
max(case when dt='2021-12-22' then cnt else 0 end) as '2021-12-22'
from
tmp
group by create_by