If
hive> select if(1>0,1,0);
OK
1
Time taken: 0.454 seconds, Fetched: 1 row(s)
hive> select if(1>0,if(-1>0,-1,1),0);
OK
1
Time taken: 0.438 seconds, Fetched: 1 row(s)
hive> select score,if(score>120,'优秀',if(score>100,'良好',if(score>90,'及格','不及格'))) as pingfen from score limit 20;
OK
98 及格
5 不及格
137 优秀
29 不及格
85 不及格
52 不及格
139 优秀
102 良好
44 不及格
18 不及格
46 不及格
91 及格
48 不及格
132 优秀
41 不及格
32 不及格
7 不及格
99 及格
147 优秀
69 不及格
Time taken: 0.029 seconds, Fetched: 20 row(s)
COALESCE
hive> select COALESCE(null,'1','2');
OK
1
Time taken: 0.442 seconds, Fetched: 1 row(s)
hive> select COALESCE('1',null,'2');
OK
1
Time taken: 0.447 seconds, Fetched: 1 row(s)
hive> create table testCoalesce(
> col1 string,
> col2 string,
> col3 string
> )
> row format delimited fields terminated by ',';
OK
Time taken: 0.027 seconds
hive> insert into testCoalesce values(1,2,3),(null,4,5),(null,null,6);
hive> select * from testCoalesce;
OK
1 2 3
NULL 4 5
NULL NULL 6
Time taken: 0.034 seconds, Fetched: 3 row(s)
hive> select coalesce(col1,col2,col3) from testCoalesce;
OK
1
4
6
Time taken: 0.032 seconds, Fetched: 3 row(s)
case when
hive> select score
> ,case when score>120 then '优秀'
> when score>100 then '良好'
> when score>90 then '及格'
> else '不及格'
> end as pingfen
> from score limit 20;
OK
98 及格
5 不及格
137 优秀
29 不及格
85 不及格
52 不及格
139 优秀
102 良好
44 不及格
18 不及格
46 不及格
91 及格
48 不及格
132 优秀
41 不及格
32 不及格
7 不及格
99 及格
147 优秀
69 不及格
Time taken: 0.048 seconds, Fetched: 20 row(s)
hive>
> select name
> ,case name when "施笑槐" then "槐ge"
> when "吕金鹏" then "鹏ge"
> when "单乐蕊" then "蕊jie"
> else "算了不叫了"
> end as nickname
> from students limit 10;
OK
施笑槐 槐ge
吕金鹏 鹏ge
单乐蕊 蕊jie
葛德曜 算了不叫了
宣谷芹 算了不叫了
边昂雄 算了不叫了
尚孤风 算了不叫了
符半双 算了不叫了
沈德昌 算了不叫了
羿彦昌 算了不叫了
Time taken: 0.027 seconds, Fetched: 10 row(s)
concat('123','456'); // 123456
concat('123','456',null); // NULLselect concat_ws('#','a','b','c'); // a#b#c
select concat_ws('#','a','b','c',NULL); // a#b#c 可以指定分隔符,并且会自动忽略NULL
select concat_ws("|",cast(id as string),name,cast(age as string),gender,clazz) from students limit 10;select substring("abcdefg",1); // abcdefg HQL中涉及到位置的时候 是从1开始计数
// '2021/01/14' -> '2021-01-14'
select concat_ws("-",substring('2021/01/14',1,4),substring('2021/01/14',6,2),substring('2021/01/14',9,2));
// 建议使用日期函数去做日期
select from_unixtime(unix_timestamp('2021/01/14','yyyy/MM/dd'),'yyyy-MM-dd');select split("abcde,fgh",","); // ["abcde","fgh"]
select split("a,b,c,d,e,f",",")[2]; // cselect explode(split("abcde,fgh",",")); // abcde
// fgh// 解析json格式的数据
select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score[0].score"); // 100
hive> select concat_ws('#','a','b','c');
OK
a#b#c
Time taken: 0.428 seconds, Fetched: 1 row(s)
hive> select concat_ws('#','a','b','c',NULL);
OK
a#b#c
Time taken: 0.431 seconds, Fetched: 1 row(s)
hive> select substring('2022-04-06',6,5);
OK
04-06
Time taken: 0.428 seconds, Fetched: 1 row(s)
hive> select split('abc,efg,hijk',',');
OK
["abc","efg","hijk"]
Time taken: 0.438 seconds, Fetched: 1 row(s)
hive> select split('abc,efg,hijk',',')[1];
OK
efg
Time taken: 0.44 seconds, Fetched: 1 row(s)
hive> select explode(split('abc,efg,hijk',','));
OK
abc
efg
hijk
Time taken: 0.029 seconds, Fetched: 3 row(s)
hive> create table wordCount(
> words string
> )
> row format delimited fields terminated by '\t';
OK
Time taken: 0.028 seconds
hive> load data local inpath '/usr/local/soft/data/words.txt' into table wordCount;
Loading data to table test5.wordcount
Table test5.wordcount stats: [numFiles=1, totalSize=435]
OK
Time taken: 0.507 seconds
hive> select * from wordCount;
OK
java,c,c++,sql,scale,python
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
Time taken: 0.031 seconds, Fetched: 17 row(s)
hive> select split(words,',') from wordCount;
OK
["java","c","c++","sql","scale","python"]
["mysql","springboot","redis"]
["hadoop","hive","hbase","spark","flink"]
["kafka","sqoop","flume","datax","kettle","flink"]
["phoenix"]
["mysql","springboot","redis"]
["hadoop","hive","hbase","spark","flink"]
["kafka","sqoop","flume","datax","kettle","flink"]
["phoenix"]
["mysql","springboot","redis"]
["hadoop","hive","hbase","spark","flink"]
["kafka","sqoop","flume","datax","kettle","flink"]
["phoenix"]
["mysql","springboot","redis"]
["hadoop","hive","hbase","spark","flink"]
["kafka","sqoop","flume","datax","kettle","flink"]
["phoenix"]
Time taken: 0.031 seconds, Fetched: 17 row(s)
hive> select explode(split(words,',')) from wordCount;
OK
java
c
c++
sql
scale
python
mysql
springboot
redis
hadoop
hive
hbase
spark
flink
kafka
sqoop
flume
datax
kettle
flink
phoenix
mysql
springboot
redis
hadoop
hive
hbase
spark
flink
kafka
sqoop
flume
datax
kettle
flink
phoenix
mysql
springboot
redis
hadoop
hive
hbase
spark
flink
kafka
sqoop
flume
datax
kettle
flink
phoenix
mysql
springboot
redis
hadoop
hive
hbase
spark
flink
kafka
sqoop
flume
datax
kettle
flink
phoenix
Time taken: 0.022 seconds, Fetched: 66 row(s)
hive> select word
> ,count(*) as cnt
> from(
> select explode(split(words,',')) as word
> from wordCount
> )t1 group by word;
hive> select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score[0].score");
OK
100
Time taken: 0.458 seconds, Fetched: 1 row(s)
hive> select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score[0]");
OK
{"course_name":"math","score":100}
Time taken: 0.444 seconds, Fetched: 1 row(s)
hive> select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score");
OK
[{"course_name":"math","score":100},{"course_name":"english","score":60}]
Time taken: 0.436 seconds, Fetched: 1 row(s)
hive> select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.name");
OK
zhangsan
Time taken: 0.428 seconds, Fetched: 1 row(s)
hive> select t1.id,
> t1.name,
> t1.age,
> t1.gender,
> t1.clazz,
> t2.max_age
> from students t1
> left join(
> select clazz,
> max(age)as max_age
> from students
> group by clazz
> )t2 on t1.clazz=t2.clazz
> order by t1.clazz;
hive>
> select id
> ,name
> ,age
> ,gender
> ,clazz
> ,max(age) over(partition by clazz order by clazz) as max_age
> from students;
row_number:无并列排名
用法: select xxxx, row_number() over(partition by 分组字段 order by 排序字段 desc) as rn from tb group by xxxx
dense_rank:有并列排名,并且依次递增
rank:有并列排名,不依次递增
percent_rank:(rank的结果-1)/(分区内数据的个数-1)
cume_dist:计算某个窗口或分区中某个值的累积分布。
假定升序排序,则使用以下公式确定累积分布: 小于等于当前值x的行数 / 窗口或partition分区内的总行数。其中,x 等于 order by 子句中指定的列的当前行中的值。
NTILE(n):对分区内数据再分成n组,然后打上组号
max、min、avg、count、sum:基于每个partition分区内的数据做对应的计算
窗口帧:用于从分区中选择指定的多条记录,供窗口函数处
hive> create table new_score(
> id int
> ,score int
> ,clazz string
> ,department string
> ) row format delimited fields terminated by ",";
OK
Time taken: 0.031 seconds
hive> load data local inpath '/usr/local/soft/data/new_score.txt' into table new_score;
Loading data to table test5.new_score
Table test5.new_score stats: [numFiles=1, numRows=0, totalSize=520, rawDataSize=0]
OK
Time taken: 0.51 seconds
hive> select * from new_score;
OK
111 69 class1 department1
112 80 class1 department1
113 74 class1 department1
114 94 class1 department1
115 93 class1 department1
121 74 class2 department1
122 86 class2 department1
123 78 class2 department1
124 70 class2 department1
211 93 class1 department2
212 83 class1 department2
213 94 class1 department2
214 94 class1 department2
215 82 class1 department2
216 74 class1 department2
221 99 class2 department2
222 78 class2 department2
223 74 class2 department2
224 80 class2 department2
225 85 class2 department2
Time taken: 0.018 seconds, Fetched: 20 row(s)
hive> SELECT id
> ,score
> ,clazz
> ,SUM(score) OVER w as sum_w
> ,round(avg(score) OVER w,3) as avg_w
> ,count(score) OVER w as cnt_w
> FROM new_score
> WINDOW w AS (PARTITION BY clazz ORDER BY score rows between 2 PRECEDING and 2 FOLLOWING);
Query ID = root_20220407112250_0a09ad5d-6a2c-4c1a-83af-af503ca25b2f
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1649211406017_0026, Tracking URL = http://master:8088/proxy/application_1649211406017_0026/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job -kill job_1649211406017_0026
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2022-04-07 11:22:58,635 Stage-1 map = 0%, reduce = 0%
2022-04-07 11:23:03,875 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.67 sec
2022-04-07 11:23:10,015 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 2.27 sec
MapReduce Total cumulative CPU time: 2 seconds 270 msec
Ended Job = job_1649211406017_0026
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 2.27 sec HDFS Read: 9806 HDFS Write: 506 SUCCESS
Total MapReduce CPU Time Spent: 2 seconds 270 msec
OK
111 69 class1 217 72.333 3
113 74 class1 297 74.25 4
216 74 class1 379 75.8 5
112 80 class1 393 78.6 5
215 82 class1 412 82.4 5
212 83 class1 431 86.2 5
211 93 class1 445 89.0 5
115 93 class1 457 91.4 5
213 94 class1 468 93.6 5
114 94 class1 375 93.75 4
214 94 class1 282 94.0 3
124 70 class2 218 72.667 3
121 74 class2 296 74.0 4
223 74 class2 374 74.8 5
222 78 class2 384 76.8 5
123 78 class2 395 79.0 5
224 80 class2 407 81.4 5
225 85 class2 428 85.6 5
122 86 class2 350 87.5 4
221 99 class2 270 90.0 3
Time taken: 21.628 seconds, Fetched: 20 row(s)
hive> select id
> ,score
> ,clazz
> ,department
> ,row_number() over (partition by clazz order by score desc) as rn_rk
> ,dense_rank() over (partition by clazz order by score desc) as dense_rk
> ,rank() over (partition by clazz order by score desc) as rk
> ,percent_rank() over (partition by clazz order by score desc) as percent_rk
> ,round(cume_dist() over (partition by clazz order by score desc),3) as cume_rk
> ,NTILE(3) over (partition by clazz order by score desc) as ntile_num
> ,max(score) over (partition by clazz order by score desc range between 3 PRECEDING and 11 FOLLOWING) as max_p
> from new_score;
Query ID = root_20220407112959_8db64286-c278-4a8b-b557-c54fb17ab190
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1649211406017_0027, Tracking URL = http://master:8088/proxy/application_1649211406017_0027/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job -kill job_1649211406017_0027
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2022-04-07 11:30:06,604 Stage-1 map = 0%, reduce = 0%
2022-04-07 11:30:11,809 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.6 sec
2022-04-07 11:30:18,949 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.91 sec
MapReduce Total cumulative CPU time: 1 seconds 910 msec
Ended Job = job_1649211406017_0027
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 1.91 sec HDFS Read: 13656 HDFS Write: 946 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 910 msec
OK
114 94 class1 department1 1 1 1 0.0 0.273 1 94
214 94 class1 department2 2 1 1 0.0 0.273 1 94
213 94 class1 department2 3 1 1 0.0 0.273 1 94
211 93 class1 department2 4 2 4 0.3 0.455 1 94
115 93 class1 department1 5 2 4 0.3 0.455 2 94
212 83 class1 department2 6 3 6 0.5 0.545 2 83
215 82 class1 department2 7 4 7 0.6 0.636 2 83
112 80 class1 department1 8 5 8 0.7 0.727 2 83
113 74 class1 department1 9 6 9 0.8 0.909 3 74
216 74 class1 department2 10 6 9 0.8 0.909 3 74
111 69 class1 department1 11 7 11 1.0 1.0 3 69
221 99 class2 department2 1 1 1 0.0 0.111 1 99
122 86 class2 department1 2 2 2 0.125 0.222 1 86
225 85 class2 department2 3 3 3 0.25 0.333 1 86
224 80 class2 department2 4 4 4 0.375 0.444 2 80
123 78 class2 department1 5 5 5 0.5 0.667 2 80
222 78 class2 department2 6 5 5 0.5 0.667 2 80
121 74 class2 department1 7 6 7 0.75 0.889 3 74
223 74 class2 department2 8 6 7 0.75 0.889 3 74
124 70 class2 department1 9 7 9 1.0 1.0 3 70
Time taken: 20.502 seconds, Fetched: 20 row(s)
hive>
> select id
> ,score
> ,clazz
> ,department
> ,lag(id,2) over (partition by clazz order by score desc) as lag_num
> ,LEAD(id,2) over (partition by clazz order by score desc) as lead_num
> ,FIRST_VALUE(id) over (partition by clazz order by score desc) as first_v_num
> ,LAST_VALUE(id) over (partition by clazz order by score desc) as last_v_num
> ,NTILE(3) over (partition by clazz order by score desc) as ntile_num
> from new_score;
Query ID = root_20220407113407_9459dbac-54ca-4180-a706-8162025832dd
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1649211406017_0028, Tracking URL = http://master:8088/proxy/application_1649211406017_0028/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job -kill job_1649211406017_0028
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2022-04-07 11:34:15,939 Stage-1 map = 0%, reduce = 0%
2022-04-07 11:34:21,172 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 0.59 sec
2022-04-07 11:34:27,428 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.66 sec
MapReduce Total cumulative CPU time: 1 seconds 660 msec
Ended Job = job_1649211406017_0028
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 1.66 sec HDFS Read: 12286 HDFS Write: 872 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 660 msec
OK
114 94 class1 department1 NULL 213 114 213 1
214 94 class1 department2 NULL 211 114 213 1
213 94 class1 department2 114 115 114 213 1
211 93 class1 department2 214 212 114 115 1
115 93 class1 department1 213 215 114 115 2
212 83 class1 department2 211 112 114 212 2
215 82 class1 department2 115 113 114 215 2
112 80 class1 department1 212 216 114 112 2
113 74 class1 department1 215 111 114 216 3
216 74 class1 department2 112 NULL 114 216 3
111 69 class1 department1 113 NULL 114 111 3
221 99 class2 department2 NULL 225 221 221 1
122 86 class2 department1 NULL 224 221 122 1
225 85 class2 department2 221 123 221 225 1
224 80 class2 department2 122 222 221 224 2
123 78 class2 department1 225 121 221 222 2
222 78 class2 department2 224 223 221 222 2
121 74 class2 department1 123 124 221 223 3
223 74 class2 department2 222 NULL 221 223 3
124 70 class2 department1 121 NULL 221 124 3
Time taken: 20.61 seconds, Fetched: 20 row(s)