Hive 行转列
hive> create table testArray2(
> name string,
> weight array<string>
> )row format delimited
> fields terminated by '\t'
> COLLECTION ITEMS terminated by ',';
OK
Time taken: 0.172 seconds
hive> load data local inpath '/usr/local/soft/data/testArray2.txt' into table testArray2;
Loading data to table test6.testarray2
Table test6.testarray2 stats: [numFiles=1, totalSize=50]
OK
Time taken: 0.288 seconds
hive> select * from testArray2;
OK
志凯 ["\"150\"","\"170\"","\"180\""]
上单 ["\"150\"","\"180\"","\"190\""]
Time taken: 0.14 seconds, Fetched: 2 row(s)
hive> select explode(weight) from testArray2;
OK
"150"
"170"
"180"
"150"
"180"
"190"
Time taken: 0.062 seconds, Fetched: 6 row(s)
hive> select name
> ,col1
> from testArray2
> lateral view explode(weight) v1 as col1;
OK
志凯 "150"
志凯 "170"
志凯 "180"
上单 "150"
上单 "180"
上单 "190"
Time taken: 0.043 seconds, Fetched: 6 row(s)
hive> select posexplode(weight) from testArray2;
OK
0 "150"
1 "170"
2 "180"
0 "150"
1 "180"
2 "190"
Time taken: 0.032 seconds, Fetched: 6 row(s)
hive> select name
> ,col1
> ,val
> from testArray2
> lateral view posexplode(weight) v1 as col1,val;
OK
志凯 0 "150"
志凯 1 "170"
志凯 2 "180"
上单 0 "150"
上单 1 "180"
上单 2 "190"
Time taken: 0.034 seconds, Fetched: 6 row(s)
hive> select key from (select explode(map('key1',1,'key2',2,'key3',3)) as (key,value)) t;
OK
key1
key2
key3
Time taken: 0.453 seconds, Fetched: 3 row(s)
hive> select name,col1,col2 from testarray2 lateral view explode(map('key1',1,'key2',2,'key3',3)) t1 as col1,col2;
OK
志凯 key1 1
志凯 key2 2
志凯 key3 3
上单 key1 1
上单 key2 2
上单 key3 3
Time taken: 0.06 seconds, Fetched: 6 row(s)
Hive 列转行
hive> create table testLieToLine(
> name string,
> col1 int
> )row format delimited
> fields terminated by '\t';
OK
Time taken: 0.045 seconds
hive> load data local inpath '/usr/local/soft/data/testLieToLine.txt' into table testLieToLine;
Loading data to table test6.testlietoline
Table test6.testlietoline stats: [numFiles=1, totalSize=66]
OK
Time taken: 0.533 seconds
hive> select * from testLieToLine;
OK
志凯 150
志凯 170
志凯 180
上单 150
上单 180
上单 190
Time taken: 0.021 seconds, Fetched: 6 row(s)
hive> select name,collect_list(col1) from testLieToLine group by name;
OK
上单 [150,180,190]
志凯 [150,170,180]
Time taken: 21.914 seconds, Fetched: 2 row(s)
hive> select t1.name
> ,collect_list(t1.col1)
> from (
> select name
> ,col1
> from testarray2
> lateral view explode(weight) t1 as col1
> ) t1 group by t1.name;
OK
上单 ["\"150\"","\"180\"","\"190\""]
志凯 ["\"150\"","\"170\"","\"180\""]
Hive自定义函数UserDefineFunction
UDF:一进一出
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
<repositories>
<repository>
<id>spring</id>
<url>https://maven.aliyun.com/repository/spring</url>
</repository>
</repositories>
package com.shujia.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
//自定义UDF函数,继承UDF类,实现evaluate方法即可
public class Demo02HiveUDF extends UDF {
/**
*返回值类型 即为 在HQL中调用后期望返回值得类型
* 参数类型 即为 在HQL中调用时传入的值得类型
*/
/**
*自定义mySubString方法:根据指定的起始位置/结束位置提取字符串
* 接收3个参数:字符串str,起始位置,结束位置
* 返回一个字符串
*/
public String evaluate(String str, int start, int end){
return str.substring(start,end);
}
}
hive> add jar /usr/local/soft/jars/HIve-1.0-SNAPSHOT.jar
> ;
Added [/usr/local/soft/jars/HIve-1.0-SNAPSHOT.jar] to class path
Added resources: [/usr/local/soft/jars/HIve-1.0-SNAPSHOT.jar]
hive> create temporary function mySubString as 'com.shujia.hive.Demo02HiveUDF';
OK
Time taken: 0.006 seconds
hive> select mySubString('2022-04-07',5,7);
OK
04
Time taken: 0.435 seconds, Fetched: 1 row(s)
hive> select substring('2022-04-07',6,2);
OK
04
Time taken: 0.431 seconds, Fetched: 1 row(s)
hive> select * from deal_tb;
OK
1 2019-02-08 6214.23
1 2019-02-08 6247.32
1 2019-02-09 85.63
1 2019-02-09 967.36
1 2019-02-10 85.69
1 2019-02-12 769.85
1 2019-02-13 943.86
1 2019-02-14 538.42
1 2019-02-15 369.76
1 2019-02-16 369.76
1 2019-02-18 795.15
1 2019-02-19 715.65
1 2019-02-21 537.71
2 2019-02-08 6214.23
2 2019-02-08 6247.32
2 2019-02-09 85.63
2 2019-02-09 967.36
2 2019-02-10 85.69
2 2019-02-12 769.85
2 2019-02-13 943.86
2 2019-02-14 943.18
2 2019-02-15 369.76
2 2019-02-18 795.15
2 2019-02-19 715.65
2 2019-02-21 537.71
3 2019-02-08 6214.23
3 2019-02-08 6247.32
3 2019-02-09 85.63
3 2019-02-09 967.36
3 2019-02-10 85.69
3 2019-02-12 769.85
3 2019-02-13 943.86
3 2019-02-14 276.81
3 2019-02-15 369.76
3 2019-02-16 369.76
3 2019-02-18 795.15
3 2019-02-19 715.65
3 2019-02-21 537.71
Time taken: 0.03 seconds, Fetched: 38 row(s)
hive> select mySubString(datestr,8,10) from deal_tb;
OK
08
08
09
09
10
12
13
14
15
16
18
19
21
08
08
09
09
10
12
13
14
15
18
19
21
08
08
09
09
10
12
13
14
15
16
18
19
21
Time taken: 0.041 seconds, Fetched: 38 row(s)
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class Test01 {
public static void main(String[] args) throws ParseException {
//日期转化
SimpleDateFormat simpleDateFormat=new SimpleDateFormat("公元yyyy年阳历MM月dd日");
String dateStr="公元2022年阳历04月05日";
//减7天 转化为 xxxx/xx/xx 格式
//按照指定的pattern解析 日期字符串 将其转化为Date类型
Date date=simpleDateFormat.parse(dateStr);
System.out.println(date.getTime());
long newDateTime=date.getTime()-7*24*60*60*1000;
SimpleDateFormat format2 = new SimpleDateFormat("yyyy/MM/dd");
//将Date类型格式转化成pattern指定格式的一个日期字符串
String newDateStr = format2.format(new Date(newDateTime));
System.out.println(newDateStr);
}
}
UDTF:一进多出
hive> select "key1:value1,key2:value2,key3:value3";
OK
key1:value1,key2:value2,key3:value3
Time taken: 0.429 seconds, Fetched: 1 row(s)
hive> select split("key1:value1,key2:value2,key3:value3",',');
OK
["key1:value1","key2:value2","key3:value3"]
Time taken: 0.429 seconds, Fetched: 1 row(s)
hive> select explode(split("key1:value1,key2:value2,key3:value3",','));
OK
key1:value1
key2:value2
key3:value3
Time taken: 0.431 seconds, Fetched: 3 row(s)
hive> select explode(split("key1:value1,key2:value2,key3:value3",','))as kv;
OK
key1:value1
key2:value2
key3:value3
Time taken: 0.428 seconds, Fetched: 3 row(s)
hive> select split(t1.kv,',') from (select explode(split("key1:value1,key2:value2,key3:value3",','))as kv)t1;
OK
["key1:value1"]
["key2:value2"]
["key3:value3"]
Time taken: 0.029 seconds, Fetched: 3 row(s)
hive> select split(t1.kv,':') from (select explode(split("key1:value1,key2:value2,key3:value3",','))as kv)t1;
OK
["key1","value1"]
["key2","value2"]
["key3","value3"]
Time taken: 0.432 seconds, Fetched: 3 row(s)
hive> select split(t1.kv,':')[0],split(t1.kv,':')[1] from (select explode(split("key1:value1,key2:value2,key3:value3",','))as kv)t1;
OK
key1 value1
key2 value2
key3 value3
Time taken: 0.432 seconds, Fetched: 3 row(s)