1. DQL
1.1 join
- inner join : 只返回两边连接条件匹配上的数据
- outer join
- left outer join 以左边表为基准
- right outer join 以右边表为基准
- full outer join (可以用于数据质量核查)
join 的 on 条件中支持AND多条件,不支持OR
1.2 复杂数据类型
对于复杂数据类型,需要掌握的是如何存(建表时定义,数据导入)和如何取(掌握常用的查询方法)
ARRAY<T>
泛型,数据类型要一样
数据内容如下:
[ruoze@rzdata001 data]$ cat hive_array.txt
pk beijing,shanghai,tianjin,hangzhou
jepson changchu,chengdu,wuhan,beijing
[ruoze@rzdata001 data]$
简单使用:
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> create table hive_array(name string, work_locations array<string>)
. . . . . . . . . . . . . . . . . . . . . . > row format delimited
. . . . . . . . . . . . . . . . . . . . . . > fields terminated by '\t'
. . . . . . . . . . . . . . . . . . . . . . > collection items terminated by ',';
No rows affected (0.13 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> load data local inpath '/home/ruoze/data/hive_array.txt' overwrite into table hive_array;
No rows affected (0.357 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from hive_array limit 3;
+------------------+----------------------------------------------+--+
| hive_array.name | hive_array.work_locations |
+------------------+----------------------------------------------+--+
| pk | ["beijing","shanghai","tianjin","hangzhou"] |
| jepson | ["changchu","chengdu","wuhan","beijing"] |
+------------------+----------------------------------------------+--+
2 rows selected (0.066 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
2 rows selected (0.066 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select name ,work_locations[0]
. . . . . . . . . . . . . . . . . . . . . . > from hive_array limit 3;
+---------+-----------+--+
| name | _c1 |
+---------+-----------+--+
| pk | beijing |
| jepson | changchu |
+---------+-----------+--+
2 rows selected (0.078 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select name ,size(work_locations)
. . . . . . . . . . . . . . . . . . . . . . > from hive_array limit 3;
+---------+------+--+
| name | _c1 |
+---------+------+--+
| pk | 4 |
| jepson | 4 |
+---------+------+--+
2 rows selected (0.077 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from hive_array where array_contains(work_locations,'beijing') limit 5;
+------------------+----------------------------------------------+--+
| hive_array.name | hive_array.work_locations |
+------------------+----------------------------------------------+--+
| pk | ["beijing","shanghai","tianjin","hangzhou"] |
| jepson | ["changchu","chengdu","wuhan","beijing"] |
+------------------+----------------------------------------------+--+
2 rows selected (0.236 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
MAP<primitive_type,data_type>
k-v类型
数据内容如下:
[ruoze@rzdata001 data]$ cat hive_map.txt
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
[ruoze@rzdata001 data]$
简单使用:
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> create table hive_map(
. . . . . . . . . . . . . . . . . . . . . . > id int,
. . . . . . . . . . . . . . . . . . . . . . > name string,
. . . . . . . . . . . . . . . . . . . . . . > members map<string,string>,
. . . . . . . . . . . . . . . . . . . . . . > age int
. . . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . . > row format delimited
. . . . . . . . . . . . . . . . . . . . . . > fields terminated by ','
. . . . . . . . . . . . . . . . . . . . . . > collection items terminated by '#'
. . . . . . . . . . . . . . . . . . . . . . > map keys terminated by ':';
No rows affected (0.083 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> load data local inpath '/home/ruoze/data/hive_map.txt' overwrite into table hive_map;
No rows affected (0.222 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from hive_map limit 3;
+--------------+----------------+----------------------------------------------------+---------------+--+
| hive_map.id | hive_map.name | hive_map.members | hive_map.age |
+--------------+----------------+----------------------------------------------------+---------------+--+
| 1 | zhangsan | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} | 28 |
| 2 | lisi | {"father":"mayun","mother":"huangyi","brother":"guanyu"} | 22 |
| 3 | wangwu | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} | 29 |
+--------------+----------------+----------------------------------------------------+---------------+--+
3 rows selected (0.073 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,members['father'] as father,members['mother'] as mother,age
. . . . . . . . . . . . . . . . . . . . . . > from hive_map;
+-----+-----------+--------------+-------------+------+--+
| id | name | father | mother | age |
+-----+-----------+--------------+-------------+------+--+
| 1 | zhangsan | xiaoming | xiaohuang | 28 |
| 2 | lisi | mayun | huangyi | 22 |
| 3 | wangwu | wangjianlin | ruhua | 29 |
| 4 | mayun | mayongzhen | angelababy | 26 |
+-----+-----------+--------------+-------------+------+--+
4 rows selected (0.079 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,map_keys(members) as relation, age
. . . . . . . . . . . . . . . . . . . . . . > from hive_map;
+-----+-----------+--------------------------------+------+--+
| id | name | relation | age |
+-----+-----------+--------------------------------+------+--+
| 1 | zhangsan | ["father","mother","brother"] | 28 |
| 2 | lisi | ["father","mother","brother"] | 22 |
| 3 | wangwu | ["father","mother","sister"] | 29 |
| 4 | mayun | ["father","mother"] | 26 |
+-----+-----------+--------------------------------+------+--+
4 rows selected (0.061 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,map_values(members) as names, age
. . . . . . . . . . . . . . . . . . . . . . > from hive_map;
+-----+-----------+-------------------------------------+------+--+
| id | name | names | age |
+-----+-----------+-------------------------------------+------+--+
| 1 | zhangsan | ["xiaoming","xiaohuang","xiaoxu"] | 28 |
| 2 | lisi | ["mayun","huangyi","guanyu"] | 22 |
| 3 | wangwu | ["wangjianlin","ruhua","jingtian"] | 29 |
| 4 | mayun | ["mayongzhen","angelababy"] | 26 |
+-----+-----------+-------------------------------------+------+--+
4 rows selected (0.056 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,size(members) as msize
. . . . . . . . . . . . . . . . . . . . . . > from hive_map;
+-----+-----------+--------+--+
| id | name | msize |
+-----+-----------+--------+--+
| 1 | zhangsan | 3 |
| 2 | lisi | 3 |
| 3 | wangwu | 3 |
| 4 | mayun | 2 |
+-----+-----------+--------+--+
4 rows selected (0.068 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 查询出有兄弟的人以及他的兄弟的名字 方式1
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,members['brother'] as brother
. . . . . . . . . . . . . . . . . . . . . . > from hive_map
. . . . . . . . . . . . . . . . . . . . . . > where array_contains(map_keys(members),'brother') ;
+-----+-----------+---------+--+
| id | name | brother |
+-----+-----------+---------+--+
| 1 | zhangsan | xiaoxu |
| 2 | lisi | guanyu |
+-----+-----------+---------+--+
2 rows selected (0.07 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 查询出有兄弟的人以及他的兄弟的名字(直接查)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,members['brother'] as brother
. . . . . . . . . . . . . . . . . . . . . . > from hive_map
. . . . . . . . . . . . . . . . . . . . . . > where members['brother'] is not null;
+-----+-----------+----------+--+
| id | name | brother |
+-----+-----------+----------+--+
| 1 | zhangsan | xiaoxu |
| 2 | lisi | guanyu |
+-----+-----------+----------+--+
2 rows selected (0.056 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 查询出有兄弟的人以及他的兄弟的名字(使用子查询实现)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select id,name,members['brother'] as brother
. . . . . . . . . . . . . . . . . . . . . . > from (
. . . . . . . . . . . . . . . . . . . . . . > select id,name,members from hive_map where members['brother'] is not null
. . . . . . . . . . . . . . . . . . . . . . > ) t;
+-----+-----------+----------+--+
| id | name | brother |
+-----+-----------+----------+--+
| 1 | zhangsan | xiaoxu |
| 2 | lisi | guanyu |
+-----+-----------+----------+--+
2 rows selected (0.049 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
STRUCT<col_name : data_type>
内部的数据类型可以不同
数据内容如下:
[ruoze@rzdata001 data]$ cat hive_struct.txt
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70
[ruoze@rzdata001 data]$
简单使用:
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> create table hive_struct(
. . . . . . . . . . . . . . . . . . . . . . > ip string,
. . . . . . . . . . . . . . . . . . . . . . > info struct<name:string,age:int>
. . . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . . > row format delimited
. . . . . . . . . . . . . . . . . . . . . . > fields terminated by '#'
. . . . . . . . . . . . . . . . . . . . . . > collection items terminated by ':';
No rows affected (0.937 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> load data local inpath '/home/ruoze/data/hive_struct.txt' overwrite into table hive_struct;
No rows affected (0.778 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from hive_struct;
+-----------------+-------------------------------+--+
| hive_struct.ip | hive_struct.info |
+-----------------+-------------------------------+--+
| 192.168.1.1 | {"name":"zhangsan","age":40} |
| 192.168.1.2 | {"name":"lisi","age":50} |
| 192.168.1.3 | {"name":"wangwu","age":60} |
| 192.168.1.4 | {"name":"zhaoliu","age":70} |
+-----------------+-------------------------------+--+
4 rows selected (0.246 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select ip, info.name, info.age
. . . . . . . . . . . . . . . . . . . . . . > from hive_struct;
+--------------+-----------+------+--+
| ip | name | age |
+--------------+-----------+------+--+
| 192.168.1.1 | zhangsan | 40 |
| 192.168.1.2 | lisi | 50 |
| 192.168.1.3 | wangwu | 60 |
| 192.168.1.4 | zhaoliu | 70 |
+--------------+-----------+------+--+
4 rows selected (0.13 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
1.3 查询过程中生成特殊类型
数据样例:
[ruoze@rzdata001 data]$ cat ad_list.txt
ad_101 http://www.google.com catalog8|catalog1
ad_102 http://www.sohu.com catalog6|catalog3
ad_103 http://www.baidu.com catalog7
ad_104 http://www.qq.com catalog5|catalog1|catalog4|catalog9
ad_105 http://sina.com
[ruoze@rzdata001 data]$
[ruoze@rzdata001 data]$ cat click_log.txt
11 ad_101 2014-05-01 06:01:12.334+01
22 ad_102 2014-05-01 07:28:12.342+01
33 ad_103 2014-05-01 07:50:12.33+01
11 ad_104 2014-05-01 09:27:12.33+01
22 ad_103 2014-05-01 09:03:12.324+01
33 ad_102 2014-05-02 19:10:12.343+01
11 ad_101 2014-05-02 09:07:12.344+01
35 ad_105 2014-05-03 11:07:12.339+01
22 ad_104 2014-05-03 12:59:12.743+01
77 ad_103 2014-05-03 18:04:12.355+01
99 ad_102 2014-05-04 00:36:39.713+01
33 ad_101 2014-05-04 19:10:12.343+01
11 ad_101 2014-05-05 09:07:12.344+01
35 ad_102 2014-05-05 11:07:12.339+01
22 ad_103 2014-05-05 12:59:12.743+01
77 ad_104 2014-05-05 18:04:12.355+01
99 ad_105 2014-05-05 20:36:39.713+01
[ruoze@rzdata001 data]$
建表,导入数据:
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> create table ad_list(
. . . . . . . . . . . . . . . . . . . . . . > ad_id string,
. . . . . . . . . . . . . . . . . . . . . . > url string,
. . . . . . . . . . . . . . . . . . . . . . > catalogs string
. . . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . . > row format delimited
. . . . . . . . . . . . . . . . . . . . . . > fields terminated by '\t';
No rows affected (0.169 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> load data local inpath '/home/ruoze/data/ad_list.txt' overwrite into table ad_list;
No rows affected (0.135 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from ad_list;
+----------------+------------------------+--------------------------------------+--+
| ad_list.ad_id | ad_list.url | ad_list.catalogs |
+----------------+------------------------+--------------------------------------+--+
| ad_101 | http://www.google.com | catalog8|catalog1 |
| ad_102 | http://www.sohu.com | catalog6|catalog3 |
| ad_103 | http://www.baidu.com | catalog7 |
| ad_104 | http://www.qq.com | catalog5|catalog1|catalog4|catalog9 |
| ad_105 | http://sina.com | NULL |
+----------------+------------------------+--------------------------------------+--+
5 rows selected (0.18 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> create table click_log (
. . . . . . . . . . . . . . . . . . . . . . > cookie_id string,
. . . . . . . . . . . . . . . . . . . . . . > ad_id string,
. . . . . . . . . . . . . . . . . . . . . . > time string
. . . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . . > row format delimited
. . . . . . . . . . . . . . . . . . . . . . > fields terminated by '\t';
No rows affected (0.08 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> load data local inpath '/home/ruoze/data/click_log.txt' overwrite into table click_log;
No rows affected (0.16 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from click_log limit 5;
+----------------------+------------------+-----------------------------+--+
| click_log.cookie_id | click_log.ad_id | click_log.time |
+----------------------+------------------+-----------------------------+--+
| 11 | ad_101 | 2014-05-01 06:01:12.334+01 |
| 22 | ad_102 | 2014-05-01 07:28:12.342+01 |
| 33 | ad_103 | 2014-05-01 07:50:12.33+01 |
| 11 | ad_104 | 2014-05-01 09:27:12.33+01 |
| 22 | ad_103 | 2014-05-01 09:03:12.324+01 |
+----------------------+------------------+-----------------------------+--+
5 rows selected (0.064 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
Examples
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 每个人访问每个广告的次数以及对应广告的类目
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select click.cookie_id,click.ad_id,click.amount,ad_list.catalogs
. . . . . . . . . . . . . . . . . . . . . . > from (
. . . . . . . . . . . . . . . . . . . . . . > select cookie_id, ad_id, count(1) amount from click_log group by cookie_id, ad_id
. . . . . . . . . . . . . . . . . . . . . . > ) click join ad_list
. . . . . . . . . . . . . . . . . . . . . . > on ad_list.ad_id = click.ad_id;
2019-12-19 05:07:03 Dump the side-table for tag: 1 with group count: 5 into file: file:/tmp/ruoze/5353762b-cd3b-415c-9fc7-58e47c9350e2/hive_2019-12-19_17-06-42_607_4705675263615256455-2/-local-10004/HashTable-Stage-4/MapJoin-mapfile01--.hashtable
2019-12-19 05:07:03 Uploaded 1 File to: file:/tmp/ruoze/5353762b-cd3b-415c-9fc7-58e47c9350e2/hive_2019-12-19_17-06-42_607_4705675263615256455-2/-local-10004/HashTable-Stage-4/MapJoin-mapfile01--.hashtable (466 bytes)
2019-12-19 05:07:03 End of local task; Time Taken: 0.899 sec.
+------------------+--------------+---------------+--------------------------------------+--+
| click.cookie_id | click.ad_id | click.amount | ad_list.catalogs |
+------------------+--------------+---------------+--------------------------------------+--+
| 11 | ad_101 | 3 | catalog8|catalog1 |
| 11 | ad_104 | 1 | catalog5|catalog1|catalog4|catalog9 |
| 22 | ad_102 | 1 | catalog6|catalog3 |
| 22 | ad_103 | 2 | catalog7 |
| 22 | ad_104 | 1 | catalog5|catalog1|catalog4|catalog9 |
| 33 | ad_101 | 1 | catalog8|catalog1 |
| 33 | ad_102 | 1 | catalog6|catalog3 |
| 33 | ad_103 | 1 | catalog7 |
| 35 | ad_102 | 1 | catalog6|catalog3 |
| 35 | ad_105 | 1 | NULL |
| 77 | ad_103 | 1 | catalog7 |
| 77 | ad_104 | 1 | catalog5|catalog1|catalog4|catalog9 |
| 99 | ad_102 | 1 | catalog6|catalog3 |
| 99 | ad_105 | 1 | NULL |
+------------------+--------------+---------------+--------------------------------------+--+
14 rows selected (33.439 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
1.4 行转列&列转行
行转列
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select * from click_log ;
+----------------------+------------------+-----------------------------+--+
| click_log.cookie_id | click_log.ad_id | click_log.time |
+----------------------+------------------+-----------------------------+--+
| 11 | ad_101 | 2014-05-01 06:01:12.334+01 |
| 22 | ad_102 | 2014-05-01 07:28:12.342+01 |
| 33 | ad_103 | 2014-05-01 07:50:12.33+01 |
| 11 | ad_104 | 2014-05-01 09:27:12.33+01 |
| 22 | ad_103 | 2014-05-01 09:03:12.324+01 |
| 33 | ad_102 | 2014-05-02 19:10:12.343+01 |
| 11 | ad_101 | 2014-05-02 09:07:12.344+01 |
| 35 | ad_105 | 2014-05-03 11:07:12.339+01 |
| 22 | ad_104 | 2014-05-03 12:59:12.743+01 |
| 77 | ad_103 | 2014-05-03 18:04:12.355+01 |
| 99 | ad_102 | 2014-05-04 00:36:39.713+01 |
| 33 | ad_101 | 2014-05-04 19:10:12.343+01 |
| 11 | ad_101 | 2014-05-05 09:07:12.344+01 |
| 35 | ad_102 | 2014-05-05 11:07:12.339+01 |
| 22 | ad_103 | 2014-05-05 12:59:12.743+01 |
| 77 | ad_104 | 2014-05-05 18:04:12.355+01 |
| 99 | ad_105 | 2014-05-05 20:36:39.713+01 |
+----------------------+------------------+-----------------------------+--+
17 rows selected (0.054 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 每个人访问的所有ad_id 去重
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select cookie_id,collect_set(ad_id)
. . . . . . . . . . . . . . . . . . . . . . > from click_log group by cookie_id;
+------------+-------------------------------+--+
| cookie_id | _c1 |
+------------+-------------------------------+--+
| 11 | ["ad_101","ad_104"] |
| 22 | ["ad_102","ad_103","ad_104"] |
| 33 | ["ad_103","ad_102","ad_101"] |
| 35 | ["ad_105","ad_102"] |
| 77 | ["ad_103","ad_104"] |
| 99 | ["ad_102","ad_105"] |
+------------+-------------------------------+--+
6 rows selected (19.221 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> # 每个人访问的所有ad_id 不去重
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select cookie_id,collect_list(ad_id)
. . . . . . . . . . . . . . . . . . . . . . > from click_log group by cookie_id;
+------------+----------------------------------------+--+
| cookie_id | _c1 |
+------------+----------------------------------------+--+
| 11 | ["ad_101","ad_104","ad_101","ad_101"] |
| 22 | ["ad_102","ad_103","ad_104","ad_103"] |
| 33 | ["ad_103","ad_102","ad_101"] |
| 35 | ["ad_105","ad_102"] |
| 77 | ["ad_103","ad_104"] |
| 99 | ["ad_102","ad_105"] |
+------------+----------------------------------------+--+
6 rows selected (17.263 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select cookie_id,concat_ws(',',collect_list(ad_id)) as ad_str
. . . . . . . . . . . . . . . . . . . . . . > from click_log group by cookie_id;
+------------+------------------------------+--+
| cookie_id | ad_str |
+------------+------------------------------+--+
| 11 | ad_101,ad_104,ad_101,ad_101 |
| 22 | ad_102,ad_103,ad_104,ad_103 |
| 33 | ad_103,ad_102,ad_101 |
| 35 | ad_105,ad_102 |
| 77 | ad_103,ad_104 |
| 99 | ad_102,ad_105 |
+------------+------------------------------+--+
6 rows selected (19.675 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>
列转行
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb> select ad_id, catalog
. . . . . . . . . . . . . . . . . . . . . . > from ad_list
. . . . . . . . . . . . . . . . . . . . . . > lateral view outer explode(split(catalogs,'\\|')) t as catalog;
+---------+-----------+--+
| ad_id | catalog |
+---------+-----------+--+
| ad_101 | catalog8 |
| ad_101 | catalog1 |
| ad_102 | catalog6 |
| ad_102 | catalog3 |
| ad_103 | catalog7 |
| ad_104 | catalog5 |
| ad_104 | catalog1 |
| ad_104 | catalog4 |
| ad_104 | catalog9 |
| ad_105 | NULL |
+---------+-----------+--+
10 rows selected (0.063 seconds)
0: jdbc:hive2://rzdata001:10086/ruoze_hivedb>