原始数据:
a b 1,2,3
c d 4,5,6
建表:
create table col2row_21(col1 string,col2 string,col3 string)
row format delimited
fields terminated by '\t';
表结构如下:
+------------------+------------------+------------------+--+
| col2row_21.col1 | col2row_21.col2 | col2row_21.col3 |
+------------------+------------------+------------------+--+
| a | b | 1,2,3 |
| c | d | 4,5,6 |
+------------------+------------------+------------------+--+
#使用hive sql把上述表的结果变成下述的结构
+-----------------+-----------------+-----------------+--+
| a | b | 1 |
| a | b | 2 |
| a | b | 3 |
| c | d | 4 |
| c | d | 5 |
| c | d | 6 |
+-----------------+-----------------+-----------------+--+
#首先二话不说想把第三个字段炸开, explode 其参数类型:Array map
#但是第三个字段目前是字符串 如何变成Array 进而炸开呢? split
select split(col3,',') from col2row_21;
select explode(split(col3,',')) from col2row_21;
+------+--+
| col |
+------+--+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
+------+--+
#再配合使用lateral view和原来的表进行关联
select col2row_21.col1,col2row_21.col2,tem_table.* from col2row_21 lateral view explode(split(col3,',')) tem_table;
+------------------+------------------+----------------+--+
| col2row_21.col1 | col2row_21.col2 | tem_table.col |
+------------------+------------------+----------------+--+
| a | b | 1 |
| a | b | 2 |
| a | b | 3 |
| c | d | 4 |
| c | d | 5 |
| c | d | 6 |
+------------------+------------------+----------------+--+