目录
一列转多行:
oracle实现方式:
注意:以下只有单行数据或拆分单个字符串时候适用:
WITH test AS
(
SELECT '11' COL1,'zhang,wang,li,huang' COL2 FROM DUAL
)
SELECT test.col1,test.col2,
regexp_count(test.col2, '\,') + 1 as col2_num,
regexp_substr(test.col2,'[^,]+',1,level) col2_new,
level
FROM test
connect by level <= length(test.col2)-length(replace(test.col2,',',''))+1
表中存在多行数据,需要拆分多行时用以下方法:
方法1:
with test as(
select '11' COL1, '100.111.222' COL2 from dual
union all
select '22' COL1, '111.333' COL2 from dual
union all
select '33' COL1, '444.111' COL2 from dual
)
select COL1,
level,
regexp_count(COL2, '\.') + 1 as COL2_num,
regexp_substr(COL2, '[^.]+', 1, level) as COL2_new
from test t
connect by level <= regexp_count(COL2, '\.') + 1
and t.COL1 = prior t.COL1
and prior dbms_random.value > 0;
方法2:借助伪列,注意创建的伪列行数,要大于其中字符串拆分后数量最大值,本示例中字符串拆分后最大值是3,所以只要伪列行数大于3即可
with test as(
select '11' COL1, '100.111.222' COL2 from dual
union all
select '22' COL1, '111.333' COL2 from dual
union all
select '33' COL1, '444.111' COL2 from dual
),
z_level as(
select level lv from dual connect by level < 10
)
select t.COL1,
a.lv,
regexp_count(t.COL2, '\.') + 1 as COL2_num,
regexp_substr(t.COL2, '[^.]+', 1, a.lv) as COL2_new
from test t
inner join z_level a
on a.lv <= regexp_count(t.COL2, '\.') + 1;
regexp_count(col2, ',') 统计字符都好分割的个数
regexp_substr(col2, '[^,]+', 1, level) col2字段从位置1开始使用字符逗号拆分;
level指取拆分后第level个字符串
注意:在实际操作中报错ORA-00900 ,源表加上distinct即可。
hive实现方式:
with test as (
select 1 as id,'lisi,wanger,lili' as name
union all
select 2 as id,'wangwu,wangwang,susu,erha' as name
union all
select 3 as id,'qq' as name
)
select a.id,a.name, tt.name_new
from test a
LATERAL VIEW explode ( split(a.name,',')) tt as name_new ;
impala实现方式:
暂无
多行转一列:
oracle实现方式:
with tab_test as(
select '11' COL1, '222' COL2 from dual
union all
select '11' COL1, '100' COL2 from dual
union all
select '11' COL1, '111' COL2 from dual
union all
select '22' COL1, '111' COL2 from dual
union all
select '22' COL1, '333' COL2 from dual
union all
select '33' COL1, '111' COL2 from dual
union all
select '33' COL1, '444' COL2 from dual
)
select COL1, to_char(wmsys.wm_concat(COL2)) COL2_new
from tab_test group by COL1;
hive实现方式:
with test3 as (
select 1 uid,'aa' as tag
union all
select 1 uid,'bb' as tag
union all
select 1 uid,'vv' as tag
union all
select 2 uid,'vv' as tag
union all
select 2 uid,'dd' as tag
union all
select 3 uid,'aa' as tag
)
select uid , concat_ws ( ',' , collect_set ( tag ) ) from test3 group by uid ;
impala实现方式:
with test3 as (
select 1 uid,'aa' as tag
union all
select 1 uid,'bb' as tag
union all
select 1 uid,'vv' as tag
union all
select 2 uid,'vv' as tag
union all
select 2 uid,'dd' as tag
union all
select 3 uid,'aa' as tag
)
select uid , group_concat ( cast ( tag as string ) , ',' ) as tag_list from test3 group by uid ;