数据样例:
U3 A
U1 B
U2 A
U4 A
--结果: U1,U2 A,B
U1,U3 A
U1,U4 A
U2,U3 A
U2,U4 A
U3,U4 A
要点:先按照用户分区排序,使用窗口函数做出标记,然后按照左表rank值小于右表rank值和两两用户不为相同用户的规则关联,使用列转行的方式(collect_set)将value值合并
select concat_ws(",",a.user,b.user) as user,concat_ws(",",collect_set(a.goods)) as goods
from
(select row_number() over( order by user) rank,user,goods from tmp.normal) a
inner join
(select row_number() over(order by user) rank,user,goods from tmp.normal) b
on a.goods = b.goods
where a.rank<b.rank and levenshtein(a.user,b.user) !=0
group by concat_ws(",",a.user,b.user)