collect_set去除重复元素;collect_list不去除重复元素
select gender,
concat_ws(',', collect_set(children)),
concat_ws(',', collect_list(children))
from Affairs
group by gender
1
2
3
4
5
6
7
8
9
10
11
12
13
|
// 创建视图
data.createOrReplaceTempView(
"Affairs"
)
val
df
3
=
spark.sql(
"select gender,concat_ws(',',collect_set(children)),concat_ws(',',collect_list(children)) from Affairs group by gender"
)
df
3
:
org.apache.spark.sql.DataFrame
=
[gender
:
string, concat
_
ws(,, collect
_
set(children))
:
string ...
1
more field]
df
3
.show
// collect_set去除重复元素;collect_list不去除重复元素
+------+-----------------------------------+------------------------------------+
|gender|concat
_
ws(,, collect
_
set(children))|concat
_
ws(,, collect
_
list(children))|
+------+-----------------------------------+------------------------------------+
|female| no,yes| no,yes,no,no,yes|
| male| no,yes| no,yes,no,yes,no|
+------+-----------------------------------+------------------------------------+
|