def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local").setAppName("spark-sql-test")
val sparkSession = SparkSession.builder().config(conf).getOrCreate()
//
// val df1 = sparkSession.createDataFrame(Seq(
// ("a",1),
// ("b",2),
// ("c",3)
// )).toDF("col1","col2")
// df1.show()
// df1.createOrReplaceTempView("df1")
val df2 = sparkSession.createDataFrame(Seq(
("A,a","D,d"),
("B,b","E,e"),
("C,c","F,f")
)).toDF("col3","col4")
df2.show()
df2.createOrReplaceTempView("df2")
val sql = "select col3,col4,col_s,col_s1 from df2 lateral view explode(split(col3,\",\")) t as col_s " +
"lateral view explode(split(col4,\",\")) t as col_s1"
val result = sparkSession.sql(sql)
result.show()
}
lateral view explode(split(col3,",")) t as col_s
将col3字段内容按","分割后拆分成多行,拆分后结果字段命名为 col_s(想拆分多个字段再增加一段句式 lateral view explode(func(column)))tmpname as new_column
结果输出
df2.show()
±—±---+
|col3|col4|
±—±---+
| A,a| D,d|
| B,b| E,e|
| C,c| F,f|
±—±---+
result.show()
±—±---±----±-----+
|col3|col4|col_s|col_s1|
±—±---±----±-----+
| A,a| D,d| A| D|
| A,a| D,d| A| d|
| A,a| D,d| a| D|
| A,a| D,d| a| d|
| B,b| E,e| B| E|
| B,b| E,e| B| e|
| B,b| E,e| b| E|
| B,b| E,e| b| e|
| C,c| F,f| C| F|
| C,c| F,f| C| f|
| C,c| F,f| c| F|
| C,c| F,f| c| f|
±—±---±----±-----+