package test0816
import org.apache.spark.sql.SparkSession
/**
* @author WGY
*/
object Test extends App {
private val spark: SparkSession = SparkSession.builder().master("local[2]").appName("mysql").getOrCreate()
//先声明两个DF
val df1 = spark.createDataFrame(
List((1, "zhangsan"), (2, "lisi"), (3, "wangwu"))).toDF("id", "name")
df1.show()
val df2 = spark.createDataFrame(
List((1, "zhangsan"), (2, "lisi"), (4, "zhaoliu"))).toDF("id", "name")
df2.show()
//差集
println("差集")
df1.except(df2).show()//取df1中有df2中没有的
df2.except(df1).show()//取df2中有df1中没有的
//交集
println("交集")
df1.intersect(df2).show()
//并集
println("并集")
df1.union(df2).show()
spark.close()
}
spark学习--求DataFrema的交集、差集、并集
最新推荐文章于 2024-01-27 19:41:39 发布