spark 本地测试Demo
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
val spark = SparkSession.builder().master("local[*]").getOrCreate()
import spark.implicits._
val data = Seq(("Alice", "Apple"),
("Bob", "Banana"),
("Charlie", "Cherry"),
("Alice", "Apricot"),
("Alice", "Avocado"))
val df = spark.createDataFrame(data).toDF("name", "fruit")
df.createOrReplaceTempView("fruits_table")
val result = spark.sql("SELECT name, collect_set(fruit) AS fruits FROM fruits_table GROUP BY name")
.withColumn("contains_apple", array_contains(col("fruits"), "Apple"))
result.show()