val df1 = sc.parallelize(Seq((1,"abcd"), (2,"defg"), (3, "ghij"),(4,"xyzz"),(5,"lmnop"),(6,"pqrst"),(7,"wxyz"),(8,"lmnoa"),(9,"jklm"))).toDF("c1","c2")
val given_list = List("abcd","defg","ghij")
df1.filter(($"c2").isin(given_list: _*)).show()
//或者
df1.filter(($"c2").isInCollection(given_list)).show()
执行后的情况:
scala> val df1 = sc.parallelize(Seq((1,"abcd"), (2,"defg"), (3, "ghij"),(4,"xyzz"),(5,"lmnop"),(6,"pqrst"),(7,"wxyz"),(8,"lmnoa"),(9,"jklm"))).toDF("c1","c2")
df1: org.apache.spark.sql.DataFrame = [c1: int, c2: string]
scala> val given_list = List("abcd","defg","ghij")
given_list: List[String] = List(abcd, defg, ghij)
scala> df1.filter(($"c2").isin(given_list: _*)).show()
+---+----+
| c1| c2|
+---+----+
| 1|abcd|
| 2|defg|
| 3|ghij|
+---+----+
scala> //或者
scala> df1.filter(($"c2").isInCollection(given_list)).show()
+---+----+
| c1| c2|
+---+----+
| 1|abcd|
| 2|defg|
| 3|ghij|
+---+----+