参考:
how to filter out a null value from spark dataframe
scala> df.show()
+----+---+--------+
| age| id| name|
+----+---+--------+
|null| 1| Michael|
| 30| 1| Tom|
| 19| 2| Justin|
+----+---+--------+
- 每列不为null
val filterCond: Column = df.columns.map(x => col(x).isNotNull).reduce(_ && _)
val filteredDf = df.filter(filterCond)
filteredDf.show()
scala> filterCond
res16: org.apache.spark.sql.Column = (((age IS NOT NULL) AND (id IS NOT NULL)) AND (name IS NOT NULL))
scala> filteredDf.show()
+---+---+--------+
|age| id| name|
+---+---+--------+
| 30| 1|Tom|
| 19| 2| Justin|
+---+---+--------+
- age id 值大于1,name 不为null
val filterCond2=df.columns.map {
case x@"name" => col(x).isNotNull
case x => col(x) > 1
}.reduce(_ && _)
val filteredDf2 = df.filter(filterCond2)
scala> val filterCond2=df.columns.map {
| case x@"name" => col(x).isNotNull
| case x => col(x) > 1
| }.reduce(_ && _)
filterCond2: org.apache.spark.sql.Column = (((age > 1) AND (id > 1)) AND (name IS NOT NULL))
scala> df.filter(filterCond2).show()
+---+---+------+
|age| id| name|
+---+---+------+
| 19| 2|Justin|
+---+---+------+