import org. apache. spark. sql. SparkSession
import org. apache. spark. sql. Dataset
import org. apache. spark. sql. Row
import org. apache. spark. sql. DataFrame
import org. apache. spark. sql. Column
import org. apache. spark. sql. DataFrameReader
import org. apache. spark. rdd. RDD
import org. apache. spark. sql. catalyst. encoders. ExpressionEncoder
import org. apache. spark. sql. Encoder
import org. apache. spark. sql. functions. _
import org. apache. spark. sql. DataFrameStatFunctions
import org. apache. spark. ml. linalg. Vectors
math. sqrt( - 1.0 )
res43: Double = NaN
math. sqrt( - 1.0 ) . isNaN( )
res44: Boolean = true
val data1 = data. toDF( "affairs" , "gender" , "age" , "yearsmarried" , "children" , "religiousness" , "education" , "occupation" , "rating" )
val resNull= data1. na. drop( )
val res= data1. na. drop( Array( "gender" , "yearsmarried" ) )
data1. na. drop( 10 , Array( "gender" , "yearsmarried" ) )
val res123= data1. na. fill( "wangxiao123" )
val res2= data1. na. fill( value= "wangxiao111" , cols= Array( "gender" , "yearsmarried" ) )
val res3= data1. na. fill( Map( "gender" -> "wangxiao222" , "yearsmarried" -> "wangxiao567" ) )
data1. filter( "gender is null" ) . select( "gender" ) . limit( 10 ) . show
data1. filter( "gender is not null" ) . select( "gender" ) . limit( 10 ) . show
data1. filter( data1( "gender" ) . isNull ) . select( "gender" ) . limit( 10 ) . show
data1. filter( "gender<>''" ) . select( "gender" ) . limit( 10 ) . show