val testdata=sql("select * from test").map{ line =>
line.toSeq.map {_.toString}.toArray
}.map{ line =>(line(0),line(1),Vectors.dense(line.drop(1).drop(1).map(_.toDouble)))}.toDF("id","name","features")
StandardScaler
val scaler1 = new StandardScaler().setInputCol("features").setOutputCol("scaledFeatures").setWithMean(true).setWithStd(true).fit(testdata)
val scaledData = scaler1.transform(testdata)
val featuresdatatran = scaledData.map{row=>(row.getAs[String]("id"),row.getAs[Vector]("scaledFeatures"))}
featuresdatatran.collect()
#(1001,[-1.0,-1.0]), #(1002,[0.0,0.0]), #(1003,[1.0,1.0])
MinMaxScaler
val scaler = new MinMaxScaler().setInputCol("features").setOutputCol("scaledFeatures")
val scalerModel = scaler.fit(testdata)
// rescale each feature to range [min, max].
val scaledData = scalerModel.transform(testdata)
// scaledData.printSchema()
// val featuresdata = scaledData.select($"scaledFeatures")
val featuresdatatran = scaledData.map{row=>(row.getAs[String]("id"),row.getAs[Vector]("scaledFeatures"))}
featuresdatatran.collect()
#(1001,[0.0,0.0]), #(1002,[0.5,0.5]), #(1003,[1.0,1.0])
Normalizer
val testdata=sql("select * from test").map{ line =>
line.toSeq.map {_.toString}.toArray
}.map{ line =>(line(0),line(1),Vectors.dense(line.drop(1).drop(1).map(_.toDouble)))}
val featuresdatatran= new Normalizer().transform(testdata.map(_._3))
featuresdatatran.collect()
#(1001,[0.9805806756909202,0.19611613513818404]), #(1002,[0.9889363528682975,0.14834045293024462]), #(1003,[0.9912279006826347,0.13216372009101796])