-
归一化后加快了梯度下降求最优解的速度;
-
归一化有可能提高精度;
def StandardScalerTest(): Unit ={
import org.apache.spark.ml.feature.StandardScaler
val spark: SparkSession = SparkSession.builder().appName("implicits").master("local[2]").getOrCreate()
val dataFrame = spark.read.format("libsvm").load("F:\\sparkData/mllib/sample_libsvm_data.txt")
val scaler = new StandardScaler()
.setInputCol("features")
.setOutputCol("scaledFeatures")
.setWithStd(true)
.setWithMean(false)
// Compute summary statistics by fitting the StandardScaler.
val scalerModel = scaler.fit(dataFrame)
// Normalize each feature to have unit standard deviation.
val scaledData = scalerModel.transform(dataFrame)
scaledData.collect().foreach(println)
}
运行结果