感觉这些程序都是千篇一律,哎。
import org.apache.spark.ml.feature.PCA
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by fhqplzj on 16-7-18 at 上午11:18.
*/
object TestPCA {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("TestPCA")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val data = Array(
Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
)
val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
val transform = new PCA().setInputCol("features").setOutputCol("pca").setK(3).fit(dataFrame).transform(dataFrame)
transform.show(false)
}
}