import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by fhqplzj on 16-7-18 at 上午11:02.
*/
object TestKMeans {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("TestKMeans")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val input = "/home/fhqplzj/github/spark-1.6.1/data/mllib/kmeans_data.txt"
val rowRDD = sc.textFile(input).filter(_.nonEmpty).map(s => Vectors.dense(s.split("\\s+").map(_.toDouble))).zipWithIndex().map(_.swap)
val dataset = sqlContext.createDataFrame(rowRDD).toDF("id", "features")
val model = new KMeans().setK(2).setFeaturesCol("features").setPredictionCol("prediction").fit(dataset)
println("Cluster centers:")
model.clusterCenters.foreach(println)
}
}
spark:kmeans测试
最新推荐文章于 2023-11-22 19:50:03 发布