Kmeans实例实现代码
软件环境:scala2.10.4+spark1.6.3
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.{SparkConf, SparkContext}
object MyTest {
def main(args:Array[String]): Unit ={
// 屏蔽不必要的日志显示在终端上
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
val conf = new SparkConf().setMaster("local[2]").setAppName("Kmeans test...")
val sc = new SparkContext(conf)
//装载数据集
val mydata = sc.textFile("kmeans_data.txt",1)
val testdata = sc.textFile("kmeans_data_test.txt",1)
val parsedData = mydata.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
//将数据集聚类,2个类,20次迭代,