spark - k-mean

/**
 *  1.0 1.0
 * 1.0 2.0
 * 2.0 1.0
 * 2.0 2.0
 * 3.0 3.0
 * 3.0 4.0
 * 4.0 3.0
 * 4.0 4.0
 */
package com.spark.test

import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.SparkContext._

import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors

object ObKMeans {
  def main(args: Array[String]) {
    run()
  }

  def run() {

    val conf = new SparkConf().setAppName("ObKMeansTest")
    val sc = new SparkContext(conf)

    // Load and parse the data  
    val data = sc.textFile("/ruson/kmean.txt")
    //            org.apache.spark.mllib.linalg.Vector
    //            val parsedData = data.map( _.split(' ').map(_.toVector))  
    val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))

    // Cluster the data into two classes using KMeans  
    val numIterations = 20
    val numClusters = 4
    val clusters = KMeans.train(parsedData, numClusters, numIterations)

    // Evaluate clustering by computing Within Set Sum of Squared Errors  
    val WSSSE = clusters.computeCost(parsedData)
    
    println("Within Set Sum of Squared Errors = " + WSSSE)

    val result = parsedData.map(point => clusters.predict(point))
    val resultFile = "/ruson/KMeansResult"
    result.saveAsTextFile(resultFile)
    println("Result file : " + resultFile)
  }
}  

上面是数据集;运行之后结果分为四类如下




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值