K 聚类分析实现类源码

37 篇文章 0 订阅

数据文件来自 :http://archive.ics.uci.edu/ml/datasets/Wholesale+customers?cm_mc_uid=21918109261714715776095&cm_mc_sid_50200000=1476090999


import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
object KMeansClustering {
 def main (args: Array[String]) {
 if (args.length < 5) {

    println("Usage:KMeansClustering trainingDataFilePath testDataFilePath numClusters
    numIterations runTimes")
 sys.exit(1)
 }

 val conf = new
    SparkConf().setAppName("Spark MLlib Exercise:K-Means Clustering")
 val sc = new SparkContext(conf)

 
 val rawTrainingData = sc.textFile(args(0))
 val parsedTrainingData =
    rawTrainingData.filter(!isColumnNameLine(_)).map(line => {

    Vectors.dense(line.split("\t").map(_.trim).filter(!"".equals(_)).map(_.toDouble))
 }).cache()

    // Cluster the data into two classes using KMeans

 val numClusters = args(2).toInt
 val numIterations = args(3).toInt
 val runTimes = args(4).toInt
 var clusterIndex:Int = 0
 val clusters:KMeansModel =
    KMeans.train(parsedTrainingData, numClusters, numIterations,runTimes)

    println("Cluster Number:" + clusters.clusterCenters.length)

    println("Cluster Centers Information Overview:")
 clusters.clusterCenters.foreach(
    x => {

    println("Center Point of Cluster " + clusterIndex + ":")

    println(x)
 clusterIndex += 1
 })

    //begin to check which cluster each test data belongs to based on the clustering result

 val rawTestData = sc.textFile(args(1))
 val parsedTestData = rawTestData.map(line =>
    {

    Vectors.dense(line.split("\t").map(_.trim).filter(!"".equals(_)).map(_.toDouble))

    })
 parsedTestData.collect().foreach(testDataLine => {
 val predictedClusterIndex:
    Int = clusters.predict(testDataLine)

    println("The data " + testDataLine.toString + " belongs to cluster " +
    predictedClusterIndex)
 })

    println("Spark MLlib K-means clustering test finished.")
 }

 private def isColumnNameLine(line:String):Boolean = {
 if (line != null &&
    line.contains("Channel")) true
 else false
 }
 


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值