Scala 学习笔记(6)-程序例子Scala Keams聚类算法

之前Java写的一个Keams算法,想通过写这个例子试试手,总结下来就是对Scala 还是不是很熟悉,还需要慢慢加强。对于Scala中List ,数组,Map等集合还需深入了解

Scala中foreach 用起来还是比较方便的,对于定义数据类型也比较方法相比java简单

for 循环中遇到一个开闭区间的问题,下面代码中 until 是不包括值为D_LEN的,如果是换成 to 是包含 D_LEN值的 ,这点是需要注意的地方

        for (i <- 0 until D_LEN) {
          t(i) = add(t(i), list.point(i))
        }

下面是Scala Keams程序


清单1.

package com.test.zhuoer

/**
 * *
 * Keams聚类用到的实体类
 *
 * ?这个类的构造函数,如果参数多了怎么办?
 *
 */
class KEntity(pointArg: Array[Double], attrStrArg: String, idArg: Int) {

  var point = pointArg;
  var attrStr = attrStrArg // 属性字符串
  var id = idArg //ID

}

清单2.

package com.test.zhuoer

import scala.util.control.Breaks._

object Keams {

  var K = 3 //簇大小

  var initCluster = Map[Int, Array[Double]]() // 聚类中心

  var datasource = List[KEntity]() //数据源

  var D_LEN = 4; //数据维度

  var k_Cluster = Map[Int, List[KEntity]]() //用来分类的数据Map

  var ctDistance = new Array[Double](K) //每次迭代聚类中心点

  var DISTANCE = 0d //精度控制

  /**
   * *
   * 初始化方法
   */
  def init = {
    var set = Set[Int]() //去重聚类点
    var index_k = 0

    while (initCluster.size != K) {
      var t = scala.util.Random.nextInt(datasource.size)
      if (!set.contains(t)) {
        set += t;
        initCluster += (index_k -> datasource(index_k).point) //初始聚类点
        k_Cluster += (index_k -> List()) //初始聚类数据
        ctDistance(index_k) = -1 //设置聚类点距离
        index_k = index_k + 1
      }
    }
  }

  /**
   * *
   * 初始聚类Map数据
   */
  def initKCluster {
    k_Cluster = Map[Int, List[KEntity]]()
    for (i <- 0 until K) {
      k_Cluster += (i -> List()) //初始聚类数据
    }

  }

  /**
   * *
   * 加法
   */
  def add(d1: Double, d2: Double): Double =
    d1 + d2

  /**
   * *
   * 减法
   */
  def sub(d1: Double, d2: Double): Double =
    d1 - d2

  /**
   * *
   * 乘法
   */
  def mul(d1: Double, d2: Double): Double =
    d1 * d2

  /**
   * *
   * 除法
   */
  def div(d1: Double, d2: Double): Double =
    d1 / d2

  /**
   * *
   * 两点之间欧氏距离
   */
  def distance(d1: Array[Double], d2: Array[Double]): Double = {

    var sum = 0d
    if (d1.length == d2.length && d1.length == D_LEN) {
      for (i <- 0 until D_LEN) {
        sum += Math.pow(sub(d1(i), d2(i)), 2)
      }
    }

    Math.sqrt(sum);
  }

  /**
   * *
   * 重新计算每个类别下面的点与对应聚类中心的距离
   */
  def newCenter() = {

    k_Cluster.foreach(cluster => {
      var t = new Array[Double](D_LEN)
      var list = cluster._2
      list.foreach(list => {
        for (i <- 0 until D_LEN) {
          t(i) = add(t(i), list.point(i))
        }
      })

      var ncc = new Array[Double](D_LEN)
      for (i <- 0 until ncc.length) {
        ncc(i) = div(t(i), cluster._2.size)
      }

      ctDistance(cluster._1) = distance(initCluster(cluster._1), ncc)

      initCluster += (cluster._1 -> ncc)

    })

  }

  /**
   * *
   * 迭代方法重新将数据分类
   */
  def order() {

    initKCluster

    for (i <- 0 until datasource.size) {

      var tempDistance = 99999999999d
      var cluster = 0
      for (j <- 0 until K) {
        var min_cluster = distance(datasource(i).point, initCluster(j));
        if (min_cluster < tempDistance) {
          tempDistance = min_cluster;
          cluster = j;
        }
      }
      k_Cluster += (cluster -> (k_Cluster(cluster) ::: List(datasource(i))))
    }

  }

  /**
   * *
   * 执行聚类
   */
  def exec = {

    breakable {

      var c = 0
      do {

        order // 迭代方法重新将数据分类

        // 重新计算的聚类中心相互之间距离小于精度值,停止迭代
        var t = 0;

        ctDistance.foreach(d => {
          if (DISTANCE == d)
            t = t + 1
        })

        if (t != K)
          newCenter // 重新计算每个类别下面的点与对应聚类中心的距离
        else
          break

        println("--------------迭代次数:" + c)
        c = c + 1

      } while (true)
    }
  }

  def main(args: Array[String]): Unit = {

    println("-------------Start")

    //准备数据
    var d: Array[Array[Double]] = Array(

      Array(6, 12, 8929, 1474),
      Array(7, 12, 9149, 9952),
      Array(4, 12, 3992, 5822),
      Array(3, 12, 1626, 360),
      Array(32, 12, 3563, 39630),
      Array(38, 12, 303451, 34083239),
      Array(66, 12, 133102, 6468),
      Array(14, 12, 38860, 15140),
      Array(128, 72, 271390, 39019349),
      Array(111, 12, 0, 0),
      Array(61, 12, 18626, 664),
      Array(40, 12, 3626, 660),
      Array(63, 42, 2290136, 3419991),
      Array(1, 12, 0, 14000),
      Array(5, 12, 5723, 998),
      Array(6, 12, 9032, 1512),
      Array(102, 72, 20134467, 25894663),
      Array(5, 12, 5723, 5998),
      Array(101, 72, 621319, 15322448),
      Array(6, 12, 9095, 1542),
      Array(6, 12, 9095, 1542),
      Array(6, 12, 9095, 1542),
      Array(6, 12, 9095, 1542),
      Array(6, 12, 9095, 1542),
      Array(4, 12, 3626, 660),
      Array(6, 12, 9095, 1542),
      Array(3, 12, 1626, 360),
      Array(2, 12, 500, 19100),
      Array(100, 12, 15420, 8208707),
      Array(100, 12, 8927659, 38163823),
      Array(11, 12, 7708, 5546),
      Array(6, 12, 8849, 33459849),
      Array(4, 12, 3626, 89160),
      Array(14, 12, 38860, 14140),
      Array(1, 12, 0, 79000),
      Array(30, 12, 225482, 90391),
      Array(31, 12, 230754, 119948),
      Array(3, 12, 1626, 360),
      Array(27, 12, 287635, 148541),
      Array(11, 12, 7890, 13594),
      Array(7, 12, 9095, 26942),
      Array(30, 36, 311375, 73711),
      Array(2, 12, 500, 84100),
      Array(1, 12, 0, 228000),
      Array(6, 12, 8992, 1504),
      Array(3, 12, 1626, 360),
      Array(4, 12, 3626, 8660),
      Array(18, 12, 69041, 17594),
      Array(18, 12, 69358, 16593),
      Array(4, 12, 3706, 694),
      Array(2, 12, 500, 3100),
      Array(35, 12, 2907, 206039),
      Array(8, 12, 7446, 10562),
      Array(2, 12, 500, 3100),
      Array(21, 12, 189051, 20076),
      Array(21, 12, 188065, 19604),
      Array(100, 12, 805762, 19934040),
      Array(15, 12, 40589, 13905),
      Array(8, 12, 7549, 2600),
      Array(6, 12, 9095, 1542),
      Array(15, 12, 39589, 7305),
      Array(18, 12, 66326, 10959),
      Array(7, 12, 9095, 9942),
      Array(22, 12, 184273, 27756),
      Array(8, 12, 7708, 8646),
      Array(23, 12, 223512, 33280),
      Array(4, 12, 3626, 660),
      Array(4, 12, 3626, 660),
      Array(6, 12, 8786, 80428),
      Array(3, 12, 1626, 360),
      Array(21, 12, 5515, 21260),
      Array(1, 12, 0, 3000),
      Array(1, 12, 0, 41000),
      Array(30, 12, 330716, 91039),
      Array(4, 12, 3626, 660),
      Array(7, 12, 9329, 2022),
      Array(1, 12, 0, 3000),
      Array(2, 12, 500, 178700),
      Array(2, 12, 500, 132900),
      Array(2, 12, 500, 18500),
      Array(1, 12, 0, 8200),
      Array(2, 12, 500, 8300),
      Array(2, 12, 500, 45900),
      Array(2, 12, 500, 18900),
      Array(2, 12, 500, 9500),
      Array(2, 12, 500, 96500),
      Array(2, 12, 500, 30700),
      Array(2, 12, 500, 20100),
      Array(8, 12, 7577, 67104),
      Array(58, 36, 1397618, 17879602),
      Array(11, 12, 8909, 11400),
      Array(6, 12, 9181, 1594),
      Array(2, 12, 563, 130),
      Array(32, 12, 19756, 186422),
      Array(6, 12, 8786, 6428),
      Array(5, 12, 5786, 1028),
      Array(6, 12, 8786, 1428),
      Array(6, 12, 8786, 40006428),
      Array(12, 12, 68456, 95240),
      Array(2, 12, 500, 20100),
      Array(11, 12, 7708, 7546),
      Array(4, 12, 3786, 728),
      Array(8, 12, 7577, 2604),
      Array(5, 12, 5849, 1058),
      Array(5, 12, 5786, 1028),
      Array(6, 12, 8889, 1466),
      Array(9, 12, 7708, 4246),
      Array(9, 12, 7708, 4246),
      Array(7, 12, 9095, 1942),
      Array(4, 12, 3626, 660),
      Array(7, 12, 8500, 1700),
      Array(4, 12, 3500, 600),
      Array(2, 12, 563, 130),
      Array(9, 12, 7708, 4246),
      Array(4, 12, 13000, 400),
      Array(1, 12, 0, 3000),
      Array(3, 12, 1500, 300),
      Array(7, 12, 9095, 1942),
      Array(10, 12, 6879, 4774),
      Array(4, 12, 3626, 27660),
      Array(11, 12, 7141, 7308),
      Array(9, 12, 7403, 5092),
      Array(19, 12, 107219, 22580),
      Array(15, 12, 39589, 14305),
      Array(32, 12, 8945, 229632),
      Array(7, 12, 9212, 1982),
      Array(1, 12, 0, 2000),
      Array(2, 12, 500, 5100),
      Array(1, 12, 0, 5000),
      Array(2, 12, 500, 5100),
      Array(23, 12, 134301, 37817),
      Array(3, 12, 1626, 360),
      Array(2, 12, 626, 160),
      Array(32, 12, 174216, 160294),
      Array(1, 12, 0, 342000),
      Array(2, 12, 500, 120100),
      Array(21, 12, 8515, 19560),
      Array(15, 12, 40854, 12462),
      Array(4, 12, 3706, 694),
      Array(60, 12, 0, 0),
      Array(60, 12, 0, 0),
      Array(14, 12, 38860, 9140),
      Array(21, 12, 7689, 32237),
      Array(4, 12, 3626, 660),
      Array(4, 12, 3626, 660),
      Array(6, 12, 8889, 362466),
      Array(3, 12, 1626, 360),
      Array(1, 12, 0, 3000),
      Array(2, 12, 563, 5130),
      Array(2, 12, 500, 62100),
      Array(4, 12, 3626, 660),
      Array(45, 12, 1508096, 8341432),
      Array(1, 12, 0, 114000),
      Array(4, 12, 3626, 12660),
      Array(1, 12, 0, 9000),
      Array(4, 12, 3626, 234660),
      Array(1, 12, 0, 132000),
      Array(60, 12, 3293822, 40645),
      Array(12, 12, 68658, 7290),
      Array(6, 12, 8786, 8428),
      Array(4, 12, 3786, 2728),
      Array(1, 12, 0, 5000),
      Array(1, 12, 0, 400000000),
      Array(1, 12, 0, 2000),
      Array(2, 12, 626, 5160),
      Array(32, 12, 273220, 152515),
      Array(2, 12, 500, 5100),
      Array(1, 12, 0, 8000),
      Array(1, 12, 0, 5000))

    d.foreach { x =>
      {
        var ke = new KEntity(x, x(0) + " " + x(1) + " " + x(2) + " " + x(3), 0)
        datasource = ke :: datasource
      }
    }

    init //初始化数据

    exec //执行聚类 就这么一句是蛮简洁的差点忽略掉了--~

    k_Cluster.foreach(e => {
      println("---------------K" + e._1)
      e._2.foreach { x =>
        {
          print("[")
          print(x.attrStr)
          println("]")
        }
      }
    })

  }

}


清单3.

执行结果

-------------Start
--------------迭代次数:0
--------------迭代次数:1
--------------迭代次数:2
--------------迭代次数:3
--------------迭代次数:4
--------------迭代次数:5
---------------K0
[1.0 12.0 0.0 5000.0]
[1.0 12.0 0.0 8000.0]
[2.0 12.0 500.0 5100.0]
[32.0 12.0 273220.0 152515.0]
[2.0 12.0 626.0 5160.0]
[1.0 12.0 0.0 2000.0]
[1.0 12.0 0.0 5000.0]
[4.0 12.0 3786.0 2728.0]
[6.0 12.0 8786.0 8428.0]
[12.0 12.0 68658.0 7290.0]
[60.0 12.0 3293822.0 40645.0]
[1.0 12.0 0.0 132000.0]
[4.0 12.0 3626.0 234660.0]
[1.0 12.0 0.0 9000.0]
[4.0 12.0 3626.0 12660.0]
[1.0 12.0 0.0 114000.0]
[45.0 12.0 1508096.0 8341432.0]
[4.0 12.0 3626.0 660.0]
[2.0 12.0 500.0 62100.0]
[2.0 12.0 563.0 5130.0]
[1.0 12.0 0.0 3000.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8889.0 362466.0]
[4.0 12.0 3626.0 660.0]
[4.0 12.0 3626.0 660.0]
[21.0 12.0 7689.0 32237.0]
[14.0 12.0 38860.0 9140.0]
[60.0 12.0 0.0 0.0]
[60.0 12.0 0.0 0.0]
[4.0 12.0 3706.0 694.0]
[15.0 12.0 40854.0 12462.0]
[21.0 12.0 8515.0 19560.0]
[2.0 12.0 500.0 120100.0]
[1.0 12.0 0.0 342000.0]
[32.0 12.0 174216.0 160294.0]
[2.0 12.0 626.0 160.0]
[3.0 12.0 1626.0 360.0]
[23.0 12.0 134301.0 37817.0]
[2.0 12.0 500.0 5100.0]
[1.0 12.0 0.0 5000.0]
[2.0 12.0 500.0 5100.0]
[1.0 12.0 0.0 2000.0]
[7.0 12.0 9212.0 1982.0]
[32.0 12.0 8945.0 229632.0]
[15.0 12.0 39589.0 14305.0]
[19.0 12.0 107219.0 22580.0]
[9.0 12.0 7403.0 5092.0]
[11.0 12.0 7141.0 7308.0]
[4.0 12.0 3626.0 27660.0]
[10.0 12.0 6879.0 4774.0]
[7.0 12.0 9095.0 1942.0]
[3.0 12.0 1500.0 300.0]
[1.0 12.0 0.0 3000.0]
[4.0 12.0 13000.0 400.0]
[9.0 12.0 7708.0 4246.0]
[2.0 12.0 563.0 130.0]
[4.0 12.0 3500.0 600.0]
[7.0 12.0 8500.0 1700.0]
[4.0 12.0 3626.0 660.0]
[7.0 12.0 9095.0 1942.0]
[9.0 12.0 7708.0 4246.0]
[9.0 12.0 7708.0 4246.0]
[6.0 12.0 8889.0 1466.0]
[5.0 12.0 5786.0 1028.0]
[5.0 12.0 5849.0 1058.0]
[8.0 12.0 7577.0 2604.0]
[4.0 12.0 3786.0 728.0]
[11.0 12.0 7708.0 7546.0]
[2.0 12.0 500.0 20100.0]
[12.0 12.0 68456.0 95240.0]
[6.0 12.0 8786.0 1428.0]
[5.0 12.0 5786.0 1028.0]
[6.0 12.0 8786.0 6428.0]
[32.0 12.0 19756.0 186422.0]
[2.0 12.0 563.0 130.0]
[6.0 12.0 9181.0 1594.0]
[11.0 12.0 8909.0 11400.0]
[8.0 12.0 7577.0 67104.0]
[2.0 12.0 500.0 20100.0]
[2.0 12.0 500.0 30700.0]
[2.0 12.0 500.0 96500.0]
[2.0 12.0 500.0 9500.0]
[2.0 12.0 500.0 18900.0]
[2.0 12.0 500.0 45900.0]
[2.0 12.0 500.0 8300.0]
[1.0 12.0 0.0 8200.0]
[2.0 12.0 500.0 18500.0]
[2.0 12.0 500.0 132900.0]
[2.0 12.0 500.0 178700.0]
[1.0 12.0 0.0 3000.0]
[7.0 12.0 9329.0 2022.0]
[4.0 12.0 3626.0 660.0]
[30.0 12.0 330716.0 91039.0]
[1.0 12.0 0.0 41000.0]
[1.0 12.0 0.0 3000.0]
[21.0 12.0 5515.0 21260.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8786.0 80428.0]
[4.0 12.0 3626.0 660.0]
[4.0 12.0 3626.0 660.0]
[23.0 12.0 223512.0 33280.0]
[8.0 12.0 7708.0 8646.0]
[22.0 12.0 184273.0 27756.0]
[7.0 12.0 9095.0 9942.0]
[18.0 12.0 66326.0 10959.0]
[15.0 12.0 39589.0 7305.0]
[6.0 12.0 9095.0 1542.0]
[8.0 12.0 7549.0 2600.0]
[15.0 12.0 40589.0 13905.0]
[21.0 12.0 188065.0 19604.0]
[21.0 12.0 189051.0 20076.0]
[2.0 12.0 500.0 3100.0]
[8.0 12.0 7446.0 10562.0]
[35.0 12.0 2907.0 206039.0]
[2.0 12.0 500.0 3100.0]
[4.0 12.0 3706.0 694.0]
[18.0 12.0 69358.0 16593.0]
[18.0 12.0 69041.0 17594.0]
[4.0 12.0 3626.0 8660.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 8992.0 1504.0]
[1.0 12.0 0.0 228000.0]
[2.0 12.0 500.0 84100.0]
[30.0 36.0 311375.0 73711.0]
[7.0 12.0 9095.0 26942.0]
[11.0 12.0 7890.0 13594.0]
[27.0 12.0 287635.0 148541.0]
[3.0 12.0 1626.0 360.0]
[31.0 12.0 230754.0 119948.0]
[30.0 12.0 225482.0 90391.0]
[1.0 12.0 0.0 79000.0]
[14.0 12.0 38860.0 14140.0]
[4.0 12.0 3626.0 89160.0]
[11.0 12.0 7708.0 5546.0]
[100.0 12.0 15420.0 8208707.0]
[2.0 12.0 500.0 19100.0]
[3.0 12.0 1626.0 360.0]
[6.0 12.0 9095.0 1542.0]
[4.0 12.0 3626.0 660.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[6.0 12.0 9095.0 1542.0]
[5.0 12.0 5723.0 5998.0]
[6.0 12.0 9032.0 1512.0]
[5.0 12.0 5723.0 998.0]
[1.0 12.0 0.0 14000.0]
[63.0 42.0 2290136.0 3419991.0]
[40.0 12.0 3626.0 660.0]
[61.0 12.0 18626.0 664.0]
[111.0 12.0 0.0 0.0]
[14.0 12.0 38860.0 15140.0]
[66.0 12.0 133102.0 6468.0]
[32.0 12.0 3563.0 39630.0]
[3.0 12.0 1626.0 360.0]
[4.0 12.0 3992.0 5822.0]
[7.0 12.0 9149.0 9952.0]
[6.0 12.0 8929.0 1474.0]
---------------K1
[1.0 12.0 0.0 4.0E8]
---------------K2
[6.0 12.0 8786.0 4.0006428E7]
[58.0 36.0 1397618.0 1.7879602E7]
[100.0 12.0 805762.0 1.993404E7]
[6.0 12.0 8849.0 3.3459849E7]
[100.0 12.0 8927659.0 3.8163823E7]
[101.0 72.0 621319.0 1.5322448E7]
[102.0 72.0 2.0134467E7 2.5894663E7]
[128.0 72.0 271390.0 3.9019349E7]
[38.0 12.0 303451.0 3.4083239E7]



  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值