scala实现高斯混合聚类

 scala实现的高斯混合聚类,效果还不错,原理参考西瓜书p206-210

import breeze.linalg.{DenseMatrix, DenseVector, det, inv}
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer

object GaussCluster {
  var a1 = 1.0/3.0
  var a2 = 1.0/3.0
  var a3 = 1.0/3.0

  var cov1 = DenseMatrix((0.1, 0.0), (0.0, 0.1))
  var cov2 = DenseMatrix((0.1, 0.0), (0.0, 0.1))
  var cov3 = DenseMatrix((0.1, 0.0), (0.0, 0.1))

  var u1 = DenseVector(0.403, 0.237)
  var u2 = DenseVector(0.714, 0.346)
  var u3 = DenseVector(0.532, 0.472)

  var array = Array(0.0, 0.0, 0.0)
  var result = new collection.mutable.ArrayBuffer[Array[Double]]()
  def getPx(cov:DenseMatrix[Double],x:DenseVector[Double],u:DenseVector[Double]):Double = {
    val exp =  (mMv(inv(cov),(x-u))).t * u
    val px = (-0.5*Math.exp(exp)) / (2*Math.PI)*Math.sqrt(det(cov))
    px
  }

  def mMv(m:DenseMatrix[Double],v:DenseVector[Double]):DenseVector[Double] = {
    val cols = v.length
    val rows = m.rows
    val array = new collection.mutable.ArrayBuffer[Double]()
    val tv = v.t
    for(i <- 0 to cols - 1){
      var sum = 0.0
      for(j <- 0 to rows - 1){
        sum += tv(j)*m(j,i)
      }
      array.append(sum)
    }
//    println(array.toArray)
    DenseVector(array.toArray)
  }

  def getPm(x:DenseVector[Double]):Array[Double] = {
    val p1 = a1 * getPx(cov1,x,u1)
    val p2 = a2 * getPx(cov2,x,u2)
    val p3 = a3 * getPx(cov3,x,u3)

    val pm1 = p1 / (p1 + p2 + p3)
    val pm2 = p2 / (p1 + p2 + p3)
    val pm3 = p3 / (p1 + p2 + p3)

    array = Array(pm1,pm2,pm3)
//    var max = 0.0
//    for(i <- 0 to array.length-1){
//      if(array(i)>max){
//        max = array(i)
//      }
//    }

//    println(pm1 + ":" + pm2 + ":" + pm3)

    array
  }

  def updateCoef(x:Array[DenseVector[Double]]) = {
    val pmArray = new collection.mutable.ArrayBuffer[Array[Double]]()
    val rarray = new collection.mutable.ArrayBuffer[Array[Double]]()
    val m = x.length.toDouble
    for(i <- 0 to x.length-1){
      pmArray.append(getPm(x(i)))
      rarray.append(getPm(x(i)))
    }

    result = rarray

    var pmSum1 = 0.0
    var pmX1 = DenseVector(0.0,0.0)
    for(j <- 0 to pmArray.length-1){
      pmSum1 += pmArray(j)(0)
//      println((x(j) * pmArray(j)(0)))
      pmX1 = pmX1 :+ (x(j) * pmArray(j)(0))
    }
    u1 = pmX1 :/ pmSum1

    var pmSum2 = 0.0
    var pmX2 = DenseVector(0.0,0.0)
    for(j <- 0 to pmArray.length-1){
      pmSum2 += pmArray(j)(0)
      pmX2 =  pmX2 + (x(j) * pmArray(j)(1))
    }
    u2 = pmX2 :/ pmSum2

    var pmSum3 = 0.0
    var pmX3 = DenseVector(0.0,0.0)
    for(j <- 0 to pmArray.length-1){
      pmSum3 += pmArray(j)(0)
      pmX3 = pmX3 + (x(j) * pmArray(j)(2))
    }
    u3 = pmX3 :/ pmSum3

    a1 = pmSum1 / m
    a2 = pmSum2 / m
    a3 = pmSum3 / m

    var Ncov1 = DenseMatrix((0.0,0.0),(0.0,0.0))
    var Ncov2 = DenseMatrix((0.0,0.0),(0.0,0.0))
    var Ncov3 = DenseMatrix((0.0,0.0),(0.0,0.0))


    for(k <- 0 to x.length-1 ){
      Ncov1 = Ncov1 :+ pmArray(k)(0)*((x(k)-u1) * (x(k)-u1).t)
      Ncov2 = Ncov2 :+ pmArray(k)(1)*((x(k)-u1) * (x(k)-u2).t)
      Ncov3 = Ncov3 :+ pmArray(k)(2)*((x(k)-u1) * (x(k)-u3).t)

    }

    cov1 = Ncov1 :/ pmSum1
    cov2 = Ncov2 :/ pmSum2
    cov3 = Ncov3 :/ pmSum3


  }

  def getNewCov(pmSum:Double,pmArray:ArrayBuffer[Double],u:DenseVector[Double],x:Array[DenseVector[Double]]) = {
  }

  def main(args: Array[String]): Unit = {
//    val m = DenseMatrix((1.0,3.0),(2.0,4.0))
//    val v = DenseVector(1.0,2.0)
//    val temp = mMv(m,v)
//    val result = temp.t * v
//    println(result)
    val conf = new SparkConf().setMaster("local[4]").setAppName(s"${this.getClass.getSimpleName}")
    val sc = new SparkContext(conf)
    sc.setLogLevel("ERROR")

    val oriData = sc.textFile("C:\\Users\\dell\\Desktop\\data\\gaussCluster.txt")
    val data = oriData.map(_.split(" ")).map(s => s.map(_.toDouble)).map(s => {
      DenseVector(s(1),s(2))
    })
    var c = 0
    val x = data.collect()

    while(c<5){
      for (i <-0 to x.length-1){
        getPm(x(i))
      }
      updateCoef(x)
      c += 1
    }

    for (i <- 0 to result.length-1){
      val temp = result(i)
      var max = 0
      for (j <- 1 to temp.length-1){
        if(temp(j)>temp(max)){
          max = j
        }
      }
      println(max)
    }
  }

}

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
利用scala实现的k-means 包含数据集 0 1 22 9 181 5450 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 8 8 0.00 0.00 0.00 0.00 1.00 0.00 0.00 9 9 1.00 0.00 0.11 0.00 0.00 0.00 0.00 0.00 0 1 22 9 239 486 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 8 8 0.00 0.00 0.00 0.00 1.00 0.00 0.00 19 19 1.00 0.00 0.05 0.00 0.00 0.00 0.00 0.00 0 1 22 9 235 1337 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 8 8 0.00 0.00 0.00 0.00 1.00 0.00 0.00 29 29 1.00 0.00 0.03 0.00 0.00 0.00 0.00 0.00 0 1 22 9 219 1337 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 39 39 1.00 0.00 0.03 0.00 0.00 0.00 0.00 0.00 0 1 22 9 217 2032 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 49 49 1.00 0.00 0.02 0.00 0.00 0.00 0.00 0.00 0 1 22 9 217 2032 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 59 59 1.00 0.00 0.02 0.00 0.00 0.00 0.00 0.00 0 1 22 9 212 1940 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 2 0.00 0.00 0.00 0.00 1.00 0.00 1.00 1 69 1.00 0.00 1.00 0.04 0.00 0.00 0.00 0.00 0 1 22 9 159 4087 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 5 5 0.00 0.00 0.00 0.00 1.00 0.00 0.00 11 79 1.00 0.00 0.09 0.04 0.00 0.00 0.00 0.00 0 1 22 9 210 151 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 8 8 0.00 0.00 0.00 0.00 1.00 0.00 0.00 8 89 1.00 0.00 0.12 0.04 0.00 0.00 0.00 0.00 0 1 22 9 212 786 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 8 8 0.00 0.00 0.00 0.00 1.00 0.00 0.00 8 99 1.00 0.00 0.12 0.05 0.00 0.00 0.00 0.00 0 1 22 9 210 624 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 18 18 0.00 0.00 0.00 0.00 1.00 0.00 0.00 18 109 1.00 0.00 0.06 0.05 0.00 0.00 0.00 0.00 0 1 22 9 177 1985 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 28 119 1.00 0.00 0.04 0.04 0.00 0.00 0.00 0.00 0 1 22 9 222 773 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 11 11 0.00 0.00 0.00 0.00 1.00 0.00 0.00 38 129 1.00 0.00 0.03 0.04 0.00 0.00 0.00 0.00 0 1 22 9 256 1169 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 4 4 0.00 0.00 0.00 0.00 1.00 0.00 0.00 4 139 1.00 0.00 0.25 0.04 0.00 0.00 0.00 0.00 0 1 22 9 241 259 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 14 149 1.00 0.00 0.07 0.04 0.00 0.00 0.00 0.00 0 1 22 9 260 1837 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 11 11 0.00 0.00 0.00 0.00 1.00 0.00 0.00 24 159 1.00 0.00 0.04 0.04 0.00 0.00 0.00 0.00 0 1 22 9 241 261 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 34 169 1.00 0.00 0.03 0.04 0.00 0.00 0.00 0.00 0 1 22 9 257 818 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 12 12 0.00 0.00 0.00 0.00 1.00 0.00 0.00 44 179 1.00 0.00 0.02 0.03 0.00 0.00 0.00 0.00 0 1 22 9 233 255 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 8 0.00 0.00 0.00 0.00 1.00 0.00 0.25 54 189 1.00 0.00 0.02 0.03 0.00 0.00 0.00 0.00 0 1 22 9 233 504 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 7 7 0.00 0.00 0.00 0.00 1.00 0.00 0.00 64 199 1.00 0.00 0.02 0.03 0.00 0.00 0.00 0.00 0 1 22 9 256 1273 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 17 17 0.00 0.00 0.00 0.00 1.00 0.00 0.00 74 209 1.00 0.00 0.01 0.03 0.00 0.00 0.00 0.00 0 1 22 9 234 255 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 5 5 0.00 0.00 0.00 0.00 1.00 0.00 0.00 84 219 1.00 0.00 0.01 0.03 0.00 0.00 0.00 0.00 0 1 22 9 241 259 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 12 12 0.00 0.00 0.00 0.00 1.00 0.00 0.00 94 229 1.00 0.00 0.01 0.03 0.00 0.00 0.00 0.00 0 1 22 9 239 968 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 3 239 1.00 0.00 0.33 0.03 0.00 0.00 0.00 0.00 0 1 22 9 245 1919 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 13 13 0.00 0.00 0.00 0.00 1.00 0.00 0.00 13 249 1.00 0.00 0.08 0.03 0.00 0.00 0.00 0.00 0 1 22 9 248 2129 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 23 23 0.00 0.00 0.00 0.00 1.00 0.00 0.00 23 255 1.00 0.00 0.04 0.03 0.00 0.00 0.00 0.00 0 1 22 9 354 1752 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 5 255 1.00 0.00 0.20 0.04 0.00 0.00 0.00 0.00 0 1 22 9 193 3991 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 1 255 1.00 0.00 1.00 0.05 0.00 0.00 0.00 0.00 0 1 22 9 214 14959 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 11 255 1.00 0.00 0.09 0.05 0.00 0.00 0.00 0.00 0 1 22 9 212 1309 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 10 0.00 0.00 0.00 0.00 1.00 0.00 0.20 21 255 1.00 0.00 0.05 0.05 0.00 0.00 0.00 0.00 0 1 22 9 215 3670 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 31 255 1.00 0.00 0.03 0.05 0.00 0.00 0.00 0.00 0 1 22 9 217 18434 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 41 255 1.00 0.00 0.02 0.05 0.00 0.00 0.00 0.00 0 1 22 9 205 424 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 25 0.00 0.00 0.00 0.00 1.00 0.00 0.12 2 255 1.00 0.00 0.50 0.05 0.00 0.00 0.00 0.00 0 1 22 9 155 424 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 13 0.00 0.00 0.00 0.00 1.00 0.00 0.15 12 255 1.00 0.00 0.08 0.05 0.00 0.00 0.00 0.00 0 1 22 9 202 424 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 22 255 1.00 0.00 0.05 0.05 0.00 0.00 0.00 0.00 0 1 22 9 235 6627 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 32 255 1.00 0.00 0.03 0.05 0.00 0.00 0.00 0.00 0 1 22 9 259 3917 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 42 255 1.00 0.00 0.02 0.05 0.00 0.00 0.00 0.00 0 1 22 9 301 2653 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 52 255 1.00 0.00 0.02 0.05 0.00 0.00 0.00 0.00 0 1 22 9 322 424 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 62 255 1.00 0.00 0.02 0.05 0.00 0.00 0.00 0.00 0 1 22 9 370 520 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 72 255 1.00 0.00 0.01 0.04 0.00 0.00 0.00 0.00 0 1 22 9 370 520 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 82 255 1.00 0.00 0.01 0.04 0.00 0.00 0.00 0.00 0 1 22 9 172 5884 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 10 255 1.00 0.00 0.10 0.05 0.00 0.00 0.00 0.00 0 1 22 9 264 16123 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 13 0.00 0.00 0.00 0.00 1.00 0.00 0.23 20 255 1.00 0.00 0.05 0.05 0.00 0.00 0.00 0.00 0 1 22 9 255 1948 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 4 14 0.00 0.00 0.00 0.00 1.00 0.00 0.14 30 255 1.00 0.00 0.03 0.05 0.00 0.00 0.00 0.00 0 1 22 9 274 19790 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.00 0.00 0.00 0.00 1.00 0.00 0.00 40 255 1.00 0.00 0.03 0.05 0.00 0.00 0.00 0.00 0 1 22 9 313 293 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 3 3 0.00 0.00 0.00 0.00 1.00 0.00 0.00 3 255 1.00 0.00 0.33 0.05 0.00 0.00 0.00 0.00 0 1 22 9 145 4466 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 4 4 0.00 0.00 0.00 0.00 1.00 0.00 0.00 13 255 1.00 0.00 0.08 0.05 0.00 0.00 0.00 0.00 0 1 22 9 290 460 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 23 255 1.00 0.00 0.04 0.05 0.00 0.00 0.00 0.00 0 1 22 9 309 17798 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0.00 0.00 0.00 0.00 1.00 0.00 0.00 2 255 1.00 0.00 0.50 0.06 0.00 0.00 0.00 0.00 0 1 22 9 317 2075 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 4 4 0.00 0.00 0.00 0.00 1.00 0.00 0.00 8 255 1.00 0.00 0.12 0.06 0.00 0.00 0.00 0.00

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值