【Data Algorithms_Recipes for Scaling up with Hadoop and Spark】Chapter5 Order Inversion Pattern

:scala版本算法实现

package com.bbw5.dataalgorithms.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.SparkContext._

object SparkRelativeFrequency {
  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkRelativeFrequency")
    val sc = new SparkContext(sparkConf)
    val bWin = sc.broadcast(2)
    val data = sc.parallelize(Seq("W1 W2 W3 W4 W5 W6", "W1 W2 W3 W4 W5 W6"), 2)
    val results = data.flatMap { value =>
      val tokens = value.split(" ").map { a => a.replaceAll("\\W+", "") }.filter(a => a != "")
      val items = new ArrayBuffer[((String, String), Int)]
      for (i <- 0 until tokens.length) {
        
        val start = if (i - bWin.value < 0) 0 else i - bWin.value
        val end = if (i + bWin.value >= tokens.length) tokens.length - 1 else i + bWin.value
        for (j <- start to end; if i != j) {
          items += (tokens(i) -> tokens(j)) -> 1
        }
        items += (tokens(i) -> "*") -> (end - start)
      }
      items
    }.reduceByKey { (a, b) => a + b}

    results.sortByKey(true, 1).foreach(println)

    results.map { c => c._1._1 -> (c._1._2 -> c._2) }.groupByKey().flatMap { pair =>
      val c = pair._2.filter(a => a._1 == "*").head._2
      pair._2.filter(a => a._1 != "*").map { b =>
        (pair._1 -> b._1) -> b._2 / c.toFloat
      }
    }.sortByKey(true, 1).foreach(println)

  }
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值