【Data Algorithms_Recipes for Scaling up with Hadoop and Spark】Chapter 9 Recommendation Items

:scala版算法实现

package com.bbw5.dataalgorithms.spark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.HashMap
import scala.collection.mutable.ArrayBuffer

/**
 * The SparkFriendRecommendation is a Spark program to implement a basic
 * friends recommendation engine between all users.
 * CWBTIAB.txt:
 * user1,item1
 * user1,item2
 * user1,item3
 * user1,item2
 * user1,item3
 * user2,item2
 * user2,item4
 * user2,item5
 * user3,item1
 * user3,item2
 * user4,item4
 * user4,item5
 *
 * FBT.txt
 * T1:P1,P2,P3
 * T2:P2,P3,P4
 * T3:P4,P3,P8
 * T4:P2,P3,P9,P10
 * T5:P4,P3,P8,P12
 * T6:P1,P3,P8
 * T7:P2,P3,P8
 *
 * @author baibaiw5
 *
 */
object SparkRecommendItems {

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkRecommendItems")
    val sc = new SparkContext(sparkConf)

  }

  def customersWhoBoughtThisItemAlsoBought(sc: SparkContext) {
    val textFiles = sc.textFile("G:/temp/data/CWBTIAB.txt");
    val topN = sc.broadcast(2)
    val data = textFiles.map { l => (l.split(",")(0), l.split(",")(1)) }.groupByKey().flatMap {
      case (_, items) =>
        items.map { item =>
          val map = new HashMap[String, Int]()
          items.foldLeft(map) { (m, a) =>
            if (item != a) m(a) = m.getOrElse(a, 0) + 1
            m
          }
          (item, map)
        }
    }
    data.collect().foreach(println)

    val data2 = data.groupByKey().mapValues { items =>
      val map = new HashMap[String, Int]()
      items.foldLeft(map) { (m, m2) =>
        m2.foreach { case (k, v) => m(k) = m.getOrElse(k, 0) + v }
        m
      }
      map
    }

    data2.collect().foreach(println)
    //get top n recommend
    val data3 = data2.mapValues(m => m.toList.sortBy(a => -a._2).take(topN.value))
    data3.collect().foreach(println)
  }

  def frequentlyBoughtTogether(sc: SparkContext) {
    val textFiles = sc.textFile("G:/temp/data/FBT.txt")
    val supportB = sc.broadcast(2)
    val data = textFiles.map { l => l.split("[:]")(1).split(",") }.flatMap { items =>
      val buffer = new ArrayBuffer[Array[String]]
      for (i <- 1 to items.size) {
        //must be sorted
        buffer ++= items.sorted.combinations(i)
      }
      buffer.map { a => (a.toList, 1) }
    }

    data.collect().foreach(println)

    val data2 = data.reduceByKey((a, b) => a + b).filter(_._2 >= supportB.value)

    data2.collect().foreach(println)
  }
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值