1 数据
2 源码
package wc
import scala.actors.{Actor, Future}
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
class Task extends Actor {
override def act(): Unit = {
loop {
//偏函数
react { // react 可以复用线程池
case SubmitTask(filename) => {
val result = Source.fromFile(filename).getLines().flatMap(_.split(" "))
.map((_, 1)).toList.groupBy(_._1).mapValues(_.size)
sender ! ResultTask(result)
}
case StopTask => {
exit()
}
}
}
}
}
case class SubmitTask(filename: String)
case class ResultTask(result: Map[String, Int])
case object StopTask
object ActorWordCount {
def main(args: Array[String]): Unit = {
//存储异步返回的 future
val replySet = new mutable.HashSet[Future[Any]]()
//存储 map 返回的值
val resultList = new ListBuffer[ResultTask]()
val files = Array[String]("d://words1.txt", "d://words2.txt")
for (file <- files) {
/*
* 创建消息对象,且异步发送消息
* 每次启动发送一个异步消息并返回一个 Future, 同 java 的 Callable
* */
val actor = new Task
//actor.start() !! SubmitTask(file)
actor.start()
//发送异步消息返回 Future
val reply = actor !! SubmitTask(file)
// 放到 HashSet
replySet += reply
}
// reduce 阶段
while (replySet.size > 0) {
//取出已经计算好的异步结果, isSet 类似于 Callable 的 isDone,返回 boolean
val toCompute = replySet.filter(_.isSet)
for (f <- toCompute) {
/*
* apply() 相当于 Callable 的 get(),强转为 ResultTask
* */
val result = f.apply().asInstanceOf[ResultTask]
//返回的 map 放到 list
resultList += result
//移除futures 处理过的对象
replySet -= f
}
// 如果文件很大,设置睡眠时间,否则浪费CPU
Thread.sleep(100)
}
/*
* 对上面处理好的 map 结果进行统计
* 这里的数据只是打比方
* resultList{
* Map(hello -> 1, tom -> 2,jerry -> 4),
* Map(hello -> 4,jerry -> 2)
* }
*
* */
val fr = resultList.flatMap(_.result).groupBy(_._1).mapValues(_.foldLeft(0)(_ + _._2))
println(fr)
}
}
Map(henny -> 1, jack -> 1, tom -> 3, pick -> 1, hello -> 10, jerry -> 4)
3 局部分析
scala> import scala.io.Source
import scala.io.Source
scala> Source.fromFile("d://words1.txt")
res1: scala.io.BufferedSource = non-empty iterator
scala> Source.fromFile("d://words1.txt").getLines()
res2: Iterator[String] = non-empty iterator
scala> Source.fromFile("d://words1.txt").getLines().flatMap(_.split(" "))
res5: Iterator[String] = non-empty iterator
scala> Source.fromFile("d://words1.txt").getLines().flatMap(_.split(" ")).map((_,1))
res6: Iterator[(String, Int)] = non-empty iterator
scala> Source.fromFile("d://words1.txt").getLines().flatMap(_.split(" ")).map((_,1)).toList
res7: List[(String, Int)] = List((hello,1), (tom,1), (hello,1), (jerry,1), (hello,1), (henny,1), (hello,1), (tom,1))
scala> Source.fromFile("d://words1.txt").getLines().flatMap(_.split(" ")).map((_,1)).toList.groupBy(_._1)
res8: scala.collection.immutable.Map[String,List[(String, Int)]] =
Map(henny -> List((henny,1)),
tom -> List((tom,1), (tom,1)), jerry -> List((jerry,1)),
hello -> List((hello,1), (hello,1), (hello,1), (hello,1)))
scala> Source.fromFile("d://words1.txt").getLines().flatMap(_.split(" ")).map((_,1)).toList.groupBy(_._1).mapValues(_.size)
res9: scala.collection.immutable.Map[String,Int] =
Map(henny -> 1, tom -> 2, jerry -> 1, hello -> 4)