import org.apache.spark.{SparkConf, SparkContext} /** * Created by MC on 2018/6/11. * 本文介绍将程序加载到内存 * */ object PersisTest { def WordCount(sc : SparkContext): String ={ val start = System.currentTimeMillis() val file = sc.textFile("D://B/c.txt") // val rdd1 = file.flatMap(_.split(" ")).map((_,1)).reduceByKey(_ + _).sortByKey().persist().collect() val flatMap = file.flatMap(_.split("\\s")) val map = flatMap.map((_,1)) //.persist是将程序久化的内存 val rsp = map.reduceByKey(_ + _).sortByKey().persist() val key = rsp.foreach(println(_)) val end = System.currentTimeMillis() println("--------------"+(end-start)+"--------------") key.toString() val end1 = System.currentTimeMillis() println("--------------"+(end1-end)+"--------------") key.toString() val end2 = System.currentTimeMillis() println("--------------"+(end2-end1)+"--------------") key.toString() } def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local").setAppName("GroupTopN") val sc = new SparkContext(conf) var result: String = "" result=WordCount(sc) println(result) sc.stop } } //运行结果 //第一次运行计算 // --------------464-------------- //运行计算并加载到内存 // --------------757-------------- //直接读取内存 // --------------15--------------
//运行结果
(Job,1)
(My,3)
(Second,,1)
(So,1)
(Thank,1)
(Third,1)
(a,4)
(an,1)
(and,5)
(architect,1)
(be,2)
(become,1)
(believe,1)
(big,1)
(can,4)
(crazy.,1)
(data,1)
(data,,1)
(dream,1)
(excellent,1)
(family,,1)
(find,1)
(first,1)
(for,1)
(from,1)
(future.,1)
(great,1)