第1关:WordCount - 词频统计
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("WordCount")
val sc = new SparkContext(conf)
val path = "file:///root/files/wordcount.txt"
/********* Begin *********/
//读取文件创建RDD
val file = sc.textFile(path)
//切分并压平
val words: RDD[String] = file.flatMap(_.split(" "))
// 组装
val wordsAndone: RDD[(String, Int)] = words.map((_,1))
// 分组聚合
val result: RDD[(String, Int)] = wordsAndone.reduceByKey(_+_)
// 排序
val result1: RDD[(String, Int)] = result.sortBy(_._2,false)
//输出
result1.foreach(println)
/********* End *********/
sc.stop()
}
}
第1关:SparkContext初始化
import org.apache.spark.{SparkConf, SparkContext}
object Test {
def main(args: Array[String]): Unit = {
/********** Begin **********/
//设置SparkConf
val conf =new SparkConf().setAppName("educoder").setMaster("local")
//初始化SparkContext
val sc=new SparkContext(conf)
/********** End **********/
val sc1=sc.parallelize(List("hello","world"))
sc1.foreach(print)
/********** Begin **********/
//关闭SparkContext
sc.stop()
/********** End **********/
}
}