import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object ScalaWordCount {
def main(args: Array[String]): Unit = {
if (args.size != 2) {
println(
"""
|Usage:clw.spark.day01.ScalaWordCount
|<srcPath>,<desPath>
""".stripMargin)
sys.exit(-1)
}
val Array(srcPath, desPath) = args
//创建Spark配置,设置应用的名字
val sparkConf = new SparkConf()
.setAppName("ScalaWordCount")
.setMaster("local[*]")
//创建Spark的执行入口
val sc = new SparkContext(sparkConf)
//指定以后从哪里读取数据,创建RDD
val data: RDD[String] = sc.textFile(srcPath)
//切分压平
val words: RDD[String] = data.flatMap(_.split(","))
//将单词和1组装
val wordAndOne: RDD[(String, Int)] = words.map((_, 1))
//按key进行聚合
val result: RDD[(String, Int)] = wordAndOne.reduceByKey(_ + _)
//排序
val resultSort: RDD[(String, Int)] = result.sortBy(-_._2)
//将数据存入指定文件
result.saveAsTextFile(desPath)
//释放资源
sc.stop()
}
}