import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* @author YaPeng Li
* @version 0.0.1
**/
object WordCount {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("SparkWordCount").setMaster("local[*]")
val sc: SparkContext = new SparkContext(conf)
val lines: RDD[String] = sc.textFile("/Users/liyapeng/Desktop/test_data.txt",2)
val words: RDD[String] = lines.flatMap(_.split("\n"))
val tuples: RDD[(String, Int)] = words.map((_, 1))
val sum: RDD[(String, Int)] = tuples.reduceByKey(_ + _)
sum.collect().foreach(println);
sc.stop()
}
}