spark使用scala实现词频统计
package scala.spark.com.wordCount
import org.apache.spark.{SparkConf, SparkContext}
/**
* /usr/local/spark-2.2.0-bin-2.6.0-cdh5.7.0/bin/spark-submit \
* --class scala.spark.com.wordCount.WordCount \
* --num-executors 1 \
* --driver-memory 1000m \
* --executor-memory 100m \
* --executor-cores 1 \
* scalaSpark-1.0.jar
*
*/
object WordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("WordCount").setMaster("local")
val sc = new SparkContext(conf)
//val lines = sc.textFile("hdfs://hadoop001:9000/input/test.txt",1)
val lines = sc.textFile("file:/Users/zhudechao/gitee/bigdata/scalaSpark/input/test.txt")
val words = lines.flatMap{line=>line.split(" ")}
val pairs = words.map{word=>(word,1)}
val wc = pairs.reduceByKey{_+_}
wc.foreach(w=>println(w._1+" "+w._2))
//wc.saveAsTextFile("hdfs://hadoop001:9000/output/test.txt")
sc.stop()
}
}
输入数据
输出数据