package main.scala.scala.operator
import org.apache.spark.{SparkConf, SparkContext}
object GroupByKey {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("GroupByKey").setMaster("local")
val sc = new SparkContext(conf)
//val lines = sc.textFile("hdfs://hadoop001:9000/input/test.txt",1)
val lines = sc.textFile("file:/Users/zhudechao/gitee/bigdata/scalaSpark/input/test.txt")
val words = lines.flatMap{line=>line.split(" ")}
val pairs = words.map{word=>(word,1)}
val wc = pairs.groupByKey().map((w=>(w._1,w._2.sum)))
wc.foreach(w=>println(w._1+" "+w._2))
sc.stop()
}
}
输入数据
a b c
uu uu 1
h k l
输出数据
a 1
k 1
b 1
h 1
uu 2
1 1
l 1
c 1