spark Scala版本
实验四2
import org.apache.spark.{SparkConf, SparkContext}
object case2 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("reduce").set("spark.driver.host","localhost")
val sc = new SparkContext(conf)
sc.setLogLevel("ERROR")
//获取数据
val two = sc.textFile("file:///usr/local/spark/text_4/sec")
two.filter(_.trim().length>0) //trim()函数返回空格个数
.map(line=>(line.trim,""))//全部值当key,(key value,"")
.groupByKey()//groupByKey,过滤重复的key value ,发送到总机器上汇总
.sortByKey() //按key value的自然顺序排序
.keys.collect().foreach(println)//collect是将结果转换为数组的形式
}
}
scala sdk版本:2.11.8
spark版本:2.1.0