数据:
1990-01-01 -5
1990-06-18 35
1990-03-20 8
1989-05-04 23
1989-11-11 -3
1989-07-05 38
1990-07-30 37
import org.apache.spark.{SparkConf, SparkContext}
object MaxTemp extends App {
System.setProperty("hadoop.home.dir","D:\\soft\\hadoop\\hadoop-2.7.3")
val conf=new SparkConf().setMaster("local[*]").setAppName("MaxTemp")
val sc=new SparkContext(conf)
val lineRDD=sc.textFile(args(0))
val pairRDD=lineRDD.map(line => {
val year=line.split("\\s")(0).substring(0,4)
(year,line)
})
val maxRDD=pairRDD.groupByKey().map(f = tuple => {
val list = tuple._2
var maxTemp=Integer.MIN_VALUE
var day=""
for (valueStr <- list){
//判断获取每年的最高气温及哪天
if (valueStr.split("\\s")(1).toInt>maxTemp){
maxTemp=valueStr.split("\\s")(1).toInt
day=valueStr.split("\\s")(0)
}
}
(tuple._1,maxTemp,day)
})
maxRDD.foreach(println)
sc.stop()
}