创建数组
scala> val a = sc.parallelize(Array(("zhangsan", 99, 98, 100),("lisi", 99, 98, 100),("wangwu", 99, 98, 100)))
a: org.apache.spark.rdd.RDD[(String, Int, Int, Int)] = ParallelCollectionRDD[28] at parallelize at <console>:24
要求:筛选出名字为zhang开头的总分最高分
一、
scala> a.filter(x=>x._1.startsWith("zhang")).map(x=>(x._1,x._2+x._3+x._4)).sortBy(x=> - x._2).take(1)
res30: Array[(String, Int)] = Array((zhangsan,297))
二、
scala> a.filter(x=>x._1.startsWith("zhang")).map(x=>(x._1,x._2+x._3+x._4)).reduceByKey((x,y) => if (x>y) x else y)
res28: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[51] at reduceByKey at <console>:26
三、
scala> a.filter(x=>x._1.startsWith("zhang")).map(x=>(x._2+x._3+x._4,x._1)).max
res31: (Int, String) = (297,zhangsan)
四、
scala> a.filter(x=>x._1.startsWith("zhang")).map(x=>("zhang",(x._1,x._2+x._3+x._4))).reduceByKey((x,y) => if (x._2>y._2) x else y).collect
res34: Array[(String, (String, Int))] = Array((zhang,(zhangsan,297)))
scala> a.filter(x=>x._1.startsWith("zhang")).map(x=>("zhang",(x._1,x._2+x._3+x._4))).reduceByKey((x,y) => if (x._2>y._2) x else y).map(x=>x._2).collect
res35: Array[(String, Int)] = Array((zhangsan,297))
五、
scala> a.filter(x => x._1.startsWith("zhang")).map(x => (x._1, x._2 + x._3 + x._4)).
| groupBy(x => x._1.substring(0, 5)).map(x => {
| var name = ""
| var sumscore = 0
| val itor = x._2.iterator
| for (e <- itor) {
| if (e._2 > sumscore) {
| name = e._1;
| sumscore = e._2
| }
| }
| (name, sumscore)
| }).collect()
res3: Array[(String, Int)] = Array((zhangsan,297))