object TopN { System.setProperty("hadoop.home.dir","D:\\hadoop\\hadoop-2.9.2") def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local").setAppName("TopN") val sc = new SparkContext(conf) var idx = 0 var six = sc.textFile(args(0)) //方式一 /* six.filter(x => (x.trim().length>0) && x.split(",").length==4) .map(line => line.split(",")(2)) .map(x => (x.toInt,"")) .sortByKey(false) .map(x=>x._1) .take(5) .foreach(x =>{ idx = idx+1 println(idx+" "+x) })*/ //方式二 /*six.filter(x => (x.trim().length>0) && x.split(",").length==4) .map(line => line.split(",")(2)) .sortBy(line => line.toInt,false) .take(5) .foreach(x =>{ idx = idx+1 println(idx+" "+x) })*/ //方式三 six.filter(x => (x.trim().length>0) && x.split(",").length==4) .map(line => line.split(",")(2).toInt) .top(5) .foreach(x =>{ idx = idx+1 println(idx+" "+x) }) sc.stop() } }
spark的TopN
最新推荐文章于 2023-06-25 21:32:24 发布