package com.uplooking.bigdata.core.p3
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
/**
* Scala版本的TopN
*/
object ScalaSparkTopNApp {
def main(args: Array[String]): Unit = {
if (args == null || args.length < 1) {
System.err.println("Parameter Errors! Usage: <topNum[Integer]>")
System.exit(-1)
}
val conf: SparkConf = new SparkConf
conf.setAppName(classOf[JavaSparkTopNOps].getSimpleName)
conf.setMaster("local")
val sc: SparkContext = new SparkContext(conf)
val toNBC = sc.broadcast(args(0).trim.toInt)
val linesRDD: RDD[String] = sc.textFile("E:/test/spark/topn.txt")
val pairRDD = linesRDD.map(line => {
val splits: Array[String] = line.split(" ")
val score: Int = Integer.valueOf(splits(1).trim)
val className: String = splits(0).trim
new Tuple2[String, Int](className, score)
})
val gbkRDD = pairRDD.groupByKey()
val topNGBKRDD = gbkRDD.map(tuple => {
val className = tuple._1
val scores = tuple._2
val ts = new mutable.TreeSet[Int]()(new Ordering[Int]{
override def compare(x: Int, y: Int): Int = {
y - x
}
})
for(s <- scores) {
ts.add(s)
if(ts.size > toNBC.value) {
val last = ts.last;
ts.remove(last)
}
}
new Tuple2[String, Iterable[Int]](className, ts)
})
topNGBKRDD.foreach(t => println(t._1 + "\t" + t._2))
sc.stop()
}
}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
/**
* Scala版本的TopN
*/
object ScalaSparkTopNApp {
def main(args: Array[String]): Unit = {
if (args == null || args.length < 1) {
System.err.println("Parameter Errors! Usage: <topNum[Integer]>")
System.exit(-1)
}
val conf: SparkConf = new SparkConf
conf.setAppName(classOf[JavaSparkTopNOps].getSimpleName)
conf.setMaster("local")
val sc: SparkContext = new SparkContext(conf)
val toNBC = sc.broadcast(args(0).trim.toInt)
val linesRDD: RDD[String] = sc.textFile("E:/test/spark/topn.txt")
val pairRDD = linesRDD.map(line => {
val splits: Array[String] = line.split(" ")
val score: Int = Integer.valueOf(splits(1).trim)
val className: String = splits(0).trim
new Tuple2[String, Int](className, score)
})
val gbkRDD = pairRDD.groupByKey()
val topNGBKRDD = gbkRDD.map(tuple => {
val className = tuple._1
val scores = tuple._2
val ts = new mutable.TreeSet[Int]()(new Ordering[Int]{
override def compare(x: Int, y: Int): Int = {
y - x
}
})
for(s <- scores) {
ts.add(s)
if(ts.size > toNBC.value) {
val last = ts.last;
ts.remove(last)
}
}
new Tuple2[String, Iterable[Int]](className, ts)
})
topNGBKRDD.foreach(t => println(t._1 + "\t" + t._2))
sc.stop()
}
}