Spark Core求topN案例

 

 

案例需求:

1、对文本文件内的数字,取最大的前3个。
2、对每个班级内的学生成绩,取出前3名。(分组取topN)

实例:

Java版本:topN和分组TopN

import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

/**
 * 取最大的前3个数字
 * @author Administrator
 *
 */
public class Top3 {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf()
				.setAppName("Top3")
				.setMaster("local");  
		JavaSparkContext sc = new JavaSparkContext(conf);
	
		JavaRDD<String> lines = sc.textFile("C://Users//Administrator//Desktop//top.txt");
		
		JavaPairRDD<Integer, String> pairs = lines.mapToPair(
				
				new PairFunction<String, Integer, String>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<Integer, String> call(String t) throws Exception {
						return new Tuple2<Integer, String>(Integer.valueOf(t), t);
					}
					
				});
		
		JavaPairRDD<Integer, String> sortedPairs = pairs.sortByKey(false);
		
		JavaRDD<Integer> sortedNumbers = sortedPairs.map(
				
				new Function<Tuple2<Integer,String>, Integer>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Integer call(Tuple2<Integer, String> v1) throws Exception {
						return v1._1;
					}
					
				});
		
		List<Integer> sortedNumberList = sortedNumbers.take(3);
		
		for(Integer num : sortedNumberList) {
			System.out.println(num);
		}
		
		sc.close();
	}
	
}
import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

/**
 * 分组取top3
 * @author Administrator
 *
 */
public class GroupTop3 {
	
	public static void main(String[] args) {
		SparkConf conf = new SparkConf()
				.setAppName("Top3")
				.setMaster("local");  
		JavaSparkContext sc = new JavaSparkContext(conf);
		
		JavaRDD<String> lines = sc.textFile("C://Users//Administrator//Desktop//score.txt");
		
		JavaPairRDD<String, Integer> pairs = lines.mapToPair(
				
				new PairFunction<String, String, Integer>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<String, Integer> call(String line) throws Exception {
						String[] lineSplited = line.split(" ");  
						return new Tuple2<String, Integer>(lineSplited[0], 
								Integer.valueOf(lineSplited[1]));
					}
					
				});
		
		JavaPairRDD<String, Iterable<Integer>> groupedPairs = pairs.groupByKey();
		
		JavaPairRDD<String, Iterable<Integer>> top3Score = groupedPairs.mapToPair(
				
				new PairFunction<Tuple2<String,Iterable<Integer>>, String, Iterable<Integer>>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<String, Iterable<Integer>> call(
							Tuple2<String, Iterable<Integer>> classScores)
							throws Exception {
						Integer[] top3 = new Integer[3];
						
						String className = classScores._1;
						Iterator<Integer> scores = classScores._2.iterator();
						
						while(scores.hasNext()) {
							Integer score = scores.next();
							
							for(int i = 0; i < 3; i++) {
								if(top3[i] == null) {
									top3[i] = score;
									break;
								} else if(score > top3[i]) {
									for(int j = 2; j > i; j--) {
										top3[j] = top3[j - 1];  
									}
									
									top3[i] = score;
									
									break;
								} 
							}
						}
						
						return new Tuple2<String, 
								Iterable<Integer>>(className, Arrays.asList(top3));    
					}
					
				});
		
		top3Score.foreach(new VoidFunction<Tuple2<String,Iterable<Integer>>>() {
			
			private static final long serialVersionUID = 1L;

			@Override
			public void call(Tuple2<String, Iterable<Integer>> t) throws Exception {
				System.out.println("class: " + t._1);  
				Iterator<Integer> scoreIterator = t._2.iterator();
				while(scoreIterator.hasNext()) {
					Integer score = scoreIterator.next();
					System.out.println(score);  
				}
				System.out.println("=======================================");   
			}
			
		});
		
		sc.close();
	}
	
}

Scala版本:

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext


object Top3 {
  
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
        .setAppName("Top3")
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val lines = sc.textFile("C://Users//Administrator//Desktop//top.txt", 1)
    val pairs = lines.map { line => (line.toInt, line) }
    val sortedPairs = pairs.sortByKey(false)
    val sortedNumbers = sortedPairs.map(sortedPair => sortedPair._1)  
    val top3Number = sortedNumbers.take(3)
    
    for(num <- top3Number) {
      println(num)  
    }
  }
  
}

scala版本分组取topN:

import org.apache.spark.{SparkConf, SparkContext}

object GroupTopN {

  def main(args: Array[String]){
    val conf = new SparkConf().setAppName("groupTopN").setMaster("local")
    val sc = new SparkContext(conf)
    //数据在代码末尾
    val baseRDD = sc.textFile("D:\\score.txt").cache()
    val pairRDD = baseRDD.map(line => (line.split(" ")(0),line.split(" ")(1).toInt))

    val groupRDD = pairRDD.groupByKey()

    groupRDD.foreach(println(_))

    val grouptop5RDD = groupRDD.map(line => {
      val top5 = new Array[Int](5)
      line._2.foreach(score =>{
        var i = 0
        var flag = true
        for(i <- 0 until 5 if flag){
            if(top5(i)<score){
              var tmp = top5(i)
              top5(i) = score
              var j = 0
              for(j <- i+1 until top5.size){
                if(top5(j) < tmp){
                  var temp = top5(j)
                  top5(j) = tmp
                  tmp = temp
                }
              }
              flag = false
            }
        }
      })
      (line._1,top5)
    })

    grouptop5RDD.foreach(line =>{
      var i = 0
      val top = line._2
      println("================================")
      for (i <- 0 until top.size){
        println(line._1+" : "+top(i))
      }
    })
    
  }
}

/*
class1 77
class3 67
class2 81
class3 73
class1 91
class2 69
class2 97
class3 90
class1 57
class3 55
class1 57
class2 81
class1 93
class3 79
class2 81
class1 99
class3 99
class1 91
class2 81
class1 99
class3 79
class2 81
 */

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值