RDD算子:reduceByKey、foldByKey、SortByKey

reduceByKey
聚合操作

def reduceByKey(func : 
org.apache.spark.api.java.function.Function2[V, V, V]) : 
org.apache.spark.api.java.JavaPairRDD[K, V] = 
{ /* compiled code */ }

Scala版

import org.apache.spark.{SparkConf, SparkContext}

object reduceByKey {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setMaster("local[1]").setAppName("demo")
    val sc = new SparkContext(conf)
    val rdd = sc.textFile("data/words.txt")
    val result = rdd.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
    result.collect.foreach(println)
  }
}

Java版:

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class reduceByKey {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[1]").setAppName("reduceByKey");
        JavaSparkContext sc = new JavaSparkContext(conf);

        //todo 文件导入
        JavaRDD<String> words = sc.textFile("data/words.txt");
        JavaPairRDD<String, Integer> wordsPair = words.flatMapToPair(new PairFlatMapFunction<String, String, Integer>() {
            @Override
            public Iterator<Tuple2<String, Integer>> call(String s) throws Exception {
                ArrayList<Tuple2<String, Integer>> list = new ArrayList<>();
                String[] splits = s.split("\\s+");
                for (int i = 0; i < splits.length; i++) {
                    String split = splits[i];
                    Tuple2 t2 = new Tuple2<String, Integer>(split, 1);
                    list.add(t2);
                }
                return list.iterator();
            }
        });

//        List<Tuple2<String, Integer>> collect = wordsPair.collect();
//        for (Tuple2<String, Integer> tuple2 : collect) {
//            System.out.println(tuple2);
//        }
        JavaPairRDD<String, Integer> reduceByKey = wordsPair.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) throws Exception {
                return i1 + i2;
            }
        });
        List<Tuple2<String, Integer>> collect = reduceByKey.collect();
        for (Tuple2<String, Integer> tuple2 : collect) {
            System.out.println(tuple2);
        }
    }
}

foldByKey
Scala版本

object foldByKey {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setMaster("local[1]").setAppName("demo")
    val sc = new SparkContext(conf)

    val rdd = sc.textFile("data/words.txt")
    val result = rdd.flatMap(_.split(" ")).map((_,1)).foldByKey(1)(_+_)
    result.collect.foreach(println)
  }
}

sortByKey
按照key排序。区内排序
sortByKey() 默认升序,sortByKey(false) 降序。
Scala版本:

import org.apache.spark.{SparkConf, SparkContext}

object sortByKey {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setMaster("local[1]").setAppName("demo")
    val sc = new SparkContext(conf)

    val rdd=sc.makeRDD(List((3,"c"),(1,"a"),(2,"b"),(5,"e"),(4,"d")))
    rdd.sortByKey().collect.foreach(println)
  }
}

Java版本

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class SortByKey {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[1]").setAppName("sortByKey");
        JavaSparkContext sc = new JavaSparkContext(conf);

        List<Tuple2<Integer, String>> list = new ArrayList<>();
        list.add(new Tuple2<>(3, "sam"));
        list.add(new Tuple2<>(1, "sally"));
        list.add(new Tuple2<>(4, "john"));
        list.add(new Tuple2<>(2, "mary"));
        list.add(new Tuple2<>(5, "bruce"));
        list.add(new Tuple2<>(6, "sana"));

        JavaRDD<Tuple2<Integer, String>> rdd = sc.parallelize(list);
        PairFunction<Tuple2<Integer, String>, Integer, String> pairFunction =
                new PairFunction<Tuple2<Integer, String>, Integer, String>() {
            @Override
            public Tuple2<Integer, String> call(Tuple2<Integer, String> t) throws Exception {
                return t;
            }
        };
        //sortByKey() 默认升序,sortByKey(false) 降序
        JavaPairRDD<Integer, String> sort = rdd.mapToPair(pairFunction).sortByKey(false);
        List<Tuple2<Integer, String>> collect = sort.collect();
        for (Tuple2<Integer, String> tup2 : collect) {
            System.out.println(tup2);
        }
    }
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值