Spark PairRDD 转化二

package edu.berkeley.simple_project;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.storage.StorageLevel;

import com.google.common.base.Optional;

import scala.Tuple2;

/**
 * Hello world!
 *
 */
public class App {
	public static void main(String[] args) {
		SparkConf conf = new SparkConf().setAppName("Simple Application");
		JavaSparkContext sc = new JavaSparkContext(conf);

		// convert from other RDD
		JavaRDD<String> line1 = sc.parallelize(Arrays.asList("1 aa", "2 bb", "4 cc", "3 dd"));
		JavaPairRDD<String, String> prdd = line1.mapToPair(new PairFunction<String, String, String>() {
			public Tuple2<String, String> call(String x) throws Exception {
				return new Tuple2(x.split(" ")[0], x);
			}
		});
		System.out.println("111111111111mapToPair:");
		prdd.foreach(new VoidFunction<Tuple2<String, String>>() {
			public void call(Tuple2<String, String> x) throws Exception {
				System.out.println(x);
			}
		});

		// parallelizePairs
		Tuple2 t1 = new Tuple2(1, 2);
		Tuple2 t2 = new Tuple2(3, 4);
		Tuple2 t3 = new Tuple2(3, 6);
		List list1 = new ArrayList<Tuple2>();
		list1.add(t1);
		list1.add(t2);
		list1.add(t3);
		JavaPairRDD<Integer, Integer> line2 = sc.parallelizePairs(list1);
		line2.persist(StorageLevel.MEMORY_ONLY());

		Tuple2 t4 = new Tuple2(3, 9);
		List list2 = new ArrayList<Tuple2>();
		list2.add(t4);
		JavaPairRDD<Integer, Integer> line3 = sc.parallelizePairs(list2);
		line3.persist(StorageLevel.MEMORY_ONLY());

		// subtractByKey
		JavaPairRDD<Integer, Integer> line4 = line2.subtractByKey(line3);
		System.out.println("22222222222222subtractByKey:");
		line4.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
			public void call(Tuple2<Integer, Integer> x) throws Exception {
				System.out.println(x);
			}
		});

		// join
		JavaPairRDD<Integer, Tuple2<Integer, Integer>> line5 = line2.join(line3);
		System.out.println("33333333333333join:");
		line5.foreach(new VoidFunction<Tuple2<Integer, Tuple2<Integer, Integer>>>() {
			public void call(Tuple2<Integer, Tuple2<Integer, Integer>> x) throws Exception {
				System.out.println(x);
			}
		});

		// rightOuterJoin
		JavaPairRDD<Integer, Tuple2<Optional<Integer>, Integer>> line6 = line2.rightOuterJoin(line3);
		System.out.println("444444444444444444rightOuterJoin:");
		line6.foreach(new VoidFunction<Tuple2<Integer, Tuple2<Optional<Integer>, Integer>>>() {
			public void call(Tuple2<Integer, Tuple2<Optional<Integer>, Integer>> x) throws Exception {
				System.out.println(x);
			}
		});

		// leftOuterJoin
		JavaPairRDD<Integer, Tuple2<Integer, Optional<Integer>>> line7 = line2.leftOuterJoin(line3);
		System.out.println("555555555555555leftOuterJoin:");
		line7.foreach(new VoidFunction<Tuple2<Integer, Tuple2<Integer, Optional<Integer>>>>() {
			public void call(Tuple2<Integer, Tuple2<Integer, Optional<Integer>>> x) throws Exception {
				System.out.println(x);
			}
		});

		// cogroup
		JavaPairRDD<Integer, Tuple2<Iterable<Integer>, Iterable<Integer>>> line8 = line2.cogroup(line3);
		System.out.println("66666666666666666cogroup:");
		line8.foreach(new VoidFunction<Tuple2<Integer, Tuple2<Iterable<Integer>, Iterable<Integer>>>>() {
			public void call(Tuple2<Integer, Tuple2<Iterable<Integer>, Iterable<Integer>>> x) throws Exception {
				System.out.println(x);
			}
		});

		// combineByKey,聚合
		// 1. createCombiner, if key already exists, then do mergeValue.
		// a[1]:(2,1), a[3]:(4,1)
		Function<Integer, AvgCount> ca = new Function<Integer, AvgCount>() {
			public AvgCount call(Integer x) throws Exception {
				return new AvgCount(x, 1);
			}
		};
		// 2. mergeValue
		// a[3]:(a[3],6) => (10,2)
		Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>() {
			public AvgCount call(AvgCount x, Integer y) throws Exception {
				x.total += y;
				x.num += 1;
				return x;
			}
		};
		// 3.mergeCombiners in different partitions
		// if (4,1) and (6,1) are different partitions , then =>(10,2)
		Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {
			public AvgCount call(AvgCount x, AvgCount y) throws Exception {
				x.total += y.total;
				x.num += y.num;
				return x;
			}
		};
		JavaPairRDD<Integer, AvgCount> avgCounts = line2.combineByKey(ca, addAndCount, combine);
		System.out.println("7777777777combineByKey:");
		avgCounts.foreach(new VoidFunction<Tuple2<Integer, AvgCount>>() {
			public void call(Tuple2<Integer, AvgCount> x) throws Exception {
				System.out.println(x._1 + " " + x._2.avg());
			}
		});

		// sortByKey
		JavaPairRDD<Integer, Integer> sortRDD = line2.sortByKey(new  MyComparator());
		System.out.println("88888888888888888sortByKey:");
		sortRDD.foreach(new VoidFunction<Tuple2<Integer, Integer>>() {
			public void call(Tuple2<Integer, Integer> x) throws Exception {
				System.out.println(x);
			}
		});
	}
}


package edu.berkeley.simple_project;

import java.io.Serializable;
import java.util.Comparator;

public class MyComparator implements Comparator<Integer>, Serializable{
	public int compare(Integer x, Integer y) {
		return -(y-x);
	}

}

111111111111mapToPair:
16/02/02 14:01:41 WARN SizeEstimator: Failed to check whether UseCompressedOops is set; assuming yes
[Stage 0:>                                                          (0 + 0) / 4](2,2 bb)
(1,1 aa)
(3,3 dd)
(4,4 cc)
22222222222222subtractByKey:                                                    
(1,2)
33333333333333join:
(3,(4,9))
(3,(6,9))
444444444444444444rightOuterJoin:
(3,(Optional.of(4),9))
(3,(Optional.of(6),9))
555555555555555leftOuterJoin:
(1,(2,Optional.absent()))
(3,(4,Optional.of(9)))
(3,(6,Optional.of(9)))
66666666666666666cogroup:
(1,([2],[]))
(3,([4, 6],[9]))
7777777777combineByKey:
1 2.0
3 5.0
88888888888888888sortByKey:
(3,4)
(3,6)
(1,2)


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值