zip 拉链函数
[测试 zip 函数]返回结果:[(1,a), (2,b), (3,c), (4,d), (5,e), (6,f), (7,g), (8,h), (9,i), (10,j)]
intersection 取得两个RDD的交集
[测试 zip 函数]返回结果:[(1,a), (2,b), (3,c), (4,d), (5,e), (6,f), (7,g), (8,h), (9,i), (10,j)]
keyBy
[测试 keyBy 函数]返回结果:[(3,dog), (6,salmon), (6,salmon), (3,rat), (8,elephant)]
1.[代码][Java]代码
/**
* zip 拉链函数
* [测试 zip 函数]返回结果:[(1,a), (2,b), (3,c), (4,d), (5,e), (6,f), (7,g), (8,h), (9,i), (10,j)]
* @param spark
*/
public static void zipTest(JavaSparkContext spark){
// 测试 zip 函数
JavaRDD rdd1 = spark.parallelize(Arrays.asList(1,2,3,4,5,6,7,8,9,10));
JavaRDD rdd2 = spark.parallelize(Arrays.asList("a","b","c","d","e","f","g","h","i","j"));
JavaPairRDD result = rdd1.zip(rdd2);
System.out.println("[测试 zip 函数]返回结果:"+result.collect());
}
/**
* intersection 取得两个RDD的交集
* [测试 zip 函数]返回结果:[(1,a), (2,b), (3,c), (4,d), (5,e), (6,f), (7,g), (8,h), (9,i), (10,j)]
* @param spark
*/
public static void intersectionTest(JavaSparkContext spark){
// 测试 zip 函数
JavaRDD rdd1 = spark.parallelize(Arrays.asList(1,2,3,4,5,6,7,8,9,10));
JavaRDD rdd2 = spark.parallelize(Arrays.asList(6,7,8,9,10,11));
JavaRDD result = rdd1.intersection(rdd2).sortBy(t -> t, false,0);
System.out.println("[测试 intersection 函数]返回结果:"+result.collect());
}
/**
* keyBy
* [测试 keyBy 函数]返回结果:[(3,dog), (6,salmon), (6,salmon), (3,rat), (8,elephant)]
* @param spark
*/
public static void keyByTest(JavaSparkContext spark){
// 测试 zip 函数
JavaRDD rdd1 = spark.parallelize(Arrays.asList("dog", "salmon", "salmon", "rat", "elephant"));
JavaPairRDD result = rdd1.keyBy(t -> t.length()).sortByKey();
System.out.println("[测试 keyBy 函数]返回结果:"+result.collect());
JavaRDD keysRdd = result.keys();
System.out.println("[测试 keys 函数]返回结果:"+keysRdd.collect());
}