Spark算子(八)

Point 1:CartesianOperator

package com.spark.operator;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

/**
 * Created by Administrator on 2017/07/20.
 */
public class CartesianOperator {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("CartesianOperator").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);
        List<String> clothes = Arrays.asList("T恤衫","夹克","皮大衣","衬衫","毛衣");
        List<String> trousers = Arrays.asList("西裤","内裤","铅笔裤","皮裤","牛仔裤");
        JavaRDD<String> clothesRDD = sc.parallelize(clothes);
        JavaRDD<String> trousersRDD = sc.parallelize(trousers);
        JavaPairRDD<String,String> pairs = clothesRDD.cartesian(trousersRDD);
        for (Tuple2<String,String> result :pairs.collect()){
            System.out.println(result);
        }
    }
}

Point 2:CollectOperator

package com.spark.operator;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

public class CollectOperator {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("ReduceOperator")
                .setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);

        // 有一个集合,里面有1到10,10个数字,现在我们通过reduce来进行累加
        List<Integer> numberList = Arrays.asList(1, 2, 3, 4, 5);
        JavaRDD<Integer> numbers = sc.parallelize(numberList);

        JavaRDD<Integer> doubleNumbers = numbers.map(new Function<Integer, Integer>() {

            private static final long serialVersionUID = 1L;

            @Override
            public Integer call(Integer v) throws Exception {
                return v * 2;
            }
        });

        // 用foreach action操作,collect在远程集群上遍历RDD的元素
        // 用collect操作,将分布式 的在远程集群里面的数据拉取到本地!!!
        // 这种方式不建议使用,如果数据量大,走大量的网络传输
        // 甚至有可能OOM内存溢出,通常情况下你会看到用foreach操作
        List<Integer> doubleNumberList = doubleNumbers.collect();
        for(Integer num : doubleNumberList){
            System.out.println(num);
        }

        sc.close();
    }
}

Point 3:CogroupOperator

package com.spark.operator;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

/**
 * Created by Administrator on 2017/07/20.
 */
public class CogroupOperator {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("CogroupOperator").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);
        List<Tuple2<String,String>> studentsList = Arrays.asList(
                new Tuple2<String,String>("1","xuruyun"),
                new Tuple2<String,String>("2","wangfei"),
                new Tuple2<String,String>("3","lixin"));
        List<Tuple2<String,String>> scoreList = Arrays.asList(
                new Tuple2<String,String>("1","100"),
                new Tuple2<String,String>("2","90"),
                new Tuple2<String,String>("3","80"),
                new Tuple2<String,String>("1","70"),
                new Tuple2<String,String>("2","60"),
                new Tuple2<String,String>("3","50"));
        JavaPairRDD<String,String> students = sc.parallelizePairs(studentsList);
        JavaPairRDD<String,String> scores = sc.parallelizePairs(scoreList);
        JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<String>>> result = students.cogroup(scores);
        result.foreach(new VoidFunction<Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>>>() {
            @Override
            //cogroup的用法是,根据key进行聚合,如果没有的话,返回null
            public void call(Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>> tuple) throws Exception {
                System.out.println("id:"+tuple._1);
                System.out.println("name:"+tuple._2._1);
                System.out.println("score:"+tuple._2._2);
            }
        });


    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值