Spark之join

import org.apache.spark.HashPartitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.storage.StorageLevel;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Created by hadoop on 17-10-18.
 */
public class JoinSparkJava {
    //数据拆分的方式
    private static final Pattern SPACE = Pattern.compile(" ");

    public static void main(String[] args) throws Exception {
        //判断是否拿到对象
//        if (args.length < 1) {
//            System.err.println("Usage: JavaWordCount <file>");
//            System.exit(1);
//        }
//        //sparkSession对象,为用户提供了一个统一的切入点来使用spark的功能
//        SparkSession spark = SparkSession
//                .builder()
//                .master("local")
//                .appName("JavaWordCount")
//                .getOrCreate();
//        System.out.println("star=========");
        SparkConf conf=new SparkConf().setAppName("join").setMaster("local");
        JavaSparkContext sc=new JavaSparkContext(conf);
        List<Tuple2<String,String>> users=new ArrayList<Tuple2<String,String>>();
        Tuple2<String,String> user1=new Tuple2<String,String>("1212","zhouqinru");
        Tuple2<String,String> user2=new Tuple2<String,String>("1213","lixiaofang");
        Tuple2<String,String> user3=new Tuple2<String,String>("1214","zhaosi");
        Tuple2<String,String> user4=new Tuple2<String,String>("1215","ligang");
        Tuple2<String,String> user5=new Tuple2<String,String>("1216","wangwu");
        users.add(user1);
        users.add(user2);
        users.add(user3);
        users.add(user4);
        users.add(user5);
        JavaPairRDD<String,String> userrdd=sc.parallelizePairs(users,2);
        JavaPairRDD<String,String> userrddG=userrdd.partitionBy(new HashPartitioner(3)).persist(StorageLevel.MEMORY_ONLY());
        /*
          测试底层分区
         */
        JavaPairRDD<String,String> newuser=userrdd.coalesce(30,true);
        System.out.println("分区个数为:"+newuser.getNumPartitions());

        List<Tuple2<String,String>> infos=new ArrayList<Tuple2<String,String>>();
        Tuple2<String,String> info1=new Tuple2<String,String>("1212","http:www.baidu.com");
        Tuple2<String,String> info2=new Tuple2<String,String>("1213","http:www.taobao.com");
        Tuple2<String,String> info3=new Tuple2<String,String>("1214","http:www.Ali.comi");
        Tuple2<String,String> info4=new Tuple2<String,String>("1215","http:www.Tengxun.com");
        Tuple2<String,String> info5=new Tuple2<String,String>("1216","http:www.Jjngdong.comu");

        infos.add(info1);
        infos.add(info2);
        infos.add(info3);
        infos.add(info4);
        infos.add(info5);
        JavaPairRDD<String,String> infordd=sc.parallelizePairs(users);

        JavaPairRDD<String,Tuple2<String,String>> join_tow=userrddG.join(infordd);
        join_tow.collect().forEach(System.out::println);

        sc.stop();
    }




}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值