Spark之广播变量

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/tangshiweibbs/article/details/70240549
private static void broadCastOps(JavaSparkContext sc) {
    //加载user表到rdd
    JavaRDD<String> linesRDD = sc.parallelize(Arrays.asList("1,3,张三,河北", "2,1,李四,北京", "3,0,王五,天津", "4,1,赵六,广东"));
    JavaRDD<String> sexLineRDD = sc.parallelize(Arrays.asList("1,", "0,"));
    /**
     * 将小表对应的RDD拉取到driver节点之上
     * 使用合适的数据结构加载广播变量中
     */
    List<String> sexList = sexLineRDD.collect();
    Map<String, String> sexMap = new HashMap();
    for(String sexLine : sexList) {
        String[] sexSplits = sexLine.split(",");
        sexMap.put(sexSplits[0], sexSplits[1]);
    }

    Broadcast<Map<String, String>> sexMapBC = sc.broadcast(sexMap);
    JavaRDD<String> retRDD = linesRDD.map(new Function<String, String>() {
        @Override
        public String call(String line) throws Exception {
            String[] splits = line.split(",");
            if (splits == null || splits.length < 4) {
                return null;
            }
            String sid = splits[1].trim();
            //不建议直接在transformation中调用外部变量,而应该从广播变量中获取外部变量
            //String sName = sexMap.get(sid);
            String sName = sexMapBC.value().getOrDefault(sid, "未知");
            return splits[0] + " " + sName + " " + splits[2] + " " + splits[3];
        }
    });
    retRDD.foreach(str -> System.out.println(str));
}
展开阅读全文

没有更多推荐了,返回首页