JavaSpark 自定义累加器

JavaSpark 自定义累加器实现


import org.apache.spark.util.AccumulatorV2;
import scala.runtime.BoxedUnit;

import java.util.*;

/**
 * @description: 自定义累加器
 *               拓展 String、List、Map
 * @author: Mr.杜子腾
 * spark : 2.3.1
 **/

public class VectorAccumulatorV2 extends AccumulatorV2<String, String> {
    public Map<String, Object> map = new HashMap<>();
    public List<Object> list = new ArrayList();
    private String string = "";

    /* 当AccumulatorV2中存在类似数据不存在这种问题时,是否结束程序 */
    @Override
    public boolean isZero() {

        return "".equals(string) && map.size() == 0;
    }

    /* 拷贝一个新的AccumulatorV2 */
    @Override
    public AccumulatorV2 copy() {
        VectorAccumulatorV2 vectorAccumulatorV2 = new VectorAccumulatorV2();
        vectorAccumulatorV2.string = this.string;
        vectorAccumulatorV2.map = this.map;
        return vectorAccumulatorV2;
    }

    /* 重置AccumulatorV2中的数据 */
    @Override
    public void reset() {

        string = "";
        map.clear();
    }

    /* 操作数据累加方法实现 */
    @Override
    public void add(String o) {
        string = string + "\t" + o.toString();
    }

    public void addMap(Map<String, Object> mpaPara) {
        map.putAll(mpaPara);
    }

    public void addList(Object o) {
        list.add(o);
    }

    /* 合并数据 */
    @Override
    public void merge(AccumulatorV2 other) {
        if (other instanceof VectorAccumulatorV2) {
            this.string += ((VectorAccumulatorV2) other).string;
            this.map.putAll(((VectorAccumulatorV2) other).map);
            BoxedUnit var4 = BoxedUnit.UNIT;
        } else {
            throw new UnsupportedOperationException("AccumulatorV2 merge failed!");
        }

    }

    @Override
    public String value() {
        return string;
    }

    public Map<String, Object> getMapValue() {
        Map remap = new HashMap();
        remap.putAll(map);
        return remap;
    }

    public List<Object> getListValue() {
        return list;
    }

}

调用示例

  JavaRDD<Tuple2<String, Map<String, String>>> parallelizeRDD = sc.parallelize(list);
        VectorAccumulatorV2 vector = new VectorAccumulatorV2();
        sc.sc().register(vector, "myAccumulator");

        JavaPairRDD<String, Map<String, Object>> batchJavaPairRDD = parallelizeRDD.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Map<String, String>>, String, Map<String, Object>>() {

            private static final long serialVersionUID = 1L;
            public GaoD gaoD = new GaoD();

            @Override
            public Iterator<Tuple2<String, Map<String, Object>>> call(Tuple2<String, Map<String, String>> tuple2) throws Exception {
                List<Tuple2<String, Map<String, Object>>> tuple2List = new ArrayList<>();
                String inputAddress = "";
                String inputId = "";
                for (Map.Entry entry : tuple2._2.entrySet()) {
                    inputAddress = (String) entry.getValue();
                    inputId = (String) entry.getKey();
                }

                if (isNull(inputAddress) || isNull(inputId)) {
                    return tuple2List.iterator();
                }

                Map<String, Object> geocodeList = gaoD.getGeocodeList(inputAddress, inputId);
                if (geocodeList.size() > 0) {
                    for (Map.Entry entry : geocodeList.entrySet()) {
                        Map<String, Object> map = new HashMap<>();
                        Map allMap = (Map) entry.getValue();

                        map.put("address_province", getdefult(allMap.get("province")));
                        map.put("address_city", getdefult(allMap.get("city")));
                        map.put("address_area", getdefult(allMap.get("district")));

                        tuple2List.add(new Tuple2<>(entry.getKey().toString(), map));
                    }
                }
                vector.reset();
                vector.addMap(gaoD.getFinalMap());
                return tuple2List.iterator();
            }

        }).filter(new Function<Tuple2<String, Map<String, Object>>, Boolean>() {
            @Override
            public Boolean call(Tuple2<String, Map<String, Object>> tuple2) throws Exception {
                return tuple2._2.size() > 1;
            }
        }).persist(StorageLevel.DISK_ONLY());

        //  to action
        logger.warn("batchJavaPairRDD.count()=======" + batchJavaPairRDD.count());
        logger.warn("vector map====" + vector.getMapValue());

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

懒脖积泥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值