一套玩转Datahub,多shard订阅数据(阿里云K8S与Datahub实操)

写在前面:Datahub是什么?阿里云流数据处理平台数据总线

如何轻松构建基于流式数据的分析和应用_数据总线 DataHub(DataHub)-阿里云帮助中心 (aliyun.com)

最近接了一个小活,之前同事通过请求接口的形式获取上游系统数据,轮询一时爽,改bug火葬场。没办法,只能用上Datahub,看到阿里云提供的SDK源码与其提供的demo,感觉有的拉胯,看了全网如github、scdn、gitee、掘金等诸多文章没几个能说明白,开箱即用的。没办法自己写了一个小demo放到k8s上,效果妥妥滴,代码如下:

ps:文章末尾有Python版本的🔗链接

package cn.cicoding.mybatisplus.consume;

import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureConsoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutageCopy;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureConsoutageMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageCopyMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageMapper;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.model.*;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Configuration;
import org.springframework.transaction.annotation.Transactional;

import javax.annotation.Resource;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Slf4j
@Configuration
public class ReadExample {

    @Resource
    private PowerMeasurationDxPowerFailureConsoutageMapper consoutageMapper;

    @Resource
    private PowerMeasurationDxPowerFailureTranoutageMapper tranoutageMapper;

    @Resource
    private PowerMeasurationDxPowerFailureTranoutageCopyMapper tranoutageCopyMapper;

    private static final ConcurrentHashMap<Long, Boolean> PROCESSED_IDS = new ConcurrentHashMap<>();


    private static DatahubClient datahubClient;
    private static RecordSchema recordSchema;


    public static void init() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig("https://datahub.xxx.cn",
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount("wxxxV", "fLxxxx"), true))
                .build();

        // 获取schema
        recordSchema = datahubClient.getTopic("xzx", "xxxx").getRecordSchema();
    }

    public void getShardIds() {
        ListShardResult listShardResult = datahubClient.listShard("xxxx", "rxxxx");
        List<ShardEntry> shards = listShardResult.getShards();
        System.out.println("shardId的数量为:=====>" + shards.size());
        ExecutorService executor = Executors.newFixedThreadPool(shards.size()); // 创建一个线程池
        shards.forEach(shardEntry -> {
            String shardId = shardEntry.getShardId();
            executor.submit(() -> {
                // 上述example()函数的逻辑,但替换掉固定的shardId为当前的shardId
                example(shardId); // 假设example()接受shardId作为参数
            });
        });
        executor.shutdown(); // 等待所有任务完成
    }


    @Transactional(rollbackFor = Exception.class)
    public void example(String shardId) {
        //每次最多读取数据量
        int recordLimit = 10;
//        String shardId = "1";
        // 获取cursor, 这里获取有效数据中时间最久远的record游标
        // 注: 正常情况下,getCursor只需在初始化时获取一次,然后使用getRecords的nextCursor进行下一次读取
        String cursor = datahubClient.getCursor("xxxx", "rxxxxt", shardId, CursorType.LATEST).getCursor();

//        HashSet<Long> set = new HashSet<>();
        //一共向Mysql插入多少条
        int i = 0, j = 0;
        while (true) {
            try {
                GetRecordsResult result = datahubClient.getRecords("xxx", "rxxxxt", shardId, recordSchema, cursor, recordLimit);
                if (result.getRecordCount() <= 0) {
//                    Thread.sleep(1000);
                    continue;
                }
                for (RecordEntry entry : result.getRecords()) {
                    TupleRecordData data = (TupleRecordData) entry.getRecordData();
                    if (ObjectUtil.isEmpty(data.getDataSize()) || data.getDataSize() <= 0) {
                        log.error("=====>{}", data.getDataSize());
                        continue;
                    }
                    Object field = data.getField(0);
                    JSONObject jsonObject = JSONUtil.parseObj(field);
                    JSONArray payload = (JSONArray) jsonObject.get("payload");
                    if (ObjectUtil.isEmpty(payload)) {
                        System.out.println("<数据为空>" + field);
                        continue;
                    }
                    Object o = payload.get(0);
                    log.info("field=====>{}", field.toString());
                    JSONObject js = JSONUtil.parseObj(o);


                    String eventType = (String) js.get("eventType");
                    if (StrUtil.equals(eventType, "ConsOutage")) {
                        //先判断是否在179w的数据表中
                        String psrid = (String) js.get("psrId");

                        //先在map查找,如果查到则进行,如果找不到则查询mysql,如果mysql也没有,则剔除掉
                        if (!PROCESSED_IDS.contains(Long.valueOf(psrid))){
                            Long resultId = tranoutageMapper.selectListCustomByEsyh(psrid);
                            //若是数据库中也没有,则不符合条件,剔除掉这条数据
                            if (ObjectUtil.isEmpty(resultId)) {
                                continue;
                            }
                            PROCESSED_IDS.put(Long.valueOf(psrid), true);
                        }
                        PowerMeasurationDxPowerFailureConsoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureConsoutage.class);
                        try {

                            List<PowerMeasurationDxPowerFailureConsoutage> list1 = consoutageMapper.selectList(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureConsoutage>()
                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
                                    .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));

                            if (CollectionUtil.isEmpty(list1)){
                                //如果为null,则新增
                                int insert = consoutageMapper.insert(bean);
                                i = i + insert;
                            }else {
                                if (ObjectUtil.isEmpty(list1.get(0).getEndTime())){
                                    consoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureConsoutage>()
                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
                                            .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
                                    System.out.println("修改数据为===>" + bean);
                                }
                            }

//                            int count = consoutageMapper.selectCount(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureConsoutage>()
//                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
//                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
//                                    .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
//                            if (count > 0) {
//                                if (count == 1) {
//                                    consoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureConsoutage>()
//                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
//                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
//                                            .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
//                                    System.out.println("修改数据为===>" + bean);
//                                } else {
//                                    log.error("查到多条数据---->" + count + "条数据--->" + bean);
//                                }
//                            } else {
//                                int insert = consoutageMapper.insert(bean);
//                                i = i + insert;
//                            }


                        } catch (Exception e) {
                            log.error("<Mysql异常信息======>" + e.getMessage());
                        }
                    } else {

                        PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
                        PowerMeasurationDxPowerFailureTranoutageCopy copy = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutageCopy.class);
                        tranoutageCopyMapper.insert(copy);
                        try {

                            List<PowerMeasurationDxPowerFailureTranoutage> list2 = tranoutageMapper.selectList(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureTranoutage>()
                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
                                    .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));


                            if(CollectionUtil.isEmpty(list2)){
                                int insert = tranoutageMapper.insert(bean);
                                j = j + insert;
                            }else {
                                if (ObjectUtil.isEmpty(list2.get(0).getEndTime())){
                                    tranoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureTranoutage>()
                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
                                            .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
                                    System.out.println("修改数据为===>" + bean);
                                }

                            }

//                            int count = tranoutageMapper.selectCount(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureTranoutage>()
//                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
//                                    .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
//                                    .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
//                            if (count > 0) {
//                                if (count == 1) {
//                                    tranoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureTranoutage>()
//                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
//                                            .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
//                                            .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
//                                    j = j + count;
//                                    System.out.println("<修改数据为====>" + bean);
//                                } else {
//                                    log.error("查到多条数据---->" + count + "条数据--->" + bean);
//                                }
//                            } else {
//                                int insert = tranoutageMapper.insert(bean);
//                                j = j + insert;
//                            }


                        } catch (Exception e) {
                            log.error("<Mysql异常信息======>" + e.getMessage());
                        }
                    }
                }
                log.info("一共向consoutage中新增=====>{}", i);
                log.info("一共向tranoutage中新增=====>{}", j);

                // 拿到下一个游标
                cursor = result.getNextCursor();
            } catch (Exception e) {
                log.error("异常信息======>" + e.getMessage());
            }
        }
    }


//    public static void main(String[] args) {
//        init();
//        example();
//    }
}
package cn.cicoding.mybatisplus.consume;

import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureConsoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutage;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureConsoutageMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageMapper;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.model.*;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Configuration;

import javax.annotation.Resource;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Slf4j
@Configuration
public class ReadExample {

    // 使用ConcurrentHashMap替换HashMap
//    private final ConcurrentHashMap<String, String> map = new ConcurrentHashMap<>();

//    private volatile Map<String, String> keyMap;

    @Resource
    private PowerMeasurationDxPowerFailureConsoutageMapper consoutageMapper;

    @Resource
    private PowerMeasurationDxPowerFailureTranoutageMapper tranoutageMapper;


    private static DatahubClient datahubClient;
    private static RecordSchema recordSchema;


    public static void init() {
        // 创建DataHubClient实例
        datahubClient = DatahubClientBuilder.newBuilder()
                .setDatahubConfig(
                        new DatahubConfig("https://datahub.xxx.com.cn",
                                // 是否开启二进制传输,服务端2.12版本开始支持
                                new AliyunAccount("appid", "appkey"), true))
                .build();

        // 获取schema
        recordSchema = datahubClient.getTopic("xxx", "xxxx").getRecordSchema();
    }

    public void getShardIds() {
//        Map<String, String> keyMap = new ConcurrentHashMap<>();
//        List<Long> integers = testMapper.selectListCustomByEsyh();
//        System.out.println("user长度"+integers.size());
//        integers.forEach(integer -> {
//            keyMap.put(String.valueOf(integer),null);
//        });
//        System.out.println("map中的数据===>"+keyMap.size());

        ListShardResult listShardResult = datahubClient.listShard("xxx", "xxxx");
        List<ShardEntry> shards = listShardResult.getShards();
        System.out.println("shardId的数量为:=====>" + shards.size());
        ExecutorService executor = Executors.newFixedThreadPool(shards.size()); // 创建一个线程池
        shards.forEach(shardEntry -> {
            String shardId = shardEntry.getShardId();
            executor.submit(() -> {
                // 上述example()函数的逻辑,但替换掉固定的shardId为当前的shardId
                example(shardId); // 假设example()接受shardId作为参数
            });
        });
        executor.shutdown(); // 等待所有任务完成
    }


    public void example(String shardId) {
        //每次最多读取数据量
        int recordLimit = 10;
//        String shardId = "1";
        // 获取cursor, 这里获取有效数据中时间最久远的record游标
        // 注: 正常情况下,getCursor只需在初始化时获取一次,然后使用getRecords的nextCursor进行下一次读取
        String cursor = datahubClient.getCursor("xxxx", "xxxxx", shardId, CursorType.LATEST).getCursor();

        HashSet<Long> set = new HashSet<>();
        //一共向Mysql插入多少条
        int i = 0, j = 0;
        while (true) {
            try {
                GetRecordsResult result = datahubClient.getRecords("xxx", "xxxxx", shardId, recordSchema, cursor, recordLimit);
                if (result.getRecordCount() <= 0) {
//                    Thread.sleep(1000);
                    continue;
                }
                for (RecordEntry entry : result.getRecords()) {
                    TupleRecordData data = (TupleRecordData) entry.getRecordData();
                    if (ObjectUtil.isEmpty(data.getDataSize()) || data.getDataSize() <= 0) {
                        log.error("=====>{}", data.getDataSize());
                        continue;
                    }
                    Object field = data.getField(0);
                    JSONObject jsonObject = JSONUtil.parseObj(field);
                    JSONArray payload = (JSONArray) jsonObject.get("payload");
                    if (ObjectUtil.isEmpty(payload)) {
                        System.out.println("<数据为空>" + field);
                        continue;
                    }
                    Object o = payload.get(0);
                    log.info("field=====>{}", field.toString());
                    JSONObject js = JSONUtil.parseObj(o);

                    String psrid = (String) js.get("psrId");
                    boolean contains = set.contains(Long.valueOf(psrid));
                    if (!contains){
                        //在set中找不到,则去数据库中查找
                        Long resultId = tranoutageMapper.selectListCustomByEsyh(Long.valueOf(psrid));
                        //若是数据库中也没有,则不符合条件,剔除掉这条数据
                        if (ObjectUtil.isEmpty(resultId)){
                            continue;
                        }
                        set.add(resultId);
                    }


                    String eventType = (String) js.get("eventType");
                    if (StrUtil.equals(eventType, "ConsOutage")) {
                        PowerMeasurationDxPowerFailureConsoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureConsoutage.class);
                        try {
                            int insert = consoutageMapper.insert(bean);
                            System.out.println("<新增数据为====>" + bean);
                            i = i + insert;
                        } catch (Exception e) {
                            log.error("<Mysql异常信息======>" + e.getMessage());
                        }
                    }else {
                        PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
                        try {
                            int insert = tranoutageMapper.insert(bean);
                            System.out.println("<新增数据为====>" + bean);
                            j = j + insert;
                        } catch (Exception e) {
                            log.error("<Mysql异常信息======>" + e.getMessage());
                        }
                    }
//                    if (StrUtil.equals(eventType, "TranOutage")) {
//                        PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
//                        try {
//                            int insert = tranoutageMapper.insert(bean);
//                            System.out.println("<新增数据为====>" + bean);
//                            j = j + insert;
//                        } catch (Exception e) {
//                            log.error("<Mysql异常信息======>" + e.getMessage());
//                        }
//                    }

//                    PowerMeasurationDxPowerFailureTest bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTest.class);
//                    try {
//                        int insert = testMapper.insert(bean);
//                        System.out.println("<新增数据为====>" + bean);
//                        i = i + insert;
//                    } catch (Exception e) {
//                        log.error("<Mysql异常信息======>" + e.getMessage());
//                    }
                }
                log.info("一共向consoutage中新增=====>{}", i);
                log.info("一共向tranoutage中新增=====>{}", j);

                // 拿到下一个游标
                cursor = result.getNextCursor();
            } catch (Exception e) {
                log.error("异常信息======>" + e.getMessage());
            }
        }
    }

//    public static void main(String[] args) {
//        init();
//        example();
//    }
}

参考地址:如何使用JavaSDK_数据总线 DataHub(DataHub)-阿里云帮助中心 (aliyun.com)

Python版本:阿里云 datahub python sdk补充(多shard订阅数据)_datahub 不同的shard 数据是不是不同?-CSDN博客

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值