写在前面:Datahub是什么?阿里云流数据处理平台数据总线
如何轻松构建基于流式数据的分析和应用_数据总线 DataHub(DataHub)-阿里云帮助中心 (aliyun.com)
最近接了一个小活,之前同事通过请求接口的形式获取上游系统数据,轮询一时爽,改bug火葬场。没办法,只能用上Datahub,看到阿里云提供的SDK源码与其提供的demo,感觉有的拉胯,看了全网如github、scdn、gitee、掘金等诸多文章没几个能说明白,开箱即用的。没办法自己写了一个小demo放到k8s上,效果妥妥滴,代码如下:
ps:文章末尾有Python版本的🔗链接
package cn.cicoding.mybatisplus.consume;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureConsoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutageCopy;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureConsoutageMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageCopyMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageMapper;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.model.*;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Configuration;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Slf4j
@Configuration
public class ReadExample {
@Resource
private PowerMeasurationDxPowerFailureConsoutageMapper consoutageMapper;
@Resource
private PowerMeasurationDxPowerFailureTranoutageMapper tranoutageMapper;
@Resource
private PowerMeasurationDxPowerFailureTranoutageCopyMapper tranoutageCopyMapper;
private static final ConcurrentHashMap<Long, Boolean> PROCESSED_IDS = new ConcurrentHashMap<>();
private static DatahubClient datahubClient;
private static RecordSchema recordSchema;
public static void init() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig("https://datahub.xxx.cn",
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount("wxxxV", "fLxxxx"), true))
.build();
// 获取schema
recordSchema = datahubClient.getTopic("xzx", "xxxx").getRecordSchema();
}
public void getShardIds() {
ListShardResult listShardResult = datahubClient.listShard("xxxx", "rxxxx");
List<ShardEntry> shards = listShardResult.getShards();
System.out.println("shardId的数量为:=====>" + shards.size());
ExecutorService executor = Executors.newFixedThreadPool(shards.size()); // 创建一个线程池
shards.forEach(shardEntry -> {
String shardId = shardEntry.getShardId();
executor.submit(() -> {
// 上述example()函数的逻辑,但替换掉固定的shardId为当前的shardId
example(shardId); // 假设example()接受shardId作为参数
});
});
executor.shutdown(); // 等待所有任务完成
}
@Transactional(rollbackFor = Exception.class)
public void example(String shardId) {
//每次最多读取数据量
int recordLimit = 10;
// String shardId = "1";
// 获取cursor, 这里获取有效数据中时间最久远的record游标
// 注: 正常情况下,getCursor只需在初始化时获取一次,然后使用getRecords的nextCursor进行下一次读取
String cursor = datahubClient.getCursor("xxxx", "rxxxxt", shardId, CursorType.LATEST).getCursor();
// HashSet<Long> set = new HashSet<>();
//一共向Mysql插入多少条
int i = 0, j = 0;
while (true) {
try {
GetRecordsResult result = datahubClient.getRecords("xxx", "rxxxxt", shardId, recordSchema, cursor, recordLimit);
if (result.getRecordCount() <= 0) {
// Thread.sleep(1000);
continue;
}
for (RecordEntry entry : result.getRecords()) {
TupleRecordData data = (TupleRecordData) entry.getRecordData();
if (ObjectUtil.isEmpty(data.getDataSize()) || data.getDataSize() <= 0) {
log.error("=====>{}", data.getDataSize());
continue;
}
Object field = data.getField(0);
JSONObject jsonObject = JSONUtil.parseObj(field);
JSONArray payload = (JSONArray) jsonObject.get("payload");
if (ObjectUtil.isEmpty(payload)) {
System.out.println("<数据为空>" + field);
continue;
}
Object o = payload.get(0);
log.info("field=====>{}", field.toString());
JSONObject js = JSONUtil.parseObj(o);
String eventType = (String) js.get("eventType");
if (StrUtil.equals(eventType, "ConsOutage")) {
//先判断是否在179w的数据表中
String psrid = (String) js.get("psrId");
//先在map查找,如果查到则进行,如果找不到则查询mysql,如果mysql也没有,则剔除掉
if (!PROCESSED_IDS.contains(Long.valueOf(psrid))){
Long resultId = tranoutageMapper.selectListCustomByEsyh(psrid);
//若是数据库中也没有,则不符合条件,剔除掉这条数据
if (ObjectUtil.isEmpty(resultId)) {
continue;
}
PROCESSED_IDS.put(Long.valueOf(psrid), true);
}
PowerMeasurationDxPowerFailureConsoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureConsoutage.class);
try {
List<PowerMeasurationDxPowerFailureConsoutage> list1 = consoutageMapper.selectList(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureConsoutage>()
.eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
.eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
.eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
if (CollectionUtil.isEmpty(list1)){
//如果为null,则新增
int insert = consoutageMapper.insert(bean);
i = i + insert;
}else {
if (ObjectUtil.isEmpty(list1.get(0).getEndTime())){
consoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureConsoutage>()
.eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
.eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
.eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
System.out.println("修改数据为===>" + bean);
}
}
// int count = consoutageMapper.selectCount(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureConsoutage>()
// .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
// .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
// .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
// if (count > 0) {
// if (count == 1) {
// consoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureConsoutage>()
// .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureConsoutage::getPsrId, bean.getPsrId())
// .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureConsoutage::getPsrType, bean.getPsrType())
// .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureConsoutage::getStartTime, bean.getStartTime()));
// System.out.println("修改数据为===>" + bean);
// } else {
// log.error("查到多条数据---->" + count + "条数据--->" + bean);
// }
// } else {
// int insert = consoutageMapper.insert(bean);
// i = i + insert;
// }
} catch (Exception e) {
log.error("<Mysql异常信息======>" + e.getMessage());
}
} else {
PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
PowerMeasurationDxPowerFailureTranoutageCopy copy = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutageCopy.class);
tranoutageCopyMapper.insert(copy);
try {
List<PowerMeasurationDxPowerFailureTranoutage> list2 = tranoutageMapper.selectList(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureTranoutage>()
.eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
.eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
.eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
if(CollectionUtil.isEmpty(list2)){
int insert = tranoutageMapper.insert(bean);
j = j + insert;
}else {
if (ObjectUtil.isEmpty(list2.get(0).getEndTime())){
tranoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureTranoutage>()
.eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
.eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
.eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
System.out.println("修改数据为===>" + bean);
}
}
// int count = tranoutageMapper.selectCount(new LambdaQueryWrapper<PowerMeasurationDxPowerFailureTranoutage>()
// .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
// .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
// .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
// if (count > 0) {
// if (count == 1) {
// tranoutageMapper.update(bean, new LambdaUpdateWrapper<PowerMeasurationDxPowerFailureTranoutage>()
// .eq(ObjectUtil.isNotEmpty(bean.getPsrId()), PowerMeasurationDxPowerFailureTranoutage::getPsrId, bean.getPsrId())
// .eq(ObjectUtil.isNotEmpty(bean.getPsrType()), PowerMeasurationDxPowerFailureTranoutage::getPsrType, bean.getPsrType())
// .eq(ObjectUtil.isNotEmpty(bean.getStartTime()), PowerMeasurationDxPowerFailureTranoutage::getStartTime, bean.getStartTime()));
// j = j + count;
// System.out.println("<修改数据为====>" + bean);
// } else {
// log.error("查到多条数据---->" + count + "条数据--->" + bean);
// }
// } else {
// int insert = tranoutageMapper.insert(bean);
// j = j + insert;
// }
} catch (Exception e) {
log.error("<Mysql异常信息======>" + e.getMessage());
}
}
}
log.info("一共向consoutage中新增=====>{}", i);
log.info("一共向tranoutage中新增=====>{}", j);
// 拿到下一个游标
cursor = result.getNextCursor();
} catch (Exception e) {
log.error("异常信息======>" + e.getMessage());
}
}
}
// public static void main(String[] args) {
// init();
// example();
// }
}
package cn.cicoding.mybatisplus.consume;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureConsoutage;
import cn.cicoding.mybatisplus.bean.PowerMeasurationDxPowerFailureTranoutage;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureConsoutageMapper;
import cn.cicoding.mybatisplus.mapper.PowerMeasurationDxPowerFailureTranoutageMapper;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.model.*;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Configuration;
import javax.annotation.Resource;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Slf4j
@Configuration
public class ReadExample {
// 使用ConcurrentHashMap替换HashMap
// private final ConcurrentHashMap<String, String> map = new ConcurrentHashMap<>();
// private volatile Map<String, String> keyMap;
@Resource
private PowerMeasurationDxPowerFailureConsoutageMapper consoutageMapper;
@Resource
private PowerMeasurationDxPowerFailureTranoutageMapper tranoutageMapper;
private static DatahubClient datahubClient;
private static RecordSchema recordSchema;
public static void init() {
// 创建DataHubClient实例
datahubClient = DatahubClientBuilder.newBuilder()
.setDatahubConfig(
new DatahubConfig("https://datahub.xxx.com.cn",
// 是否开启二进制传输,服务端2.12版本开始支持
new AliyunAccount("appid", "appkey"), true))
.build();
// 获取schema
recordSchema = datahubClient.getTopic("xxx", "xxxx").getRecordSchema();
}
public void getShardIds() {
// Map<String, String> keyMap = new ConcurrentHashMap<>();
// List<Long> integers = testMapper.selectListCustomByEsyh();
// System.out.println("user长度"+integers.size());
// integers.forEach(integer -> {
// keyMap.put(String.valueOf(integer),null);
// });
// System.out.println("map中的数据===>"+keyMap.size());
ListShardResult listShardResult = datahubClient.listShard("xxx", "xxxx");
List<ShardEntry> shards = listShardResult.getShards();
System.out.println("shardId的数量为:=====>" + shards.size());
ExecutorService executor = Executors.newFixedThreadPool(shards.size()); // 创建一个线程池
shards.forEach(shardEntry -> {
String shardId = shardEntry.getShardId();
executor.submit(() -> {
// 上述example()函数的逻辑,但替换掉固定的shardId为当前的shardId
example(shardId); // 假设example()接受shardId作为参数
});
});
executor.shutdown(); // 等待所有任务完成
}
public void example(String shardId) {
//每次最多读取数据量
int recordLimit = 10;
// String shardId = "1";
// 获取cursor, 这里获取有效数据中时间最久远的record游标
// 注: 正常情况下,getCursor只需在初始化时获取一次,然后使用getRecords的nextCursor进行下一次读取
String cursor = datahubClient.getCursor("xxxx", "xxxxx", shardId, CursorType.LATEST).getCursor();
HashSet<Long> set = new HashSet<>();
//一共向Mysql插入多少条
int i = 0, j = 0;
while (true) {
try {
GetRecordsResult result = datahubClient.getRecords("xxx", "xxxxx", shardId, recordSchema, cursor, recordLimit);
if (result.getRecordCount() <= 0) {
// Thread.sleep(1000);
continue;
}
for (RecordEntry entry : result.getRecords()) {
TupleRecordData data = (TupleRecordData) entry.getRecordData();
if (ObjectUtil.isEmpty(data.getDataSize()) || data.getDataSize() <= 0) {
log.error("=====>{}", data.getDataSize());
continue;
}
Object field = data.getField(0);
JSONObject jsonObject = JSONUtil.parseObj(field);
JSONArray payload = (JSONArray) jsonObject.get("payload");
if (ObjectUtil.isEmpty(payload)) {
System.out.println("<数据为空>" + field);
continue;
}
Object o = payload.get(0);
log.info("field=====>{}", field.toString());
JSONObject js = JSONUtil.parseObj(o);
String psrid = (String) js.get("psrId");
boolean contains = set.contains(Long.valueOf(psrid));
if (!contains){
//在set中找不到,则去数据库中查找
Long resultId = tranoutageMapper.selectListCustomByEsyh(Long.valueOf(psrid));
//若是数据库中也没有,则不符合条件,剔除掉这条数据
if (ObjectUtil.isEmpty(resultId)){
continue;
}
set.add(resultId);
}
String eventType = (String) js.get("eventType");
if (StrUtil.equals(eventType, "ConsOutage")) {
PowerMeasurationDxPowerFailureConsoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureConsoutage.class);
try {
int insert = consoutageMapper.insert(bean);
System.out.println("<新增数据为====>" + bean);
i = i + insert;
} catch (Exception e) {
log.error("<Mysql异常信息======>" + e.getMessage());
}
}else {
PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
try {
int insert = tranoutageMapper.insert(bean);
System.out.println("<新增数据为====>" + bean);
j = j + insert;
} catch (Exception e) {
log.error("<Mysql异常信息======>" + e.getMessage());
}
}
// if (StrUtil.equals(eventType, "TranOutage")) {
// PowerMeasurationDxPowerFailureTranoutage bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTranoutage.class);
// try {
// int insert = tranoutageMapper.insert(bean);
// System.out.println("<新增数据为====>" + bean);
// j = j + insert;
// } catch (Exception e) {
// log.error("<Mysql异常信息======>" + e.getMessage());
// }
// }
// PowerMeasurationDxPowerFailureTest bean = JSONUtil.toBean(js, PowerMeasurationDxPowerFailureTest.class);
// try {
// int insert = testMapper.insert(bean);
// System.out.println("<新增数据为====>" + bean);
// i = i + insert;
// } catch (Exception e) {
// log.error("<Mysql异常信息======>" + e.getMessage());
// }
}
log.info("一共向consoutage中新增=====>{}", i);
log.info("一共向tranoutage中新增=====>{}", j);
// 拿到下一个游标
cursor = result.getNextCursor();
} catch (Exception e) {
log.error("异常信息======>" + e.getMessage());
}
}
}
// public static void main(String[] args) {
// init();
// example();
// }
}
参考地址:如何使用JavaSDK_数据总线 DataHub(DataHub)-阿里云帮助中心 (aliyun.com)
Python版本:阿里云 datahub python sdk补充(多shard订阅数据)_datahub 不同的shard 数据是不是不同?-CSDN博客