编写hive的udf函数实现:canal的message格式数据转为json格式

如果不了解message格式的数据,可以看我的博客:https://blog.csdn.net/weixin_46034893/article/details/114640629

1、导入依赖

如果你使用的是阿里云产品,在导入udf包的时候导入这个

        <dependency>
            <groupId>com.aliyun.odps</groupId>
            <artifactId>odps-sdk-udf</artifactId>
            <version>0.31.4-public</version>
        </dependency>

如果不是,用下边的这个就行

<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-exec</artifactId>
    <version>1.2.1</version>
</dependency>

2、代码实现

public class MessageToJson extends UDF {

    public String evaluevaluateate(byte[] b) throws Exception {

        Iterator<CanalEntry.Entry> entries = CanalMessageDeserializer.deserializer(b).getEntries().iterator();

        ArrayList<Object> arrayList = new ArrayList<>();

        while (entries.hasNext()) {
            HashMap<String, Object> map = new HashMap<>();
            CanalEntry.Entry entry = entries.next();

            HashMap<String, Object> headerMap = new HashMap<>();
            CanalEntry.Header header = entry.getHeader();
            headerMap.put("version", header.getVersion());
            headerMap.put("logfileName", header.getLogfileName());
            headerMap.put("logfileOffset", header.getLogfileName());
            headerMap.put("serverId", header.getLogfileOffset());
            headerMap.put("serverenCode", header.getServerenCode());
            headerMap.put("executeTime", header.getExecuteTime());
            headerMap.put("sourceType", header.getSourceType());
            headerMap.put("schemaName", header.getSchemaName());
            headerMap.put("tableName", header.getTableName());
            headerMap.put("eventLength", header.getEventLength());
            headerMap.put("eventType", header.getEventType());
            headerMap.put("gtid", header.getGtid());

            HashMap<String, Object> storeValueMap = new HashMap<>();
            ByteString storeValue = entry.getStoreValue();
            CanalEntry.RowChange rowChange = CanalEntry.RowChange.parseFrom(storeValue);

            storeValueMap.put("tableId", rowChange.getTableId());
            storeValueMap.put("eventType", rowChange.getEventType());
            storeValueMap.put("isDdl", rowChange.getIsDdl());
            storeValueMap.put("sql", rowChange.getSql());
            storeValueMap.put("ddlSchemaName", rowChange.getDdlSchemaName());

            ArrayList<Object> rowDatasMapList = new ArrayList<>();

            List<CanalEntry.RowData> rowDatasList = rowChange.getRowDatasList();
            for (CanalEntry.RowData rowData : rowDatasList) {
//                ArrayList<Object> oneRowData = new ArrayList<>();
                HashMap<String, Object> oneRowData = new HashMap<>();
                ArrayList<IdentityHashMap> bl = new ArrayList<>();
                ArrayList<IdentityHashMap> al = new ArrayList<>();

                List<CanalEntry.Column> afterColumns = rowData.getAfterColumnsList();
                List<CanalEntry.Column> beforeColumns = rowData.getBeforeColumnsList();
                for (CanalEntry.Column column : beforeColumns) {
                    IdentityHashMap<String, Object> beforeColumn = new IdentityHashMap<>();

                    beforeColumn.put("index", column.getIndex());
                    beforeColumn.put("sqlType", column.getSqlType());
                    beforeColumn.put("name", column.getName());
                    beforeColumn.put("isKey", column.getIsKey());
                    beforeColumn.put("updated", column.getUpdated());
                    beforeColumn.put("isNull", column.getIsNull());
                    beforeColumn.put("value", column.getValue());
                    beforeColumn.put("length", column.getLength());
                    beforeColumn.put("mysqlType", column.getMysqlType());

                    bl.add(beforeColumn);
                }
                for (CanalEntry.Column column : afterColumns) {
                    IdentityHashMap<String, Object> afterColumn = new IdentityHashMap<>();

                    afterColumn.put("index", column.getIndex());
                    afterColumn.put("sqlType", column.getSqlType());
                    afterColumn.put("name", column.getName());
                    afterColumn.put("isKey", column.getIsKey());
                    afterColumn.put("updated", column.getUpdated());
                    afterColumn.put("isNull", column.getIsNull());
                    afterColumn.put("value", column.getValue());
                    afterColumn.put("length", column.getLength());
                    afterColumn.put("mysqlType", column.getMysqlType());

                    al.add(afterColumn);
                }

                oneRowData.put(new String("afterColumns"), al);
                oneRowData.put(new String("beforeColumn"), bl);
                rowDatasMapList.add(oneRowData);
            }

            storeValueMap.put("rowDatas", rowDatasMapList);

            map.put("header", headerMap);
            map.put("entryType", entry.getEntryType());
            map.put("storeValue", storeValueMap);
            arrayList.add(map);
        }
        JSONArray json = new JSONArray(arrayList);

        return JSON.toJSONString(json, SerializerFeature.DisableCircularReferenceDetect);
    }
}

个人建议先全面的了解了message和json格式的数据后,再进行代码开发
如果看不懂map的部分,可以参照我的博客https://blog.csdn.net/weixin_46034893/article/details/114643509

如果这篇文章对你有用,记得点个赞哦🤞

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值