flink cdc 实现实时msyql字段统计

需求描述

最近需要将mysql数据库的某个表中的一个状态信息统计结果并实时显示到数据大屏上

思路

首先使用flinkcdc监听mysql中的特定表
然后编写一定的处理逻辑得出结果数据
将结果数据通过flink写入到mysql的结果表中

技术选型

  • java 1.8
  • mysql 8.0.28
  • flink 1.11.3
  • flink cdc 1.1.0

实现

数据

order_info表(需要处理的数据)
在这里插入图片描述

flink_count表(结果数据)
在这里插入图片描述

java代码

pom文件
这里面没有必要这么多

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.antg</groupId>
    <artifactId>TestFlinkCDC</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <java.version>1.8</java.version>
        <flink.version>1.11.3</flink.version>
        <scala.binary.version>2.11</scala.binary.version>
        <log4j.version>2.17.0</log4j.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.22</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.16</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_2.11</artifactId>
            <version>1.11.3</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-slf4j-impl</artifactId>
            <version>${log4j.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <version>${log4j.version}</version>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>${log4j.version}</version>
            <scope>runtime</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.elasticsearch.client/transport -->


        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <!-- add the dependency matching your database -->
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>1.1.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>30.1-jre</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.mybatis.spring.boot/mybatis-spring-boot-starter -->

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_2.11</artifactId>
            <version>1.11.3</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-sql-connector-hive-1.2.2_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-parquet_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>1.2.2</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-jdbc</artifactId>
            <version>1.2.2</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.orc</groupId>
            <artifactId>orc-core</artifactId>
            <version>1.4.3</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-orc_2.11</artifactId>
            <version>1.11.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.mybatis/mybatis -->
        <dependency>
            <groupId>org.mybatis</groupId>
            <artifactId>mybatis</artifactId>
            <version>3.4.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.1</version>
        </dependency>
    </dependencies>


</project>

目录结构
在这里插入图片描述

Demo01.java

package com.antg.main;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource;
import com.antg.entity.FlinkCount;
import com.antg.sink.OutputSkin;
import com.antg.deserialization.MyDeserialization;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.Collector;

public class Demo01 {
    public static void main(String[] args) throws Exception {
        //创建监听的数据源
        SourceFunction<String> sourceFunction = MySQLSource.<String>builder()
                .hostname("127.0.0.1")
                .port(3306)
//                .databaseList("test") // 监听test数据库下的所有表
                .tableList("test.order_info") //这里一定要带库名,因为上面可以同时监听多个库,会有不同库相同表名的情况
                .username("root")
                .password("123456")
                .deserializer(new MyDeserialization()) // 将SourceRecord 转换成 String形式
                .build();
        //获取运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //开始监听数据,并将动态数据变化打印到屏幕中
        DataStreamSource<String> ds = env.addSource(sourceFunction);
        SingleOutputStreamOperator<FlinkCount> res = ds
                .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                    @Override
                    public void flatMap(String line, Collector<Tuple2<String, Long>> out) throws Exception {
                        //将字符串转json
                        JSONObject item = (JSONObject) JSON.parse(line);
                        String before = item.getJSONObject("before").getString("order_state");
                        String after = item.getJSONObject("after").getString("order_state");
                        //创建和读取情况
                        if (item.getString("op").equals("create") || item.getString("op").equals("read")) {
                            out.collect(new Tuple2<String, Long>(after, 1L));
                        //更新情况
                        } else if (item.getString("op").equals("update")) {
                            //如果是别的字段做更新操作数据不变
                            if(before.equals(after)){
                                return;
                            }
                            if (after.equals("a")) {
                                out.collect(new Tuple2<String, Long>("a", 1L));
                                out.collect(new Tuple2<String, Long>("b", -1L));
                            } else {
                                out.collect(new Tuple2<String, Long>("a", -1L));
                                out.collect(new Tuple2<String, Long>("b", 1L));
                            }
                        //删除数据情况
                        } else if (item.getString("op").equals("delete")) {
                            out.collect(new Tuple2<String, Long>(before, -1L));
                        }
                        //还可以加别的情况,比如delete_flag标志做假删除
                    }
                })
                .keyBy(0)
                .sum(1)
                .map(new MapFunction<Tuple2<String, Long>, FlinkCount>() {
                    @Override
                    public FlinkCount map(Tuple2<String, Long> value) throws Exception {

                        FlinkCount res = new FlinkCount();
                        res.setOrderState(value.f0);
                        res.setCount(value.f1);
                        return res;
                    }
                });
        //向数据库输出数据
        res.addSink(new OutputSkin());
        //ds.print();

        //开始执行任务
        env.execute();
    }
}

MyDeserialization.java

package com.antg.deserialization;

import com.alibaba.fastjson.JSONObject;
import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

public class MyDeserialization implements DebeziumDeserializationSchema<String> {
    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
        //获取主题信息,包含着数据库和表名
        String topic = sourceRecord.topic();
        String[] arr = topic.split("\\.");
        String db = arr[1];
        String tableName = arr[2];
        //获取操作类型 READ DELETE UPDATE CREATE
        Envelope.Operation operation =
                Envelope.operationFor(sourceRecord);
        //获取值信息并转换为 Struct 类型
        Struct value = (Struct) sourceRecord.value();

        //得到变化之前的数据
        Struct before = value.getStruct("before");
        JSONObject beforeJson = new JSONObject();
        if(before != null){
            for (Field field : before.schema().fields()) {
                Object o = before.get(field);
                beforeJson.put(field.name(), o);
            }

        }
        //得到变化后的数据
        Struct after = value.getStruct("after");
        //创建 JSON 格式的数据对象用于存储数据信息
        JSONObject afterJson = new JSONObject();
        if(after != null){
            for (Field field : after.schema().fields()) {
                Object o = after.get(field);
                afterJson.put(field.name(), o);
            }
        }
        //创建 JSON 格式的数据对象,用于封装最终返回值数据信息
        JSONObject result = new JSONObject();
        result.put("op", operation.toString().toLowerCase());
        result.put("before", beforeJson);
        result.put("after", afterJson);
        result.put("db", db);
        result.put("table", tableName);
        //发送数据给下游
        collector.collect(result.toJSONString());
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return TypeInformation.of(String.class);
    }
}

OutputSkin.java

package com.antg.sink;

import com.antg.entity.FlinkCount;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

public class OutputSkin extends RichSinkFunction<FlinkCount> {
    Connection connection;
    PreparedStatement pstmt;

    //获取数据库链接信息
    private Connection getConnection(){
        Connection conn = null;
        try{
            Class.forName("com.mysql.jdbc.Driver");//将mysql驱动注册到DriverManager中去

            String url = "jdbc:mysql://localhost:3306/test";//数据库路径

            conn = DriverManager.getConnection(url,"root","123456");//数据库链接信息

        }catch (Exception e){
            e.printStackTrace();
        }
        return  conn;
    }


    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);

        connection = getConnection();
        String sql = "update flink_count set num=? where order_state=?";
        pstmt = connection.prepareStatement(sql);
    }

    //每条记录插入时调用一次
    @Override
    public void invoke(FlinkCount value, Context context) throws Exception {
        //为前面的占位符赋值
        pstmt.setString(2,value.getOrderState());
        pstmt.setLong(1,value.getCount());
        pstmt.executeUpdate();
    }

    @Override
    public void close() throws Exception {
        super.close();

        if(pstmt != null){
            pstmt.close();
        }

        if (connection != null){
            connection.close();
        }
    }
}

FlinkCount.java

package com.antg.entity;

import lombok.Data;

@Data
public class FlinkCount {
    private String orderState;
    private Long count;
}

实现效果

当order_info表中的order_state数据发送了变化的时候flink_count表中的数据会动态的变化
比如新加了一条数据,那么结果表中也会相应的累计到对于的状态上
如果删掉了一条数据,结果表会发生变化
如果是a状态变成了b或者b变成了a,结果表也会发送变化
但是如果是别的字段发生变化,结果表不变

总结

flinkcdc初体验,很厉害的一个组件,有空研究一下底层的实现逻辑
以上代码有问题欢迎评论区留言

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Antgeek

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值