flink_CDC

2章 Flink CDC案例实操

2.1 DataStream方式的应用

2.1.1 导入依赖

<dependencies>

    <dependency>

        <groupId>org.apache.flink</groupId>

        <artifactId>flink-java</artifactId>

        <version>1.12.0</version>

    </dependency>

    <dependency>

        <groupId>org.apache.flink</groupId>

        <artifactId>flink-streaming-java_2.12</artifactId>

        <version>1.12.0</version>

    </dependency>

    <dependency>

        <groupId>org.apache.flink</groupId>

        <artifactId>flink-clients_2.12</artifactId>

        <version>1.12.0</version>

    </dependency>

    <dependency>

        <groupId>org.apache.hadoop</groupId>

        <artifactId>hadoop-client</artifactId>

        <version>3.1.3</version>

    </dependency>

    <dependency>

        <groupId>mysql</groupId>

        <artifactId>mysql-connector-java</artifactId>

        <version>5.1.49</version>

</dependency>

<dependency>

        <groupId>org.apache.flink</groupId>

        <artifactId>flink-table-planner-blink_2.12</artifactId>

        <version>1.12.0</version>

</dependency>

<dependency>

        <groupId>com.ververica</groupId>

        <artifactId>flink-connector-mysql-cdc</artifactId>

        <version>2.0.0</version>

</dependency>

    <dependency>

        <groupId>com.alibaba</groupId>

        <artifactId>fastjson</artifactId>

        <version>1.2.75</version>

    </dependency>

</dependencies>

<build>

    <plugins>

        <plugin>

            <groupId>org.apache.maven.plugins</groupId>

            <artifactId>maven-assembly-plugin</artifactId>

            <version>3.0.0</version>

            <configuration>

                <descriptorRefs>

                    <descriptorRef>jar-with-dependencies</descriptorRef>

                </descriptorRefs>

            </configuration>

            <executions>

                <execution>

                    <id>make-assembly</id>

                    <phase>package</phase>

                    <goals>

                        <goal>single</goal>

                    </goals>

                </execution>

            </executions>

        </plugin>

    </plugins>

</build>

2.1.2 编写代码

import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource;

import com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction;

import com.alibaba.ververica.cdc.debezium.StringDebeziumDeserializationSchema;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;

import org.apache.flink.runtime.state.filesystem.FsStateBackend;

import org.apache.flink.streaming.api.CheckpointingMode;

import org.apache.flink.streaming.api.datastream.DataStreamSource;

import org.apache.flink.streaming.api.environment.CheckpointConfig;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Properties;

public class FlinkCDC {

    public static void main(String[] args) throws Exception {

        //1.创建执行环境

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);

        //2.Flink-CDC将读取binlog的位置信息以状态的方式保存在CK,如果想要做到断点续传,需要从Checkpoint或者Savepoint启动程序

        //2.1 开启Checkpoint,每隔5秒钟做一次CK

        env.enableCheckpointing(5000L);

        //2.2 指定CK的一致性语义

env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);

        //2.3 设置任务关闭的时候保留最后一次CK数据

env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        //2.4 指定从CK自动重启策略

        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));

        //2.5 设置状态后端

        env.setStateBackend(new FsStateBackend("hdfs://hadoop102:8020/flinkCDC"));

        //2.6 设置访问HDFS的用户名

        System.setProperty("HADOOP_USER_NAME", "atguigu");

        //3.创建Flink-MySQL-CDC的Source

        //initial (default): Performs an initial snapshot on the monitored database tables upon first startup, and continue to read the latest binlog.

        //latest-offset: Never to perform snapshot on the monitored database tables upon first startup, just read from the end of the binlog which means only have the changes since the connector was started.

        //timestamp: Never to perform snapshot on the monitored database tables upon first startup, and directly read binlog from the specified timestamp. The consumer will traverse the binlog from the beginning and ignore change events whose timestamp is smaller than the specified timestamp.

        //specific-offset: Never to perform snapshot on the monitored database tables upon first startup, and directly read binlog from the specified offset.

        DebeziumSourceFunction<String> mysqlSource = MySQLSource.<String>builder()

                .hostname("hadoop102")

                .port(3306)

                .username("root")

                .password("000000")

                .databaseList("gmall-flink")

                .tableList("gmall-flink.z_user_info") //可选配置项,如果不指定该参数,则会读取上一个配置下的所有表的数据,注意:指定的时候需要使用"db.table"的方式

                .startupOptions(StartupOptions.initial())

                .deserializer(new StringDebeziumDeserializationSchema())

                .build();

        //4.使用CDC Source从MySQL读取数据

        DataStreamSource<String> mysqlDS = env.addSource(mysqlSource);

        //5.打印数据

        mysqlDS.print();

        //6.执行任务

        env.execute();

    }

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值