一、开启Binlog日志
编辑:vi /etc/my.cnf
追加如下内容
#数据库id
server-id = 1
#启动binlog,该参数的值会作为binlog的文件名
log-bin=mysql-bin
#binlog类型,maxwell要求为row类型
binlog_format=row
#启用binlog的数据库,需根据实际情况作出修改
binlog-do-db=gmall
binlog-do-db=gmall-config
二、DataStream
package com.hpsk.flink.cdc;
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;
public class Flink_CDC {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
.hostname("ip")
.port(3306)
.username("用户名")
.password("密码")
.databaseList("数据库")
.tableList("数据库.表名")
.startupOptions(StartupOptions.initial())
.deserializer(new CustomerDeserializationSchema())
.build();
DataStreamSource<String> mysqlDS = env.fromSource(mySqlSource,
WatermarkStrategy.noWatermarks(),
"mySqlSource");
mysqlDS.print();
env.execute("FlinkCDCDataStream");
}
public static class CustomerDeserializationSchema implements DebeziumDeserializationSchema<String>{
@Override
public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
String topic = sourceRecord.topic();
String[] arr = topic.split("\\.");
String db = arr[1];
String tableName = arr[2];
Envelope.Operation operation = Envelope.operationFor(sourceRecord);
Struct value = (Struct) sourceRecord.value();
Struct before = value.getStruct("before");
JSONObject beforeJson = new JSONObject();
if (before != null) {
for (Field field : before.schema().fields()) {
Object o = before.get(field);
beforeJson.put(field.name(), o);
}
}
Struct after = value.getStruct("after");
JSONObject afterJson = new JSONObject();
for (Field field : after.schema().fields()) {
Object o = after.get(field);
afterJson.put(field.name(), o);
}
JSONObject result = new JSONObject();
result.put("database", db);
result.put("table", tableName);
result.put("operation", operation.toString().toLowerCase());
result.put("before", beforeJson);
result.put("after", afterJson);
collector.collect(result.toJSONString());
}
@Override
public TypeInformation<String> getProducedType() {
return TypeInformation.of(String.class);
}
}
}
三、SQL
package com.hpsk.flink.cdc;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class FlinkSQL_CDC {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
tableEnv.executeSql("CREATE TABLE activity_info (" +
" `id` INT PRIMARY KEY," +
" `activity_name` STRING," +
" `activity_type` STRING," +
" `activity_desc` STRING," +
" `start_time` STRING," +
" `end_time` STRING," +
" `create_time` STRING" +
") WITH (" +
" 'connector' = 'mysql-cdc'," +
" 'hostname' = 'ip地址'," +
" 'port' = '3306'," +
" 'username' = '用户名'," +
" 'password' = '密码'," +
" 'database-name' = '数据库'," +
" 'table-name' = '表名'" +
")");
tableEnv.executeSql("select * from activity_info").print();
env.execute();
}
}