Flink——Flink读写MySQL

Maven依赖

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-jdbc_2.11</artifactId>
    <version>1.10.0</version>
</dependency>

Flink MySQL Source and Sink

package flink.batch;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.types.Row;

public class BankStatement {
	public static void main(String[] args) throws Exception {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		//数据库信息
		String driverName = "com.mysql.jdbc.Driver";
		String dbURL = "jdbc:mysql://host:3306/db_name";
		String username = "user_name";
		String password = "password";

		//SQL
		String selectSQL = "select email_receive_time,raw_data from table";
		String insertSQL = "insert into table_new (email_receive_time,transaction_time,transaction_amount) VALUES (?, ?, ?)";

		//字段类型信息
		TypeInformation[] fieldsTypes = new TypeInformation[]{
				BasicTypeInfo.DATE_TYPE_INFO,
				BasicTypeInfo.STRING_TYPE_INFO,
				BasicTypeInfo.STRING_TYPE_INFO
		};
		RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldsTypes);

		// JDBC Source
		JDBCInputFormat inputFormat = JDBCInputFormat.buildJDBCInputFormat()
                .setDrivername(driverName)
                .setDBUrl(dbURL)
                .setUsername(username)
                .setPassword(password)
                .setQuery(selectSQL)
                .setRowTypeInfo(rowTypeInfo)
                .finish();
        //读取MySQL
        DataSource<Row> dataSource = env.createInput(inputFormat);

		//数据处理
        DataSet ds = dataSource.map(new MapFunction<Row, Row>() {
            @Override
            public Row map(Row value) throws Exception {
                String emailReceiveTime = value.getField(0).toString();
                String rawData = value.getField(1).toString();
                JSONObject json = JSON.parseObject(rawData);

                return Row.of(json.getString("transaction_time"), json.getBigDecimal("transaction_amount"));
            }
        });

		//JDBC Sink
        JDBCOutputFormat outputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
                .setDrivername(driverName)
                .setDBUrl(dbURL)
                .setUsername(username)
                .setPassword(password)
                .setQuery(insertSQL)
                .finish();
        //写入MySQL  
        ds.output(outputFormat);

        env.execute("Flink-MySQL batch");
	}
}

//提交命令
./bin/flink run
-m yarn-cluster \
-yn 1 \
-yjm 1024 \
-ytm 1024 \
-p 2 \
-c flink.batch.BankStatement \
/opt/jar/bigdata.jar

好的,下面是一个使用Java编写的FlinkMySQL的Binlog示例代码: ``` import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionState; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateSentinel; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionStateUpdater; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionTestUtils; import org.apache.flink.streaming.connectors.kafka.testutils.KafkaTestEnvironment; import org.apache.flink.streaming.connectors.kafka.testutils.KafkaTestUtils; import org.apache.flink.types.Row; import org.apache.flink.util.TestLogger; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; public class FlinkMySQLBinlogTest extends TestLogger { @ClassRule public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); private static final String TOPIC = "test-topic"; private static final String BOOTSTRAP_SERVERS = "localhost:9092"; private static final String GROUP_ID = "test-group"; private static KafkaTestEnvironment kafkaTestEnvironment; @BeforeClass public static void setup() throws IOException { final File tmpFolder = TEMPORARY_FOLDER.newFolder(); final Properties kafkaProps = KafkaTestUtils.getStandardProperties(); kafkaProps.setProperty("auto.create.topics.enable", "false"); kafkaProps.setProperty("log.dirs", tmpFolder.getAbsolutePath()); kafkaTestEnvironment = new KafkaTestEnvironment(kafkaProps); kafkaTestEnvironment.prepare(1, 1); KafkaTestUtils.createTopic(TOPIC, 1, 1, kafkaTestEnvironment.getKafkaServer().get().config()); } @Test public void testFlinkMySQLBinlog() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(1000L); env.setParallelism(1); final String databaseName = "test"; final String tableName = "user"; final SourceFunction<String> kafkaSource = new SourceFunction<String>() { private volatile boolean running = true; @Override public void run(SourceContext<String> ctx) throws Exception { while (running) { ctx.collect("1\t'John'\t25"); Thread.sleep(1000L); } } @Override public void cancel() { running = false; } }; final FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>( TOPIC, new SimpleStringSchema(), new Properties(), KafkaSerializationSchema.UseTruncate.UPDATE_ON_CHANGE ); final FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>( TOPIC, new SimpleStringSchema(), new Properties() ); final MapFunction<String, Row> rowMapFunction = new MapFunction<String, Row>() { @Override public Row map(String value) throws Exception { final String[] fields = value.split("\t"); final Row row = new Row(3); row.setField(0, Integer.parseInt(fields[0])); row.setField(1, fields[1].replace("'", "")); row.setField(2, Integer.parseInt(fields[2])); return row; } }; final RowTypeInfo rowTypeInfo = new RowTypeInfo( new TypeInformation[]{Types.INT, Types.STRING, Types.INT}, new String[]{"id", "name", "age"} ); final JdbcSource<Row> jdbcSource = JdbcSource.<Row>builder() .setDrivername("com.mysql.jdbc.Driver") .setDBUrl("jdbc:mysql://localhost:3306/" + databaseName) .setUsername("root") .setPassword("root") .setQuery("SELECT * FROM " + tableName) .setRowTypeInfo(rowTypeInfo) .build(); final BinlogSource<Row> binlogSource = MySQLSource.<Row>builder() .hostname("localhost") .port(3306) .databaseList(databaseName) .tableList(tableName) .username("root") .password("root") .serverId(1001L) .binlogFilename("mysql-bin.000001") .binlogPosition(4L) .build(); final DataStream<Row> jdbcStream = env.addSource(jdbcSource).map(rowMapFunction); final DataStream<Row> binlogStream = env.addSource(binlogSource).map(rowMapFunction); jdbcStream.addSink(kafkaProducer); binlogStream.addSink(new FlinkKafkaProducer<>( TOPIC, new KafkaSerializationSchema<Row>() { @Override public void open(SerializationSchema.InitializationContext context) throws Exception { } @Override public ProducerRecord<byte[], byte[]> serialize(Row element, Long timestamp) { final Map<String, Object> data = new HashMap<>(); data.put("id", element.getField(0)); data.put("name", element.getField(1)); data.put("age", element.getField(2)); return new ProducerRecord<>(TOPIC, data.toString().getBytes()); } }, new Properties(), KafkaSerializationSchema.UseTruncate.UPDATE_ON_CHANGE )); final DataStream<String> kafkaStream = env.addSource(kafkaConsumer); kafkaStream.print(); env.execute(); } } ``` 这个示例代码包括了FlinkMySQL的Binlog的使用,以及Kafka的读写操作,希望能够帮助到您。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值