1、pom.xml
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.33</version>
</dependency>
2、MysqlReader(source)
package com.steven.flink;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
public class MysqlReader extends RichSourceFunction<Tuple2<String, String>> {
private Connection connection = null;
private PreparedStatement ps = null;
//该方法主要用于打开数据库连接,下面的ConfigKeys类是获取配置的类
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
Class.forName("com.mysql.jdbc.Driver");//加载数据库驱动
// connection = DriverManager.getConnection("jdbc:mysql://106.54.170.224:10328", "root", "Bmsoft2020datateam");//获取连接
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306", "root", "root");//获取连接
ps = connection.prepareStatement("select id,dq,ah,ay from flink.ajxx_xs");
}
@Override
public void run(SourceContext<Tuple2<String, String>> sourceContext) throws Exception {
ResultSet resultSet = ps.executeQuery();
while (resultSet.next()) {
Tuple2<String, String> tuple = new Tuple2<String, String>();
tuple.setFields(resultSet.getString(1), resultSet.getString(2));
sourceContext.collect(tuple);
}
}
@Override
public void cancel() {
try {
super.close();
if (connection != null) {
connection.close();
}
if (ps != null) {
ps.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
3、MysqlWriter(sink)
自定义sink,首先想到的是extends SinkFunction,集成flink自带的sinkfunction,,在当中实现方法,实现如下:
public class MysqlSink implements
SinkFunction<Tuple2<String,String>> {
private static final long serialVersionUID = 1L;
private Connection connection;
private PreparedStatement preparedStatement;
String username = "mysql.user";
String password = "mysql.password";
String drivername = "mysql.driver";
String dburl = "mysql.url";
@Override
public void invoke(Tuple2<String,String> value) throws Exception {
Class.forName(drivername);
connection = DriverManager.getConnection(dburl, username, password);
String sql = "insert into table(name,nickname) values(?,?)";
preparedStatement = connection.prepareStatement(sql);
preparedStatement.setString(1, value.f0);
preparedStatement.setString(2, value.f1);
preparedStatement.executeUpdate();
if (preparedStatement != null) {
preparedStatement.close();
}
if (connection != null) {
connection.close();
}
}
}
这样实现有个问题,每一条数据,都要打开mysql连接,再关闭,比较耗时,这个可以使用flink中比较好的Rich方式来实现。
Rich方式的优点在于,有个open和close方法,在初始化的时候建立一次连接,之后一直使用这个连接即可。
我的项目代码如下:
package com.steven.flink;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
/**
* @author steven
*/
public class MysqlWriter extends RichSinkFunction<Tuple2<String, String>> {
private Connection connection = null;
private PreparedStatement ps = null;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
if (connection == null) {
Class.forName("com.mysql.jdbc.Driver");//加载数据库驱动
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306", "root", "root");//获取连接
}
ps = connection.prepareStatement("insert into flink.ajxx_xs2 values (?,?)");
System.out.println("123123");
}
@Override
public void invoke(Tuple2<String, String> value, Context context) throws Exception {
//获取JdbcReader发送过来的结果
try {
ps.setString(1, value.f0);
ps.setString(2, value.f1);
ps.executeUpdate();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void close() throws Exception {
super.close();
if (ps != null) {
ps.close();
}
if (connection != null) {
connection.close();
}
super.close();
}
}
4、main
package com.steven.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @Description: MySql读和写
* @author: : Steven
* @Date: 2020/3/20 13:44
*/
public class Demo7 {
public static void main(String[] args) throws Exception {
// ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost",8081,"D:\\flink-steven\\target\\flink-0.0.1-SNAPSHOT.jar");
// final StreamExecutionEnvironment env =StreamExecutionEnvironment.getExecutionEnvironment();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 8081, "D:\\flink-steven\\target\\flink-0.0.1-SNAPSHOT.jar");
env.setParallelism(8);
DataStreamSource<Tuple2<String, String>> dataStream = env.addSource(new MysqlReader());
dataStream.print();
dataStream.addSink(new MysqlWriter());
env.execute("Flink cost DB data to write Database");
}
}
5、问题
5.1、java.lang.ClassNotFoundException: com.mysql.jdbc.Driver 问题
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.33</version>
</dependency>
pom中已经加了,且打包到jar中,为什么运行报:
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
虽然直接把mysql-connector-java-5.1.31.jar拷贝到\apache-flink-1.9.2\lib下可以解决这个问题,但是本人还是有疑问为啥会出现这个问题,请各位大神指教。
5.2、是不是可以控制MySQL写入的速度
以上案例本人认为写入还是慢的,虽然设置了并行度。
解决办法有两种:
1、在checkpoint的时候保存数据
2、因为 RichSinkFunction 中如果 sink 一条数据到 mysql 中就会调用 invoke 方法一次,所以如果要实现批量写的话,我们最好在 sink 之前就把数据聚合一下。(RichSinkFunction<Student>改为RichSinkFunction<List<Student>>)
参考:Flink 读取 Kafka 数据批量写入到 MySQL
3、在RichSinkFunction中没1000条,利用PreparedStatement集合批处理提交,代码如下:
分4个并行度执行的:
基本解决的批量提交的问题,但是还是有点小瑕疵:
原因是最后的count不足1000所以没有执行,这个待业务优化,或者其他功能实现。
临时的解决办法: