(11)Flink实战-Mysql读取、写入-source+sink

1、pom.xml

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.33</version>
        </dependency>

2、MysqlReader(source)

package com.steven.flink;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

public class MysqlReader extends RichSourceFunction<Tuple2<String, String>> {


    private Connection connection = null;
    private PreparedStatement ps = null;


    //该方法主要用于打开数据库连接,下面的ConfigKeys类是获取配置的类
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        Class.forName("com.mysql.jdbc.Driver");//加载数据库驱动
//        connection = DriverManager.getConnection("jdbc:mysql://106.54.170.224:10328", "root", "Bmsoft2020datateam");//获取连接
        connection = DriverManager.getConnection("jdbc:mysql://localhost:3306", "root", "root");//获取连接
        ps = connection.prepareStatement("select id,dq,ah,ay from flink.ajxx_xs");

    }


    @Override
    public void run(SourceContext<Tuple2<String, String>> sourceContext) throws Exception {
        ResultSet resultSet = ps.executeQuery();
        while (resultSet.next()) {
            Tuple2<String, String> tuple = new Tuple2<String, String>();
            tuple.setFields(resultSet.getString(1), resultSet.getString(2));
            sourceContext.collect(tuple);
        }
    }

    @Override
    public void cancel() {
        try {
            super.close();
            if (connection != null) {
                connection.close();
            }
            if (ps != null) {
                ps.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

3、MysqlWriter(sink)

自定义sink,首先想到的是extends SinkFunction,集成flink自带的sinkfunction,,在当中实现方法,实现如下:

public class MysqlSink implements
        SinkFunction<Tuple2<String,String>> {
 
    private static final long serialVersionUID = 1L;
    private Connection connection;
    private PreparedStatement preparedStatement;
    String username = "mysql.user";
    String password = "mysql.password";
    String drivername = "mysql.driver";
    String dburl = "mysql.url";
 
    @Override
    public void invoke(Tuple2<String,String> value) throws Exception {
        Class.forName(drivername);
        connection = DriverManager.getConnection(dburl, username, password);
        String sql = "insert into table(name,nickname) values(?,?)";
        preparedStatement = connection.prepareStatement(sql);
        preparedStatement.setString(1, value.f0);
        preparedStatement.setString(2, value.f1);
        preparedStatement.executeUpdate();
        if (preparedStatement != null) {
            preparedStatement.close();
        }
        if (connection != null) {
            connection.close();
        }
 
    }
 
}

 这样实现有个问题,每一条数据,都要打开mysql连接,再关闭,比较耗时,这个可以使用flink中比较好的Rich方式来实现。

Rich方式的优点在于,有个open和close方法,在初始化的时候建立一次连接,之后一直使用这个连接即可。

我的项目代码如下:

package com.steven.flink;

import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

/**
 * @author steven
 */
public class MysqlWriter extends RichSinkFunction<Tuple2<String, String>> {
    private Connection connection = null;
    private PreparedStatement ps = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        if (connection == null) {
            Class.forName("com.mysql.jdbc.Driver");//加载数据库驱动
            connection = DriverManager.getConnection("jdbc:mysql://localhost:3306", "root", "root");//获取连接
        }

        ps = connection.prepareStatement("insert into flink.ajxx_xs2 values (?,?)");
        System.out.println("123123");
    }

    @Override
    public void invoke(Tuple2<String, String> value, Context context) throws Exception {
        //获取JdbcReader发送过来的结果
        try {
            ps.setString(1, value.f0);
            ps.setString(2, value.f1);
            ps.executeUpdate();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void close() throws Exception {
        super.close();
        if (ps != null) {
            ps.close();
        }
        if (connection != null) {
            connection.close();
        }
        super.close();
    }
}

4、main

package com.steven.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
 * @Description: MySql读和写
 * @author: :  Steven
 * @Date: 2020/3/20 13:44
 */
public class Demo7 {
    public static void main(String[] args) throws Exception {
//        ExecutionEnvironment env  =  ExecutionEnvironment.createRemoteEnvironment("localhost",8081,"D:\\flink-steven\\target\\flink-0.0.1-SNAPSHOT.jar");

//        final StreamExecutionEnvironment env =StreamExecutionEnvironment.getExecutionEnvironment();
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 8081, "D:\\flink-steven\\target\\flink-0.0.1-SNAPSHOT.jar");
        env.setParallelism(8);
        DataStreamSource<Tuple2<String, String>> dataStream = env.addSource(new MysqlReader());
        dataStream.print();
        dataStream.addSink(new MysqlWriter());
        env.execute("Flink cost DB data to write Database");

    }
}

 

5、问题 

5.1、java.lang.ClassNotFoundException: com.mysql.jdbc.Driver 问题

      <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.33</version>
        </dependency>

pom中已经加了,且打包到jar中,为什么运行报:

Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver

虽然直接把mysql-connector-java-5.1.31.jar拷贝到\apache-flink-1.9.2\lib下可以解决这个问题,但是本人还是有疑问为啥会出现这个问题,请各位大神指教。

5.2、是不是可以控制MySQL写入的速度

以上案例本人认为写入还是慢的,虽然设置了并行度。

解决办法有两种:

1、在checkpoint的时候保存数据

参考:flink写入mysql的两种方式

 

 

2、因为 RichSinkFunction 中如果 sink 一条数据到 mysql 中就会调用 invoke 方法一次,所以如果要实现批量写的话,我们最好在 sink 之前就把数据聚合一下。(RichSinkFunction<Student>改为RichSinkFunction<List<Student>>)

参考:Flink 读取 Kafka 数据批量写入到 MySQL

 

3、在RichSinkFunction中没1000条,利用PreparedStatement集合批处理提交,代码如下:

分4个并行度执行的:

 基本解决的批量提交的问题,但是还是有点小瑕疵:

原因是最后的count不足1000所以没有执行,这个待业务优化,或者其他功能实现。

临时的解决办法:

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值