DataStreamAPI之Sink

1.KafkaSink

使用kafkaSink首先需要添加依赖

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.12</artifactId>
    <version>1.13.1</version>
</dependency>
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.75</version>
</dependency>

最重要的就是创建一个Flink程序用的生产者

FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>("hadoop102:9092", "first", new SimpleStringSchema());

其中String泛型代表你写入到kafka的数据类型

package net.cyan.Sink;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.SimpleTimerService;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

public class Demo1_kafkaSink {
    public static void main(String[] args) {
        //创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //创建FlinkKafkaProducer
        FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>("hadoop102:9092", "first", new SimpleStringSchema());

        env.socketTextStream("hadoop103",9999)
               //map处理数据
                .map(new MapFunction<String, String>() {
                    @Override
                    public String map(String s) throws Exception {
                        //切割数据集
                        String[] split = s.split(",");
                        //转换
                        return JSON.toJSONString(split[0].concat(split[1]).concat(split[2]));
                    }
                })
                 //添加
                .addSink(kafkaProducer);

        try {
            //启动执行环境
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

当你写入到kafka的数据类型为String时,基本没问题。当你使用自定义POJO(等同于Bean)来封装你写入的数据时,那么上述producer方法中的默认序列化器就不能序列化了,那么我们就需要使用它的重载方法

2.自定义MySqlSink

自定义一个Sink,向mysql写入数据

第一步,导入依赖

<dependency>
    <groupId>mysql</groupId>
    <artifactId>mysql-connector-java</artifactId>
    <version>5.1.49</version>
</dependency>

第二步,自定义Sink类

/*
只有RichXXXFunction才有open及close方法
向mysql写入数据,一个Task至少需要创建一个Connection,才能连上mysql
open 方法:
close方法:Task结束,释放连接
MySink extends RichSinkFunction  间接 extends AbstractRichFunction  implements SinkFunction<IN>

 */
private static class MySink extends RichSinkFunction<String>{

    private Connection connection;

    @Override
    //Task创建时调用一次
    public void open(Configuration parameters) throws Exception {
        connection = DriverManager.getConnection("jdbc:mysql://hadoop102:3306/uba_db?useUnicode=true&characterEncoding=UTF-8",
                "root",
                "123321");

    }

    @Override
    //Task关闭时调用一次
    public void close() throws Exception {
        //判断连接
        if (connection!=null){
            connection.close();
        }
    }

    @Override
    //一条数据调用一次
    public void invoke(String value, Context context) throws Exception {
         //幂等性写法
        String sql ="replace into tableName values(?,?,?)";
        //预编译
        PreparedStatement preparedStatement = connection.prepareStatement(sql);
        //填充,一个占位符一条数据,第一个参数占位符的位置,第二个参数 填充至占位符的数据
        preparedStatement.setString(1,value.getId());
        preparedStatement.setString(2,Value.getInt());
        preparedStatement.setString(3,Value.getInt());
    }
}

第三步,直接在调用addSink方法添加即可

    //创建执行环境
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.socketTextStream("hadoop103", 9999)
            .map(new MapFunction<String, String>() {
                @Override
                public String map(String s) throws Exception {
                    return null;
                }
            }).addSink(new MySink());


    try {
        //启动执行环境
        env.execute();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

3.RedisSink

RedisSink最主要的就是创建FlinkJedisPoolConfig,连接池和导入依赖,依赖只有一版没有更新了

<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-redis -->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-redis_2.11</artifactId>
    <version>1.1.5</version>
</dependency>
//创建一个FlinkJedisPoolConfig,连接池
FlinkJedisPoolConfig jedis = new FlinkJedisPoolConfig.Builder()
        .setHost("hadoop102") //主机名
        .setPort(6379)  //端口号
        .setDatabase(0)   //库id
        .setMaxTotal(20)  //最大连接数
        .setMaxIdle(10)   //最大空闲数
        .setMinIdle(5)  //最小空闲数
        .setTimeout(60000) //超时时长,60S
        .build();

然后自定义一个Sink使用addSink添加即可

package net.cyan.Sink;

import com.alibaba.fastjson.JSON;
import net.cyan.POJO.WaterSensor;
import org.apache.avro.data.Json;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisClusterConfig;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;

import java.sql.Connection;
import java.sql.DriverManager;

/*
自定义Sink向Redis写入数据
 */
public class Demo3_RedisSink {
    public static void main(String[] args) {
        //创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //创建一个FlinkJedisPoolConfig,连接池
        FlinkJedisPoolConfig jedis = new FlinkJedisPoolConfig.Builder()
                .setHost("hadoop102") //主机名
                .setPort(6379)  //端口号
                .setDatabase(0)   //库id
                .setMaxTotal(20)  //最大连接数
                .setMaxIdle(10)   //最大空闲数
                .setMinIdle(5)  //最小空闲数
                .setTimeout(60000) //超时时长,60S
                .build();


        env.socketTextStream("hadoop103", 9999)
                .map(new MapFunction<String, WaterSensor>() {
                    @Override
                    public WaterSensor map(String s) throws Exception {
                        String[] split = s.split(",");

                        return new WaterSensor(split[0], Long.valueOf(split[0]), Integer.valueOf(split[2]));
                    }
                })
                //添加自定义Sink
                .addSink(new RedisSink<WaterSensor>(jedis, new MyStringMapping()));


        try {
            //启动执行环境
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    private static class MyStringMapping implements RedisMapper<WaterSensor> {

        @Override
        //秒描述要写入redis的命令
        public RedisCommandDescription getCommandDescription() {
            //单个参数构造
            return new RedisCommandDescription(RedisCommand.SET);
        }

        @Override
        //使用数据的那部分作为key
        public String getKeyFromData(WaterSensor waterSensor) {
            return waterSensor.getId();
        }

        @Override
        //使用那部分作为value
        public String getValueFromData(WaterSensor waterSensor) {
            return JSON.toJSONString(waterSensor);
        }


    }
}

List、Set使用与String基本相同,只需要将自定义Sink的返回值设置为相对应的类型,写入redis的命令换为对应类型的命令即可

4.JDBCSink(Clikhouse为例)

CK建表语句

create table  if not exists watersensor(
    id String,
    ts UInt64,
    vc UInt32
)engine =ReplacingMergeTree(ts)
order by (id,ts)

同样,先导入依赖,需要导入两个,一个jdbc的一个CK的

<dependency>
    <groupId>ru.yandex.clickhouse</groupId>
    <artifactId>clickhouse-jdbc</artifactId>
    <version>0.1.55</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-jdbc_2.11</artifactId>
    <version>1.13.6</version>
</dependency>

然后构造一个JDBCSink

public static <T> SinkFunction<T> sink(
        String sql,
        JdbcStatementBuilder<T> statementBuilder,
        JdbcConnectionOptions connectionOptions) {
    return sink(sql, statementBuilder, JdbcExecutionOptions.defaults(), connectionOptions);
}

代码如下

package net.cyan.Sink;

import lombok.val;
import net.cyan.POJO.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.connector.jdbc.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.connectors.redis.RedisSink;

import java.sql.PreparedStatement;
import java.sql.SQLException;

public class Demo5_JDBCSink {
    public static void main(String[] args) {
        //创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //创建jdbcSink所需要的四个参数
        JdbcStatementBuilder<WaterSensor>statementBuilder=new JdbcStatementBuilder<WaterSensor>() {
            @Override
            //流中的每一个watersensor执行一次
            public void accept(PreparedStatement preparedStatement, WaterSensor waterSensor) throws SQLException {
                 //填充占位符
                preparedStatement.setString(1,waterSensor.getId());
                preparedStatement.setLong(2,waterSensor.getTs());
                preparedStatement.setInt(3,waterSensor.getVc());
            }
        };
        JdbcExecutionOptions build = JdbcExecutionOptions.builder()
                .withBatchSize(200) //一批次写多少
                .withMaxRetries(3)  //最大失败重试次数,最好支持幂等性
                .withBatchIntervalMs(2000) //当数据不满一批时,多久写入
                .build();
        JdbcConnectionOptions CKbuilder = new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
                .withUrl("jdbc:clickhouse://hadoop102:8123/upp220608") //ck地址
                .build();
        //填入参数
        SinkFunction<WaterSensor> sink = JdbcSink.sink(
                "insert into watersensor values(?,?,?)",
                statementBuilder,
                build,
                CKbuilder

        );

        env.socketTextStream("hadoop103", 9999)
                                  .map(new MapFunction<String, WaterSensor>() {
                                      @Override
                                      public WaterSensor map(String s) throws Exception {
                                          String[] split = s.split(",");

                                          return new WaterSensor(split[0], Long.valueOf(split[1]), Integer.valueOf(split[2]));
                                      }
                                  })
                                  .addSink(sink);

        try {
            //启动执行环境
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

运行后直接在虚拟机发送数据即可

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值