Flink示例——Sink

52 篇文章 7 订阅
11 篇文章 0 订阅

Flink示例——Sink

版本信息

产品版本
Flink1.7.2
Java1.8.0_231
Scala2.11.12

Mavan依赖

  • pom.xml 依赖部分
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    

自定义SourceFunction

  • 提供一个SourceFunction,方便后面测试
    public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> {
    
        private boolean flag = true;
    
        @Override
        public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception {
            List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
            Random random = new Random();
            while (flag) {
                Thread.sleep(100);
                // 随机取一个值
                String key = data.get(random.nextInt(data.size()));
                long value = System.currentTimeMillis();
                ctx.collect(Tuple2.of(key, value));
            }
        }
    
        @Override
        public void cancel() {
            flag = false;
        }
    
    }
    

Print Sink 示例

  • 代码 PrintSinkDemo
    public class PrintSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            
    //        dataDS.print("stream");
    
            // 查看源码可知,print内调用了 PrintSinkFunction
            PrintSinkFunction<Tuple2<String, Long>> sinkFunction = new PrintSinkFunction<>("stream", false);
            customDS.addSink(sinkFunction);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    

Kafka Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    
  • 代码 KafkaSinkDemo
    public class KafkaSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            // 处理,转为String
            DataStream<String> resultDS = customDS.map(new MapFunction<Tuple2<String, Long>, String>() {
                @Override
                public String map(Tuple2<String, Long> value) throws Exception {
                    return value.f0 + "|" + value.f1;
                }
            });
    
            // 创建FlinkKafkaProduce
            FlinkKafkaProducer011<String> kafkaProducer011 = generateKafkaProducer();
    
            // 发入Kafka
            resultDS.addSink(kafkaProducer011);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成 FlinkKafkaProducer
         */
        private static FlinkKafkaProducer011<String> generateKafkaProducer() {
            // 创建FlinkKafkaProducer
            FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>(
                    "192.168.0.101:9092", "topic_01", new SimpleStringSchema()
            );
            // 自定义序列化 - 示例
            /*
            new KeyedSerializationSchema<String>() {
                @Override
                public byte[] serializeKey(String element) {
                    // 可以直接为null,也可以为String编码
                    return null;
                }
    
                @Override
                public byte[] serializeValue(String element) {
                    // 编码String为byte[]
                    return element.getBytes(StandardCharsets.UTF_8);
                }
    
                @Override
                public String getTargetTopic(String element) {
                    // 由源码可知,此处优先级最高,FlinkKafkaProducer011中传的topicid是默认值
                    // 可以在此决定不同的数据发往到不同的topic
                    return null;
                }
            };
            */
    
            return kafkaProducer011;
        }
    
    }
    
  • 两阶段提交,保证exactly-once
    // 关于事务超时问题
    // Kafka默认事务最大超时15min,transaction.max.timeout.ms = 900000
    // Flink默认事务超时1h, transaction.timeout.ms = 3600000
    // 如果时间间隔过长,会导致Kafka已关闭事务,Flink却以为未关闭
    // 因此可以将Flink设置小于等于Kafka事务超时时间
    Properties props = new Properties();
    props.setProperty("bootstrap.servers", "192.168.0.101:9092");
    props.setProperty("transaction.timeout.ms", "900000");
    FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>(
            "topic_01",
            new SimpleStringSchema(),
            props,
            // 两阶段提交,保证exactly-once,默认AT_LEAST_ONCE
            FlinkKafkaProducer011.Semantic.EXACTLY_ONCE 
    );
    

ElasticSearch Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    
  • 代码 ESSinkDemo
    public class ESSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            
            // 创建ElasticsearchSink
            ElasticsearchSink<Tuple2<String, Long>> esSink = generateESSink();
    
            // 发入Elasticsearch
            customDS.addSink(esSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成ElasticsearchSink
         */
        private static ElasticsearchSink<Tuple2<String, Long>> generateESSink() {
            // 配置HttpHost
            List<HttpHost> httpHosts = Collections.singletonList(
                    new HttpHost("192.168.0.120", 9200)
            );
            ElasticsearchSinkFunction<Tuple2<String, Long>> sinkFunction = new ElasticsearchSinkFunction<Tuple2<String, Long>>() {
                @Override
                public void process(Tuple2<String, Long> tuple2, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
                    // 封装数据
                    HashMap<String, String> map = new HashMap<>();
                    map.put("content", tuple2.f0);
                    map.put("eventTime", tuple2.f1.toString());
                    map.put("processTime", String.valueOf(System.currentTimeMillis()));
                    // 封装Request
                    IndexRequest request = Requests.indexRequest()
                            .index("my_index")
                            .type("my_data")
                            .source(map);
                    
                    // 发送request
                    requestIndexer.add(request);
                }
            };
    
            ElasticsearchSink<Tuple2<String, Long>> esSink = new ElasticsearchSink.Builder<>(
                    httpHosts, sinkFunction
            ).build();
    
            return esSink;
        }
    
    }
    

Redis Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.bahir</groupId>
        <artifactId>flink-connector-redis_2.11</artifactId>
        <version>1.0</version>
    </dependency>
    
  • 代码 RedisSinkDemo
    public class RedisSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
    
            // 创建RedisSink
            RedisSink<Tuple2<String, Long>> redisSink = generateRedisSink();
    
            // 发入Redis
            customDS.addSink(redisSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成RedisSink
         */
        private static RedisSink<Tuple2<String, Long>> generateRedisSink() {
            // Redis配置
            FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
                    .setMaxTotal(8) // 最大实例总数
                    .setMaxIdle(4) // 实例最多空闲数
                    .setMinIdle(2)
                    .setHost("192.168.0.110")
                    .setPort(6379)
                    .build();
            // Mapper
            RedisMapper<Tuple2<String, Long>> redisMapper = new RedisMapper<Tuple2<String, Long>>() {
                @Override
                public RedisCommandDescription getCommandDescription() {
                    // 定义保存数据到Redis的命令
                    return new RedisCommandDescription(
                            RedisCommand.HSET, // 使用hset命令
                            "my_hash" // 表名
                    );
                }
    
                @Override
                public String getKeyFromData(Tuple2<String, Long> tuple2) {
                    return tuple2.f0;
                }
    
                @Override
                public String getValueFromData(Tuple2<String, Long> tuple2) {
                    return tuple2.f1.toString();
                }
            };
    
            return new RedisSink<>(config, redisMapper);
        }
    
    }
    

自定义 Sink 示例 (JDBC)

  • Maven导包 pom.xml
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.38</version>
    </dependency>
    
  • 代码 JDBCUtils
    public class JDBCUtils {
    
        static {
            try {
                Class.forName("com.mysql.jdbc.Driver");
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
        }
    
        public static Connection getConnection() {
            String url = "jdbc:mysql://192.168.0.130:3306/test_db";
            String user = "user_name";
            String password = "123456";
    
            Connection conn = null;
            try {
                conn = DriverManager.getConnection(url, user, password);
            } catch (SQLException e) {
                e.printStackTrace();
            }
    
            if (conn == null) {
                throw new RuntimeException("JDBC创建连接失败!");
            }
    
            return conn;
        }
    
    }
    
  • 代码 JDBCSinkFunction
    public class JDBCSinkFunction extends RichSinkFunction<Tuple2<String, Long>> {
    
        private Connection conn = null;
        private PreparedStatement pst;
    
        @Override
        public void open(Configuration parameters) throws Exception {
            conn = JDBCUtils.getConnection();
            pst = conn.prepareStatement(
                    "INSERT INTO tb_name (content, event_time, update_time) VALUES (?, ?, ?)"
            );
        }
    
        @Override
        public void close() throws Exception {
            conn.close();
        }
    
        @Override
        public void invoke(Tuple2<String, Long> value, Context context) throws Exception {
            // 执行插入
            pst.setString(1, value.f0);
            pst.setLong(2, value.f1);
            pst.setLong(3, System.currentTimeMillis());
            pst.execute();
        }
    
        // 批量插入-示例
        /*
        private int count = 0;
    
        @Override
        public void invoke(Tuple2<String, Long> value, Context context) throws Exception {
            // 执行插入
            pst.setString(1, value.f0);
            pst.setLong(2, value.f1);
            pst.setLong(3, System.currentTimeMillis());
            pst.addBatch();
    
            count++;
    
            // 每1000条记录插入一次
            if (count == 1000){
                pst.executeBatch();
                conn.commit(); // 进行手动提交
                pst.clearBatch();
                count = 0;
            }
            // 记得在open处关闭自动提交[conn.setAutoCommit(false);]
        }
        */
    
    }
    
  • 代码 CustomSinkDemo
    public class CustomSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> dataDS = env.addSource(sourceFunction);
    
            // 创建JDBCSinkFunction
            JDBCSinkFunction jdbcSink = new JDBCSinkFunction();
    
            // 自定义发入MySQL
            dataDS.addSink(jdbcSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    
下面是一个基于 Flink 的 Elasticsearch Sink 的代码示例: ```java import org.apache.flink.api.common.functions.RuntimeContext; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction; import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink; import org.apache.flink.streaming.connectors.elasticsearch6.RestClientFactory; import org.apache.flink.streaming.connectors.elasticsearch6.RestClientFactoryImpl; import org.apache.flink.streaming.connectors.elasticsearch6.index.IndexRequestBuilder; import org.apache.flink.util.Preconditions; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.client.Requests; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import java.util.HashMap; import java.util.Map; public class FlinkElasticsearchSinkExample { public static void main(String[] args) { // set up Flink environment // set up Elasticsearch sink configuration Map<String, String> config = new HashMap<>(); config.put("cluster.name", "elasticsearch"); config.put("bulk.flush.max.actions", "1"); // create Elasticsearch sink ElasticsearchSink.Builder<ElasticsearchMessage> esSinkBuilder = new ElasticsearchSink.Builder<>( config, new ElasticsearchSinkFunction<ElasticsearchMessage>() { public IndexRequest createIndexRequest(ElasticsearchMessage element) { try { XContentBuilder builder = XContentFactory.jsonBuilder(); builder.startObject(); { builder.field("timestamp", element.getTimestamp()); builder.field("message", element.getMessage()); } builder.endObject(); return Requests.indexRequest() .index("flink-elasticsearch-sink") .type("_doc") .id(element.getId()) .source(builder); } catch (Exception e) { throw new RuntimeException(e); } } @Override public void process(ElasticsearchMessage element, RuntimeContext ctx, RequestIndexer indexer) { indexer.add(createIndexRequest(element)); } }); // create and add RestClientFactory esSinkBuilder.setRestClientFactory(new RestClientFactoryImpl()); // add Elasticsearch sink to Flink job env.addSource(new FlinkKafkaConsumer<>("my-topic", new SimpleStringSchema(), properties)) .map(new MyMapFunction()) .addSink(esSinkBuilder.build()); } // POJO representing Elasticsearch message public static class ElasticsearchMessage { private String id; private long timestamp; private String message; public ElasticsearchMessage(String id, long timestamp, String message) { this.id = id; this.timestamp = timestamp; this.message = message; } public String getId() { return id; } public void setId(String id) { this.id = id; } public long getTimestamp() { return timestamp; } public void setTimestamp(long timestamp) { this.timestamp = timestamp; } public String getMessage() { return message; } public void setMessage(String message) { this.message = message; } } // custom implementation of RestClientFactory public static class RestClientFactoryImpl implements RestClientFactory { @Override public void configureRestClientBuilder(org.elasticsearch.client.RestClientBuilder restClientBuilder) { // set up authentication if needed } } } ``` 注意:在 `configureRestClientBuilder` 方法中可以设置 Elasticsearch 的认证信息。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值