文章目录
Flink示例——Sink
版本信息
产品 | 版本 |
---|---|
Flink | 1.7.2 |
Java | 1.8.0_231 |
Scala | 2.11.12 |
Mavan依赖
- pom.xml 依赖部分
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.11</artifactId> <version>${flink.version}</version> </dependency>
自定义SourceFunction
- 提供一个SourceFunction,方便后面测试
public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> { private boolean flag = true; @Override public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception { List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g"); Random random = new Random(); while (flag) { Thread.sleep(100); // 随机取一个值 String key = data.get(random.nextInt(data.size())); long value = System.currentTimeMillis(); ctx.collect(Tuple2.of(key, value)); } } @Override public void cancel() { flag = false; } }
Print Sink 示例
- 代码 PrintSinkDemo
public class PrintSinkDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义SourceFunction CustomSourceFunction sourceFunction = new CustomSourceFunction(); DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // dataDS.print("stream"); // 查看源码可知,print内调用了 PrintSinkFunction PrintSinkFunction<Tuple2<String, Long>> sinkFunction = new PrintSinkFunction<>("stream", false); customDS.addSink(sinkFunction); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }
Kafka Sink 示例
- Maven导包 pom.xml
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.11_2.11</artifactId> <version>${flink.version}</version> </dependency>
- 代码 KafkaSinkDemo
public class KafkaSinkDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义SourceFunction CustomSourceFunction sourceFunction = new CustomSourceFunction(); // 添加数据源 DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 处理,转为String DataStream<String> resultDS = customDS.map(new MapFunction<Tuple2<String, Long>, String>() { @Override public String map(Tuple2<String, Long> value) throws Exception { return value.f0 + "|" + value.f1; } }); // 创建FlinkKafkaProduce FlinkKafkaProducer011<String> kafkaProducer011 = generateKafkaProducer(); // 发入Kafka resultDS.addSink(kafkaProducer011); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } /** * 生成 FlinkKafkaProducer */ private static FlinkKafkaProducer011<String> generateKafkaProducer() { // 创建FlinkKafkaProducer FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>( "192.168.0.101:9092", "topic_01", new SimpleStringSchema() ); // 自定义序列化 - 示例 /* new KeyedSerializationSchema<String>() { @Override public byte[] serializeKey(String element) { // 可以直接为null,也可以为String编码 return null; } @Override public byte[] serializeValue(String element) { // 编码String为byte[] return element.getBytes(StandardCharsets.UTF_8); } @Override public String getTargetTopic(String element) { // 由源码可知,此处优先级最高,FlinkKafkaProducer011中传的topicid是默认值 // 可以在此决定不同的数据发往到不同的topic return null; } }; */ return kafkaProducer011; } }
- 两阶段提交,保证exactly-once
// 关于事务超时问题 // Kafka默认事务最大超时15min,transaction.max.timeout.ms = 900000 // Flink默认事务超时1h, transaction.timeout.ms = 3600000 // 如果时间间隔过长,会导致Kafka已关闭事务,Flink却以为未关闭 // 因此可以将Flink设置小于等于Kafka事务超时时间 Properties props = new Properties(); props.setProperty("bootstrap.servers", "192.168.0.101:9092"); props.setProperty("transaction.timeout.ms", "900000"); FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>( "topic_01", new SimpleStringSchema(), props, // 两阶段提交,保证exactly-once,默认AT_LEAST_ONCE FlinkKafkaProducer011.Semantic.EXACTLY_ONCE );
ElasticSearch Sink 示例
- Maven导包 pom.xml
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch6_2.11</artifactId> <version>${flink.version}</version> </dependency>
- 代码 ESSinkDemo
public class ESSinkDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义SourceFunction CustomSourceFunction sourceFunction = new CustomSourceFunction(); // 添加数据源 DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 创建ElasticsearchSink ElasticsearchSink<Tuple2<String, Long>> esSink = generateESSink(); // 发入Elasticsearch customDS.addSink(esSink); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } /** * 生成ElasticsearchSink */ private static ElasticsearchSink<Tuple2<String, Long>> generateESSink() { // 配置HttpHost List<HttpHost> httpHosts = Collections.singletonList( new HttpHost("192.168.0.120", 9200) ); ElasticsearchSinkFunction<Tuple2<String, Long>> sinkFunction = new ElasticsearchSinkFunction<Tuple2<String, Long>>() { @Override public void process(Tuple2<String, Long> tuple2, RuntimeContext runtimeContext, RequestIndexer requestIndexer) { // 封装数据 HashMap<String, String> map = new HashMap<>(); map.put("content", tuple2.f0); map.put("eventTime", tuple2.f1.toString()); map.put("processTime", String.valueOf(System.currentTimeMillis())); // 封装Request IndexRequest request = Requests.indexRequest() .index("my_index") .type("my_data") .source(map); // 发送request requestIndexer.add(request); } }; ElasticsearchSink<Tuple2<String, Long>> esSink = new ElasticsearchSink.Builder<>( httpHosts, sinkFunction ).build(); return esSink; } }
Redis Sink 示例
- Maven导包 pom.xml
<dependency> <groupId>org.apache.bahir</groupId> <artifactId>flink-connector-redis_2.11</artifactId> <version>1.0</version> </dependency>
- 代码 RedisSinkDemo
public class RedisSinkDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义SourceFunction CustomSourceFunction sourceFunction = new CustomSourceFunction(); // 添加数据源 DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 创建RedisSink RedisSink<Tuple2<String, Long>> redisSink = generateRedisSink(); // 发入Redis customDS.addSink(redisSink); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } /** * 生成RedisSink */ private static RedisSink<Tuple2<String, Long>> generateRedisSink() { // Redis配置 FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder() .setMaxTotal(8) // 最大实例总数 .setMaxIdle(4) // 实例最多空闲数 .setMinIdle(2) .setHost("192.168.0.110") .setPort(6379) .build(); // Mapper RedisMapper<Tuple2<String, Long>> redisMapper = new RedisMapper<Tuple2<String, Long>>() { @Override public RedisCommandDescription getCommandDescription() { // 定义保存数据到Redis的命令 return new RedisCommandDescription( RedisCommand.HSET, // 使用hset命令 "my_hash" // 表名 ); } @Override public String getKeyFromData(Tuple2<String, Long> tuple2) { return tuple2.f0; } @Override public String getValueFromData(Tuple2<String, Long> tuple2) { return tuple2.f1.toString(); } }; return new RedisSink<>(config, redisMapper); } }
自定义 Sink 示例 (JDBC)
- Maven导包 pom.xml
<dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.38</version> </dependency>
- 代码 JDBCUtils
public class JDBCUtils { static { try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { e.printStackTrace(); } } public static Connection getConnection() { String url = "jdbc:mysql://192.168.0.130:3306/test_db"; String user = "user_name"; String password = "123456"; Connection conn = null; try { conn = DriverManager.getConnection(url, user, password); } catch (SQLException e) { e.printStackTrace(); } if (conn == null) { throw new RuntimeException("JDBC创建连接失败!"); } return conn; } }
- 代码 JDBCSinkFunction
public class JDBCSinkFunction extends RichSinkFunction<Tuple2<String, Long>> { private Connection conn = null; private PreparedStatement pst; @Override public void open(Configuration parameters) throws Exception { conn = JDBCUtils.getConnection(); pst = conn.prepareStatement( "INSERT INTO tb_name (content, event_time, update_time) VALUES (?, ?, ?)" ); } @Override public void close() throws Exception { conn.close(); } @Override public void invoke(Tuple2<String, Long> value, Context context) throws Exception { // 执行插入 pst.setString(1, value.f0); pst.setLong(2, value.f1); pst.setLong(3, System.currentTimeMillis()); pst.execute(); } // 批量插入-示例 /* private int count = 0; @Override public void invoke(Tuple2<String, Long> value, Context context) throws Exception { // 执行插入 pst.setString(1, value.f0); pst.setLong(2, value.f1); pst.setLong(3, System.currentTimeMillis()); pst.addBatch(); count++; // 每1000条记录插入一次 if (count == 1000){ pst.executeBatch(); conn.commit(); // 进行手动提交 pst.clearBatch(); count = 0; } // 记得在open处关闭自动提交[conn.setAutoCommit(false);] } */ }
- 代码 CustomSinkDemo
public class CustomSinkDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义SourceFunction CustomSourceFunction sourceFunction = new CustomSourceFunction(); // 添加数据源 DataStreamSource<Tuple2<String, Long>> dataDS = env.addSource(sourceFunction); // 创建JDBCSinkFunction JDBCSinkFunction jdbcSink = new JDBCSinkFunction(); // 自定义发入MySQL dataDS.addSink(jdbcSink); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }