Flink接收Kafka流数据实时向Redis中插入
此文档主要写了:Flink接入kafka数据,并做“flatMap()”算子操作,处理后将数据插入到Redis中 。
相关代码如下:
一、相关依赖
<!-- Flink相关-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.10_2.10</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.1.5</version>
</dependency>
二、Flink流处理
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class TestTableAPI {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置启动检查点
env.enableCheckpointing(5000);
//设置活动时间 (这里使用的处理时间,有三种时间可选)
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
//kafka配置
Properties props = new Properties();
//注意:这里换成自己的kafka地址(如:127.0.0.1:9092)
props.setProperty("bootstrap.servers", "masterLinux:9092");
props.setProperty("group.id", "flink-group");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "earliest");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// args[0] = "test"; //传入的是kafka中的topic
FlinkKafkaConsumer010<String> consumer =
new FlinkKafkaConsumer010<>("test", new SimpleStringSchema(), props);
DataStream dataStream = env.addSource(consumer);
/**
* 执行算子操作
*/
DataStream dataStream1 = dataStream.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String in, Collector<Tuple2<String, Integer>> collector) throws Exception {
//将消息根据“,”拆分
String[] strs = in.split(",");
//输出执行后的结果
collector.collect(Tuple2.of(strs[1], 1));
}
//根据第一个元素进行分组、求和,每5秒计算一次
}).keyBy(0).timeWindow(Time.seconds(5)).sum(1);
//打印到控制台
dataStream1.print();
//创建redis环境(这里是单机版)
FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").setPort(6379).build();
//向redis中写入数据
dataStream1.addSink(new RedisSink<Tuple2<String, Integer>>(conf, new RedisExampleMapper()));
//执行
env.execute("TestTableAPI Test");
}
}
三、RedisExampleMapper
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
public class RedisExampleMapper implements RedisMapper<Tuple2<String, Integer>> {
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET, "HASH_NAME");
}
@Override
public String getKeyFromData(Tuple2<String, Integer> data) {
return data.f0;
}
@Override
public String getValueFromData(Tuple2<String, Integer> data) {
return data.f1+"";
}
示例:
执行算子“flatMap()”操作后的数据:
数据插入成功
注:以上代码执行后,插入redis后是一个hash类型