What
Flink实现了各种类型的连接器(Connector)来实现数据在不同平台上的读写。使用Java和Scala编程支持Kafka、Twitter、RabbitMQ、ElasticSearch、Cassandra等各种组件和Flink的整合。Flink既可以把数据输出给Kafka,也可以接收从Kafka输入的数据。
How(Flink -> KAFKA)
- 环境
a. KAFKA: Version2.13
b. Flink: Version1.11 - POM配置
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.11.1</version>
</dependency>
- 代码
package com.xiaohei;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
public class Flink2Kafka {
public static void main(String[] args) throws Exception {
//创建执行环境
final StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
//创建Source
DataStream<String> lines = env.socketTextStream("xx.xx.xx.xx", 9090);
//创建Sink
FlinkKafkaProducer<String> myProducer = new FlinkKafkaProducer<String>(
"xx.xx.xx.xx:9092",
"my-topic",
new SimpleStringSchema()
);
//给Source添加Sink
lines.addSink(myProducer);
lines.print();
//执行Flink
env.execute("flink2kafka test");
}
}
How(KAFKA -> Flink)
- 环境
a. KAFKA: Version2.13
b. Flink: Version1.11 - POM配置
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.11.1</version>
</dependency>
- 代码
package com.xiaohei;
import com.xiaohei.rep.PageCount;
import com.xiaohei.util.MapUtil;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import java.util.HashMap;
import java.util.Properties;
public class Kafka2Flink {
public static void main(String[] args) throws Exception {
//创建执行环境
final StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "xx.xx.xx.xx:9092");
properties.setProperty("group.id", "test");
DataStream<String> stream = env
.addSource(new FlinkKafkaConsumer<>("my-topic2", new SimpleStringSchema(), properties));
stream.flatMap(new FlatMapFunction<String,PageCount>(){
@Override
public void flatMap(String accessLog, Collector<PageCount> out) throws Exception {
HashMap<String,String> hm = MapUtil.FillValue(accessLog);
if (!hm.isEmpty()) {
String url = hm.get("request_url");
String access_time = hm.get("access_time");
out.collect(new PageCount(url,1L,access_time));
}
}
})
.keyBy("url")
.timeWindow(Time.seconds(10), Time.seconds(5))
.reduce(new ReduceFunction<PageCount>() {
@Override
public PageCount reduce(PageCount prev, PageCount next) {
return new PageCount(prev.getUrl(), prev.getCount() + next.getCount(), prev.getAccess_time());
}
}).print().setParallelism(1);
env.execute("Kafka2Flink test");
}
}
How(KAFKA -> Flink -> Redis)
- 环境
a. KAFKA: Version2.13
b. Flink: Version1.11 - POM配置
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.11.1</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
- 代码
package com.xiaohei;
import com.xiaohei.logic.HBaseProcess;
import com.xiaohei.logic.RedisProcess;
import com.xiaohei.rep.AccessLog;
import com.xiaohei.rep.PageCount;
import com.xiaohei.util.LogUtil;
import com.xiaohei.util.MapUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import java.util.HashMap;
import java.util.Properties;
public class WriteDB {
public static void main(String[] args) throws Exception {
//创建执行环境
final StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "xx.xx.xx.xx:9092");
properties.setProperty("group.id", "test");
DataStream<String> stream = env
.addSource(new FlinkKafkaConsumer<>("my-topic2", new SimpleStringSchema(), properties));
stream.flatMap(new FlatMapFunction<String, AccessLog>() {
@Override
public void flatMap(String access_str,Collector<AccessLog> out) {
AccessLog log = LogUtil.FillValue(access_str);
out.collect(log);
}
})
.filter(new FilterFunction<AccessLog>(){
@Override
public boolean filter(AccessLog log) throws Exception{
return log.getRequest_url() != null;
}
})
.process(new RedisProcess())
env.execute("flink to redis");
}
}
package com.xiaohei.logic;
import com.xiaohei.rep.AccessLog;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.configuration.Configuration;
import redis.clients.jedis.Jedis;
public class RedisProcess extends ProcessFunction<AccessLog, AccessLog> {
private static final long serialVersionUID = 1L;
private Jedis jedis;
@Override
public void open(Configuration parameters) throws Exception {
try {
jedis = new Jedis("xx.xx.xx.xx");
jedis.auth("xxxx");
jedis.select(4);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void close() throws Exception {
jedis.close();
}
@Override
public void processElement(AccessLog value, Context ctx, Collector<AccessLog > out)
throws Exception {
System.out.println("redis url=="+value.getRequest_url());
String key = value.getAccess_time() + "_" + value.getSite_id();
jedis.incr(key);
out.collect(value);
}
}
package com.xiaohei.rep;
public class AccessLog {
private String client_ip;
private String access_time;
private String request_body;
private String request_url;
private String page_id;
private String site_id;
private String response_code;
private String bytes_sent;
private String refer;
private String browser_type;
public AccessLog() {}
public String getClient_ip() {
return client_ip;
}
public void setClient_ip(String client_ip) {
this.client_ip = client_ip;
}
public String getAccess_time() {
return access_time;
}
public void setAccess_time(String access_time) {
this.access_time = access_time;
}
public String getRequest_body() {
return request_body;
}
public void setRequest_body(String request_body) {
this.request_body = request_body;
}
public String getRequest_url() {
return request_url;
}
public void setRequest_url(String request_url) {
this.request_url = request_url;
}
public String getPage_id() {
return page_id;
}
public void setPage_id(String page_id) {
this.page_id = page_id;
}
public String getSite_id() {
return site_id;
}
public void setSite_id(String site_id) {
this.site_id = site_id;
}
public String getResponse_code() {
return response_code;
}
public void setResponse_code(String response_code) {
this.response_code = response_code;
}
public String getBytes_sent() {
return bytes_sent;
}
public void setBytes_sent(String bytes_sent) {
this.bytes_sent = bytes_sent;
}
public String getRefer() {
return refer;
}
public void setRefer(String refer) {
this.refer = refer;
}
public String getBrowser_type() {
return browser_type;
}
public void setBrowser_type(String browser_type) {
this.browser_type = browser_type;
}
public String toString() {
return this.request_url;
}
}
Refer
- https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/dev/connectors/kafka.html