大数据 - Flink与KAFKA的集成(六)

What

Flink实现了各种类型的连接器(Connector)来实现数据在不同平台上的读写。使用Java和Scala编程支持Kafka、Twitter、RabbitMQ、ElasticSearch、Cassandra等各种组件和Flink的整合。Flink既可以把数据输出给Kafka,也可以接收从Kafka输入的数据。

How(Flink -> KAFKA)

在这里插入图片描述

  1. 环境
    a. KAFKA: Version2.13
    b. Flink: Version1.11
  2. POM配置
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
  1. 代码
package com.xiaohei;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;

public class Flink2Kafka {
    public static void main(String[] args) throws Exception {
        //创建执行环境
        final StreamExecutionEnvironment env =
                StreamExecutionEnvironment.getExecutionEnvironment();

        //创建Source
        DataStream<String> lines = env.socketTextStream("xx.xx.xx.xx", 9090);

        //创建Sink
        FlinkKafkaProducer<String> myProducer = new FlinkKafkaProducer<String>(
                "xx.xx.xx.xx:9092",
                "my-topic",
                new SimpleStringSchema()
        );

        //给Source添加Sink
        lines.addSink(myProducer);
        lines.print();

        //执行Flink
        env.execute("flink2kafka test");
    }
}

How(KAFKA -> Flink)

  1. 环境
    a. KAFKA: Version2.13
    b. Flink: Version1.11
  2. POM配置
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
  1. 代码
package com.xiaohei;

import com.xiaohei.rep.PageCount;
import com.xiaohei.util.MapUtil;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;

import java.util.HashMap;
import java.util.Properties;

public class Kafka2Flink {
    public static void main(String[] args) throws Exception {
        //创建执行环境
        final StreamExecutionEnvironment env =
                StreamExecutionEnvironment.getExecutionEnvironment();

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "xx.xx.xx.xx:9092");
        properties.setProperty("group.id", "test");

        DataStream<String> stream = env
                .addSource(new FlinkKafkaConsumer<>("my-topic2", new SimpleStringSchema(), properties));
        stream.flatMap(new FlatMapFunction<String,PageCount>(){
            @Override
            public void flatMap(String accessLog, Collector<PageCount> out) throws Exception {
                HashMap<String,String> hm = MapUtil.FillValue(accessLog);
                if (!hm.isEmpty()) {
                    String url = hm.get("request_url");
                    String access_time = hm.get("access_time");
                    out.collect(new PageCount(url,1L,access_time));
                }
            }
        })
        .keyBy("url")
        .timeWindow(Time.seconds(10), Time.seconds(5))
        .reduce(new ReduceFunction<PageCount>() {
            @Override
            public PageCount reduce(PageCount prev, PageCount next) {
                return new PageCount(prev.getUrl(), prev.getCount() + next.getCount(), prev.getAccess_time());
            }
        }).print().setParallelism(1);

        env.execute("Kafka2Flink test");
    }
}

How(KAFKA -> Flink -> Redis)

  1. 环境
    a. KAFKA: Version2.13
    b. Flink: Version1.11
  2. POM配置
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.11</artifactId>
    <version>1.11.1</version>
</dependency>
<dependency>
    <groupId>redis.clients</groupId>
    <artifactId>jedis</artifactId>
    <version>2.9.0</version>
</dependency>
  1. 代码
package com.xiaohei;

import com.xiaohei.logic.HBaseProcess;
import com.xiaohei.logic.RedisProcess;
import com.xiaohei.rep.AccessLog;
import com.xiaohei.rep.PageCount;
import com.xiaohei.util.LogUtil;
import com.xiaohei.util.MapUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;

import java.util.HashMap;
import java.util.Properties;

public class WriteDB {
    public static void main(String[] args) throws Exception {
        //创建执行环境
        final StreamExecutionEnvironment env =
                StreamExecutionEnvironment.getExecutionEnvironment();

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "xx.xx.xx.xx:9092");
        properties.setProperty("group.id", "test");

        DataStream<String> stream = env
                .addSource(new FlinkKafkaConsumer<>("my-topic2", new SimpleStringSchema(), properties));
        stream.flatMap(new FlatMapFunction<String, AccessLog>() {
            @Override
            public void flatMap(String access_str,Collector<AccessLog> out) {
                AccessLog log = LogUtil.FillValue(access_str);
                out.collect(log);
            }
        })
        .filter(new FilterFunction<AccessLog>(){
            @Override
            public boolean filter(AccessLog log) throws Exception{
                return log.getRequest_url() != null;
            }
        })
        .process(new RedisProcess())
        env.execute("flink to redis");
    }
}

package com.xiaohei.logic;

import com.xiaohei.rep.AccessLog;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.configuration.Configuration;
import redis.clients.jedis.Jedis;

public class RedisProcess extends ProcessFunction<AccessLog, AccessLog> {
    private static final long serialVersionUID = 1L;

    private Jedis jedis;
    @Override
    public void open(Configuration parameters) throws Exception {
        try {
            jedis = new Jedis("xx.xx.xx.xx");
            jedis.auth("xxxx");
            jedis.select(4);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    @Override
    public void close() throws Exception {
        jedis.close();
    }

    @Override
    public void processElement(AccessLog  value, Context ctx, Collector<AccessLog > out)
            throws Exception {
        System.out.println("redis url=="+value.getRequest_url());
        String key = value.getAccess_time() + "_" + value.getSite_id();
        jedis.incr(key);
        out.collect(value);
    }
}
package com.xiaohei.rep;

public class AccessLog {

    private String client_ip;
    private String access_time;
    private String request_body;
    private String request_url;
    private String page_id;
    private String site_id;
    private String response_code;
    private String bytes_sent;
    private String refer;
    private String browser_type;

    public AccessLog() {}

    public String getClient_ip() {
        return client_ip;
    }

    public void setClient_ip(String client_ip) {
        this.client_ip = client_ip;
    }

    public String getAccess_time() {
        return access_time;
    }

    public void setAccess_time(String access_time) {
        this.access_time = access_time;
    }

    public String getRequest_body() {
        return request_body;
    }

    public void setRequest_body(String request_body) {
        this.request_body = request_body;
    }

    public String getRequest_url() {
        return request_url;
    }

    public void setRequest_url(String request_url) {
        this.request_url = request_url;
    }

    public String getPage_id() {
        return page_id;
    }

    public void setPage_id(String page_id) {
        this.page_id = page_id;
    }

    public String getSite_id() {
        return site_id;
    }

    public void setSite_id(String site_id) {
        this.site_id = site_id;
    }

    public String getResponse_code() {
        return response_code;
    }

    public void setResponse_code(String response_code) {
        this.response_code = response_code;
    }

    public String getBytes_sent() {
        return bytes_sent;
    }

    public void setBytes_sent(String bytes_sent) {
        this.bytes_sent = bytes_sent;
    }

    public String getRefer() {
        return refer;
    }

    public void setRefer(String refer) {
        this.refer = refer;
    }

    public String getBrowser_type() {
        return browser_type;
    }

    public void setBrowser_type(String browser_type) {
        this.browser_type = browser_type;
    }

    public String toString() {
        return this.request_url;
    }

}

Refer

  1. https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/dev/connectors/kafka.html
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值