Flink 环境搭建 和 入门demo

一、环境搭建

1、下载

下载地址:
https://flink.apache.org/downloads.html#apache-flink-1124
flink-1.12.4的下载地址:
https://apache.website-solution.net/flink/flink-1.12.4/flink-1.12.4-bin-scala_2.11.tgz
我用的是1.12.4的版本,值得注意的是这个版本要和java项目中pom文件的依赖保持一致。

2、运行

解压、运行

$ tar -zxvf flink-1.4.2-bin-hadoop26-scala_2.11.tgz 
$ cd flink-1.12.4
$ ./bin/start-cluster.sh 
Starting cluster.
Starting standalonesession daemon on host MacdeMacBook-Pro-2.local.
Starting taskexecutor daemon on host MacdeMacBook-Pro-2.local.
$ 

3、访问

通过http://localhost:8081/ 访问后台管理页面

4、提交作业

把集成flink的demo提交到flink服务,执行作业。

在这里插入图片描述

二、入门demo

1、pom文件依赖

<!--        flink scala支持-->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-java</artifactId>
    <version>1.12.4</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-java_2.11</artifactId>
    <version>1.12.4</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.11</artifactId>
    <version>1.12.4</version>
</dependency>
<!--        flink api-->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>statefun-sdk</artifactId>
    <version>2.2.2</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>statefun-flink-harness</artifactId>
    <version>3.0.0</version>
</dependency>
<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
</dependency>

2、我的demo

package com.rules.engine.task;

import com.alibaba.fastjson.JSONObject;
import com.rules.engine.beans.Product;
import com.rules.engine.entity.UserInfoCount;
import com.rules.engine.rule.*;
import com.rules.engine.source.MyRuleSource;
import com.rules.engine.source.RulesSource;
import com.rules.engine.utils.KafkaProducer;
import com.rules.engine.utils.MySqlSink2;
import com.rules.engine.utils.MySqlSink3;
import com.rules.engine.vo.UserInfoVo;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;

import java.util.Properties;

/**
 * @Author dingws
 * @PackageName rule_engine
 * @Package com.rules.engine.task
 * @Date 2021/7/7 7:25 下午
 * @Version 1.0
 */
@Slf4j
public class KafkaMessageDeal3 {
    public static void main(String[] args) throws Exception {
        long delay = 5000L;
        long windowGap = 5000L;

        //1、设置运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(6000L);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.setParallelism(1);

        //2、配置数据源读取数据
        Properties props = new Properties();
        props.put("bootstrap.servers", "172.21.88.77:9092");
        props.put("group.id", KafkaProducer.TOPIC_GROUP1);

        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<String>(KafkaProducer.TOPIC_TEST, new SimpleStringSchema(), props);

        // 配置规则服务
        DataStream<Product> rulesStream = env.addSource(new MyRuleSource());

        //设置数据源
        DataStreamSource<String> dataStreamSource = env.addSource(consumer).setParallelism(1);

        //从kafka里读取数据,转换成UserInfoVo对象
        DataStream<UserInfoVo> dataStream = dataStreamSource.map(value -> JSONObject.parseObject(value, UserInfoVo.class));

        dataStream
                .connect(rulesStream)
                .flatMap(new CoFlatMapFunction<UserInfoVo, Product, UserInfoVo>() {
            private Product localProduct;
            @Override
            public void flatMap1(UserInfoVo userInfoVo, Collector<UserInfoVo> collector) throws Exception {
                System.out.println("---------userInfoVo = " + userInfoVo);
                String[] id = userInfoVo.getId().split("_");
                String idTime = id[id.length - 1];
                int time = Integer.parseInt(idTime);
                if (localProduct.getDiscount() == 100){
                    if (time % 2 == 0){
                        collector.collect(userInfoVo);
                    }
                } else {
                    if (time > 5){
                        collector.collect(userInfoVo);
                    }
                }
            }

            @Override
            public void flatMap2(Product product, Collector<UserInfoVo> collector) throws Exception {
                localProduct = product;
                System.out.println("---------product = " + product);
            }
        }).addSink(new MySqlSink2()).name("save mysql whit rule");

        dataStream
                .keyBy("id")
                .window(SlidingProcessingTimeWindows.of(Time.seconds(60), Time.seconds(60)))
                .aggregate(new AggregateFunction<UserInfoVo, UserInfoCount, UserInfoCount>() {

                    @Override
                    public UserInfoCount createAccumulator() {
                        UserInfoCount count = new UserInfoCount();
                        count.setId(1);
                        count.setUserId(null);
                        count.setCount(0);
                        return count;
                    }

                    @Override
                    public UserInfoCount add(UserInfoVo userInfoVo, UserInfoCount userInfoCount) {
                        if (userInfoCount.getUserId() == null){
                            userInfoCount.setUserId(userInfoVo.getId());
                        }

                        if (userInfoCount.getUserId().equals(userInfoVo.getId())){
                            userInfoCount.setCount(userInfoCount.getCount() + 1);
                        }
                        return userInfoCount;
                    }

                    @Override
                    public UserInfoCount getResult(UserInfoCount userInfoCount) {
                        return userInfoCount;
                    }

                    @Override
                    public UserInfoCount merge(UserInfoCount userInfoCount, UserInfoCount acc1) {
                        return null;
                    }
                })
                .keyBy("userId")
                .addSink(new MySqlSink3());

        env.execute("KafkaMessageDeal2");
    }

    private static DataStream<MyRule> getRulesUpdateStream(StreamExecutionEnvironment env, ParameterTool parameter) {
        String name = "Rule Source";
        SourceFunction<String> rulesSource = RulesSource.createRulesSource(parameter);
        DataStream<String> rulesStrings = env.addSource(rulesSource).name(name).setParallelism(1);
        return RulesSource.stringsStreamToRules(rulesStrings);
    }
}

UserInfoVo文件实现:

import lombok.Data;
import java.util.Date;

import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import org.apache.kafka.common.serialization.StringSerializer;

import java.io.Serializable;

/**
 * @Author dingws
 * @PackageName rule_engine
 * @Package com.rules.engine.vo
 * @Date 2021/7/6 6:23 下午
 * @Version 1.0
 */
@Data
@NoArgsConstructor
public class UserInfoVo implements Serializable {
    private String id;
    private String name;
    private String deviceId;
    private Long beginTime;
    private Long endTime;

    public UserInfoVo(UserInfoVo userInfoVo) {
        this.id = userInfoVo.id;
        this.name = userInfoVo.name;
        this.deviceId = userInfoVo.deviceId;
        this.beginTime = userInfoVo.beginTime;
        this.endTime = userInfoVo.endTime;
    }
}

MySqlSink2文件实现:

import com.rules.engine.vo.UserInfoVo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.util.List;

/**
 * @Author dingws
 * @PackageName rule_engine
 * @Package com.rules.engine.utils
 * @Date 2021/7/9 2:10 下午
 * @Version 1.0
 */
public class MySqlSink2 extends RichSinkFunction<UserInfoVo> {

//    @Autowired
//    private JdbcTemplate jdbcTemplate;

    private PreparedStatement ps;
    private Connection connection;

    @Override
    public void open(Configuration parameters) throws Exception {
        System.out.println("------MySqlSink2 open");
        super.open(parameters);
        //获取数据库连接,准备写入数据库
        connection = DbUtils.getConnection();
        String sql = "insert into user_info(id, name, deviceId, beginTime, endTime) values (?, ?, ?, ?, ?); ";
        ps = connection.prepareStatement(sql);
    }

    @Override
    public void close() throws Exception {
        System.out.println("------MySqlSink2 close");
        super.close();
        //关闭并释放资源
        if(connection != null) {
            connection.close();
        }

        if(ps != null) {
            ps.close();
        }
    }

    @Override
    public void invoke(UserInfoVo userInfo, Context context) throws Exception {
        System.out.println("------MySqlSink2 invoke");
        ps.setString(1, userInfo.getId());
        ps.setString(2, userInfo.getDeviceId());
        ps.setString(3, userInfo.getName());
        ps.setTimestamp(4, new Timestamp(userInfo.getBeginTime()));
        ps.setTimestamp(5, new Timestamp(userInfo.getEndTime()));
        ps.addBatch();

        //一次性写入
        int[] count = ps.executeBatch();
        System.out.println("--------666666 成功写入Mysql数量:" + count.length);

    }
}

MySqlSink3文件实现:

import com.rules.engine.entity.UserInfoCount;
import com.rules.engine.vo.UserInfoVo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Timestamp;

/**
 * @Author dingws
 * @PackageName rule_engine
 * @Package com.rules.engine.utils
 * @Date 2021/7/9 2:10 下午
 * @Version 1.0
 */
public class MySqlSink3 extends RichSinkFunction<UserInfoCount> {
    private PreparedStatement ps;
    private Connection connection;

    @Override
    public void open(Configuration parameters) throws Exception {
        System.out.println("------MySqlSink3 open");
        super.open(parameters);
        //获取数据库连接,准备写入数据库
        connection = DbUtils.getConnection();
        String sql = "insert into user_info_count(userId, count) values (?, ?); ";
        ps = connection.prepareStatement(sql);
    }

    @Override
    public void close() throws Exception {
        System.out.println("------MySqlSink3 close");
        super.close();
        //关闭并释放资源
        if(connection != null) {
            connection.close();
        }

        if(ps != null) {
            ps.close();
        }
    }

    @Override
    public void invoke(UserInfoCount userInfoCount, Context context) throws Exception {
        System.out.println("------MySqlSink3 invoke");
//        ps.setInt(1, 1);
        ps.setString(1, userInfoCount.getUserId());
        ps.setInt(2, userInfoCount.getCount());
        ps.addBatch();

        //一次性写入
        int[] count = ps.executeBatch();
        System.out.println("--------666666 成功写入Mysql数量:" + count.length);

    }
}

3、集成kafka和mysql

上面的demo中使用了kafka作为数据源,mysql作为数据输出,如果觉得用不到,可以把他们注释掉。下面我把集成kafka和mysql的配置也列出来:

  1. pom文件依赖
		<!--        kafka依赖配置-->
        <dependency>
            <groupId>org.springframework.kafka</groupId>
            <artifactId>spring-kafka</artifactId>
        </dependency>
		<!--        mysql依赖配置-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-jdbc</artifactId>
            <version>2.3.3.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.16</version>
        </dependency>
  1. 配置文件
spring:
  datasource:
    url: jdbc:mysql://172.1.16.18:3306/geniubot?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&useSSL=false&serverTimezone=GMT%2B8
    password: my_passeord
    username: my_username
    hikari:
      transaction-isolation: TRANSACTION_REPEATABLE_READ
    transaction:
      rollback-on-commit-failure: true
  kafka:
    bootstrap-servers: 172.21.88.77:9092
    producer:
      # 发生错误后,消息重发的次数。
      retries: 0
      #当有多个消息需要被发送到同一个分区时,生产者会把它们放在同一个批次里。该参数指定了一个批次可以使用的内存大小,按照字节数计算。
      batch-size: 16384
      # 设置生产者内存缓冲区的大小。
      buffer-memory: 33554432
      # 键的序列化方式
      key-serializer: org.apache.kafka.common.serialization.StringSerializer
      # 值的序列化方式
      value-serializer: org.apache.kafka.common.serialization.StringSerializer
#      value-serializer: com.rules.engine.utils.JSONSerializer
      # acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
      # acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
      # acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
      acks: 1
    consumer:
      # 自动提交的时间间隔 在spring boot 2.X 版本中这里采用的是值的类型为Duration 需要符合特定的格式,如1S,1M,2H,5D
      auto-commit-interval: 1S
      # 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
      # latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
      # earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
      auto-offset-reset: earliest
      # 是否自动提交偏移量,默认值是true,为了避免出现重复数据和数据丢失,可以把它设置为false,然后手动提交偏移量
      enable-auto-commit: false
      # 键的反序列化方式
      key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
      # 值的反序列化方式
      value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
#      value-deserializer: com.rules.engine.utils.JSONDeserializer
    listener:
      # 在侦听器容器中运行的线程数。
      concurrency: 5
      #listner负责ack,每调用一次,就立即commit
      ack-mode: manual_immediate
      missing-topics-fatal: false

3、运行结果

在这里插入图片描述

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值