Flink安装部署

本文章用到:

							flink-1.10.2-bin-scala_2.11.tgz
							集群:三台【master、slave1、slave2】

单机模式部署

  1. Hadoop集群普通分布式搭建(省略)
  2. /etc/hosts、免密(略)
  3. 解压tar -zxf flink-1.10.2-bin-scala_2.11.tgz -C /usr/apps
  4. 重命名:mv flink-1.10.2-bin-scala_2.11 flink-1.10.2(通常解压即是flink-1.10.2
[root@master flink-1.10.2]# ll
总用量 576
drwxr-xr-x. 2 root root   4096 8月  17 2020 bin
drwxr-xr-x. 2 root root   4096 11月 21 19:42 conf
drwxr-xr-x. 7 root root     71 8月  17 2020 examples
drwxr-xr-x. 2 root root   4096 8月  17 2020 lib
-rw-r--r--. 1 root root  11357 8月  15 2020 LICENSE
drwxr-xr-x. 2 root root   4096 8月  17 2020 licenses
drwxr-xr-x. 2 root root      6 8月  16 2020 log
-rw-r--r--. 1 root root 550566 8月  17 2020 NOTICE
drwxr-xr-x. 2 root root   4096 8月  17 2020 opt
drwxr-xr-x. 2 root root     23 8月  15 2020 plugins
-rw-r--r--. 1 root root   1309 8月  15 2020 README.txt
  1. 修改配置文件:
    Ⅰ. vim ./conf/masters
[root@master flink-1.10.2]# vim ./conf/masters
# 修改内容如下:
master:8081

Ⅱ. vim ./conf/slaves

[root@master flink-1.10.2]# vim ./conf/slaves
# 修改内容如下:
slave1
slave2

Ⅲ. vim ./conf/flink-conf.yaml

[root@master flink-1.10.2]# vim ./conf/flink-conf.yaml
#修改如下:
# 工作任务提交入口
jobmanager.rpc.address: master
# 任务调度空间
jobmanager.heap.size: 1024m
# 线程运行空间【此设置可放大空间】
taskmanager.memory.process.size: 1728m
#插槽数设置【多线程数量,为“1”则是单线程】
taskmanager.numberOfTaskSlots: 2
  1. 分发
scp -r /usr/apps/flink-1.10.2 slave1:/usr/apps/
scp -r /usr/apps/flink-1.10.2 slave2:/usr/apps/
  1. 启动
# master
[root@master flink-1.10.2]# ./bin/start-cluster.sh 
Starting cluster.
[INFO] 1 instance(s) of standalonesession are already running on master.
Starting standalonesession daemon on host master.
Starting taskexecutor daemon on host slave1.
Starting taskexecutor daemon on host slave2.
[root@master flink-1.10.2]# jps
23952 StandaloneSessionClusterEntrypoint
12967 NameNode
13751 Master
13048 ResourceManager
25032 Jps
13227 DataNode
24268 TaskManagerRunner
13246 NodeManager
[root@master flink-1.10.2]# 

# slave1
[root@slave1 ~]# jps
3746 NodeManager
3989 Worker
3687 DataNode
14024 Jps
13962 TaskManagerRunner
3933 SecondaryNameNode
[root@slave1 ~]#

# slave2
[root@slave2 ~]# jps
3840 Worker
3603 DataNode
11850 TaskManagerRunner
11898 Jps
3662 NodeManager
[root@slave2 ~]# 

HA模式部署

  1. 修改zookeeper配置文件zoo.cfg
[root@master flink-1.10.2]# vim ./conf/zoo.cfg

server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
  1. 修改masters
[root@master flink-1.10.2]# vim ./conf/masters

master1:8081
master2:8081
  1. 修改slaves
[root@master flink-1.10.2]# vim ./conf/slaves

master1
master2
slave1
  1. 修改flink-conf.yaml
[root@master flink-1.10.2]# vim ./conf/flink-conf.yaml

 
jobmanager.rpc.port: 6123
 
 
# The heap size for the JobManager JVM
 
jobmanager.heap.size: 1024m
 
 
# The heap size for the TaskManager JVM
 
taskmanager.heap.size: 1024m
 
 
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
 
taskmanager.numberOfTaskSlots: 2
 
# The parallelism used for programs that did not specify and other parallelism.
 
parallelism.default: 1
 
# The default file system scheme and authority.
# 
# By default file paths without scheme are interpreted relative to the local
# root file system 'file:///'. Use this to override the default and interpret
# relative paths relative to a different file system,
# for example 'hdfs://mynamenode:12345'
#
# fs.default-scheme
 
#==============================================================================
# High Availability
#==============================================================================
 
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
#
 high-availability: zookeeper
 
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
# 
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...) 
#
 high-availability.storageDir: hdfs://jh/flink/ha/
 
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
#
 high-availability.zookeeper.quorum: node7-1:2181,node7-2:2181,node7-3:2181
high-availability.zookeeper.path.root: /flink
high-availability.cluster-id: /flinkCluster 
 
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
#
# high-availability.zookeeper.client.acl: open
 
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
 
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled.
#
# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
# <class-name-of-factory>.
#
state.backend: filesystem
 
 
 
# Directory for checkpoints filesystem, when using any of the default bundled
# state backends.
#
 state.checkpoints.dir: hdfs://node7-1/flink/checkpoints
 
# Default target directory for savepoints, optional.
#
 state.savepoints.dir: hdfs://node7-1/flink/checkpoints
 
# Flag to enable/disable incremental checkpoints for backends that
# support incremental checkpoints (like the RocksDB state backend). 
#
# state.backend.incremental: false
 
#==============================================================================
# Web Frontend
#==============================================================================
 
# The address under which the web-based runtime monitor listens.
#
#web.address: 0.0.0.0
 
# The port under which the web-based runtime monitor listens.
# A value of -1 deactivates the web server.
 
rest.port: 8081
  1. 修改bin目录下的config.sh
[root@master flink-1.10.2]# vim ./bin/config.sh

如下:

config.sh

  1. flink的lib目录:上传Hadoop所需jar包
flink-shaded-hadoop2-uber-1.7.2.jar
  1. 验证:
    杀死master1的进程,master2会顶上即为成功

2021年11月13日更新

FlinkUI界面启动、简单例子

IDEA中:
pom.xml文件配置:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>Flinktutorial</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
    </dependencies>
    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>
</project>
# 流数据处理
package AdminLog;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class StreamWordCount {
    public static void main(String[] args) throws Exception{
        // 创建流处理执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //线程数:开发环境中默认不写该配置,则使用物理机器的最大线程数
        //env.setParallelism(1);

//        // 从文件中读取数据
//        String inputPath = "/usr/apps/flink-1.10.2/file/hello.txt";
//        DataStream<String> inputDataStream = env.readTextFile(inputPath);

        // 用Parameter tool 工具从程序启动参数中提取配置项
        ParameterTool parameterTool = ParameterTool.fromArgs(args);
        String host = parameterTool.get("host");
        int port = parameterTool.getInt("port");


        // 从Socket文本流读取数据
        DataStream<String> inputDataStream = env.socketTextStream(host,port);

        // 基于数据流进行转换计算
        SingleOutputStreamOperator<Tuple2<String, Integer>> resultStream = inputDataStream.flatMap(new WordCount.MyFlatMapper())
                .keyBy(0)
                .sum(1).setParallelism(2);
        resultStream.print().setParallelism(1);

        // 执行任务
        env.execute();
    }
}
  • 生成jar包
    在这里插入图片描述

下方控制台输出可查看路径

  • 上传到Flink
    在这里插入图片描述
  • IDEA中找到项目名->鼠标右键->CopyPath->Copy Reference
    在这里插入图片描述

代码行开启/关闭任务

  • 将jar包上传到master任意路径
  • 开启:[root@master flink-1.10.2]# ./bin/flink run -c com.AdminLog.wc.StreamWordCount -p 3 /usr/apps/flink-1.10.2/jar/Flinktutorial-1.0-SNAPSHOT.jar --host master --port 7777

-c 入口类 -p 并行度

  • 出现如下:
[root@master flink-1.10.2]# ./bin/flink run -c com.AdminLog.wc.StreamWordCount -p 3 /usr/apps/flink-1.10.2/jar/Flinktutorial-1.0-SNAPSHOT.jar --host master --port 7777
Job has been submitted with JobID 181cbfb0be7cdc36719367eeefb34472
#JobID 后面的 181cbfb0be7cdc36719367eeefb34472 为进程号,在UI界面可查看
# 阻塞进程,可Ctrl+C停止,进程并不会停止
  • UI界面查看
    在这里插入图片描述

查看任务ID

flink list
[root@master flink-1.10.2]# ./bin/flink list
Waiting for response...
------------------ Running/Restarting Jobs -------------------
13.11.2021 18:01:14 : 181cbfb0be7cdc36719367eeefb34472 : Flink Streaming Job (RUNNING)
--------------------------------------------------------------
No scheduled jobs.

停止任务

flink cancel
[root@master flink-1.10.2]# ./bin/flink cancel 181cbfb0be7cdc36719367eeefb34472
Cancelling job 181cbfb0be7cdc36719367eeefb34472.
Cancelled job 181cbfb0be7cdc36719367eeefb34472.
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AdminLog

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值