下载安装
#下载包
wget https://archive.apache.org/dist/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz
#清华镜像
wget https://mirrors.tuna.tsinghua.edu.cn/apache/flink/flink-1.15.0/flink-1.15.0-bin-scala_2.12.tgz --no-check-certificat
#解压
tar -zxvf flink-1.15.0-bin-scala_2.12.tgz
#创建软连接
ln -s flink-1.15.0 flink
配置masters和workers
#添加环境变量 : vim /etc/profile
export FLINK_HOME=/home/hadoop/bigdata_software/flink
export PATH=$FLINK_HOME/bin:$PATH
#配置masters : vim conf/masters
stream-01:8081
#配置workers : vim conf/workers
stream-01
stream-02
stream-03
配置flink配置文件 vim conf/flink-conf.yaml
jobmanager.rpc.address: stream-01
# 配置high-availability mode
igh-availability: zookeeper
# JobManager的meta信息放在dfs,在zk上主要会保存一个指向dfs路径的指针
high-availability.storageDir: hdfs://cluster/flinkha/
# 配置zookeeper quorum(hostname和端口需要依据对应zk的实际配置)
high-availability.zookeeper.quorum: stream-01:2181,stream-02:2181,stream-03:2181
# (可选)设置zookeeper的root目录
#high-availability.zookeeper.path.root: /test_dir/test_standalone2_root
# 注释以下配置
# jobmanager.bind-host: localhost
# taskmanager.bind-host: localhost
#taskmanager.host: localhost
#rest.address: localhost
#rest.bind-address: localhost
#配置yarn 高可用重试次数
yarn.application-attempts: 10
修改yarn-site.xml
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
<description>
The maximum number of application master execution attempts.
</description>
</property>
配置zk:vim zoo.cfg
server.1=stream-01:2888:3888
server.2=stream-02:2888:3888
server.3=stream-03:2888:3888
配置Hadoop集成
1.在三台机器上分别配置HADOOP_CLASSPATH
export HADOOP_CLASSPATH=$HADOOP_HOME/lib:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/tools/*:$HADOOP_HOME/share/hadoop/httpfs/*:$HADOOP_HOME/share/hadoop/kms/*:$HADOOP_HOME/share/hadoop/common/lib/*
2.添加jar包:
wget https://repository.cloudera.com/artifactory/cloudera-repos/org/apache/flink/flink-shaded-hadoop-3-uber/3.1.1.7.2.9.0-173-9.0/flink-shaded-hadoop-3-uber-3.1.1.7.2.9.0-173-9.0.jar
复制到slaver
scp -r /home/hadoop/bigdata_software/flink-1.15.0/ stream-02:/home/hadoop/bigdata_software/
scp -r /home/hadoop/bigdata_software/flink-1.15.0/ stream-03:/home/hadoop/bigdata_software/
启动并访问 stream-01:8081查看flink web ui是否正常启动
./bin/start-cluster.sh
job 测试
./bin/flink run -yjm 502m -ytm 1024m -ys 2 ./examples/batch/WordCount.jar
添加需要的cdcjar包
这里添加了 mysql ,oracle ,elasticsearch, jdbc
#mysql
wget https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.2.1/flink-sql-connector-mysql-cdc-2.2.1.jar
#oracle
wget https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-oracle-cdc/2.2.1/flink-sql-connector-oracle-cdc-2.2.1.jar
#es
wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7/1.15.0/flink-sql-connector-elasticsearch7-1.15.0