1 Install
Download flink from apache{:target=“_blank”}, extract to /opt/module .
2 Configuration
2.1 Set environment variables
[root@hadoop001 ~]# cd /opt/module/flink-1.12.7/
[root@hadoop001 flink-1.12.7]# echo export FLINK_HOME=`pwd` >> /etc/profile
[root@hadoop001 flink-1.12.7]# echo export PATH=\$PATH:\$FLINK_HOME/bin >> /etc/profile
[root@hadoop001 flink-1.12.7]# echo export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop >> /etc/profile
[root@hadoop001 flink-1.12.7]# source /etc/profile
2.2 config flink-conf.yaml
Edit conf/flink-conf.yaml, change the following options to:
jobmanager.rpc.address: hadoop001
taskmanager.numberOfTaskSlots: 2
web.submit.enable: true
jobmanager.archive.fs.dir: hdfs://ns/flink/completed-jobs/
historyserver.web.address: hadoop001
historyserver.web.port: 8082
historyserver.archive.fs.dir: hdfs://ns/flink/completed-jobs/
2.3 set masters
Update masters to hadoop001:8081
[root@hadoop001 conf]# echo hadoop001:8081 > masters
[root@hadoop001 conf]# cat masters
hadoop001:8081
2.4 set workers
Add hadoop001/2/3 to workers:
[root@hadoop001 conf]# echo "hadoop001
> hadoop002
> hadoop003" > workers
[root@hadoop001 conf]# cat workers
hadoop001
hadoop002
hadoop003
2.5 distribute to hadoop002/3
[root@hadoop001 module]# scp -r flink-1.12.7 hadoop002:`pwd`
[root@hadoop001 module]# scp -r flink-1.12.7 hadoop003:`pwd`
[root@hadoop001 module]# scp /etc/profile hadoop002:/etc
[root@hadoop001 module]# scp /etc/profile hadoop003:/etc
2.6 create job directory
[root@hadoop001 module]# hdfs dfs -mkdir -p hdfs://ns/flink/completed-jobs/
[root@hadoop001 module]# hdfs dfs -chmod 777 hdfs://ns/flink/completed-jobs/
3. Start cluster
3.1 Start cluster and history server
[root@hadoop001 module]# start-cluster.sh
[root@hadoop001 module]# historyserver.sh start
Starting historyserver daemon on host hadoop001.
3.2 check in web ui
open http://hadoop001:8081/{:target=_blank} in browser.
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-w8e2Imo4-1656497337860)(/post_img/flink-webui.jpg)]
4 Run tasks
Launch a example word count task:
[root@hadoop001 flink-1.12.7]# flink run ./examples/batch/WordCount.jar \
--input README.txt \
--output /root/readme-count.txt \
--parallelism 2
Job has been submitted with JobID 16f8f54c12b4c26ea8a95d719b63cdaa
Program execution finished
Job with JobID 16f8f54c12b4c26ea8a95d719b63cdaa has finished.
Job Runtime: 670 ms
According the webui, task was run on hadoop002, check it there. [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ICO25wNu-1656497337861)(/post_img/flink-task-detail.jpg)]
[root@hadoop002 ~]# ll
total 1574376
-rwxr-xr-x 1 root root 1612145664 Jun 27 21:38 bigdata.tar
drwxrwxrwx 8 501 games 4096 May 31 16:16 hadoop-install
-rw-r--r-- 1 root root 969 Jun 28 11:53 readme-count.txt
[root@hadoop002 ~]# more readme-count.txt
1 1
13 1
5d002 1
740 1
about 1
account 1
administration 1
algorithms 1
and 7
another 1
any 2
apache 5
as 1
ask 1
asymmetric 1
...
Flink successfully launched a task and output the result.
5 Fink on YARN
5.1 Session Mode
Features:
- Need to start a session and request resources before start jobs
- Do not need to request resources before start each job, this will boost efficiency
- Job Manager and Task Manager will keep running after jobs finished(wait for next job)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-KcaYQkZv-1656497337862)(/post_img/flink-session.jpg)]
5.1.1 Start a session
[root@hadoop001 flink-1.12.7]# ./bin/yarn-session.sh -n 2 -tm 400 -s 1 -d
2022-06-28 17:01:56,478 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: jobmanager.rpc.address, hadoop001
2022-06-28 17:01:56,481 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: jobmanager.rpc.port, 6123
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: jobmanager.memory.process.size, 1600m
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: taskmanager.memory.process.size, 1728m
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: taskmanager.numberOfTaskSlots, 2
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: parallelism.default, 1
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: jobmanager.execution.failover-strategy, region
2022-06-28 17:01:56,482 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: web.submit.enable, true
2022-06-28 17:01:56,483 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: jobmanager.archive.fs.dir, hdfs://ns/flink/completed-jobs/
2022-06-28 17:01:56,483 INFO org.apache.flink.configuration.GlobalConfiguration [] - Loading configuration property: historyserver.web.address, hadoop001
2022-06-28 17:01:56,483 INFO org.apache.flink.configuration.GlobalConfiguration []