flink调用python脚本

最新推荐文章于 2024-07-11 16:34:57 发布

scan724

最新推荐文章于 2024-07-11 16:34:57 发布

阅读量762

点赞数

分类专栏： Flink实时计算文章标签：大数据 linux python

本文链接：https://blog.csdn.net/zhaoyangjian724/article/details/130673747

版权

Flink实时计算专栏收录该内容

102 篇文章 1 订阅

订阅专栏

./flink run -m localhost:8081 -py python的flink程序

[root@centos6 bin]# ./flink run -m localhost:8081 -py /root/flink/count_word.py
Job has been submitted with JobID 7a975665fbd8ff49861f99a695c80348
Program execution finished
Job with JobID 7a975665fbd8ff49861f99a695c80348 has finished.
Job Runtime: 1190 ms

[root@centos6 bin]# cat /tmp/output
xx   2
yy   1
zz   1

[root@centos6 bin]# cat /tmp/input
xx
yy
zz
xx

[root@centos6 flink]# cat count_word.py
from pyflink.table import BatchTableEnvironment, EnvironmentSettings
from pyflink.table import DataTypes
from pyflink.table.descriptors import Schema, OldCsv, FileSystem
env_settings = EnvironmentSettings.new_instance().in_batch_mode().use_blink_planner().build()
t_env = BatchTableEnvironment.create(environment_settings=env_settings)
t_env.connect(FileSystem().path('/tmp/input')) \
.with_format(OldCsv()
.field('word', DataTypes.STRING())) \
.with_schema(Schema()
.field('word', DataTypes.STRING())) \
.create_temporary_table('mySource')

t_env.connect(FileSystem().path('/tmp/output')) \
.with_format(OldCsv()
.field_delimiter('\t')
.field('word', DataTypes.STRING())
.field('count', DataTypes.BIGINT())) \
.with_schema(Schema()
.field('word', DataTypes.STRING())
.field('count', DataTypes.BIGINT())) \
.create_temporary_table('mySink')

t_env.scan('mySource') \
.group_by('word') \
.select('word, count(1)') \
.insert_into('mySink')

t_env.execute("python_job")