./flink run -m localhost:8081 -py python的flink程序
[root@centos6 bin]# ./flink run -m localhost:8081 -py /root/flink/count_word.py
Job has been submitted with JobID 7a975665fbd8ff49861f99a695c80348
Program execution finished
Job with JobID 7a975665fbd8ff49861f99a695c80348 has finished.
Job Runtime: 1190 ms
[root@centos6 bin]# cat /tmp/output
xx 2
yy 1
zz 1
[root@centos6 bin]# cat /tmp/input
xx
yy
zz
xx
[root@centos6 flink]# cat count_word.py
from pyflink.table import BatchTableEnvironment, EnvironmentSettings
from pyflink.table import DataTypes
from pyflink.table.descriptors import Schema, OldCsv, FileSystem
env_settings = EnvironmentSettings.new_instance().in_batch_mode().use_blink_planner().build()
t_env = BatchTableEnvironment.create(environment_settings=env_settings)
t_env.connect(FileSystem().path('/tmp/input')) \
.with_format(OldCsv()
.field('word', DataTypes.STRING())) \
.with_schema(Schema()
.field('word', DataTypes.STRING())) \
.create_temporary_table('mySource')
t_env.connect(FileSystem().path('/tmp/output')) \
.with_format(OldCsv()
.field_delimiter('\t')
.field('word', DataTypes.STRING())
.field('count', DataTypes.BIGINT())) \
.with_schema(Schema()
.field('word', DataTypes.STRING())
.field('count', DataTypes.BIGINT())) \
.create_temporary_table('mySink')
t_env.scan('mySource') \
.group_by('word') \
.select('word, count(1)') \
.insert_into('mySink')
t_env.execute("python_job")