FLINK -PythonTable API
官网很详细,有兴趣的自己去看下官网!!
环境部署
安装 pyflink,
root@wang:/usr/bin#pip3 install apache-flink -i https://mirrors.aliyun.com/pypi/simple/
#!/usr/bin/python
print('11111')
from pyflink.table import EnvironmentSettings, StreamTableEnvironment, BatchTableEnvironment
#第一步获取环境对象
# 创建 blink 流 TableEnvironment
#env_settings_b = EnvironmentSettings.new_instance().in_streaming_mode().use_blink_planner().build()
#table_env_b = StreamTableEnvironment.create(environment_settings=env_settings_b)
# 创建 blink 批 TableEnvironment
env_settings_b = EnvironmentSettings.new_instance().in_batch_mode().use_blink_planner().build()
table_env_b = BatchTableEnvironment.create(environment_settings=env_settings_b)
#table_env_b.get_config().get_configuration().set_string("file:/home/wang/uhome/flink-1.12.0/lib/flink-connector-jdbc_2.11-1.11.2.jar")
table_env_b.get_config().get_configuration().set_string("pipeline.jars", "file:/home/wang/uhome/flink-1.12.0/lib/flink-connector-jdbc_2.11-1.11.2.jar;file:/home/wang/uhome/flink-1.12.0/lib/mysql-connector-java-8.0.22.jar")
#SQL 可以根据自己需要创建目录结构放对应脚本
#第二步编写sql
source_ddl_source_order = """
CREATE TABLE source_order (
order_id INT,
order_value INT,
user_id INT
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://localhost:3306/test',
'table-name' = 'source_order',
'username' = 'root',
'password' = 'Wang32?_#@%'
)
"""
source_ddl_order_business = """
CREATE TABLE order_business (
order_busi_id INT,
order_name VARCHAR
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://localhost:3306/test',
'table-name' = 'order_business',
'username' = 'root',
'password' = 'Wang32?_#@%'
)
"""
sink_ddl_sink_order = """
CREATE TABLE sink_order (
order_id INT,
order_count BIGINT,
user_id INT,
PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://localhost:3306/test',
'table-name' = 'sink_order',
'username' = 'root',
'password' = 'Wang32?_#@%'
)
"""
insert_sql = """
insert into sink_order select aaa.order_id as order_id,count(aaa.order_id) as order_count,aaa.user_id as user_id
from (select b.*,s.*
from order_business b,source_order s ) aaa
group by aaa.user_id,aaa.order_id,aaa.order_name
"""
#第三步注册表到flink
table_env_b.execute_sql(source_ddl_source_order)
table_env_b.execute_sql(source_ddl_order_business)
table_env_b.execute_sql(sink_ddl_sink_order)
table_env_b.execute_sql(insert_sql).wait()
#table_env_b.execute_sql(""" select * from source_order """).wait()
#第四步骤运行PY作业
#./bin/flink run --python examples/python/table/batch/word_count.py
执行./flink run --python /home/wang/uhome/FlinkDemo.py
可打开页面查看任务
调度的话可以写shell 定时执行上面的 命令 后台运行并输出日志到指定文件,或者用调度工具执行linux 命令。
官方文档很详细,笔者只是用python + mysql 实现了 简单的数据转换任务。