pyflink实时接收kafka数据至hive

#!/usr/bin/python
# -*- coding: UTF-8 -*-



from pyflink.dataset import ExecutionEnvironment
from pyflink.table import TableConfig, DataTypes, BatchTableEnvironment, EnvironmentSettings,StreamTableEnvironment
from pyflink.table.catalog import HiveCatalog
from pyflink.table import SqlDialect
from pyflink.datastream import StreamExecutionEnvironment, CheckpointingMode

s_env = StreamExecutionEnvironment.get_execution_environment()
s_env.set_parallelism(4)
# 必须开启checkpoint,时间间隔为毫秒,否则不能输出数据
s_env.enable_checkpointing(600000) #60 0000 十分钟

#env_settings = EnvironmentSettings.new_instance().in_batch_mode().use_blink_planner().build()
#t_env = BatchTableEnvironment.create(environment_settings=env_settings)
env_settings = EnvironmentSettings.new_instance().in_streaming_mode().use_blink_planner().build()
t_env = StreamTableEnvironment.create(stream_execution_environment=s_env,environment_settings=env_settings)

print("AAAA")
#t_env.enable_checkpointing(3000)
#测试环境物理机
hive_conf_dir = "/data/docker/containers/*/conf"  # a local path

#测试环境容器 /data/EtlServices/dlp/code/conf
#hive_conf_dir = "/data/E*code/conf"
catalog = HiveCatalog("myhive", "default", hive_conf_dir)
print("BBBB")
# Register the catalog
t_env.register_catalog("myhive", catalog)
print("CCC")
# set the HiveCatalog as the current catalog of the sessionT_env.use_catalog("myhive")
t_env.use_catalog("myhive")
t_env.get_config().set_sql_dialect(SqlDialect.HIVE)
# Create a catalog table
print(t_env.execute_sql("show tables").print())
print(t_env.list_databases())
t_env.execute_sql("CREATE DATABASE IF NOT EXISTS hive_*_data")
#t_env.execute_sql("DROP TABLE IF EXISTS hive_*data_test.sink_dlp_hive")
#t_env.execute_sql("DROP TABLE IF EXISTS hive*data_test.source_dlp_kafka")
print(t_env.list_databases())
print(t_env.execute_sql("SHOW TABLES").print())
print("DDDD")


t_env.execute_sql("""CREATE TABLE IF NOT EXISTS hive*_data.sink_*_hive(
ORG_ID STRING,
DIRECTION_DESC STRING,
`RESULT` STRING
      ) PARTITIONED BY (
        ts_date STRING,
        ts_hour STRING
      ) STORED AS PARQUET
      TBLPROPERTIES (
        'sink.partition-commit.trigger' = 'process-time',
        'sink.partition-commit.delay' = '1 min',
        'sink.partition-commit.policy.kind' = 'metastore,success-file',
        'partition.time-extractor.timestamp-pattern' = '$ts_date $ts_hour:00:00'
      )"""
)

# should return the tables in current catalog and database.
t_env.get_config().set_sql_dialect(SqlDialect.DEFAULT)
# 2. 创建 source 表
t_env.execute_sql("""
    CREATE TABLE IF NOT EXISTS  hive_*_data.source_*_kafka (
ORG_ID STRING,
DIRECTION_DESC STRING,
`RESULT` STRING
    ) WITH (
              'connector' = 'kafka',
              'topic' = '*',
              'properties.bootstrap.servers' = '2*:*.162:9092,2*3.163:9092,
                                                *4:9092,2*:9092,*6:9092,
                                                *:9092',
              'properties.group.id' = 'flink_kafka_hive01_consumers_*',
              'scan.startup.mode' = 'latest-offset',
              'format' = 'json'
            )
""")



#插入数据
t_env.execute_sql("""
INSERT INTO hive_aliHipsLuojing_data.sink_aliHipsLuojing_hive
SELECT
ORG_ID,
DIRECTION_DESC,
`RESULT`,
DATE_FORMAT(LOCALTIMESTAMP,'yyyyMMdd'),
DATE_FORMAT(LOCALTIMESTAMP,'HH')
FROM hive_aliHipsLuojing_data.source_aliHipsLuojing_kafka
""").wait()

#RAW_MSG is not null
#!=''

                            

参考文档:
https://help.aliyun.com/document_detail/181568.html
https://blog.csdn.net/chenshijie2011/article/details/117399883
https://blog.csdn.net/chenshijie2011/article/details/117401621
https://www.cnblogs.com/maoxiangyi/p/13509782.html
https://www.cnblogs.com/Springmoon-venn/p/13726089.html
https://www.jianshu.com/p/295066a24092
https://blog.csdn.net/m0_37592814/article/details/108044830

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小金子的夏天

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值