public class Kafka2Hive {
public static void main(String[] args) {
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().build();
environment.enableCheckpointing(10000L);
environment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(environment, settings);
String name = "myhive";
String defaultDatabase = "mydatabase";
String hiveConfDir = "F:\\flink-demo\\src\\main\\resources";
HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir);
tableEnv.registerCatalog("myhive", hive);
// set the HiveCatalog as the current catalog of the session
tableEnv.useCatalog("myhive");
// hive 方言
tableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
// 已创建kafka catalog 以下建表语句不要执行
// String hive_sql = "CREATE TABLE hive_table (" +
// " user_id STRING," +
// " order_amount DOUBLE" +
// ") PARTITIONED BY (dt STRING, hr STRING) STORED AS parquet TBLPROPERTIES (" +
// " 'partition.time-extractor.timestamp-pattern'='$dt $hr:00:00'," +
// " 'sink.partition-commit.trigger'='partition-time'," +
// " 'sink.partition-commit.delay'='1 h'," +
// " 'sink.partition-commit.policy.kind'='metastore,success-file'" +
// ")";
//
// tableEnv.executeSql(hive_sql);
// default 方言
tableEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
// // 已创建kafka catalog 以下建表语句不要执行
// String kafka_sql = "CREATE TABLE kafka_table (" +
// " user_id STRING," +
// " order_amount DOUBLE," +
// " log_ts BIGINT," +
// " ts AS TO_TIMESTAMP(FROM_UNIXTIME(log_ts / 1000, 'yyyy-MM-dd HH:mm:ss'))," +
// " WATERMARK FOR ts AS ts - INTERVAL '5' SECOND" +
// ") WITH (" +
// " 'connector' = 'kafka'," +
// " 'topic' = 'user_behavior'," +
// " 'properties.bootstrap.servers' = 'hadoop01:9092'," +
// " 'properties.group.id' = 'testGroup'," +
// " 'scan.startup.mode' = 'earliest-offset'," +
// " 'format' = 'csv')";
//
// tableEnv.executeSql(kafka_sql);
tableEnv.executeSql("INSERT INTO hive_table " +
"SELECT user_id, order_amount, DATE_FORMAT(ts, 'yyyy-MM-dd'), DATE_FORMAT(ts, 'HH')" +
"FROM kafka_table");
}
}
hive-site.xml 添加以下配置
<property>
<name>hive.metastore.uris</name>
<value>thrift://hadoop01:9083</value>
</property>
启动 hive --service metastore
测试数据
1,10,1625886660000
2,3,1625886721000
1,5,1625887380000
1,6,1625887800000
2,30,1625886721000
1,4,1625889989000