Flink Iceberg 测试

最新推荐文章于 2024-07-11 12:49:13 发布

訾零

最新推荐文章于 2024-07-11 12:49:13 发布

阅读量940

点赞数

分类专栏： Flink Iceberg 文章标签： flink scala big data

本文链接：https://blog.csdn.net/lingeio/article/details/120505596

版权

Flink 同时被 2 个专栏收录

17 篇文章 7 订阅

订阅专栏

Iceberg

2 篇文章 0 订阅

订阅专栏

组件版本

组件	版本
Java	1.8.251
Scala	1.12.14
Flink	1.12.5
Iceberg	0.12.0
Hadoop	2.9.2
Hive	2.3.6

将hdfs-site.xml，core-site.xml，hive-site.xml放入resources下

hadoop_catalog

object TestFlinkSQLOptIcebergHadoopCatalog {
  def main(args: Array[String]): Unit = {
    //   val env = StreamExecutionEnvironment.getExecutionEnvironment
    val settings = EnvironmentSettings
      .newInstance()
      .useBlinkPlanner()
      .inBatchMode()
      .build()

    val tableEnv = TableEnvironment.create(settings)

    val DDL =
      """
        |create catalog hadoop_catalog with (
        | 'type' = 'iceberg',
        | 'catalog-type' = 'hadoop',
        | 'property-version' = '1',
        | 'warehouse' = 'hdfs:///user/hive/warehouse/'
        |)
        |""".stripMargin
    tableEnv.executeSql(DDL)

    //    两种写法
    //    tableEnv.executeSql("use catalog hadoop_catalog")
    tableEnv.useCatalog("hadoop_catalog")

    tableEnv.executeSql("create database if not exists iceberg_db")
    //    tableEnv.executeSql("use iceberg_db")
    tableEnv.useDatabase("iceberg_db")

    tableEnv.executeSql("show databases").print()
    tableEnv.executeSql("show tables").print()

    // 1. 创建表
    //    val tableDDL =
    //      """
    //        |create table if not exists iceberg_test_table (
    //        | id bigint comment 'unique id',
    //        | data string
    //        |) comment 'iceberg test table'
    //        | partitioned by (data)
    //        |""".stripMargin
    //    tableEnv.executeSql(tableDDL)
    //    tableEnv.executeSql("show tables").print()

    // *** 2. 修改表名，暂不支持 hadoop catalog 更改表名等操作,当前仅支持更新设置属性和删除
    //    tableEnv.executeSql("alter table iceberg_test_table rename to iceberg_test_table2")
    //    tableEnv.executeSql("show tables").print()

    // 3. 删除表
    //    tableEnv.executeSql("drop table if exists iceberg_test_table")
    //    tableEnv.executeSql("show tables").print()

    // 4. 查询表
    //    tableEnv.executeSql("show tables").print()

    // 5. like 根据已有表创建新表
    tableEnv.executeSql("create table iceberg_test_like like iceberg_test_table")
    tableEnv.executeSql("show tables").print()

    // 6. 修改表属性
    // flink1.11后支持
    //    tableEnv.executeSql("""alter table test_like set ('write.format.default'='avro')""")

    // 7. 写入
    //    tableEnv.executeSql("insert into test_hadoop_table values (1, 'a')")
    //    tableEnv.executeSql("insert overwrite test_hadoop_table values (2, 'a') ")
    //    tableEnv.executeSql("insert overwrite test_table PARTITION(data='b') SELECT 6")

    // 8. 读取数据
    //    tableEnv.executeSql("select * from test_hadoop_table").print()

    // 9. 写入数据
    //    val insert =
    //      """
    //        |insert into test_like
    //        |select
    //        | id, data
    //        |from test_hadoop_table
    //        |""".stripMargin
    //    tableEnv.executeSql(insert)
  }
}

hive_catalog

object TestFlinkSQLOptIcebergHiveCatalog {
  private var logger: org.slf4j.Logger = _

  def main(args: Array[String]): Unit = {
    logger = LoggerFactory.getLogger(this.getClass.getSimpleName)
    Logger.getLogger("org.apache").setLevel(Level.INFO)
    Logger.getLogger("hive.metastore").setLevel(Level.INFO)
    Logger.getLogger("akka").setLevel(Level.INFO)

    val tableEnv = FlinkUtils.initStreamTableEnvironment()

    //    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //
    //    val settings = EnvironmentSettings
    //      .newInstance()
    //      .useBlinkPlanner()
    //      .inStreamingMode()
    //      .build()
    //
      // streamTable 环境
    //    val tableEnv = StreamTableEnvironment.create(env, settings)

    // batchTable 环境
    //    val settings = EnvironmentSettings
    //      .newInstance()
    //      .useBlinkPlanner()
    //      .inBatchMode()
    //      .build()
    //    val tableEnv = TableEnvironment.create(settings)

    //        val catalog_name = "hive_catalog"
    //        val database = "iceberg_test_db"
    //        val hiveConf = "F:\\workspace\\realtime-lakehouse\\test\\src\\main\\resources"
    //
    //        val hiveCatalog = new HiveCatalog(
    //          catalog_name,
    //          null,
    //          hiveConf
    //        )
    //        tableEnv.registerCatalog(catalog_name, hiveCatalog)
    //        tableEnv.getConfig.setSqlDialect(SqlDialect.HIVE)

    // catalog
    //    val catalogDDL =
    //      """
    //        |create catalog hive_catalog with (
    //        | 'type' = 'iceberg',
    //        | 'catalog-type' = 'hive',
    //        | 'uri' = 'thrift://test-lakehouse:9083',
    //        | 'clients' = '5',
    //        | 'property-version' = '1',
    //        | 'warehouse' = 'hdfs://test-lakehouse:9000/user/hive/warehouse/'
    //        |)
    //        |""".stripMargin
    //    tableEnv.executeSql(catalogDDL)

    //    两种写法
    //    //    tableEnv.executeSql("use catalog hive_catalog")
    //    tableEnv.useCatalog("hive_catalog")
    //    tableEnv.executeSql("show catalogs").print()
    //
    //    val databaseDDL = "create database if not exists iceberg_test_db"
    //    tableEnv.executeSql(databaseDDL)
    //
    //    tableEnv.useDatabase("iceberg_test_db")
    //    println(s"current database: ${tableEnv.getCurrentDatabase}")

    //    tableEnv.executeSql("show databases").print()

    //    println("list catalogs:")
    //    tableEnv.listCatalogs().foreach(println)
    //    tableEnv.listDatabases()

    //    1. 创建表
    //    val tableDDL =
    //      """
    //        |create table if not exists iceberg_test_table (
    //        | id bigint comment 'unique id',
    //        | data string,
    //        | primary key (id) not enforced
    //        |) comment 'iceberg test table'
    //        | partitioned by (id)
    //        |""".stripMargin
    //    tableEnv.executeSql(tableDDL)
    //    tableEnv.executeSql("show tables").print()

    //  2. 修改表名
    //    tableEnv.executeSql("alter table iceberg_test_table rename to iceberg_test_table2")
    //    tableEnv.executeSql("show tables").print()
    //

    // 3. 删除表
    //    tableEnv.executeSql("drop table if exists iceberg_test_table")
    //    tableEnv.executeSql("show tables").print()

    // 4. 查询表
    //    tableEnv.executeSql("show tables").print()

    // 5. like 根据已有表创建新表
    //    tableEnv.executeSql("create table iceberg_test_like like iceberg_test_table")
    //    tableEnv.executeSql("show tables").print()

    // 6. 修改表属性
    // flink1.11后支持
    //    tableEnv.executeSql("alter table iceberg_test_like set ('write.format.default'='avro')")

    // 7. 写入
    // tableAPI
    //    val statementSet = tableEnv.createStatementSet()
    //    statementSet.addInsertSql("insert into iceberg_test_table values (1, 'a')")
    //    statementSet.execute()

    //    tableEnv.executeSql("insert into iceberg_test_table values (1, 'a'), (2, 'b')")
    //+----------------------+--------------------------------+
    //|                   id |                           data |
    //+----------------------+--------------------------------+
    //|                    1 |                              a |
    //|                    2 |                              b |
    //+----------------------+--------------------------------+

    //    tableEnv.executeSql("insert overwrite iceberg_test_table values (111, 'b')")
    //+----------------------+--------------------------------+
    //|                   id |                           data |
    //+----------------------+--------------------------------+
    //|                    1 |                            aaa |
    //|                    2 |                              b |
    //+----------------------+--------------------------------+

    //    tableEnv.executeSql("insert overwrite iceberg_test_table partition(data='b') select 888")
    //+----------------------+--------------------------------+
    //|                   id |                           data |
    //+----------------------+--------------------------------+
    //|                    2 |                              b |
    //|                    1 |                            ccc |
    //+----------------------+--------------------------------+

    // 8. 读取数据
    //    tableEnv.executeSql("select * from iceberg_test_table").print()
    //    val table = tableEnv.sqlQuery("select * from iceberg_test_table")
    //    table.printSchema()
    //    table.execute().print()

    // 9. 写入数据
    //    val insert =
    //      """
    //        |insert into iceberg_test_like
    //        |select
    //        | id, data
    //        |from iceberg_test_table
    //        |""".stripMargin
    //    tableEnv.executeSql(insert)
    //    tableEnv.executeSql("select * from iceberg_test_like").print()

    // 10. 流读
    //    val config = tableEnv.getConfig.getConfiguration
    //    config.setBoolean(TableConfigOptions.TABLE_DYNAMIC_TABLE_OPTIONS_ENABLED, true)
    //
    //    // read all the records
    //    val readAllDML =
    //    """
    //      |select * from iceberg_test_table
    //      |/*+ options('streaming'='true', 'monitor-interval'='1s')*/
    //      |""".stripMargin
    //    tableEnv.executeSql(readAllDML).print()
    //
    //    // read incremental data
    //    val readIncrementalDML =
    //      """
    //        |select * from iceberg_test_table
    //        |/*+ options('streaming'='true', 'monitor-interval'='1s', 'start-snapshot-id'='8116368287341314212')*/
    //        |""".stripMargin
    //    tableEnv.executeSql(readIncrementalDML).print()

    // 11. cdc
    tableEnv.useCatalog("default_catalog")
    val cdcDDL =
      """
        |create table if not exists iceberg_cdc_source (
        |  id int,
        |  data string,
        |  primary key (id) not enforced
        |) with (
        | 'connector' = 'mysql-cdc',
        | 'hostname' = 'test-lakehouse',
        | 'port' = '3306',
        | 'username' = 'test',
        | 'password' = '123456',
        | 'database-name' = 'test_db',
        | 'table-name' = 'test',
        | 'server-time-zone' = 'Asia/Shanghai'
        |)
      """.stripMargin
    tableEnv.executeSql(cdcDDL)

    // output
    //    tableEnv.executeSql("select * from iceberg_cdc_source").print()

    //        val printSinkSql =
    //          """
    //            |create table if not exists print_sink (
    //            |  id int,
    //            |  data string,
    //            |  primary key (id) not enforced
    //            |) with (
    //            | 'connector' = 'print'
    //            |)
    //          """.stripMargin
    //        tableEnv.executeSql(printSinkSql)
    //
    //        tableEnv.executeSql("insert into print_sink select * from iceberg_cdc_source")

    //   catalog
    val catalogDDL =
      """
        |create catalog hive_catalog with (
        | 'type' = 'iceberg',
        | 'catalog-type' = 'hive',
        | 'uri' = 'thrift://test-lakehouse:9083',
        | 'clients' = '5',
        | 'property-version' = '1',
        | 'warehouse' = 'hdfs://test-lakehouse:9000/user/hive/warehouse/'
        |)
        |""".stripMargin
    tableEnv.executeSql(catalogDDL)

    val databaseDDL = "create database if not exists hive_catalog.iceberg_test_db"
    tableEnv.executeSql(databaseDDL)

    //    tableEnv.executeSql("drop table if exists hive_catalog.iceberg_test_db.iceberg_cdc_test")

    val tableDDL =
      """
        |create table if not exists hive_catalog.iceberg_test_db.iceberg_cdc_test (
        | id bigint comment 'unique id',
        | data string,
        | primary key (id) not enforced
        |) comment 'iceberg test table'
        | partitioned by (id)
        | with(
        |  'iceberg.format.version' = '2',
//            |  'write.metadata.delete-after-commit.enabled' = 'true',
//            |  'write.metadata.previous-versions-max' = '100'
        | )
        |""".stripMargin
    tableEnv.executeSql(tableDDL)

    val cdcDML =
      """
        |insert into hive_catalog.iceberg_test_db.iceberg_cdc_test
        |select * from default_catalog.default_database.iceberg_cdc_source
        |""".stripMargin
    tableEnv.executeSql(cdcDML)

    // stop cdc after
    // tableEnv.executeSql("select * from iceberg_cdc_test").print()

    //    val config = tableEnv.getConfig.getConfiguration
    //    config.setBoolean(TableConfigOptions.TABLE_DYNAMIC_TABLE_OPTIONS_ENABLED, true)
    //    tableEnv.executeSql(
    //      """
    //        |select * from iceberg_cdc_test
    //        |/*+ options('streaming'='true', 'monitor-interval'='1s')*/
    //      """.stripMargin).print()
  }
}

检查数据

建表后

2021-09-24 14:40:31,948 INFO - Successfully committed to table hive_catalog.iceberg_test_db.iceberg_test_table in 2008 ms

+--------------------+

| table name |

+--------------------+

| iceberg_test_table |

+--------------------+

hdfs数据

未写入数据前，只有metastore

metastore信息

{
  "format-version" : 1,
  "table-uuid" : "efbc787a-6eed-46ef-a2a8-c04b8cbcf1c2",
  "location" : "hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table",
  "last-updated-ms" : 1632715958040,
  "last-column-id" : 2,
  "schema" : {
    "type" : "struct",
    "schema-id" : 0,
    "identifier-field-ids" : [ 1 ],
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : true,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    } ]
  },
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "identifier-field-ids" : [ 1 ],
    "fields" : [ {
      "id" : 1,
      "name" : "id",
      "required" : true,
      "type" : "long"
    }, {
      "id" : 2,
      "name" : "data",
      "required" : false,
      "type" : "string"
    } ]
  } ],
  "partition-spec" : [ {
    "name" : "id",
    "transform" : "identity",
    "source-id" : 1,
    "field-id" : 1000
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ {
      "name" : "id",
      "transform" : "identity",
      "source-id" : 1,
      "field-id" : 1000
    } ]
  } ],
  "last-partition-id" : 1000,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : { },
  "current-snapshot-id" : -1,
  "snapshots" : [ ],
  "snapshot-log" : [ ],
  "metadata-log" : [ ]
}

修改数据后

    ......

  "properties" : { },
  "current-snapshot-id" : 3357358225130025285,
  "snapshots" : [ {
    "snapshot-id" : 750183960105471040,
    "timestamp-ms" : 1632715970291,
    "summary" : {
      "operation" : "append",
      "flink.job-id" : "c79435a3ae5097eba8842a1816409be5",
      "flink.max-committed-checkpoint-id" : "9223372036854775807",
      "added-data-files" : "2",
      "added-records" : "2",
      "added-files-size" : "1354",
      "changed-partition-count" : "2",
      "total-records" : "2",
      "total-files-size" : "1354",
      "total-data-files" : "2",
      "total-delete-files" : "0",
      "total-position-deletes" : "0",
      "total-equality-deletes" : "0"
    },
    "manifest-list" : "hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table/metadata/snap-750183960105471040-1-bc72ec07-52b5-4352-9c6b-1db44c8f85e9.avro",
    "schema-id" : 0
  }, {
    "snapshot-id" :

       ......

    },
    "manifest-list" : "hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table/metadata/snap-3357358225130025285-1-9f5c0553-7a4b-42c7-8199-1f7cff77f3ac.avro",
    "schema-id" : 0
  } ],
  "snapshot-log" : [ {
    "timestamp-ms" : 1632715970291,
    "snapshot-id" : 750183960105471040
  }, {
    
    ......

  } ],

  "metadata-log" : [ {
    "timestamp-ms" : 1632715958040,
    "metadata-file" : "hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table/metadata/00000-7f31a7d0-6bd9-45a4-82f6-210ea2aa5f10.metadata.json"
  }, {

    ......

  } ]
}

hdfs dfs -text /.../iceberg_test_db.db/iceberg_test_table/metadata/snap-3357358225130025285-1-9f5c0553-7a4b-42c7-8199-1f7cff77f3ac.avro

{"manifest_path":"hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table/metadata/9f5c0553-7a4b-42c7-8199-1f7cff77f3ac-m1.avro","manifest_length":6030,"partition_spec_id":0,"added_snapshot_id":{"long":3357358225130025285},"added_data_files_count":{"int":1},"existing_data_files_count":{"int":0},"deleted_data_files_count":{"int":0},"partitions":{"array":[{"contains_null":false,"contains_nan":{"boolean":false},"lower_bound":{"bytes":"\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"},"upper_bound":{"bytes":"\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"}}]},"added_rows_count":{"long":1},"existing_rows_count":{"long":0},"deleted_rows_count":{"long":0}}

hdfs dfs -text /.../iceberg_test_db.db/iceberg_test_table/metadata/9f5c0553-7a4b-42c7-8199-1f7cff77f3ac-m0.avro

{"status":2,"snapshot_id":{"long":3357358225130025285},"data_file":{"file_path":"hdfs://test-lakehouse:9000/user/hive/warehouse/iceberg_test_db.db/iceberg_test_table/data/id=1/00007-0-3ccc043d-9d03-4b5c-8268-55c09827927b-00001.parquet","file_format":"PARQUET","partition":{"id":{"long":1}},"record_count":1,"file_size_in_bytes":691,"block_size_in_bytes":67108864,"column_sizes":{"array":[{"key":1,"value":46},{"key":2,"value":54}]},"value_counts":{"array":[{"key":1,"value":1},{"key":2,"value":1}]},"null_value_counts":{"array":[{"key":1,"value":0},{"key":2,"value":0}]},"nan_value_counts":{"array":[]},"lower_bounds":{"array":[{"key":1,"value":"\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"},{"key":2,"value":"aaa"}]},"upper_bounds":{"array":[{"key":1,"value":"\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"},{"key":2,"value":"aaa"}]},"key_metadata":null,"split_offsets":{"array":[4]},"sort_order_id":{"int":0}}}

hiveCatalog hive元数据信息会记录 iceberg 表名及 iceberg 元数据位置 metadata_location

修改iceberg表名，只会修改hive元数据信息，iceberg元数据metadata不变，也就是hdfs上表目录名和json信息不会变。

删除表，会删除hive元数据信息和iceberg元数据位置信息，会删除hdfs上目录下的metadata下的元数据，但是不会删目录。

Overwrite，会写入新的parquet文件，不会第一时间清理旧文件。

Hive信息

修改后

overwrite后，之前的snapshots 无法再读取

Found overwrite operation, cannot support incremental data in snapshots (8116368287341314212, 3591005179391590033]

另外，Flink cdc mysql8.x问题

Public Key Retrieval is not allowed

MySQL8.0连接验证机制发生了变化，默认使用caching_sha2_password作为身份验证插件，修改为使用mysql_native_password加密规则来校验身份。

alter user 'test'@'%' identified with mysql_native_password by '123456';

Iceberg v1 CDC 不支持deletions，只能初始化CDC时导入，后续有deletions操作数据进入会报错。v2功能还在开发中，暂未对外开放，所以CDC功能目前并不能使用。

pom文件

    <properties>
        <!-- project compiler -->
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <!-- maven compiler-->
        <scala.maven.plugin.version>3.2.2</scala.maven.plugin.version>
        <maven.compiler.plugin.version>3.8.1</maven.compiler.plugin.version>
        <maven.assembly.plugin.version>3.1.1</maven.assembly.plugin.version>
        <!-- sdk -->
        <java.version>1.8</java.version>
        <scala.version>2.12.13</scala.version>
        <scala.binary.version>2.12</scala.binary.version>
        <!-- engine-->
        <hadoop.version>2.9.2</hadoop.version>
        <flink.version>1.12.5</flink.version>
        <iceberg.version>0.12.0</iceberg.version>
        <hive.version>2.3.9</hive.version>

<!--         <scope.type>provided</scope.type>-->
        <scope.type>compile</scope.type>
    </properties>

    <dependencies>
        <!-- scala -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
            <scope>${scope.type}</scope>
        </dependency>

        <!-- flink Dependency -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-orc_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-sql-connector-kafka_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <artifactId>flink-sql-connector-mysql-cdc</artifactId>
            <version>1.2.0</version>
            <scope>${scope.type}</scope>
        </dependency>

        <!-- iceberg Dependency -->
        <dependency>
            <groupId>org.apache.iceberg</groupId>
            <artifactId>iceberg-flink-runtime</artifactId>
            <version>${iceberg.version}</version>
            <scope>${scope.type}</scope>
        </dependency>

        <!-- hadoop Dependency-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
            <scope>${scope.type}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <scope>${scope.type}</scope>
        </dependency>

        <!-- hive Dependency-->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
            <scope>${scope.type}</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.logging.log4j</groupId>
                    <artifactId>log4j-slf4j-impl</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.hive</groupId>
                    <artifactId>hive-llap-tez</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.antlr</groupId>
            <artifactId>antlr-runtime</artifactId>
            <version>3.5.2</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>${scala.maven.plugin.version}</version>
                <executions>
                    <execution>
                        <goals>
                            <!--声明绑定到maven的compile阶段-->
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>${maven.assembly.plugin.version}</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

訾零

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
1
评论
Flink Iceberg 测试

本地搭建单节点Hadoop，根据官网示例进行Iceberg功能测试。组件版本组件版本 Java 1.8.251 Scala 1.12.14 Flink 1.12.5 Iceberg 0.12.0 Hadoop 2.9.2 Hive .
复制链接

扫一扫