必选三个类包: iceberg-spark-runtime-3.2_2.12 iceberg-spark-extensions-3.2_2.12 hive-exec-2.1.1-cdh6.0.1
pom.xml 必选,其他的按需添加,如果mvn 安装不了,参见文章:
<!-- spark iceberg -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-spark-runtime-3.2_2.12</artifactId>
<version>0.13.1</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-spark-extensions-3.2_2.12</artifactId>
<version>0.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.1.1-cdh6.0.1</version>
</dependency>
代码示例:
iceberg 表 :dl_ods.ods_hdpos_t_sale_d_mark_stream hive 表:hdm.hdpos_t_sale_d_mark
object OrderTest2 {
val DEFAULTFS = "hdfs://nameservice2";
val HIVE_WAREHOUSE = "/user/hive/warehouse";
val HIVE_URI = "thrift://xxxxxxxx:9083";
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("OrderStreamRead")
.config("spark.sql.sources.partitionOverwriteMode", "dynamic")
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
.config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog")
.config("spark.sql.catalog.spark_catalog.type", "hive")
.config("spark.sql.catalog.spark_catalog.uri", HIVE_URI)
// .config("spark.sql.catalog.spark_catalog.warehouse", DEFAULTFS + HIVE_WAREHOUSE)
.config("spark.sql.catalog.spark_catalog.iceberg.handle-timestamp-without-timezone", "true")
// .config("spark.sql.catalog.spark_catalog.spark.sql.adaptive.enabled", "true")
.enableHiveSupport()
.getOrCreate();
spark.sparkContext.setLogLevel("WARN")
val df = spark.sql(
"""
select t1.rowno,t1.reason,t2.sys_cd from ( select * from dl_ods.ods_hdpos_t_sale_d_mark_stream where dt='20220906') t1
left join (select * from hdm.hdpos_t_sale_d_mark where dt='20220906' ) t2 on t1.saleno=t2.saleno
limit 2 ;
""".stripMargin)
df.show()
spark.stop()
}
}
结果: