集群环境:
HDP 3.1.5.0-152
Spark 2.3.0(hudi官网要求2.4+)
Hudi 0.9.0
当前环境执行hudi官网测试代码报错“NoSuchFieldError: NULL_VALUE”,代码如下:
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder
.appName("hudi-test")
.config("spark.serializer","org.apache.spark.serializer.KryoSerializer")
.getOrCreate()
//定义hudi表
val tableName = "hudi_trips_cow"
val basePath = "hdfs://192.168.78.12:8020/user/hudi3"
//hudi表插入数据
val dataGen = new DataGenerator
val inserts = convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD.key(), "ts").
option(RECORDKEY_FIELD.key(), "uuid").
option(PARTITIONPATH_FIELD.key(), "partitionpath").
option(TBL_NAME.key(), tableName).
mode(Overwrite).
save(basePath)
spark.stop()
}
提交脚本如下:
spark-submit --class kl.App \
--master yarn \
--deploy-mode cluster \
--driver-memory 1g \
--executor-memory 1g \
--conf yarn.nodemanager.resource.memory-mb=512m \
--num-executors 1 \
--executor-cores 2 \
--conf spark.yarn.access.hadoopFileSystems=hdfs://192.168.78.12:8020 \
--conf spark.yarn.maxAppAttempts=1 \
--jars "/root/hudi_spark_test/dependency/hudi-spark-bundle_2.11-0.9.0.jar,/root/hudi_spark_test/dependency/original-hudi-spark-bundle_2.11-0.9.0.jar,/root/hudi_spark_test/dependency/avro-1.8.2.jar" \
--files "/root/hudi_spark_test/core-site.xml" \
/root/hudi_spark_test/KLSparkHudiDemo-1.0.jar
提交报错如下:
22/02/18 06:20:10 INFO Client: Application report for application_1644979649961_0059 (state: ACCEPTED)
22/02/18 06:20:11 INFO Client: Application report for application_1644979649961_0059 (state: ACCEPTED)
22/02/18 06:20:12 INFO Client: Application report for application_1644979649961_0059 (state: RUNNING)
22/02/18 06:20:12 INFO Client:
client token: N/A
diagnostics: N/A
ApplicationMaster host: 10.131.24.124
ApplicationMaster RPC port: 0
queue: default
start time: 1645165208087
final status: UNDEFINED
tracking URL: http://awnx1-cdata-tnode01:8088/proxy/application_1644979649961_0059/
user: root
22/02/18 06:20:13 INFO Client: Application report for application_1644979649961_0059 (state: RUNNING)
22/02/18 06:20:14 INFO Client: Application report for application_1644979649961_0059 (state: RUNNING)
22/02/18 06:20:15 INFO Client: Application report for application_1644979649961_0059 (state: FINISHED)
22/02/18 06:20:15 INFO Client:
client token: N/A
diagnostics: User class threw exception: java.lang.NoSuchFieldError: NULL_VALUE
at org.apache.hudi.avro.HoodieAvroUtils.initRecordKeySchema(HoodieAvroUtils.java:245)
at org.apache.hudi.avro.HoodieAvroUtils.<clinit>(HoodieAvroUtils.java:88)
at org.apache.hudi.common.model.BaseAvroPayload.<init>(BaseAvroPayload.java:49)
at org.apache.hudi.common.model.OverwriteWithLatestAvroPayload.<init>(OverwriteWithLatestAvroPayload.java:42)
at org.apache.hudi.common.model.OverwriteWithLatestAvroPayload.<init>(OverwriteWithLatestAvroPayload.java:46)
at org.apache.hudi.QuickstartUtils$DataGenerator.generateRandomValue(QuickstartUtils.java:129)
at org.apache.hudi.QuickstartUtils$DataGenerator.lambda$generateInsertsStream$0(QuickstartUtils.java:152)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at java.util.stream.IntPipeline$4$1.accept(IntPipeline.java:250)
at java.util.stream.Streams$RangeIntSpliterator.forEachRemaining(Streams.java:110)
at java.util.Spliterator$OfInt.forEachRemaining(Spliterator.java:693)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566)
at org.apache.hudi.QuickstartUtils$DataGenerator.generateInserts(QuickstartUtils.java:164)
at kl.App$.main(App.scala:34)
at kl.App.main(App.scala)
以上问题将hdp环境的spark-2.3.0更换为spark-2.4.7后解决
如有疑问可以添加我微信:z1224576376