报错
22/02/22 10:24:20 INFO Client: Application report for application_1642757441712_0012 (state: FAILED)
22/02/22 10:24:20 INFO Client:
client token: N/A
diagnostics: Application application_1642757441712_0012 failed 2 times due to AM Container for appattempt_1642757441712_0012_000002 exited with exitCode: -1000
For more detailed output, check application tracking page:http://bigdata-dataos-001:18088/cluster/app/application_1642757441712_0012Then, click on links to logs of each attempt.
Diagnostics: File file:/root/.sparkStaging/application_1642757441712_0012/__spark_libs__7954752360413169627.zip does not exist
java.io.FileNotFoundException: File file:/root/.sparkStaging/application_1642757441712_0012/__spark_libs__7954752360413169627.zip does not exist
at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:611)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:824)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:601)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:428)
at org.apache.hadoop.yarn.util.FSDownload.copy(FSDownload.java:253)
at org.apache.hadoop.yarn.util.FSDownload.access$000(FSDownload.java:63)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:361)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:359)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:358)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:62)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
解决
修改conf/spark-env.sh
export HADOOP_HOME=/mnt/hadoop-2.7.7
export HADOOP_CONF_DIR=/mnt/hadoop-2.7.7/etc/hadoop
报错
Exception in thread "main" java.io.IOException: Port 9000 specified in URI hdfs://hacluster:9000/eventLogs but hster' is a logical (HA) namenode and does not use port information.
at org.apache.hadoop.hdfs.NameNodeProxies.createFailoverProxyProvider(NameNodeProxies.java:526)
at org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:171)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:678)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:619)
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:149)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
at org.apache.spark.util.Utils$.getHadoopFileSystem(Utils.scala:1866)
at org.apache.spark.scheduler.EventLoggingListener.<init>(EventLoggingListener.scala:71)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:521)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2520)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:935)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$7.apply(SparkSession.scala:926)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:926)
at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:48)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.<init>(SparkSQLCLIDriver.scala:315)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:166)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:84
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
解决
修改 conf/spark-defaults.conf
#我这里端口用的是8020需要改一下
spark.eventLog.dir hdfs://hacluster:8020/eventLogs
问题
22/02/22 10:56:07 WARN hive.HiveUtils: Hive jar path '/mnt/spark-2.4.3-bin-hadoop2.7/standalone-metastore/*' does not exist.
22/02/22 10:56:07 INFO hive.HiveUtils: Initializing HiveMetastoreConnection version 2.0 using
Exception in thread "main" java.lang.ClassNotFoundException: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf when creating Hive client using classpath:
Please make sure that jars for your version of hive and hadoop are included in the paths passed to spark.sql.hive.metastore.jars.
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:277)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:384)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:286)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:214)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:53)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.<init>(SparkSQLCLIDriver.scala:315)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:166)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:271)
... 27 more
Caused by: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf
at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:152)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:117)
... 32 more
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedClientLoader.scala:226)
at org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.loadClass(IsolatedClientLoader.scala:215)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 34 more
解决
查看jars
修改
conf/spark-defaults.conf
spark.sql.hive.metastore.jars=/mnt/spark-2.4.3-bin-hadoop2.7/jars/*
spark.sql.hive.metastore.version=1.2.1
完整配置
conf/spark-defaults.conf
spark.driver.extraLibraryPath=/mnt/hadoop-2.7.7/lib/native:/mnt/hadoop-2.7.7/lib/native/Linux-amd64-64
spark.executor.extraJavaOptions=-XX:+UseNUMA
spark.executor.extraLibraryPath=/mnt/hadoop-2.7.7/lib/native:/mnt/hadoop-2.7.7/lib/native/Linux-amd64-64
spark.history.provider=org.apache.spark.deploy.history.FsHistoryProvider
spark.history.store.path=/var/lib/spark2/shs_db
spark.io.compression.lz4.blockSize=128kb
spark.master=yarn
spark.shuffle.file.buffer=1m
spark.shuffle.io.backLog=8192
spark.shuffle.io.serverThreads=128
spark.shuffle.unsafe.file.output.buffer=5m
spark.sql.autoBroadcastJoinThreshold=26214400
spark.sql.hive.convertMetastoreOrc=true
spark.sql.hive.metastore.jars=/mnt/spark-2.4.3-bin-hadoop2.7/jars/*
spark.sql.hive.metastore.version=1.2.1
spark.sql.orc.filterPushdown=true
spark.sql.orc.impl=native
spark.sql.statistics.fallBackToHdfs=true
spark.unsafe.sorter.spill.reader.buffer.size=1m
spark.yarn.historyServer.address=10.19.32.30:18081
spark.yarn.queue=default
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hacluster:8020/eventLogs
spark.eventLog.compress true
spark.driver.cores 1
spark.driver.memory 800m
spark.executor.cores 1
spark.executor.memory 1000m
spark.executor.instances 1
spark.sql.warehouse.dir hdfs://hacluster/user/hive/warehouse
conf/spark-env.sh
export SPARK_DAEMON_MEMORY="2048m"
export SPARK_DRIVER_MEMORY="10240m"
export SPARK_EXECUTOR_CORES="4"
export SPARK_EXECUTOR_MEMORY="4096m"
# A string representing this instance of spark.(Default: $USER)
SPARK_IDENT_STRING=$USER
# The scheduling priority for daemons. (Default: 0)
SPARK_NICENESS=0
export HADOOP_HOME=/mnt/hadoop-2.7.7
export HADOOP_CONF_DIR=/mnt/hadoop-2.7.7/etc/hadoop
# The java implementation to use.
export JAVA_HOME=/opt/jdk1.8.0_201