今天在使用idea操作spark进行hive insert into的时候发现运行缓慢,一直卡在一个地方:
Exception in thread "main" java.lang.RuntimeException: Cannot create staging directory 'hdfs://bigdata1:9000/user/hive/warehouse/ods.db/user_info/.hive-staging_hive_2023-08-15_19-16-24_121_3409857277518366194-1': DestHost:destPort bigdata1:9000 , LocalHost:localPort DESKTOP-6GUQ4QU/:0. Failed on local exception: java.io.InterruptedIOException: Interrupted while waiting for IO on channel java.nio.channels.SocketChannel[connection-pending remote=bigdata1/:9000]. 20000 millis timeout left.
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getStagingDir(SaveAsHiveFile.scala:250)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getStagingDir$(SaveAsHiveFile.scala:213)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.getStagingDir(InsertIntoHiveTable.scala:71)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getExternalScratchDir(SaveAsHiveFile.scala:210)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.newVersionExternalTempPath(SaveAsHiveFile.scala:192)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getExternalTmpPath(SaveAsHiveFile.scala:131)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getExternalTmpPath$(SaveAsHiveFile.scala:100)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.getExternalTmpPath(InsertIntoHiveTable.scala:71)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:228)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610)
at com.itbm.yyh.y1$.main(y1.scala:84)
at com.itbm.yyh.y1.main(y1.scala)
Caused by: java.io.InterruptedIOException: DestHost:destPort bigdata1:9000 , LocalHost:localPort DESKTOP-6GUQ4QU/:0. Failed on local exception: java.io.InterruptedIOException: Interrupted while waiting for IO on channel java.nio.channels.SocketChannel[connection-pending remote=bigdata1/:9000]. 20000 millis timeout left.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:831)
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:806)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1549)
at org.apache.hadoop.ipc.Client.call(Client.java:1491)
at org.apache.hadoop.ipc.Client.call(Client.java:1388)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
at com.sun.proxy.$Proxy26.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:904)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy27.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1661)
at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1577)
at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1574)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1589)
at org.apache.hadoop.hive.common.FileUtils.mkdir(FileUtils.java:528)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.getStagingDir(SaveAsHiveFile.scala:242)
... 28 more
Caused by: java.io.InterruptedIOException: Interrupted while waiting for IO on channel java.nio.channels.SocketChannel[connection-pending remote=bigdata1/:9000]. 20000 millis timeout left.
at org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:342)
at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:203)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:700)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:804)
at org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:421)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1606)
at org.apache.hadoop.ipc.Client.call(Client.java:1435)
... 50 more
这里搞了好久发现是hdfs没有写入的权限,然后我是用下面命令发现没有权限使用使用sudo也不行
sudo hdfs dfs -chmod -R 777 /user
这里是发现Namenode处于安全模式需要先进行取消安全模式
sudo hdfs dfsadmin -safemode leave
这样就成功了:
sudo hdfs dfs -chmod -R 777 /user