在用Scala写spark时候,用了sc操作了hdfs,出现了如下错误:
java.io.IOException: No FileSystem for scheme: hdfs
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2660)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:172)
at org.apache.spark.deploy.yarn.Client$$anonfun$5.apply(Client.scala:123)
at org.apache.spark.deploy.yarn.Client$$anonfun$5.apply(Client.scala:123)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.deploy.yarn.Client.<init>(Client.scala:123)
at org.apache.spark.deploy.yarn.Client.<init>(Client.scala:69)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:55)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
解决方案1:
val hadoopConf = spark(这个是SparkSession变量名).sparkContext.hadoopConfiguration
hadoopConf.set("fs.hdfs.impl", classOf[org.apache.hadoop.hdfs.DistributedFileSystem].getName)
hadoopConf.set("fs.file.impl", classOf[org.apache.hadoop.fs.LocalFileSystem].getName)
解决方案2:
在core-site.xml中加入如下配置,并且重启,以后每次都会生效。
<property>
<name>fs.file.impl</name>
<value>org.apache.hadoop.fs.LocalFileSystem</value>
</property>
<property>
<name>fs.hdfs.impl</name>
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
</property>