1、创建目录
(1)下面是基于spark2.4版本(spark2.0以后的版本均统一使用sparkSession,spark2.x均可使用,但是spark2.0以前的版本没有sparkSession)
import org.apache.spark.sql.SparkSession
object runDriver {
def main(args:Array[String]){
val sparkSession = SparkSession.builder()
.appName("hdfsDir")
.config("spark.some.config.option","some-value")
.getOrCreate()
val sc = sparkSession.sparkContext()
//创建目录
createDirInHDFS(sc, "hdfs://master:9000/data/createdDir")
//删除目录
removeDirInHDFS(sc, "hdfs://master:9000/data/deletedDir")
}
//创建目录
def createDirInHDFS(sc: SparkContext, filePath: String): Unit = {
val path = new Path(filePath)
val hadoopConf = sc.hadoopConfiguration
val hdfs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
hdfs.mkdirs(path)
hdfs.close()
}
//删除目录
def removeDirInHDFS(sc: SparkContext, filePath: String): Unit = {
val path = new Path(filePath)
val hadoopConf = sc.hadoopConfiguration
val hdfs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
if(hdfs.exists(path)){
hdfs.delete(path, true)
}
hdfs.close()
}
}
(2)下面是spark2.0之前的版本(基于spark1.6.0)
import org.apache.spark.sql.SparkSession
object runDriver {
def main(args:Array[String]){
val conf = new SparkConf().setAppName("DataSetsCreator")
val sc = new SparkContext(conf)
//创建目录
createDirInHDFS(sc, "hdfs://master:9000/data/createdDir")
//删除目录
removeDirInHDFS(sc, "hdfs://master:9000/data/deletedDir")
}
//创建目录
def createDirInHDFS(sc: SparkContext, filePath: String): Unit = {
val path = new Path(filePath)
val hadoopConf = sc.hadoopConfiguration
val hdfs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
hdfs.mkdirs(path)
hdfs.close()
}
//删除目录
def removeDirInHDFS(sc: SparkContext, filePath: String): Unit = {
val path = new Path(filePath)
val hadoopConf = sc.hadoopConfiguration
val hdfs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
if(hdfs.exists(path)){
hdfs.delete(path, true)
}
hdfs.close()
}
}
下面
参考文献:
- Spark删除HDFS文件的两种方式: https://zhuanlan.zhihu.com/p/298310302
- scala中hdfs文件的操作: https://blog.csdn.net/ronaldo4511/article/details/53325374