HDFS文件操作编程实现(Scala)

HDFSHelper.scala

import java.io.{FileSystem => _, _}

import org.apache.hadoop.fs._

import scala.collection.mutable.ListBuffer

/**
  * Created by wcj
  */
object HDFSHelper {

  def isDir(hdfs : FileSystem, name : String) : Boolean = {
    hdfs.isDirectory(new Path(name))
  }
  def isDir(hdfs : FileSystem, name : Path) : Boolean = {
    hdfs.isDirectory(name)
  }
  def isFile(hdfs : FileSystem, name : String) : Boolean = {
    hdfs.isFile(new Path(name))
  }
  def isFile(hdfs : FileSystem, name : Path) : Boolean = {
    hdfs.isFile(name)
  }
  def createFile(hdfs : FileSystem, name : String) : Boolean = {
    hdfs.createNewFile(new Path(name))
  }
  def createFile(hdfs : FileSystem, name : Path) : Boolean = {
    hdfs.createNewFile(name)
  }
  def createFolder(hdfs : FileSystem, name : String) : Boolean = {
    hdfs.mkdirs(new Path(name))
  }
  def createFolder(hdfs : FileSystem, name : Path) : Boolean = {
    hdfs.mkdirs(name)
  }
  def exists(hdfs : FileSystem, name : String) : Boolean = {
    hdfs.exists(new Path(name))
  }
  def exists(hdfs : FileSystem, name : Path) : Boolean = {
    hdfs.exists(name)
  }
  def transport(inputStream : InputStream, outputStream : OutputStream): Unit ={
    val buffer = new Array[Byte](64 * 1000)
    var len = inputStream.read(buffer)
    while (len != -1) {
      outputStream.write(buffer, 0, len - 1)
      len = inputStream.read(buffer)
    }
    outputStream.flush()
    inputStream.close()
    outputStream.close()
  }
  class MyPathFilter extends PathFilter {
    override def accept(path: Path): Boolean = true
  }

  /**
    * create a target file and provide parent folder if necessary
    */
  def createLocalFile(fullName : String) : File = {
    val target : File = new File(fullName)
    if(!target.exists){
      val index = fullName.lastIndexOf(File.separator)
      val parentFullName = fullName.substring(0, index)
      val parent : File = new File(parentFullName)

      if(!parent.exists)
        parent.mkdirs
      else if(!parent.isDirectory)
        parent.mkdir

      target.createNewFile
    }
    target
  }

  /**
    * delete file in hdfs
    * @return true: success, false: failed
    */
  def deleteFile(hdfs : FileSystem, path: String) : Boolean = {
    if (isDir(hdfs, path))
      hdfs.delete(new Path(path), true)//true: delete files recursively
    else
      hdfs.delete(new Path(path), false)
  }

  /**
    * get all file children's full name of a hdfs dir, not include dir children
    * @param fullName the hdfs dir's full name
    */
  def listChildren(hdfs : FileSystem, fullName : String, holder : ListBuffer[String]) : ListBuffer[String] = {
    val filesStatus = hdfs.listStatus(new Path(fullName), new MyPathFilter)
    for(status <- filesStatus){
      val filePath : Path = status.getPath
      if(isFile(hdfs,filePath))
        holder += filePath.toString
      else
        listChildren(hdfs, filePath.toString, holder)
    }
    holder
  }

  def copyFile(hdfs : FileSystem, source: String, target: String): Unit = {

    val sourcePath = new Path(source)
    val targetPath = new Path(target)

    if(!exists(hdfs, targetPath))
      createFile(hdfs, targetPath)

    val inputStream : FSDataInputStream = hdfs.open(sourcePath)
    val outputStream : FSDataOutputStream = hdfs.create(targetPath)
    transport(inputStream, outputStream)
  }

  def copyFolder(hdfs : FileSystem, sourceFolder: String, targetFolder: String): Unit = {
    val holder : ListBuffer[String] = new ListBuffer[String]
    val children : List[String] = listChildren(hdfs, sourceFolder, holder).toList
    for(child <- children)
      copyFile(hdfs, child, child.replaceFirst(sourceFolder, targetFolder))
  }

  def copyFileFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = {
    val targetPath = new Path(hdfsTarget)
    if(!exists(hdfs, targetPath))
      createFile(hdfs, targetPath)

    val inputStream : FileInputStream = new FileInputStream(localSource)
    val outputStream : FSDataOutputStream = hdfs.create(targetPath)
    transport(inputStream, outputStream)
  }

  def copyFileToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = {
    val localFile : File = createLocalFile(localTarget)

    val inputStream : FSDataInputStream = hdfs.open(new Path(hdfsSource))
    val outputStream : FileOutputStream = new FileOutputStream(localFile)
    transport(inputStream, outputStream)
  }

  def copyFolderFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = {
    val localFolder : File = new File(localSource)
    val allChildren : Array[File] = localFolder.listFiles
    for(child <- allChildren){
      val fullName = child.getAbsolutePath
      val nameExcludeSource : String = fullName.substring(localSource.length)
      val targetFileFullName : String = hdfsTarget + Path.SEPARATOR + nameExcludeSource
      if(child.isFile)
        copyFileFromLocal(hdfs, fullName, targetFileFullName)
      else
        copyFolderFromLocal(hdfs, fullName, targetFileFullName)
    }
  }

  def copyFolderToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = {
    val holder : ListBuffer[String] = new ListBuffer[String]
    val children : List[String] = listChildren(hdfs, hdfsSource, holder).toList
    val hdfsSourceFullName = hdfs.getFileStatus(new Path(hdfsSource)).getPath.toString
    val index = hdfsSourceFullName.length
    for(child <- children){
      val nameExcludeSource : String = child.substring(index + 1)
      val targetFileFullName : String = localTarget + File.separator + nameExcludeSource
      copyFileToLocal(hdfs, child, targetFileFullName)
    }
  }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

数智侠

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值