scala 字节流读取hdfs文件递归子目录,同时解决NN standby 切换问题

package processor

import java.io._
import java.util.concurrent.{Executors, ExecutorService}

import Utils.{HDFSUtil, OperaFunc, MysqlUtil}
import org.apache.hadoop.fs.{Path, FSDataInputStream}

import scala.collection.mutable.ListBuffer

/**
  * Created by Victory.John on 2019/1/10.
  *
  */
object MainAccess {
  //订单
  var orderBodys: ListBuffer[String] = new ListBuffer[String]
  //位置
  var positionBodys: ListBuffer[String] = new ListBuffer[String]
  //骑行轨迹
  var ridingBodys: ListBuffer[String] = new ListBuffer[String]
  var bikeBodys: ListBuffer[String] = new ListBuffer[String]
  var orderPath = ""
  var sb=new StringBuffer()
  var strs:Array[String]=null
  def main(args: Array[String]) {
    
    getData(args(0), orderBodys)

  }

  /**
    * Step 1 初始化FileSystem 实例对象
    *
    * @param path
    */
  def getData(path: String, orders: ListBuffer[String]): Unit = {

    val holder = new ListBuffer[String]
    val pathFullName = path
    HDFSUtil.getFSInstance()
    val listPaths = HDFSUtil.listChildren(pathFullName, holder)
    for (path <- listPaths) {
      println("path",path)
      getFSReadLine(path, orders)
    }
    println("strs length",strs.length)
    for(line<-strs){
      orderBodys+=line
    }
    println("orders length",orderBodys.size)

    HDFSUtil.close()
  }

  /**
    * Step 2 字节流转字符流转按行读取HDFS data
    *
    * @param path
    */
  def getFSReadLine(path: String, order: ListBuffer[String]): Unit = {
    var inputStream: FSDataInputStream = null
    var bufferedReader: BufferedReader = null
    try {
      //获取到HDFS的输入流
      inputStream = HDFSUtil.fs.open(new Path(path))
      val buf: Array[Byte] =new Array[Byte](1024)
      var bytesRead: Int = inputStream.read(buf)
      while (bytesRead >= 0) {
        var  s=new String(buf,0,bytesRead);
        sb.append(s)
        bytesRead = inputStream.read(buf)
      }
      strs=sb.toString.split("\n")
      inputStream.close()
    } catch {
      case ex: Exception =>

        println("sorceerror=" + ex.getMessage)
        ex.fillInStackTrace()

    }

  }

  def initData(): Unit = {
    println("数据初始化>>>>>>>>>" + orderPath)
    getData(orderPath, orderBodys)
    println("order类型完成>>>>>>:" + orderBodys.size)
    // getData(orderPath + partitionPath, positionBodys)
    // println("position类型完成>>>>>>:" + positionBodys.size)
    //    getData(orderPath + positionBodys, ridingBodys)
    //    println("riding类型完成>>>>>>:" + ridingBodys.size)
  }
  //getData(path, ty, map)
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值