package processor
import java.io._
import java.util.concurrent.{Executors, ExecutorService}
import Utils.{HDFSUtil, OperaFunc, MysqlUtil}
import org.apache.hadoop.fs.{Path, FSDataInputStream}
import scala.collection.mutable.ListBuffer
/**
* Created by Victory.John on 2019/1/10.
*
*/
object MainAccess {
//订单
var orderBodys: ListBuffer[String] = new ListBuffer[String]
//位置
var positionBodys: ListBuffer[String] = new ListBuffer[String]
//骑行轨迹
var ridingBodys: ListBuffer[String] = new ListBuffer[String]
var bikeBodys: ListBuffer[String] = new ListBuffer[String]
var orderPath = ""
var sb=new StringBuffer()
var strs:Array[String]=null
def main(args: Array[String]) {
getData(args(0), orderBodys)
}
/**
* Step 1 初始化FileSystem 实例对象
*
* @param path
*/
def getData(path: String, orders: ListBuffer[String]): Unit = {
val holder = new ListBuffer[String]
val pathFullName = path
HDFSUtil.getFSInstance()
val listPaths = HDFSUtil.listChildren(pathFullName, holder)
for (path <- listPaths) {
println("path",path)
getFSReadLine(path, orders)
}
println("strs length",strs.length)
for(line<-strs){
orderBodys+=line
}
println("orders length",orderBodys.size)
HDFSUtil.close()
}
/**
* Step 2 字节流转字符流转按行读取HDFS data
*
* @param path
*/
def getFSReadLine(path: String, order: ListBuffer[String]): Unit = {
var inputStream: FSDataInputStream = null
var bufferedReader: BufferedReader = null
try {
//获取到HDFS的输入流
inputStream = HDFSUtil.fs.open(new Path(path))
val buf: Array[Byte] =new Array[Byte](1024)
var bytesRead: Int = inputStream.read(buf)
while (bytesRead >= 0) {
var s=new String(buf,0,bytesRead);
sb.append(s)
bytesRead = inputStream.read(buf)
}
strs=sb.toString.split("\n")
inputStream.close()
} catch {
case ex: Exception =>
println("sorceerror=" + ex.getMessage)
ex.fillInStackTrace()
}
}
def initData(): Unit = {
println("数据初始化>>>>>>>>>" + orderPath)
getData(orderPath, orderBodys)
println("order类型完成>>>>>>:" + orderBodys.size)
// getData(orderPath + partitionPath, positionBodys)
// println("position类型完成>>>>>>:" + positionBodys.size)
// getData(orderPath + positionBodys, ridingBodys)
// println("riding类型完成>>>>>>:" + ridingBodys.size)
}
//getData(path, ty, map)
}
scala 字节流读取hdfs文件递归子目录,同时解决NN standby 切换问题
最新推荐文章于 2024-06-18 16:10:07 发布