用scala读取一个只有字符的文件,该文件是已经清洗完的数据,有规律,一行是一条数据。
现在的需求是一次读取文件中的一部分,依次读取完。
之前用字节流偶尔出现乱码,最后改成一次读取一行了。import java.io.{BufferedReader, InputStreamReader}
import org.apache.hadoop.fs.FSDataInputStream
/**
* Created by wx on 2017/7/25.
*/
object ReadHDFSFileByLine {
def main(args: Array[String]) {
var inputStream: FSDataInputStream = null
var bufferedReader: BufferedReader = null
try {
/* inputStream = HDFSUtil.getFSDataInputStream("hdfs://master:9000/TestData/aviation7/part-00018")
bufferedReader = new BufferedReader(new InputStreamReader(inputStream))
var lineTxt:String=null
var count = 1
// 这样写就是死循环,无法跳出,我很郁闷改用下面的就好了
while ((lineTxt= bufferedReader.readLine()) != null) {
println(count + "\t" + lineTxt)
count += 1
//让程序暂停睡眠一下
Thread.sleep(1)
}*/
//获取到HDFS的输入流,可以参考上一篇文档
inputStream = HDFSUtil.getFSDataInputStream("hdfs://master:9000/TestData/aviation7/part-00018")
//转成缓冲流
bufferedReader = new BufferedReader(new InputStreamReader(inputStream))
//一次读取一行
var lineTxt: String = bufferedReader.readLine()
var count = 1
while (lineTxt != null) {
println(count + "\t" + lineTxt)
count += 1
lineTxt = bufferedReader.readLine()
//让程序暂停睡眠一下
Thread.sleep(1)
}
} catch {
case e: Exception => e.printStackTrace()
} finally {
if (bufferedReader != null) {
bufferedReader.close()
}
if (inputStream != null) {
HDFSUtil.close
}
}
}
}