一、读取HDFS文件
public class ReadHdfsFileUtil {
//添加日志输出能力
Logger log = Logger.getLogger(ReadHdfsFileUtil.class);
//加载配置文件到内存对象
static Configuration conf = new Configuration();
//读取HDFS文件内容
public static void readHdfsFile(String hdfsPath) throws Exception{
//文件路径的空判断
if (hdfsPath == null || hdfsPath.trim().length() == 0) {
throw new Exception("所要读取的源文件" + hdfsPath + ",不存在,请检查!");
}
//获取 conf 对应的 hdfs 集群的对象引用
FileSystem fs = FileSystem.get(conf);
//将给定的 hdfsPath 构建成一个 hdfs 的路径对象 Path
Path path = new Path(hdfsPath);
//字节转字符
FSDataInputStream fsdis = fs.open(path);
InputStreamReader isr = new InputStreamReader(fsdis);
BufferedReader br = new BufferedReader(isr);
String temp = null;
while((temp = br.readLine()) != null){
System.out.println(temp);
}
br.close();
}
}
二、写入HDFS文件
1)、字节流
public static void writeToHdfsFile(String toHdfsPath,String localPath) throws Exception{
FileSystem fs = FileSystem.get(conf);
Path hdfsPath = new Path(toHdfsPath);
FSDataOutputStream fsdos = fs.create(hdfsPath);
//读取本地文件并写入HDFS文件
String content = readLocalFile(localPath);
fsdos.write(content.getBytes("utf-8"));
fsdos.close();
}
2)、字节转字符
这个时候要注意关闭流,即bw.close。在写文件的操作中如果不关闭是写不进去的,因为数据并没有落地到文件中,而是在内存中,程序执行完成后就被回收了。而且在读写文件操作的时候,不关闭流可能会占用过多的内存资源,导致内存溢出。
public static void writeToHdfsFile(String toHdfsPath,String localPath) throws Exception{
FileSystem fs = FileSystem.get(conf);
Path hdfsPath = new Path(toHdfsPath);
FSDataOutputStream fsdos = fs.create(hdfsPath);
OutputStreamWriter osw = new OutputStreamWriter(fsdos);
BufferedWriter bw = new BufferedWriter(osw);
List<String> list = readLocalFile(localPath);
for(String str : list){
bw.write(str);
bw.write('\n');
}
bw.close();
}