Hadoop整合了众多的文件系统,在其中有一个综合性的文件系统抽象,它提供了文件系统实现的各类接口,HDFS只是这个抽象文件系统的一个实例。提供了一个高层的文件系统抽象类org.apache.hadoop.FileSystem。下面我们主要围绕这个抽象类讲解一下常见的文件操作。
Hadoop中关于文件操作基本上全部是在“org.apache.hadoop.fs”包中,这些API能够支持的操作包含打开文件,读写文件,上传下载,删除文件等等。
Hadoop中最终面向用户提供的接口类是FileSystem,该类是一个抽象类,只能通过get方法来获得其实例,下面我们首先看看怎么获得这个类的实例。
一、实例FileSystem
/**
* get Configuration
* @return
*/
public static Configuration getCOnfig(){
//get Configuration
Configuration config=new Configuration();
config.set("fs.defaultFS", "hdfs://hadoop-senior.shinelon.com:8020");
return config;
}
/**
* get hdfs[ FileSystem
* @return
* @throws IOException
*/
public static FileSystem getFileSystem() throws IOException{
Configuration config=getCOnfig();
FileSystem fileSystem=FileSystem.get(config);
return fileSystem;
}
上面代码封装了两个方法,首先获得Configuration,然后实例化FileSystem。
二、读取HDFS文件内容:
/**
* read file content
* @param fileSystem
* @param fileName
* @throws IOException
*/
public static void readFile(FileSystem fileSystem,String fileName) throws IOException{
//real path
Path realPath=new Path(fileName);
//open file
FSDataInputStream input=fileSystem.open(realPath);
try{
//read io
IOUtils.copyBytes(input, System.out, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
//close io
IOUtils.closeStream(input);
}
}
该方法的参数fileSystem为hdfs文件系统上的文件路径
三、写文件
/**
* write file to hdfs system
* @throws IOException
*/
public static void writeFile() throws IOException{
FileSystem fileSystem=getFileSystem();
//upload path is user/shinelon/putFileName
String putFileName="upload.input";
Path putPath=new Path(putFileName);
//get output stream
FSDataOutputStream output= fileSystem.create(putPath);
//get input stream
FileInputStream inputStream=new FileInputStream(new File("/opt/modules/hadoop-2.5.0/upload.input"));
try{
IOUtils.copyBytes(inputStream, output, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
IOUtils.closeStream(inputStream);
IOUtils.closeStream(output);
}
}
四、获取hdfs文件列表
/**
* get hdfs system path list
*/
public static void getFileList(){
try{
FileSystem fs=getFileSystem();
Path realPath=new Path("/");
FileStatus file[]=fs.listStatus(realPath);
for(FileStatus f:file){
System.out.println(f.getPath());
}
}catch (Exception e) {
e.printStackTrace();
}
}
五、创建文件并且写入内容
/**
* make dir and write it
* @throws IOException
*/
public static void makeDir() throws IOException{
FileSystem fs=getFileSystem();
Path mkdirPath=new Path("user/mkdir");
//make dir if it exist overwrite it
FSDataOutputStream create=fs.create(mkdirPath, true);
create.writeBytes("hello hdfs");
}
六、下载文件到本地
/**
* download hdfs's file to local file
* @throws IOException
*/
public static void downloadFile() throws IOException{
FileSystem fs=getFileSystem();
//hdfs file
Path src=new Path("user/mkdir");
//local file
Path local=new Path("/opt/modules/downloadFile");
fs.copyToLocalFile(src, local);
}
以上就是Hadoop Java API对hdfs文件系统的常用操作,下面是完整的代码:
package cn.just.ahdoop.hdfs;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
/**
* hdfs operation
* @author shinelon
*
*/
public class HdfsOperation {
/**
* get Configuration
* @return
*/
public static Configuration getCOnfig(){
//get Configuration
Configuration config=new Configuration();
config.set("fs.defaultFS", "hdfs://hadoop-senior.shinelon.com:8020");
return config;
}
/**
* get hdfs[ FileSystem
* @return
* @throws IOException
*/
public static FileSystem getFileSystem() throws IOException{
Configuration config=getCOnfig();
FileSystem fileSystem=FileSystem.get(config);
return fileSystem;
}
/**
* read file content
* @param fileSystem
* @param fileName
* @throws IOException
*/
public static void readFile(FileSystem fileSystem,String fileName) throws IOException{
//real path
Path realPath=new Path(fileName);
//open file
FSDataInputStream input=fileSystem.open(realPath);
try{
//read io
IOUtils.copyBytes(input, System.out, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
//close io
IOUtils.closeStream(input);
}
}
/**
* write file to hdfs system
* @throws IOException
*/
public static void writeFile() throws IOException{
FileSystem fileSystem=getFileSystem();
//upload path is user/shinelon/putFileName
String putFileName="upload.input";
Path putPath=new Path(putFileName);
//get output stream
FSDataOutputStream output= fileSystem.create(putPath);
//get input stream
FileInputStream inputStream=new FileInputStream(new File("/opt/modules/hadoop-2.5.0/upload.input"));
try{
IOUtils.copyBytes(inputStream, output, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
IOUtils.closeStream(inputStream);
IOUtils.closeStream(output);
}
}
/**
* get hdfs system path list
*/
public static void getFileList(){
try{
FileSystem fs=getFileSystem();
Path realPath=new Path("/");
FileStatus file[]=fs.listStatus(realPath);
for(FileStatus f:file){
System.out.println(f.getPath());
}
}catch (Exception e) {
e.printStackTrace();
}
}
/**
* make dir and write it
* @throws IOException
*/
public static void makeDir() throws IOException{
FileSystem fs=getFileSystem();
Path mkdirPath=new Path("user/mkdir");
//make dir if it exist overwrite it
FSDataOutputStream create=fs.create(mkdirPath, true);
create.writeBytes("hello hdfs");
}
/**
* download hdfs's file to local file
* @throws IOException
*/
public static void downloadFile() throws IOException{
FileSystem fs=getFileSystem();
//hdfs file
Path src=new Path("user/mkdir");
//local file
Path local=new Path("/opt/modules/downloadFile");
fs.copyToLocalFile(src, local);
}
public static void main(String[] args) throws IOException {
FileSystem fileSystem=getFileSystem();
System.out.println(fileSystem.toString());
String fileName="/user/shinelon/mapreduce/wordcount/input/wc.input";
readFile(fileSystem, fileName);
// writeFile();
getFileList();
// makeDir();
downloadFile();
}
}