HDFS是存取数据的分布式文件系统,HDFS文件操作常有两种方式,一种是命令行方式,另一种是JavaAPI。
要在java工程中操作hdfs,需要引入一下jar包,我的maven工程中的pom.xml文件中增加如下几个依赖:
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.3.0</version>
<exclusions>
<exclusion>
<artifactId>jdk.tools</artifactId>
<groupId>jdk.tools</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
java代码
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
public class FileOperateTest {
public static DistributedFileSystem dfs=new DistributedFileSystem();;
public static String nameNodeUri="hdfs://hadoop:8020";
public static void main(String[] args) throws Exception {
FileOperateTest fot = new FileOperateTest();
fot.initFileSystem();
fot.testMkDir();
fot.testDeleteDir();
fot.testFileList();
fot.testUploadFullFile();
fot.testDownloadFile2();
fot.testUploadFile2();
fot.testDownloadFile();
fot.testDownloadFile2();
}
public void initFileSystem() throws Exception{
System.out.println("初始化hadoop客户端");
//设置hadoop的登录用户名
System.setProperty("HADOOP_USER_NAME", "hdfs");
//dfs=new DistributedFileSystem();
dfs.initialize(new URI(nameNodeUri), new Configuration());
System.out.println("客户端连接成功");
Path workingDirectory = dfs.getWorkingDirectory();
System.out.println("工作目录:"+workingDirectory);
}
/**
* 创建文件夹
* @throws Exception
*/
public void testMkDir() throws Exception{
boolean res = dfs.mkdirs(new Path("/tmp/omcs/bbb"));
System.out.println("目录创建结果:"+(res?"创建成功":"创建失败"));
}
/**
* 删除目录/文件
* @throws Exception
*/
//@Test
public void testDeleteDir() throws Exception{
dfs.delete(new Path("/tmp/omcs/bbb"), false);
}
/**
* 获取指定目录下所有文件(忽略目录)
* @throws Exception
* @throws IllegalArgumentException
* @throws FileNotFoundException
*/
public void testFileList() throws Exception{
RemoteIterator<LocatedFileStatus> listFiles = dfs.listFiles(new Path("/"), true);
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = (LocatedFileStatus) listFiles.next();
//权限
FsPermission permission = fileStatus.getPermission();
//拥有者
String owner = fileStatus.getOwner();
//组
String group = fileStatus.getGroup();
//文件大小byte
long len = fileStatus.getLen();
long modificationTime = fileStatus.getModificationTime();
Path path = fileStatus.getPath();
System.out.println("-------------------------------");
System.out.println("permission:"+permission);
System.out.println("owner:"+owner);
System.out.println("group:"+group);
System.out.println("len:"+len);
System.out.println("modificationTime:"+sdf.format(new Date(modificationTime)));
System.out.println("path:"+path);
}
}
/**
* 【完整】文件上传
* 注意:文件上传在Window开发环境下,使用apache-common提供的<code>org.apache.commons.io.IOUtils.copy</code>可能存在问题
*/
//@Test
public void testUploadFullFile() throws Exception{
FSDataOutputStream out = dfs.create(new Path("/tmp/omcs/uploadFile.txt"), true);
// InputStream in = FileOperate.class.getResourceAsStream("uploadFile.txt");
FileInputStream in = new FileInputStream(FileOperate.class.getResource("uploadFile.txt").getFile());
org.apache.commons.io.IOUtils.copy(in, out);
System.out.println("上传完毕");
}
/**
* 【完整】文件上传
*/
//@Test
public void testUploadFullFile2() throws Exception{
dfs.copyFromLocalFile(new Path(FileOperate.class.getResource("uploadFile.txt").getFile()), new Path("/tmp/omcs/uploadFullFile.txt"));
}
/**
* 【分段|部分】文件上传
* 注意:文件上传在Window开发环境下,使用apache-common提供的<code>org.apache.commons.io.IOUtils.copy</code>可能存在问题
*/
//@Test
public void testUploadFile2() throws Exception{
FSDataOutputStream out = dfs.create(new Path("/tmp/omcs/uploadFile2.txt"), true);
FileInputStream in = new FileInputStream(FileOperate.class.getResource("uploadFile.txt").getFile());
org.apache.commons.io.IOUtils.copyLarge(in, out, 6, 12);
System.out.println("上传完毕");
}
/**
* 【完整】下载文件
* 注意:windows开发平台下,使用如下API
*/
public void testDownloadFile() throws Exception{
//使用Java API进行I/O,设置useRawLocalFileSystem=true
dfs.copyToLocalFile(false,new Path("/tmp/omcs/uploadFullFile.txt"), new Path("E:/Workspaces/MyEclipse2014_BigData/hadoop-demo/src/com/xbz/bigdata/hadoop/demo"),true);
System.out.println("下载完成");
}
/**
* 【部分】下载文件
*/
public void testDownloadFile2() throws Exception{
//使用Java API进行I/O,设置useRawLocalFileSystem=true
FSDataInputStream src = dfs.open(new Path("/tmp/omcs/uploadFullFile.txt"));
FileOutputStream des = new FileOutputStream(new File("E:/Workspaces/MyEclipse2014_BigData/hadoop-demo/src/com/xbz/bigdata/hadoop/demo","download_uploadFullFile.txt"));
src.seek(6);
org.apache.commons.io.IOUtils.copy(src, des);
System.out.println("下载完成");
}
}