JAR准备:
将hadoop-2.8.0中share目录下的jar包添加到工程中:
- common下的hadoop-common-2.8.0.jar
- common/lib下的所有jar
- hdfs下的hadoop-hdfs-2.8.0.jar
- hdfs/lib下的所有jar
示例:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.net.URI;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;
//客户端操作hdfs,是有一个用户身份的
//默认情况下,hdfs客户端api会从jvm中获取一个参数来作为自己的用户身份:-DHADOOP_USER_NAME=hadoop
public class HdfsClientDemo {
private FileSystem fs;
private Configuration conf;
@Before
public void init() throws Exception{
conf = new Configuration();
conf.set("dfs.replication", "2");
fs = FileSystem.get(new URI("hdfs://192.168.153.136:9000"),conf,"hadoop");
}
// 获取conf配置参数
@Test
public void testConf(){
Iterator<Entry<String, String>> iterator = conf.iterator();
while (iterator.hasNext()) {
Entry<String, String> ent = iterator.next();
System.out.println(ent.getKey() + ": " + ent.getValue());
}
}
// 上传文件到HDFS文件系统
@Test
public void testUpload() throws Exception {
// (1)该方式是通过IO流的方式上传文件到HDFS文件系统
//FileInputStream fileInputStream =new FileInputStream("e:/Spring MVC.docx");
//FSDataOutputStream fsDataOutputStream = fs.create(new Path("/Spring MVC222.docx"));
//IOUtils.copy(fileInputStream, fsDataOutputStream);
// (2)方式二
boolean windowsAbsolutePath = Path.isWindowsAbsolutePath("e:/Spring MVC.docx", true);
System.out.println(windowsAbsolutePath);
if (windowsAbsolutePath) {
fs.copyFromLocalFile(new Path("e:/Spring MVC.docx"), new Path("/Spring MVC.docx"));
fs.close();
}
}
// 从HDFS文件系统下载文件到本地
@Test
public void testDownload() throws Exception {
//(1)该方式不依赖于本地hadoop环境,直接通过IO流
//FSDataInputStream fsDataInputStream=fs.open(new Path("/spring/Spring MVC.docx"));
//FileOutputStream fileOutputStream=new FileOutputStream("e:/Spring MVC.docx");
//IOUtils.copy(fsDataInputStream, fileOutputStream);
// (2)fs.copyToLocalFile(new Path("/Spring MVC1111.docx"), new Path("e:/"));该方式依赖于本地hadoop环境,可以通过如下方式:参数1表示是否删除hdfs上的源文件,参数4表示是否使用java原生API
fs.copyToLocalFile(false,new Path("/Spring MVC1111.docx"), new Path("e:/"),true);
fs.close();
}
// 创建HDFS目录,根目录为/
@Test
public void makdirTest() throws Exception {
boolean mkdirs = fs.mkdirs(new Path("/spring"));
System.out.println(mkdirs);
}
@Test
public void createTest() throws Exception{
}
// 删除目录或文件
@Test
public void deleteTest() throws Exception{
// fs.exists():判断是否存在,可能不存在
boolean file_exists = fs.exists(new Path("/spring")); // true
// fs.isFile():判断是否文件
boolean file = fs.isFile(new Path("/Spring MVC.docx")); // true
// fs.isDirectory():判断一个对象是否是文件夹
boolean directory1 = fs.isDirectory(new Path("/spring")); // true
boolean directory2 = fs.isDirectory(new Path("/Spring MVC.docx")); // false
System.out.println(file_exists);
System.out.println(file);
System.out.println(directory1);
System.out.println(directory2);
if (file_exists) {
boolean delete = fs.delete(new Path("/spring"), true);//true, 递归删除
System.out.println(delete);
}
}
// 递归获得所有的文件
@Test
public void listTest() throws Exception{
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : listStatus) {
System.err.println(fileStatus.getPath()+"================="+fileStatus.toString());
}
//会递归找到所有的文件
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while(listFiles.hasNext()){
LocatedFileStatus fileStatus = listFiles.next();
System.out.println("blocksize:" + fileStatus.getBlockSize());
System.out.println("owner:" + fileStatus.getOwner());
System.out.println("Replication:" + fileStatus.getReplication());
System.out.println("Permission:" + fileStatus.getPermission());
System.out.println("Path:" + fileStatus.getPath());
System.out.println("FileName:" + fileStatus.getPath().getName());
System.out.println("File Len:" + fileStatus.getLen());
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
System.out.println("块起始偏移量:" + blockLocation.getOffset());
System.out.println("块长度:" + blockLocation.getLength());
String[] hosts = blockLocation.getHosts();
for (String datanode : hosts) {
// 这里需要注意,块副本打印的结果是3台机器,而我们hadoop配置的只有dfs.replication为2个副本
// 那么为什么会出现3个副本啦?
// 这里是因为client的Configuration和hadoop的hdfs-site.xml配置是独立且分开的,当前是client则配置依赖于Configuration,此处Configuration并没有指明dfs.replication的配置,则默认是3
// 如果要改变该配置,则需要在文件上传时指明conf.set("dfs.replication", "2");
System.out.println("块副本位置:" + datanode);
}
}
}
}
// 显示hdfs上文件的内容
@Test
public void testCat() throws Exception{
FSDataInputStream in = fs.open(new Path("/Spring MVC.docx"));
IOUtils.copy(in, System.out);
}
}