刚学习完对hadoop的文件基本操作相关内容,特记录下来,方便以后复习查看。
Hadoop集群NameNode地址:192.168.2.100,端口:9000
引入的jar包:
可能会出现重复的jar文件,覆盖即可
附:代码(记得加入Junit相关Jar包)
package cn.guyouda.hadoop;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;
public class HDFSUtil {
public FileSystem fileSystem = null;
@Before
public void init() throws IOException, URISyntaxException, InterruptedException{
//配置链接信息
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.2.100:9000/");
/*
* 第一个参数是节点URI,第二个参数是配置信息,第三个参数是用户名字(需要配置一个有权限操作该文件夹或文件的用户名)
*/
fileSystem = FileSystem.get(new URI("hdfs://192.168.2.100:9000/"),conf,"hadoop");
}
/**
* 原始下载方法
* @throws IOException
*/
@Test
public void downloadFile() throws IOException {
Path path = new Path("hdfs://192.168.2.100:9000/jdk-7u65-linux-i586.tar.gz");
FSDataInputStream in = fileSystem.open(path);
FileOutputStream out = new FileOutputStream("D:\\jdk.tar.gz");
IOUtils.copy(in, out);
}
/**
* 利用封装好的方法下载文件
* @throws IOException
*/
public void downloadFile2() throws IOException{
Path dst = new Path("D:/readme.txt");
Path src = new Path("hdfs://192.168.2.100:9000/readme.txt");
fileSystem.copyToLocalFile(src, dst);
}
/**
* 自己手动上传文件
* @throws IOException
*/
@Test
public void uploadFile() throws IOException{
Path dest = new Path("hdfs://192.168.2.100:9000/readme.txt");
FSDataOutputStream out = fileSystem.create(dest);
InputStream in = new FileInputStream("D:/readme.txt");
IOUtils.copy(in, out);
}
/**
* 利用封装好的API上传
* @throws IOException
*/
@Test
public void uploadFile2() throws IOException{
Path src = new Path("D:/readme.txt");
Path dst = new Path("hdfs://192.168.2.100:9000/aa/bb/readme2.txt");
fileSystem.copyFromLocalFile(src, dst);
}
/**
* 添加文件夹
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void mkDir() throws IllegalArgumentException, IOException{
fileSystem.mkdirs(new Path("/aa/bb/cc"));
}
/**
* 删除文件夹
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void delDir() throws IllegalArgumentException, IOException{
/*
* param 1:需要删除的文件夹
* param2:是否需要递归删除子文件夹
*/
fileSystem.delete(new Path("/aa"), true);
}
/**
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void listDir() throws IllegalArgumentException, IOException{
// 递归列出该目录下的所有文件,没有文件夹
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/"), true);
while(files.hasNext()){
LocatedFileStatus file = files.next();
System.out.println(file.getPath());
System.out.println(file.getAccessTime());
System.out.println(file.getBlockSize());
System.out.println(file.getGroup());
System.out.println(file.getLen());
System.out.println(file.getModificationTime());
System.out.println(file.getOwner());
System.out.println(file.getBlockLocations());
}
System.out.println("===================================");
//列出当前文件夹下的文件和文件夹
FileStatus[] fileStatus = fileSystem.listStatus(new Path("/"));
for(FileStatus fStatus:fileStatus){
System.out.println(fStatus.getPath());
System.out.println(fStatus.getAccessTime());
System.out.println(fStatus.getBlockSize());
System.out.println(fStatus.getGroup());
System.out.println(fStatus.getLen());
System.out.println(fStatus.getModificationTime());
System.out.println(fStatus.getOwner());
}
}
}
注:记得开启HDFS,.../hadoop-2.4.1/sbin/start-dfs.sh, 还有防火墙规则(由于该集群通常是内网运行,可选择关闭防火墙)