使用hadoop命令或者页面查看现有HDFS中目录结构:
/
/usr
/local
/testFile.txt
/testFileCopy.txt
1. 前期准备
创建一个mean项目,引入hadoop包:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.5.1</version>
</dependency>
定义一个类:
public class HDFSService {
//Hadoop系统配置
private Configuration conf;
private FileSystem fs;
//HDFS访问地址
private static final String HDFS = "hdfs://192.168.75.105:19000/";
public HDFSService() {
conf = new Configuration();
}
private FileSystem getFileSystem(){
if (fs == null){
try {
fs = FileSystem.get(URI.create(HDFS), conf);
} catch (IOException e) {
e.printStackTrace();
}
}
return fs;
}
private void closeFileSystem(){
try {
fs.close();
fs = null;
} catch (IOException e) {
e.printStackTrace();
}
}
...
}
权限相关:
如何什么都不做,以下调用hdfs时默认使用的是当前登录机器的用户名,所以在调用类似mkdir接口时会报权限错误:
org.apache.hadoop.security.AccessControlException: Permission denied: user=ÕÅÀÚ, access=WRITE, inode="/usr":root:supergroup:drwxr-xr-x
解决方法有两个,一是把此目录权限放开,二是客户端使用指定账号操作。
这里选择后者,需要在当前机器添加一个环境变量:
HADOOP_USER_NAME=root
这样就会以root的身份操作了。
2. 查看文件列表-ls
对应ls命令
public String ls(String folder){
String result = "";
try{
Path path = new Path(folder);
FileStatus[] list = getFileSystem().listStatus(path);
for (FileStatus f : list) {
String info = String.format("%s, size: %d, path: %s\n",f.isDir()?"目录":"文件", f.getLen(), f.getPath());
result += info;
}
}catch(Exception e){
e.printStackTrace();
}
return result;
}
测试:
static public void main(String[] args){
HDFSService service = new HDFSService();
String result = "";
result = service.ls("/usr/local");
System.out.println(result);
}
输出:
文件, size: 11, path: hdfs://192.168.75.105:19000/usr/local/testFile.txt
文件, size: 11, path: hdfs://192.168.75.105:19000/usr/local/testFileCopy.txt
3. 创建目录-mkdir
public boolean mkdir(String folder){
boolean result = false;
Path path = new Path(folder);
try {
if (!getFileSystem().exists(path)) {
result = getFileSystem().mkdirs(path);
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
测试:
static public void main(String[] args){
HDFSService service = new HDFSService();
String result = "";
result = service.mkdir("/usr/dirByCode")?"创建目录成功":"创建目录失败";
System.out.println(result);
result = service.ls("/usr");
System.out.println(result);
}
输出:
创建目录成功
目录, size: 0, path: hdfs://192.168.75.105:19000/usr/dirByCode
目录, size: 0, path: hdfs://192.168.75.105:19000/usr/local
4. 创建并写文件-create
public boolean creatFile(String fileName, String content){
boolean result = false;
FSDataOutputStream os;
try {
os = getFileSystem().create(new Path(fileName));
os.write(content.getBytes());
os.flush();
os.close();
result = true;
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
测试:
static public void main(String[] args){
HDFSService service = new HDFSService();
String result = "";
result = service.creatFile("/usr/dirByCode/newFile.txt", "Hello World")?"创建文件成功":"创建文件失败";
System.out.println(result);
result = service.ls("/usr/dirByCode");
System.out.println(result);
}
输出:
创建文件成功
文件, size: 11, path: hdfs://192.168.75.105:19000/usr/dirByCode/newFile.txt
5. 读文件-open
public String readFile(String fileName){
String result = "";
try{
InputStream is = getFileSystem().open(new Path(fileName));
byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = is.read(b)) > 0) {
result += new String(b);
}
is.close();
}catch(Exception e){
e.printStackTrace();
}
return result;
}
测试:
static public void main(String[] args){
HDFSService service = new HDFSService();
String result = "";
result = service.readFile("/usr/dirByCode/newFile.txt");
System.out.println(result);
}
输出:
Hello World
6. 删除文件-rmr
public boolean rmr(String folder){
boolean result = false;
try{
Path path = new Path(folder);
result = getFileSystem().deleteOnExit(path);
}catch(Exception e){
e.printStackTrace();
}
return result;
}
测试:
result = service.rmr("/usr/dirByCode/newFile.txt")?"删除文件成功":"删除文件失败";
System.out.println(result);
输出:
删除文件成功
需要注意的是,如果在删除完立马去查的话还是能查出来的,它只有在输入流关闭后才生效,因此需要调用fs.close()。
7. 从HDFS复制到本地--copyToLocalFile
public boolean copyToLocal(String hdfsPath, String local){
boolean result = false;
try{
Path path = new Path(hdfsPath);
getFileSystem().copyToLocalFile(path, new Path(local));
result = true;
}catch(Exception e){
e.printStackTrace();
}
return result;
}
测试:
result = service.copyToLocal("/usr/dirByCode/newFile.txt","d:/fileFromHDFS.txt")?"拷贝文件成功":"拷贝文件失败";
System.out.println(result);
输出:
拷贝文件成功
在电脑的d盘生成了两个文件:
打开内容为:
Hello World
8. 从HDFS复制到本地--输入输出流
public boolean copyToLocal2(String hdfsPath, String local){
boolean result = false;
try{
//打开一个输入流,从/tools/a.tar.gz读入数据
InputStream input = getFileSystem().open(new Path(hdfsPath));
//创建一个输出流,d:\\temp
OutputStream output = new FileOutputStream(local);
/*
//创建一个缓冲区
byte[] buffer = new byte[1024];
//数据长度
int len = 0;
while( (len=input.read(buffer)) > 0 ){
//读入了数据,写到输出流中
output.write(buffer, 0, len);
}
output.flush();
//关闭流
input.close();
output.close();
*/
//使用工具类
IOUtils.copyBytes(input, output, 1024);
result = true;
}catch(Exception e){
e.printStackTrace();
}
closeFileSystem();
return result;
}
9. 从本地复制到HDFS--copyFromLocalFile
public boolean copyFromLocal(String local, String hdfsPath){
boolean result = false;
try{
getFileSystem().copyFromLocalFile(new Path(local), new Path(hdfsPath));
result = true;
}catch(Exception e){
e.printStackTrace();
}
return result;
}
测试:
result = service.copyFromLocal("d:/localfile.txt","/usr/dirByCode/fileFromWin7.txt")?"拷贝文件成功":"拷贝文件失败";
System.out.println(result);
result = service.ls("/usr/dirByCode");
System.out.println(result);
result = service.readFile("/usr/dirByCode/fileFromWin7.txt");
System.out.println(result);
输出的文件内容为:
hello ��Һ�!
123abc
如果再次将此文件通过copyToLocal接口复制回win7,内容如下:
hello 大家好!
123abc
可见文件中的内容并没有问题,上面是读取文件时的显示问题,这里略掉。
10. 从本地复制到HDFS--输入输出流
public boolean copyFromLocal2(String local, String hdfsPath){
boolean result = false;
try{
//打开一个输入流,字节流
InputStream in = new FileInputStream(local);
//创建一个输出流 ------> HDFS
OutputStream out = getFileSystem().create(new Path(hdfsPath));
/*
//创建一个缓冲区
byte[] buffer = new byte[1024];
//数据长度
int len = 0;
while( (len=in.read(buffer)) > 0 ){
//读入了数据,写到输出流中
out.write(buffer, 0, len);
}
out.flush();
//关闭流
in.close();
out.close();
*/
//使用工具类,此行等同上面注释的部分
IOUtils.copyBytes(in, out, 1024);
result = true;
}catch(Exception e){
e.printStackTrace();
}
closeFileSystem();
return result;
}