用java的API操作HDFS的文件系统。那篇文章单独介绍了每个api的使用,为了以后方便使用,写成了一个类,分享给大家。
文件操作类:比较简单,大家也可以自己完善。
package storm.hadoop;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;
public class HDFS_File {
Configuration config = null;
FileSystem fs = null;
DistributedFileSystem hdfs = null;
// constructor
HDFS_File(String uri) throws IOException {
config = new Configuration();
fs = FileSystem.get(URI.create(uri), config);
hdfs = (DistributedFileSystem) fs;
}
// read the file from HDFS
public void ReadFile(String FileName) {
try {
FSDataInputStream dis = hdfs.open(new Path(FileName));
IOUtils.copyBytes(dis, System.out, 4096, false);
dis.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// write the file from HDFS
public void WriteFile(String FileName, String content) {
try {
FSDataOutputStream os = hdfs.create(new Path(FileName));
os.write(content.getBytes("UTF-8"));
os.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// copy the file from HDFS to local
public void GetFile(String srcFile, String dstFile) {
try {
Path srcPath = new Path(srcFile);
Path dstPath = new Path(dstFile);
hdfs.copyToLocalFile(true, srcPath, dstPath);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// copy the local file to HDFS
public void PutFile(String srcFile, String dstFile) {
try {
Path srcPath = new Path(srcFile);
Path dstPath = new Path(dstFile);
hdfs.copyFromLocalFile(srcPath, dstPath);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// create the new file
public FSDataOutputStream CreateFile(String FileName) {
try {
Path path = new Path(FileName);
FSDataOutputStream outputStream = hdfs.create(path);
return outputStream;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
// rename the file name
public boolean ReNameFile(String srcName, String dstName) {
try {
Path fromPath = new Path(srcName);
Path toPath = new Path(dstName);
boolean isRenamed = hdfs.rename(fromPath, toPath);
return isRenamed;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return false;
}
// delete the file
// tyep = true, delete the directory
// type = false, delece the file
public boolean DelFile(String FileName, boolean type) {
try {
Path path = new Path(FileName);
boolean isDeleted = hdfs.delete(path, type);
return isDeleted;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return false;
}
// Get HDFS file last modification time
public long GetFileModTime(String FileName) {
try {
Path path = new Path(FileName);
FileStatus fileStatus = hdfs.getFileStatus(path);
long modificationTime = fileStatus.getModificationTime();
return modificationTime;
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
// checke if a file exists in HDFS
public boolean CheckFileExist(String FileName) {
try {
Path path = new Path(FileName);
boolean isExists = hdfs.exists(path);
return isExists;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
// Get the locations of a file in the HDFS cluster
public List<String[]> GetFileBolckHost(String FileName) {
try {
List<String[]> list = new ArrayList<String[]>();
Path path = new Path(FileName);
FileStatus fileStatus = hdfs.getFileStatus(path);
BlockLocation[] blkLocations = hdfs.getFileBlockLocations(
fileStatus, 0, fileStatus.getLen());
int blkCount = blkLocations.length;
for (int i = 0; i < blkCount; i++) {
String[] hosts = blkLocations[i].getHosts();
list.add(hosts);
}
return list;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// Get a list of all the nodes host names in the HDFS cluster
public String[] GetAllNodeName() {
try {
DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
String[] names = new String[dataNodeStats.length];
for (int i = 0; i < dataNodeStats.length; i++) {
names[i] = dataNodeStats[i].getHostName();
System.out.println(names[i]);
}
return names;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
自己写的测试程序:
package storm.hadoop;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;
public class File_Operation {
public static void main(String args[]) throws IOException {
Configuration conf = new Configuration();
String uri = "hdfs://localhost:9000/user/root/input/file02";
conf.addResource(uri);
HDFS_File file = new HDFS_File(uri);
// print all the node name
String[] host_name = file.GetAllNodeName();
for (int i = 0; i < host_name.length; i++) {
System.out.println("the host name:" + host_name[i]);
}
String dstFile = "/user/root/my_test/a.txt";
// read the file
file.WriteFile(dstFile, "hello mgq");
file.ReadFile(dstFile);
// check the file exists
if (file.CheckFileExist(dstFile) == true) {
System.out.println(dstFile + "the file exists");
} else {
System.out.println(dstFile + "the file not exists");
}
// copy the file to HDFS
String srcFile = "/home/test.txt";
dstFile = "/user/root/my_test/test.txt";
String ToFile = "/home/test1.txt";
file.PutFile(srcFile, dstFile);
System.out.println("copy file ok!");
// check the file last modfiy time
long mod_time = file.GetFileModTime(dstFile);
Date d = new Date(mod_time);
System.out.println("the modefile time" + d);
// get the locations of a file in HDFS
List<String[]> list = file.GetFileBolckHost(dstFile);
for (int i = 0; i < list.size(); i++) {
for (int j = 0; j < list.get(i).length; j++) {
System.out.println("the bolck host name:" + list.get(i)[j]);
}
}
file.GetFile(dstFile, ToFile);
}
}
参考
http://blog.csdn.net/jackydai987/article/details/6227299#comments