hadoop程序中对hdfs的操作必不可少,粘贴一个工具类,分享给大家。
import java.io.File;
import java.io.FileInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* @author hadoop
*
*/
public class HdfsUtil {
/**
* 如果localFilePath是一个文件夹,则目标路径hdfsFilePath也会看成一个文件夹,然后把localFilePath下的所有文件,移动到
* hdfsFilePath文件夹下。如果localFilePath是一个文件,则目标路径hdfsFilePath也会被当成一个文件路径,就算在路径后面加
* 上了显式的“/”。
* @param conf
* @param localFilePath
* @param hdfsFilePath
* @throws Exception
*/
public static void upload(Configuration conf, String localFilePath, String hdfsFilePath) throws Exception
{
FileSystem fs = FileSystem.get(conf);
Path localPath = new Path(localFilePath);
Path dstPath = new Path(hdfsFilePath);
fs.copyFromLocalFile(localPath, dstPath);
}
/**
* 如果hdfsFilePath上一个文件夹,则会删除整个文件夹
* @param conf
* @param hdfsFilePath
* @throws Exception
*/
public static void delete(Configuration conf, String hdfsFilePath) throws Exception
{
FileSystem fs = FileSystem.get(conf);
Path dstPath = new Path(hdfsFilePath);
fs.delete(dstPath, true);
}
/**
* 从hdfs上下载文件,到本地。
* 如果hdfsFilePath是一个文件夹,则目标路径localFilePath也会看成一个文件夹,然后把hdfsFilePath下的所有文件,移动到
* localFilePath文件夹下。如果hdfsFilePath是一个文件,则目标路径localFilePath也会被当成一个文件路径,就算在路径后面加
* 上了显式的“/”。
* @param conf
* @param hdfsFilePath
* @param localFilePath
* @throws Exception
*/
public static void download(Configuration conf, String hdfsFilePath, String localFilePath) throws Exception
{
FileSystem fs = FileSystem.get(conf);
Path localPath = new Path(localFilePath);
Path hdfsPath = new Path(hdfsFilePath);
fs.copyToLocalFile(hdfsPath, localPath);
}
/**
* 把localFilePath文件内容追加到hdfsFilePath文件上
* @param conf
* @param hdfsFilePath
* @param localFilePath
* @throws Exception
*/
public static void append(Configuration conf, String hdfsFilePath, String localFilePath, boolean fromNewLine) throws Exception
{
FileSystem fs = FileSystem.get(conf);
File localFile = new File(localFilePath);
if(localFile.isDirectory())
{
throw new Exception("不支持文件夹的append操作");
}
Path hdfsPath = new Path(hdfsFilePath);
FSDataOutputStream outputStream = fs.append(hdfsPath);
if(fromNewLine)
{
String newLine = "\n";
outputStream.write(newLine.getBytes());
}
int bufferSize = 4096;
byte[] buffer = new byte[bufferSize];
FileInputStream inputStream = new FileInputStream(new File(localFilePath));
int size = -1;
while( (size = inputStream.read(buffer)) > 0)
{
outputStream.write(buffer, 0, size);
}
inputStream.close();
outputStream.close();
}
/**
* @param hdfsFolder hdfs的父文件夹
* @param filePreffix 文件的前缀
* @throws Exception
*/
public static boolean isFileExist(Configuration conf, String hdfsFolder, String filePreffix) throws Exception
{
boolean exist = false;
Path parentPath = new Path(hdfsFolder);
FileSystem fileSystem = FileSystem.get(conf);
FileStatus[] fileStatus = fileSystem.listStatus(parentPath);
for(int i = 0; i < fileStatus.length; i++)
{
String name = fileStatus[i].getPath().getName();
if(name.startsWith(filePreffix))
{
exist = true;
break;
}
}
return exist;
}
/**
* 在hdfs上创建一个空文件
* @param conf
* @param hdfsFileName
*/
public static void createEmptyFile(Configuration conf, String hdfsFileName) throws Exception
{
Path filePath = new Path(hdfsFileName);
FileSystem fileSystem = FileSystem.get(conf);
FSDataOutputStream stream = fileSystem.create(filePath);
stream.write("".getBytes());
stream.close();
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.default.name", Prop.FS_DEFAULT_NAME);
conf.set("dfs.support.append", Prop.DFS_SUPPORT_APPEND);
String localFilePath = "hadoop/test/file1";
String hdfsFilePath = "/user/test/file0";
HdfsUtil.upload(conf, localFilePath, hdfsFilePath);
//HdfsUtil.delete(conf, hdfsFilePath);
//HdfsUtil.download(conf, hdfsFilePath, localFilePath);
//HdfsUtil.append(conf, hdfsFilePath, localFilePath, true);
}
}