从hdfs下载文件或文件夹到本地目录
当集群是高可用时,配置hdfs访问地址:
当集群是ha高可用时,namenode为多节点,当namenode为standby时是不能被访问hdfs的,需要灵活的指定namenode,不能使用普通的hdfs地址,
高可用需要用nameservice去访问,每个hadoop的hdfs集群的配置是不一样的,因为nameservice的配置不同,需要把集群的hdfs-site.xml文件放到maven的resource目录中,configuration会自动加载resource中配置文件,hdfs://nameserivice/user/expr,直接用nameservice替换集群地址
URI:hdfs://nodexx:8020/user/hive/warehouse 修改为hdfs://nameservice1/user/hive/warehouse 即可。
参考文章:http://blog.csdn.net/kwu_ganymede/article/details/49097591
http://gaojiehigh.iteye.com/blog/1575006
代码示例,从hdfs下载文件和文件夹
package com.data;
import java.io.File;
import java.io.FileOutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.IOUtils;
public class HDFS_Download {
public static FileSystem hdfs;
public static void downloadFile(String srcPath, String dstPath) throws Exception {
FSDataInputStream in = null;
FileOutputStream out = null;
try {
in = hdfs.open(new Path(srcPath));
out = new FileOutputStream(dstPath);
IOUtils.copyBytes(in, out, 4096, false);
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
public static void downloadFolder(String srcPath, String dstPath) throws Exception {
File dstDir = new File(dstPath);
if (!dstDir.exists()) {
dstDir.mkdirs();
}
FileStatus[] srcFileStatus = hdfs.listStatus(new Path(srcPath));
Path[] srcFilePath = FileUtil.stat2Paths(srcFileStatus);
for (int i = 0; i < srcFilePath.length; i++) {
String srcFile = srcFilePath[i].toString();
int fileNamePosi = srcFile.lastIndexOf('/');
String fileName = srcFile.substring(fileNamePosi + 1);
download(srcPath + '/' + fileName, dstPath + '/' + fileName);
}
}
public static void download(String srcPath, String dstPath) throws Exception {
if (hdfs.isFile(new Path(srcPath))) {
downloadFile(srcPath, dstPath);
} else {
downloadFolder(srcPath, dstPath);
}
}
public static void main(String[] args) throws Exception{
String hdfsStr = "hdfs://xl.namenode1.coocaa.com:8020/apps/external/hive/jscn_sdata/jscn_base_all_mac";
String localStr = "C:\\Users\\awm-z\\Downloads\\hdfs_jscn";
Configuration conf = new Configuration();
hdfs = FileSystem.get(URI.create(hdfsStr),conf);
download(hdfsStr,localStr);
// if (args.length != 2) {
// System.out.println("Invalid input parameters");
// } else {
// try {
// Configuration conf = new Configuration();
// hdfs = FileSystem.get(URI.create(args[0]), conf);
// download(args[0], args[1]);
// } catch (Exception e) {
// System.out.println("Error occured when copy files");
// }
// }
}
}