从hadoop集群拉取hdfs文件是一个常见的需求,基于org.apache.hadoop即可做到。
但是hadoop包有个明显的缺点是引用太多,经常需要排包,包括但不限于httpclient,servlet,slf4j,tomcat等等
@Service
public class HdfsClient{
private static final Logger logger = LoggerFactory.getLogger(HdfsClient.class);
private FileSystem fileSystem;
private Configuration conf;
public synchronized void init() throws Exception {
String proxy = "x.x.x.x:x";
String username = "xxx";
boolean useProxy = false;
conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://argo");
conf.set("dfs.web.ugi", "hdfs,hadoop");
conf.set("dfs.nameservices", "argo");
conf.set("dfs.ha.namenodes.argo", "nn1,nn2");
conf.set("dfs.namenode.rpc-address.argo.nn1", "xxx:x");
conf.