第一步:下载java所依赖的java包,这里使用maven下载,使用maven下载hadoop依赖包可能出现依赖冲突,即有2个相同maven依赖包,推荐使用如下方法下载
在pom.xml里面添加如下
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<artifactId>servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<artifactId>jasper-compiler</artifactId>
<groupId>tomcat</groupId>
</exclusion>
<exclusion>
<artifactId>jasper-runtime</artifactId>
<groupId>tomcat</groupId>
</exclusion>
<exclusion>
<artifactId>jsp-api</artifactId>
<groupId>javax.servlet.jsp</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<artifactId>servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
</exclusions>
</dependency>
如果没有maven没有依赖,可以直接在pom.xml添加如下
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
第2步java从HDFS进行应用开发
public class HdfsDemo {
/**
* @param krb5File
* 对应conf的krb5.conf配置文件
* @param user
* 验证用户名
* @param keytabPath
* 对应asmp.keytab配置文件
* @return Configuratio配置对象
* @throws IOException
*/
public static Configuration getConfig(String krb5File, String user,
String keytabPath) throws IOException {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl",
org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
System.setProperty("java.security.krb5.conf", krb5File);
// if (System.getProperty("os.name").toLowerCase().startsWith("win")) {
// System.setProperty("java.security.krb5.conf", krb5File);
// } else {
// // linux系统也可不设,其会自动去寻找 /etc/krb5.conf
// System.setProperty("java.security.krb5.conf", krb5File);
// }
conf.set("hadoop.security.authentication", "kerberos");
/* conf.set("fs.defaultFS", "hdfs://svlhdpt01-pip.csvw.com:8020"); */
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab(user, keytabPath);
return conf;
}
/**
* 获取Hdfs 指定目录下所有文件
*
* @param URI
* hdfs远端连接url
* @param remotePath
* hdfs远端目录路径
* @param conf
* @throws Exception
*/
public static void getHdfsFileList(String URI, String remotePath,
Configuration conf) throws Exception {
FileSystem fs = FileSystem.get(new URI(URI), conf);
RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path(
remotePath), true);
while (iter.hasNext()) {
LocatedFileStatus status = iter.next();
System.out.println(status.getPath().toUri().getPath());
}
fs.close();
}
/**
* 下载 hdfs上的文件
*
* @param conf
* Configuration对象
* @param uri
* HDFS地址
* @param remote
* 需要下载的文件的HDFS地址
* @param local
* 下载到本地目录
* @throws IOException
* 异常
*/
public static void download(Configuration conf, String uri, String remote,
String local) throws IOException {
Path path = new Path(remote);
FileSystem fs = FileSystem.get(URI.create(uri), conf);
fs.copyToLocalFile(path, new Path(local));
System.out.println("download: from" + remote + " to " + local);
fs.close();
}
public static void main(String[] args) throws Exception {
String krb5File = "/usr/dac/conf/krb5.conf";
String user = "asmp@CSVW.COM";
String keytabPath = "/usr/dac/conf/asmp.keytab";
Configuration conf = getConfig(krb5File, user, keytabPath);
String URI = "hdfs://svlhdpt01-pip.csvw.com:8020";
String remotePath = "/user/asmp/shell/pro/aftersales/dailyreport/2016-12/2016-12-31.csv";
getHdfsFileList(URI, remotePath, conf);
download(conf, URI, remotePath, "/usr/dac/download");
}
}