Java 可以通过API访问HDFS, 主要用到几个类
下面是代码
package com.shidai.hadoop.hdfs01;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.json.JSONArray;
import org.slf4j.*;
import com.shidai.hadoop.utils.Constant;
import com.shidai.hadoop.utils.DateUtil;
public class HDFSTest {
private static String url = Constant.url;
private static Configuration conf = new Configuration();
public static void getAllDataNode() {
try {
FileSystem fs = FileSystem.get(conf);
DistributedFileSystem distributedfs = (DistributedFileSystem) fs;
DatanodeInfo[] datanodeInfos = distributedfs.getDataNodeStats();
for (int i = 0; i < datanodeInfos.length; i++) {
System.out.println("第" + i + "个datanode:" + datanodeInfos[i].getHostName());
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建文件,并写入内容
*
* @param dst
* @param contents
* @throws IOException
*/
public static void createFile(String dst, byte[] contents) throws IOException {
FileSystem fs = FileSystem.get(URI.create(url), conf);
Path path = new Path(dst);
FSDataOutputStream out = fs.create(path);
out.write(contents);
out.close();
fs.close();
System.out.println("创建文件成功");
}
/**
* 读取文件
*
* @param dst
* @throws JSONException
* @throws ParseException
*/
public static void readFile(String dst) throws JSONException, ParseException {
FileSystem fs;
FSDataInputStream in;
try {
fs = FileSystem.get(URI.create(url), conf);
in = fs.open(new Path(dst));
byte[] ioBuffer = new byte[1024];
StringBuffer sf = new StringBuffer();
int len = -1;
while ((len = in.read(ioBuffer)) != -1) {
// System.out.write(ioBuffer,0,len);;
String string = new String(ioBuffer, 0, len);
sf.append(string);
len = in.read(ioBuffer);
}
in.close();
fs.close();
System.out.println(sf.toString());
JSONObject json = new JSONObject(sf.toString());
Long time = json.getLong("last_time");
String sd = DateUtil.getDate(time * 1000);
System.out.println("上传时间:" + sd);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 遍历文件
*
* @param dst
*/
public static void listFiles(String dst) {
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create(url), conf);
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path(dst), false);
while (iterator.hasNext()) {
LocatedFileStatus locatedFileStatus = iterator.next();
if (locatedFileStatus.isFile()) {
String path = locatedFileStatus.getPath().toString();
System.out.println(path);
if (!path.endsWith("tmp")) {
readFile(path);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 上传文件
*
* @param src
* @param dst
*/
public static void upload(String src, String dst) {
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create(url), conf);
Path srcPath = new Path(src);
Path dstPath = new Path(dst);
fs.copyFromLocalFile(false, srcPath, dstPath);
// 打印文件路径
System.out.println("list files");
FileStatus[] fileStatus = fs.listStatus(dstPath);
for (FileStatus fstatus : fileStatus) {
System.out.println(fstatus.getPath());
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 删除文件
*
* @param args
* @throws JSONException
* @throws ParseException
*/
public static void delete(String dst) {
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create(url), conf);
Boolean flag = fs.delete(new Path(dst), false);
if (flag) {
System.out.println("删除成功");
} else {
System.out.println("删除失败");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws JSONException, ParseException {
System.setProperty("hadoop.home.dir", "C:/Users/root/.m2/repository/org/apache/hadoop/hadoop-common/2.5.2");
byte[] contents = "明月几时有...\n".getBytes();
/*
* try{ // createFile("/user/hadoop/test/hdfs01.txt", contents);
* }catch(IOException e){ e.printStackTrace(); }
*/
// getAllDataNode();
// upload("F:/yun/svn/1.txt", "/user/root/");
// 读文件
// readFile("/flume/data/FlumeData.1469543705102");
// 遍历文件
// listFiles("/flume/");
// 删除文件
delete("/user/root/test");
}
}
用的是Hadoop.2.5.2的版本,Maven构建工程
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.shidai.hadoop</groupId>
<artifactId>hdfs01</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>hdfs01</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.5.2</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.5.2</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20090211</version>
</dependency>
</dependencies>
</project>