0 前言
搞了大约2天时间终于把Linux下面Java API的使用给弄清楚了。做个笔记方便以后参考。环境如下所示
Hadoop:2.5.1
Linux:Ubuntu kylin
eclipse:luna
1 步骤
首先是要去下载一个eclipse,这里使用的是Luna。名字比较好听,代表月亮消灭你们...
然后发现自带了maven,太棒了!Luna牛掰,毫无疑问创建maven工程,修改pom.xml文件为下面的内容
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>maven</groupId>
<artifactId>maven</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>maven</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-maven-plugins</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.5.1</version>
</dependency>
</dependencies>
</project>
然后等待eclipse maven自动下载依赖的包。等啊等就好了,下一步是配置jvm运行的参数,因为运行的时候需要本地的库所以必须配置下。我的Hadoop是放在/home/hadoop-master/hadoop-2.5.1下的。
-Djava.library.path=/home/hadoop-master/hadoop-2.5.1/lib/native
因为hadoop2.5.1自己已经编译好了本地库所以不用在编译一次了(这就是用新不用旧的原因,自己编译太费事儿了。。。。到此一切OK
2 测试代码
是驴子是马,拉出来溜溜。写个小程序跑跑。
package maven.maven;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DFSClient.*;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public class HadoopFSOperations {
private static Configuration conf = new Configuration();
private static final String HADOOP_URL="hdfs://192.168.190.129:9000";
private static FileSystem fs;
private static DistributedFileSystem hdfs;
static {
try {
FileSystem.setDefaultUri(conf, HADOOP_URL);
fs = FileSystem.get(conf);
hdfs = (DistributedFileSystem)fs;
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 列出所有DataNode的名字信息
*/
public void listDataNodeInfo() {
try {
DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
String[] names = new String[dataNodeStats.length];
System.out.println("List of all the datanode in the HDFS cluster:");
for (int i=0;i<names.length;i++) {
names[i] = dataNodeStats[i].getHostName();
System.out.println(names[i]);
}
System.out.println(hdfs.getUri().toString());
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 查看文件是否存在
*/
public void checkFileExist() {
try {
Path a= hdfs.getHomeDirectory();
System.out.println("main path:"+a.toString());
Path f = new Path("/user/xxx/input01/");
boolean exist = fs.exists(f);
System.out.println("Whether exist of this file:"+exist);
//删除文件
// if (exist) {
// boolean isDeleted = hdfs.delete(f, false);
// if(isDeleted) {
// System.out.println("Delete success");
// }
// }
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*创建文件到HDFS系统上
*/
public void createFile() {
try {
Path f = new Path("/user/xxx/input02/file01");
System.out.println("Create and Write :"+f.getName()+" to hdfs");
FSDataOutputStream os = fs.create(f, true);
Writer out = new OutputStreamWriter(os, "utf-8");//以UTF-8格式写入文件,不乱码
out.write("你好 good job");
out.close();
os.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 读取本地文件到HDFS系统<br>
* 请保证文件格式一直是UTF-8,从本地->HDFS
*/
public void copyFileToHDFS() {
try {
Path f = new Path("/user/xxx/input02/file01");
File file = new File("E:\\hadoopTest\\temporary.txt");
FileInputStream is = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(is, "utf-8");
BufferedReader br = new BufferedReader(isr);
FSDataOutputStream os = fs.create(f, true);
Writer out = new OutputStreamWriter(os, "utf-8");
String str = "";
while((str=br.readLine()) != null) {
out.write(str+"\n");
}
br.close();
isr.close();
is.close();
out.close();
os.close();
System.out.println("Write content of file "+file.getName()+" to hdfs file "+f.getName()+" success");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 取得文件块所在的位置..
*/
public void getLocation() {
try {
Path f = new Path("/user/xxx/input02/file01");
FileStatus fileStatus = fs.getFileStatus(f);
BlockLocation[] blkLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
for (BlockLocation currentLocation : blkLocations) {
String[] hosts = currentLocation.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
//取得最后修改时间
long modifyTime = fileStatus.getModificationTime();
Date d = new Date(modifyTime);
System.out.println(d);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 读取hdfs中的文件内容
*/
public void readFileFromHdfs() {
try {
Path f = new Path("/user/xxx/input02/file01");
FSDataInputStream dis = fs.open(f);
InputStreamReader isr = new InputStreamReader(dis, "utf-8");
BufferedReader br = new BufferedReader(isr);
String str = "";
while ((str = br.readLine()) !=null) {
System.out.println(str);
}
br.close();
isr.close();
dis.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* list all file/directory
* @param args
* @throws IOException
* @throws IllegalArgumentException
* @throws FileNotFoundException
*/
public void listFileStatus(String path) throws FileNotFoundException, IllegalArgumentException, IOException {
FileStatus fileStatus[]=fs.listStatus(new Path(path));
int listlength=fileStatus.length;
for (int i=0 ;i<listlength ;i++){
if (fileStatus[i].isDirectory() == false) {
System.out.println("filename:"
+ fileStatus[i].getPath().getName() + "\tsize:"
+ fileStatus[i].getLen());
} else {
String newpath = fileStatus[i].getPath().toString();
listFileStatus(newpath);
}
}
}
public static void main(String[] args) {
HadoopFSOperations a = new HadoopFSOperations();
a.listDataNodeInfo();
// a.checkFileExist();
// a.createFile();
// a.copyFileToHDFS();
// a.getLocation();
// a.readFileFromHdfs();
try {
a.listFileStatus(HADOOP_URL+"/user");
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
因为我的hadoop是在192.168.190.129上的所以private static final String HADOOP_URL="hdfs://192.168.190.129:9000";,请酌情修改。搞定跑起来,就能看到下面的结果
List of all the datanode in the HDFS cluster:
hadoopslaver0
hadoopslaver2
hadoopslaver1
hdfs://192.168.190.129:9000
filename:TrustCom2015_CFP.pdf size:290401
filename:jd.PNG size:16647
可以看到 三个datanode hadoopslaver0,1,2 以及/user下事先放好的文件。小实验成功
3 总结
在Linux下面Java API就可以按照上面的步骤弄起来了。总算是万里之行迈出了第一步。