一.文件操作
1.上传本地文件到hadood
2.在hadoop中新建文件,并写入
3.删除hadoop上的文件
4.读取文件
5.文件修改时间
二.目录操作
1.在hadoop上创建目录
2.删除目录
3.读取某个目录下的所有文件
三.hdfs信息
1.查找某个文件在HDFS集群中位置
2.获取HDFS集群上所有名称节点信息
一.文件操作
1.上传本地文件到hadood
package com.hdfs.file;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CopyFileToHDFS {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
//copyFromLocal
//step1
Configuration conf=new Configuration();
//step2
FileSystem fs=FileSystem.get(conf);
//step3
fs.copyFromLocalFile(new Path(args[0]), new Path(args[1]));
fs.close();
}
}
把当前java文件打包成copyFileToHdfs.jar包,(注意,接下来的所有代码都先需要打包)
通过命令:hadoop jar copyFileToHdfs.jar路径 输入文件路径 输出目录路径
2.在hadoop中新建文件,并写入
package com.hdfs.file;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CreateNewFileAndWrite {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FSDataOutputStream fsOutput = fs.create(new Path(args[0]));
fsOutput.write(args[1].getBytes("UTF-8"));
fsOutput.close();
fs.close();
}
}
通过命令:hadoop jar createNFAW.jar路径 创建的文件路径(是hdfs中的文件路径) 要写入的内容。
3.删除hadoop上的文件
package com.hdfs.file;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class DeleteFile {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
fs.delete(new Path(args[0]));
fs.close();
}
}
通过命令:hadoop jar deleteFile.jar路径 需要删除的文件路径。
4.读取文件
package com.hdfs.file;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadFile {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
Path path=new Path(args[0]);
if(fs.exists(path)){
FSDataInputStream fsIn=fs.open(path);
FileStatus status=fs.getFileStatus(path);
byte[] buffer=new byte[Integer.parseInt(String.valueOf(status.getLen()))];
fsIn.readFully(0,buffer);
fsIn.close();
fs.close();
System.out.println("读取完成!");
System.out.println(new String(buffer));
}else{
throw new Exception("the file is not found!");
}
}
}
通过命令:hadoop jar readFile.jar路径 需要读取的文件路径。
5.查看文件修改时间
package com.hdfs.file;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ModifyFileTime {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
Path path=new Path(args[0]);
FileStatus status=fs.getFileStatus(path);
System.out.println("时间:"+status.getAccessTime());
fs.close();
}
}
通过命令:hadoop jar modifyFileTime.jar路径 需要查看的文件路径
二.目录操作
1.在hadoop上创建目录
package com.hdfs.directory;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CreateDir {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
fs.mkdirs(new Path("/user/long1657/20130908"));
fs.close();
// FileStatus fileStatus=fs.getFileStatus(new Path("/user/long1657/20130908"));
}
}
通过命令:hadoop jar createDir.jar路径
2.删除目录
package com.hdfs.directory;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class DeleteDir {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
fs.deleteOnExit(new Path(args[0]));
fs.close();
}
}
通过命令:hadoop jar deleteDir.jar路径 需要删除的目录路径
3.读取某个目录下的所有文件
package com.hdfs.directory;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadAllFileInDir {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
Path path=new Path(args[0]);
FileStatus[] status=fs.listStatus(path);
for(int i=0;i<status.length;i++){
System.out.println(status[i].getPath().toString());
if(status[i].isDir()){
FileStatus[] ss=fs.listStatus(status[i].getPath());
for(int j=0;j<ss.length;j++){
System.out.println(ss[j].getPath().toString());
}
}
}
fs.close();
}
}
通过命令:hadoop jar deleteDir.jar路径 需要读取的目录路径
三.hdfs信息
1.查找某个文件在HDFS集群中位置
public static void getFileLocal(String pathStr) throws IOException{
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
Path path=new Path(pathStr);
//获取文件系统里面的文件信息
FileStatus fileStatus=fs.getFileStatus(path);
//获取文件的块信息
BlockLocation[] blocks=fs.getFileBlockLocations(fileStatus, 0, fileStatus.getBlockSize());
//循环打印信息
int blockSize=blocks.length;
for(int i=0;i<blockSize;i++){
String[] hosts=blocks[i].getHosts();
System.out.println("block_"+i+"_location:"+hosts[0]);
}
fs.close();
}
2.获取HDFS集群上所有名称节点信息
/**
* 集群节点数
* */
public static void getHDFSNodes()throws IOException{
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(conf);
DistributedFileSystem df=(DistributedFileSystem)fs;
DatanodeInfo[] dataNode=df.getDataNodeStats();
//循环答应节点信息
for(int i=0;i<dataNode.length;i++){
System.out.println("DataNode_"+i+"_Name:"+dataNode[i].getHostName());
}
fs.close();
}
可以通过下面链接下载该项目:http://download.csdn.net/detail/long1657/6310109