HDFS是一个分布式文件系统,既然是文件系统,就可以对其文件进行操作,比如说新建文件、删除文件、读取文件内容等操作。下面记录一下使用JAVA API对HDFS中的文件进行操作的过程。
对分HDFS中的文件操作主要涉及一下几个类:
Configuration类:该类的对象封转了客户端或者服务器的配置。
FileSystem类:该类的对象是一个文件系统对象,可以用该对象的一些方法来对文件进行操作。FileSystem fs = FileSystem.get(conf);通过FileSystem的静态方法get获得该对象。
FSDataInputStream和FSDataOutputStream:这两个类是HDFS中的输入输出流。分别通过FileSystem的open方法和create方法获得。
package com.east.spark.hdfs;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public class FileDemo {
private static Configuration conf;// 这里创建conf对象有一个默认参数,boolean
// loadDefaults,默认为true
private static String rootPath = new String("hdfs://192.168.31.130:9000/");
private static FileSystem coreSys = null;
/**
* 每次执行之前初始化操作,初始化FileSystem核心对象
*/
public static void iniFileSystemObject() {
try {
conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
coreSys = FileSystem.get(URI.create(rootPath), conf);
} catch (IOException e) {
System.out.println("初始化HDFS核心文件对象失败:" + e.getLocalizedMessage());
}
}
public static void main(String[] args) throws Exception {
iniFileSystemObject();
FileDemo fileDemo = new FileDemo();
// fileDemo.createDirOnHDFS();
// fileDemo.createFile();
// uploadFile();
// renameFile();
// deleteFile();
// findFileIsExit();
// getNodeMsgHdfs();
// getUnderDirAllFile();
watchFileLastModifyTime();
}
/**
* 在HDFS上创建文件目录
*/
public void createDirOnHDFS() {
Path demoDir = new Path(rootPath + "demoDir");
boolean isSuccess = true;
try {
isSuccess = coreSys.mkdirs(demoDir);
} catch (IOException e) {
isSuccess = false;
}
System.out.println(isSuccess ? "目录创建成功!" : "目录创建失败!");
}
/**
* 在HDFS上创建文件
*
* @throws Exception
*/
public void createFile() throws Exception {
Path hdfsPath = new Path(rootPath + "createDemoFile");
System.out.println(coreSys.getHomeDirectory());
String content = "Hello hadoop,this is first time that I create file on hdfs";
FSDataOutputStream fsout = coreSys.create(hdfsPath);
BufferedOutputStream bout = new BufferedOutputStream(fsout);
bout.write(content.getBytes(), 0, content.getBytes().length);
bout.flush();
fsout.flush();
bout.close();
fsout.close();
System.out.println("文件创建完毕!");
}
/**
* 从本地上传任意文件到服务器HDFS环境
*
* @throws Exception
*/
public static void uploadFile() throws Exception {
Path remotePath = new Path(rootPath + "demoDir/");
coreSys.copyFromLocalFile(new Path("D:/hdfs/"), remotePath);
System.out.println("Upload to:" + conf.get("fs.defaultFS"));
FileStatus[] files = coreSys.listStatus(remotePath);
for (FileStatus file : files) {
System.out.println("==" + file.getPath().toString());
}
}
/**
* 重命名文件名
*/
public static void renameFile() {
Path oldFileName = new Path(rootPath + "demoDir");
Path newFileName = new Path(rootPath + "re_demoDir");
boolean isSuccess = true;
try {
isSuccess = coreSys.rename(oldFileName, newFileName);
} catch (IOException e) {
isSuccess = false;
}
System.out.println(isSuccess ? "重命名成功!" : "重命名失败!");
}
/**
* 删除文件
*/
public static void deleteFile() {
Path deleteFile = new Path(rootPath + "re_demoDir/hdfs/c.txt");
boolean isSuccess = true;
try {
isSuccess = coreSys.delete(deleteFile, false);
} catch (IOException e) {
isSuccess = false;
}
System.out.println(isSuccess ? "删除成功!" : "删除失败!");
}
/**
* 查找某个文件是否存在
*/
public static void findFileIsExit() {
Path checkFile = new Path(rootPath + "demoDir/c.txt");
boolean isExit = true;
try {
isExit = coreSys.exists(checkFile);
} catch (IOException e) {
isExit = false;
}
System.out.println(isExit ? "文件存在!" : "文件不存在!");
}
/**
* 查看某个文件的最后修改时间
*
* @throws IOException
*/
public static void watchFileLastModifyTime() throws IOException {
Path targetFile = new Path(rootPath + "/demoDir/c.txt");
FileStatus fileStatus = coreSys.getFileStatus(targetFile);
Long lastTime = fileStatus.getModificationTime();
Date date = new Date(lastTime);
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.err.println("文件的最后修改时间为:" + format.format(date));
}
/**
* 获取某个路径下面的所有文件
*
* @throws IOException
*/
public static void getUnderDirAllFile() throws IOException {
Path targetDir = new Path(rootPath + "demoDir/");
FileStatus[] fileStatus = coreSys.listStatus(targetDir);
for (FileStatus file : fileStatus) {
System.out.println(file.getPath() + "--" + file.getGroup() + "--" + file.getBlockSize() + "--"
+ file.getLen() + "--" + file.getModificationTime() + "--" + file.getOwner());
}
}
/**
* 查看某个文件在HDFS集群的位置
*
* @throws IOException
*/
public void findLocationOnHadoop() throws IOException {
Path targetFile = new Path(rootPath + "user/hdfsupload/AA.txt");
FileStatus fileStaus = coreSys.getFileStatus(targetFile);
BlockLocation[] bloLocations = coreSys.getFileBlockLocations(fileStaus, 0, fileStaus.getLen());
for (int i = 0; i < bloLocations.length; i++) {
System.out.println("block_" + i + "_location:" + bloLocations[i].getHosts()[0]);
}
}
/**
* 获取集群上结点的信息
*
* @throws IOException
*/
public static void getNodeMsgHdfs() throws IOException {
DistributedFileSystem distributedFileSystem = (DistributedFileSystem) coreSys;
DatanodeInfo[] dataInfos = distributedFileSystem.getDataNodeStats();
for (int j = 0; j < dataInfos.length; j++) {
System.out.println(
"DataNode_" + j + "_Name:" + dataInfos[j].getHostName() + "--->" + dataInfos[j].getDatanodeReport()
+ "-->" + dataInfos[j].getDfsUsedPercent() + "-->" + dataInfos[j].getLevel());
}
}
}