Hadoop Java API
介绍
Java抽象类org.apache.hadoop.fs.FileSystem定义了hadoop的一个文件系统接口。该类是一个抽象类,通过以下两种静态工厂方法可以过去FileSystem实例:
public static FileSystem.get(Configuration conf) throws IOException
public static FileSystem.get(URI uri, Configuration conf) throws IOException
HDFS上的文件创建,上传,下载,删除等操作的具体方法实现:
(1)public boolean mkdirs(Path f) throws IOException
一次性新建所有目录(包括父目录), f是完整的目录路径。
(2)public FSOutputStream create(Path f) throws IOException
创建指定path对象的一个文件,返回一个用于写入数据的输出流
create()有多个重载版本,允许我们指定是否强制覆盖已有的文件、文件备份数量、写入文件缓冲区大小、文件块大小以及文件权限。
(3)public boolean copyFromLocal(Path src, Path dst) throws IOException
将本地文件拷贝到文件系统
(4)public boolean exists(Path f) throws IOException
检查文件或目录是否存在
(5)public boolean delete(Path f, Boolean recursive)
永久性删除指定的文件或目录,如果f是一个空目录或者文件,那么recursive的值就会被忽略。只有recursive=true时,一个非空目录及其内容才会被删除。
(6)FileStatus类封装了文件系统中文件和目录的元数据,包括文件长度、块大小、备份、修改时间、所有者以及权限信息。
1.创建文件目录
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class MakeDir {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
String newDir = "/hdfstest";
boolean result = hdfs.mkdirs(new Path(newDir));
if (result) {
System.out.println("Success!");
}else {
System.out.println("Failed!");
}
}
}
2. 创建文件
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class TouchFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), configuration);
String filePath = "/hdfstest/touchfile";
FSDataOutputStream create = hdfs.create(new Path(filePath));
System.out.println("Finish!");
}
3.上传文件
使用vim打开sample_data文件,
vim sample_data
向sample_data文件中写入hello world。(使用vim编辑时,需输入a,开启输入模式)
hello world
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CopyFromLocalFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
String from_Linux = "/data/hadoop4/sample_data";
String to_HDFS = "/hdfstest/";
hdfs.copyFromLocalFile(new Path(from_Linux), new Path(to_HDFS));
System.out.println("Finish!");
}
}
4.下载文件
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CopyToLocalFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
String from_HDFS = "/hdfstest/sample_data";
String to_Linux = "/data/hadoop4/copytolocal";
hdfs.copyToLocalFile(false, new Path(from_HDFS), new Path(to_Linux));
System.out.println("Finish!");
}
}
from_HDFS hdfs文件
to_Linux localhost文件
5.文件是否存在
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class EDR {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), configuration);
boolean f = hdfs.exists(new Path("/newTouch"));
System.out.println(f);
}
}
6.文件重命名
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class EDR {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), configuration);
boolean f = hdfs.rename(new Path("/newTouch"),new Path("/newtouch"));
System.out.println(f);
}
}
7.文件删除
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class EDR {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), configuration);
boolean f = hdfs.delete(new Path("/newTouch"));
System.out.println(f);
}
}
8.文件合并
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class PutMerge {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
FileSystem local = FileSystem.getLocal(conf);
String from_LinuxDir = "/data/hadoop4/";
String to_HDFS = "/hdfstest/mergefile";
FileStatus[] inputFiles = local.listStatus(new Path(from_LinuxDir));
FSDataOutputStream out = hdfs.create(new Path(to_HDFS));
for (FileStatus file : inputFiles) {
FSDataInputStream in = local.open(file.getPath());
byte[] buffer = new byte[256];
int bytesRead = 0;
while ( (bytesRead = in.read(buffer) ) > 0) {
out.write(buffer, 0, bytesRead);
}
in.close();
}
System.out.println("Finish!");
}
9. 查看文件内容
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class LocateFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
Path file = new Path("/hdfstest/sample_data");
FileStatus fileStatus = hdfs.getFileStatus(file);
BlockLocation[] location = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
for (BlockLocation block : location) {
String[] hosts = block.getHosts();
for (String host : hosts) {
System.out.println("block:" +block + " host:"+ host);
}
}
}
}
10.创建文件并写入内容
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class WriteFile {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(URI.create(hdfsPath), conf);
String filePath = "/hdfstest/writefile";
FSDataOutputStream create = hdfs.create(new Path(filePath));
System.out.println("Step 1 Finish!");
String sayHi = "hello world hello data!";
byte[] buff = sayHi.getBytes();
create.write(buff, 0, buff.length);
create.close();
System.out.println("Step 2 Finish!");
}
}
11.查看文件目录的子文件
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class LocateFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String hdfsPath = "hdfs://localhost:9000";
FileSystem hdfs = FileSystem.get(new URI(hdfsPath), conf);
Path file = new Path("/hdfstest/sample_data");
FileStatus fileStatus = hdfs.getFileStatus(file);
BlockLocation[] location = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
for (BlockLocation block : location) {
String[] hosts = block.getHosts();
for (String host : hosts) {
System.out.println("block:" +block + " host:"+ host);
}
}
}
}