一、前期准备
- jar包准备
解压hadoop的压缩包,进入share文件夹,将其中的jar包放入一个文件夹中,在eclipse中导入。
2.配置环境变量
配置HADOOP_HOME环境变量
二、API操作HDFS
操作HDFS步骤主要有三步
1.获取文件系统
2.对文件进行操作
3.关闭资源
1.文件上传
public static void main(String[] args) throws Exception {
// 1 获取文件系统
Configuration configuration = new Configuration();
// 配置在集群上运行
configuration.set("fs.defaultFS", "hdfs://hadoop102:8020");
//FileSystem fileSystem = FileSystem.get(configuration);
// 直接配置访问集群的路径和访问集群的用户名称
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration, "atguigu");
// 2 把本地文件上传到文件系统中
fileSystem.copyFromLocalFile(new Path("f:/xiyou1.txt"), new Path("/user/atguigu/xiyou1.txt"));
// 3 关闭资源
fileSystem.close();
System.out.println("over");
}
2.文件下载
public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException{
Configuration configuration = new Configuration();
//1.获取文件系统
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
System.out.println(fileSystem.toString());
//2.执行下载文件操作
fileSystem.copyToLocalFile(false,new Path("/user/atguigu/xiyou.txt"),new Path("F:/xiyou.txt"),true );
//3.关闭资源
fileSystem.close();
}
注意:在第二步执行下载文件操作的时候,使用fs.copyToLocalFile(new Path("/."), new Path("e:/.**"));报空指针异常,可能是因为windows下系统环境变量引起。此时建议使用带四个参数的copyToLocalFile。
3.创建目录
public void mkdirAtHDFS() throws IOException, InterruptedException, URISyntaxException{
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2.执行创建文件夹操作
fileSystem.mkdirs(new Path("/user/atguigu/tiantang"));
fileSystem.mkdirs(new Path("/user/atguigu/sunhouzi/houzaizi"));
//3.关闭资源
fileSystem.close();
}
4.删除文件夹
public void deleteAtHDFS() throws IOException, InterruptedException, URISyntaxException{
//1.获取文件系统
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2.执行删除操作
fileSystem.delete(new Path("/user/atguigu/xiyou2.txt"),true);
//3.关闭资源
fileSystem.close();
}
5.更改文件名称
public void renameAtHDFS() throws IOException, InterruptedException, URISyntaxException{
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2.执行更改文件名称
fileSystem.rename(new Path("/user/atguigu/xiyou.txt"),new Path("/user/atguigu/honglou.txt"));
//3.关闭资源
fileSystem.close();
}
6.查看文件详情
public void readFileAtHDFS() throws IOException, InterruptedException, URISyntaxException{
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2.执行查看文件详情操作
RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path("/user/atguigu"), true);
while(listFiles.hasNext()){
LocatedFileStatus next = listFiles.next();
//文件名称
System.out.println(next.getPath().getName());
//打印文件塊大小
System.out.println(next.getBlockSize());
//文件大小
System.out.println(next.getLen());
//文件权限
System.out.println(next.getPermission());
//文件块的具体信息
BlockLocation[] blockLocations = next.getBlockLocations();
for(BlockLocation bl:blockLocations){
System.out.println(bl.getOffset());
String[] hosts = bl.getHosts();
for(String host:hosts) {
System.out.println(host);
}
}
System.out.println("-------------------");
}
//3.关闭资源
fileSystem.close();
}
7.查看文件夹
public void readFolderAtHDFS() throws IOException, InterruptedException, URISyntaxException {
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2.文件夹查看
FileStatus[] listStatus = fileSystem.listStatus(new Path("/user/atguigu/"));
//判断是文件还是文件夹
for(FileStatus status : listStatus) {
if(status.isFile()) {
System.out.println("f--"+status.getPath().getName());
} else {
System.out.println("d--"+status.getPath().getName());
}
}
//3.关闭资源
fileSystem.close();
}
三、IO流操作HDFS
1.文件上传
public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2.获取输出流
FSDataOutputStream fos = fileSystem.create(new Path("/user/atguigu/output/dongsi.txt"));
//3.获取输入流
FileInputStream fileInputStream = new FileInputStream(new File("f:/dongsi.txt"));
try {
//4.流对接
IOUtils.copyBytes(fileInputStream, fos, configuration);
} finally {
// TODO: handle finally clause
//5,关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(fileInputStream);
}
}
2.文件下载
public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"), configuration, "atguigu");
//2获取输入流
FSDataInputStream open = fileSystem.open(new Path("/user/atguigu/bajie.txt"));
//3创建输出流
FileOutputStream fileOutputStream = new FileOutputStream(new File("F:/bajie.txt"));
//4流对接
try {
IOUtils.copyBytes(open, fileOutputStream, configuration);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally{
//5关闭资源
IOUtils.closeStream(fileOutputStream);
IOUtils.closeStream(open);
}
}
3.定位文件读取
1.下载大文件第一块数据
//下载大文件第一块数据
@Test
public void getFileFromHDFSSeek1() throws IOException, InterruptedException, URISyntaxException{
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2.获取输入流
FSDataInputStream fis = fileSystem.open(new Path("/user/atguigu/input/hadoop-2.7.2.tar.gz"));
//3.创建输出流
FileOutputStream fos = new FileOutputStream(new File("F:/hadoop-2.7.2.tar.gz.part1"));
//4.流对接(指向第一块数据首地址)
byte[] buf = new byte[1024];
for (int i = 0; i<1024*128;i++) {
fis.read(buf);
fos.write(buf);
}
try {
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
2.下载大文件第二块
//下载第二块
@Test
public void getFileFromHDFSSeek2() throws IOException, InterruptedException, URISyntaxException {
//1.获取文件系统
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2.获取输入流
FSDataInputStream fis = fileSystem.open(new Path("/user/atguigu/input/hadoop-2.7.2.tar.gz"));
//3.创建输出流
FileOutputStream fos = new FileOutputStream(new File("F:/hadoop-2.7.2.tar.gz.part2"));
//4.流对接(指向第二块数据的首地址)
//定位到128M
fis.seek(1024*1024*128);
try {
IOUtils.copyBytes(fis, fos, configuration);
}catch(Exception e){
}finally {
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
}
}
3.合并文件
在Windows命令行窗口执行
type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1