HDFS文件上传(测试参数优先级)
package com.redhat.hafsclient;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.lang.InterruptedException;
public class HDFSclient {
private FileSystem fs;
@Before
public void before()throws IOException,InterruptedException {
fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"),new Configuration(),"redhat");
}
@Test
public void put() throws IOException, InterruptedException {//上传文件
//获取一个HDFS的抽象封装对象
Configuration configuration = new Configuration();
configuration.set("dfs.replication", "2");
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000" ), configuration,"redhat");
//用这个对象操作文件系统
fileSystem.copyFromLocalFile(new Path("c:\\test.txt"),new Path("/"));
//关闭文件系统
fileSystem.close();
}
@Test
public void get() throws IOException, InterruptedException {//下载文件
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000" ),new Configuration(),"redhat");
fileSystem.copyToLocalFile(new Path("/test2"),new Path("c:\\"));
fileSystem.close();
}
@Test
public void rename() throws IOException,InterruptedException {//重命名文件
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"),new Configuration(),"redhat");
fileSystem.rename(new Path("/test"),new Path("/test2"));
fileSystem.close();
}
@Test
public void delete() throws IOException {//删除文件
boolean delete = fs.delete(new Path("/test"),true);
if(delete){
System.out.println("删除成功");
} else {
System.out.println("删除失败");
}
}
@Test
public void appendfile() throws IOException {追加文件内容
FSDataOutputStream append = fs.append(new Path("/test/1.txt"), 1024);
FileInputStream open = new FileInputStream("c:\\3.txt");//本地流
IOUtils.copyBytes(open,append,1024,true);//流交换
}
@Test
public void ls() throws IOException {//查询文件信息
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isFile()){
System.out.println("以下信息是文件信息");
System.out.println(fileStatus.getPath());//获取文件路径
System.out.println(fileStatus.getLen());//获取文件长度
System.out.println(fileStatus.getPermission());//获取权限信息
System.out.println(fileStatus.getModificationTime());//获取修改时间
System.out.println(fileStatus.getAccessTime());//获取访问时间
System.out.println(fileStatus.getReplication());//获取副本数
} else {
System.out.println("以下是文件夹信息");
System.out.println(fileStatus.getPath());
}
}
}
@Test
public void listFiles() throws IOException { //递归列出全部文件
RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true);
while(files.hasNext()){
LocatedFileStatus file = files.next();
System.out.println(file.getPath());
System.out.println("块信息:");
BlockLocation[] blockLocations = file.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
String[] hosts = blockLocation.getHosts();
System.out.println("块在:");
for (String host : hosts) {
System.out.print(host+" ");
}
}
}
}
@After
public void after() throws IOException{
fs.close();
}
}
HDFS的I/O流操作
HDFS文件上传
需求:将本地C盘上的redhat.txt文件上传到HDFS根目录
@Test
public void putFileToHDFS() throws IOException, InterruptedException{
// 1 获取文件系统
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration(), "redhat");
// 2 获取输入流
FileInputStream fis = new FileInputStream(new File("c:\\redhat.txt"));
// 3 获取输出流
FSDataOutputStream fos = fs.create(new Path("/redhat.txt"));
// 4 流的对拷
IOUtils.copyBytes(fis,fos,1024,true);
// 5 关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
HDFS文件下载
@Test
public void getFileFromHDFS() throws IOException, InterruptedException{
// 1 获取文件系统
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration(), "redhat");
// 2 获取输入流
FSDataInputStream fis = fs.open(new Path("/test.txt"));
// 3 获取输出流
FileOutputStream fos = new FileOutputStream(new File("c:\\test.txt"));
// 4 流的对拷
IOUtils.copyBytes(fis,fos,1024,true);
// 5 关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
定位文件读取
需求:分块读取HDFS上的大文件,比如根目录下的/hadoop-2.7.2.tar.gz
在HDFS的根目录中中显示的大小为为188.5M
显示文件被分为了两个块,并且每个块有3个副本
下载第一块
@Test
public void readFileseek1() throws IOException, InterruptedException{
Configuration configuration = new Configuration();
// 1 获取文件系统
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), configuration, "redhat");
// 2 获取输入流
FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
// 3 获取输出流
FileOutputStream fos = new FileOutputStream(new File("c:\\phpStudy/hadoop-2.7.2.tar.gz.part1"));
// 4 流的对拷
byte[] buf = new byte[1024];
for(int i =0 ; i < 1024 * 128; i++){
fis.read(buf);
fos.write(buf);
}
// 5 关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
fs.close();
}
下载第二块
@Test
public void redFileseek2() throws IOException, InterruptedException{
Configuration configuration = new Configuration();
// 1 获取文件系统
FileSystem fs = FileSystem.get(URI.create("hdfs://hadoop102:9000"), configuration, "redhat");
// 2 获取输入流
FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
fis.seek(1024*1024*128);
// 3 获取输出流
FileOutputStream fos = new FileOutputStream(new File("c:\\phpStudy/hadoop-2.7.2.tar.gz.part2"));
// 4 流的对拷
IOUtils.copyBytes(fis,fos,configuration);
// 5 关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
}
合并文件之前
合并文件
在windows的命令行窗口中,进入到C:\phpStudy/,然后执行以下命令,对数据进行合并
type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1
合并完成后将hadoop-2.7.2.tar.gz.part1重新命名为hadoop-2.7.2.tar.gz。解压发现该tar包非常完整。