1、新建/上传/下载/删除文件(夹)
package test;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HDFSTest2 {
FileSystem fs = null;
@Before
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf = new Configuration();
String user = "hadoop";
fs = FileSystem.get(uri , conf , user);
}
/*
@Test
//创建文件夹
public void mkdir() throws Exception {
Path f = new Path("/aa");
boolean bool = fs.mkdirs(f);
System.out.println(bool);
}
*/
/*
//上传文件
@Test
public void copyFromLocal() throws Exception {
Path src = new Path("E:/a.txt");
Path dst = new Path("/aa/a.txt");
fs.copyFromLocalFile(src , dst);
System.out.println("上传成功");
}
*/
/*
@Test
//删除文件夹/文件
public void delete() throws Exception {
Path f = new Path("/aa");
fs.delete(f);
System.out.println("删除成功");
}
*/
/*
//下载文件
@Test
public void copyToLocal() throws Exception {
Path src = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
Path dst = new Path("E:/hadoop-2.7.7-centos-6.7.tar.gz");
fs.copyToLocalFile(false,src , dst,true);
}
*/
@Test
//下载文件get ,流式数据访问
public void get() throws IllegalArgumentException, IOException {
//获取hdfs文件系统的输入流(输入流:读取文件)
Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
FSDataInputStream in = fs.open(f);
//获取本地文件系统的输出流
FileOutputStream out = new FileOutputStream("E:/hadoop-2.7.7-centos-6.7.tar.gz");
//读写
IOUtils.copyBytes(in, out, 4096);
System.out.println("下载成功");
//关闭
in.close();
out.close();
}
@Test
//上传文件put ,流式数据访问
public void put() throws Exception {
//获取本地文件系统的输入流
FileInputStream in = new FileInputStream("E:/a.txt");
System.out.println(in);
//获取hdfs文件系统的输出流(输出流:写入文件)
Path f = new Path("/a.txt");
FSDataOutputStream out = fs.create(f);
//读写
IOUtils.copyBytes(in, out, 4096);
System.out.println("上传成功");
in.close();
out.close();
}
@After
public void after() throws IOException {
fs.close();
}
}
2、单个文件(属性及块)信息查询
package test;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.io.Files;
public class HDFSTest3 {
FileSystem fs = null;
@Before
//创建与hdfs的连接
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf =new Configuration();
conf.set("dfs.replication", "2");
String user = new String("hadoop");
fs = FileSystem.get(uri, conf, user);
}
@Test
//操作:获取某个文件的属性信息,块信息
public void fileStatus() throws Exception {
Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
FileStatus fileStatus = fs.getFileStatus(f );
//获取文件属性信息
System.out.println(fileStatus.getPath());
System.out.println(fileStatus.getLen());
System.out.println(fileStatus.isDir());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getReplication());
System.out.println(fileStatus.getGroup());
System.out.println(fileStatus.isSymlink());
System.out.println(fileStatus.getOwner());
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getModificationTime());
long modification_time = fileStatus.getModificationTime();
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String sd = sdf.format(new Date(Long.parseLong(String.valueOf(modification_time)))); // 时间戳转换成时间
System.out.println("格式化结果:" + sd);
System.out.println(fileStatus.getAccessTime());
long access_time = fileStatus.getAccessTime();
SimpleDateFormat sdf1=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String sd1 = sdf1.format(new Date(Long.parseLong(String.valueOf(access_time)))); // 时间戳转换成时间
System.out.println("格式化结果:" + sd1);
//获取文件块信息
Path file = fileStatus.getPath();
long start = 0;
long len = fileStatus.getLen();
BlockLocation[] getFileBlockLocations = fs.getFileBlockLocations(file , start , len );
System.out.println(getFileBlockLocations.length);
System.out.println(getFileBlockLocations[0]);
}
@After
//关闭连接
public void after() throws Exception {
fs.close();
}
}
3、循环遍历文件(属性及块)信息
package test;
import java.net.URI;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HDFSTest4 {
FileSystem fs = null;
@Before
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf = new Configuration();
conf.set("dfs.replication", "2");
String user =new String("hadoop");
fs = FileSystem.get(uri, conf, user);
}
@Test
//遍历文件fs.listFiles(Path, true/false) 其中true表示递归;
public void listFile() throws Exception {
Path f = new Path("/");
//获取迭代器
RemoteIterator<LocatedFileStatus> iterator= fs.listFiles(f, true);
//循环迭代器
while(iterator.hasNext()) {
//LocatedFileStatus这个类定义了一个包含文件块位置的FileStatus。
LocatedFileStatus status = iterator.next();
System.out.println("Path="+status.getPath());
System.out.println("Owner="+status.getOwner());
System.out.println("Len="+status.getLen());
System.out.println("Permission="+status.getPermission());
System.out.println("Replication="+status.getReplication());
System.out.println("BlockSize="+status.getBlockSize());
System.out.println("BlockLocations="+Arrays.toString( status.getBlockLocations()));
System.out.println("Block-sum="+status.getBlockLocations().length);
System.out.println("++++++");
//遍历输出块信息
BlockLocation[] blockLocations = status.getBlockLocations();
for (int i = 0; i < blockLocations.length; i++) {
System.out.println("Block-count="+(i +1));
System.out.println("blockLocations="+blockLocations[i] + " ");
System.out.println("Hosts="+Arrays.toString( blockLocations[i].getHosts()));
System.out.println("Names="+Arrays.toString( blockLocations[i].getNames()));
System.out.println("Offset="+blockLocations[i].getOffset());
System.out.println("Length="+blockLocations[i].getLength());
}
System.out.println("======================================");
}
}
@After
public void after() throws Exception {
fs.close();
}
}
4、部分(指定起止位置)下载与分块下载
//指定文件内容的起止位置下载部分内容
package test;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HDFSTest5 {
FileSystem fs = null;
@Before
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf = new Configuration();
conf.set("dfs.replication", "2");
String user = "hadoop";
fs = FileSystem.get(uri, conf, user);
}
@Test
//从指定位置下载指定长度的文件下载
public void get() throws IOException {
//获取hdfs文件系统的输入流(输入流:读取文件)
Path f = new Path("/a.txt");
FSDataInputStream in = fs.open(f );
//获取本地文件系统的输出流
FileOutputStream out= new FileOutputStream("e:/b.txt");
//读写
in.seek(6);//指定开始读取的位置
long conf = 20L; //指定读取内容起止之间的长度。4096表示用来拷贝的buffer大小,会循环全部获取内容
boolean close = "false" != null;//true - 是否关闭数据流,如果是false,就在finally里关掉。
//IOUtils.copyBytes(in, out, 4096 ); //全部下载
IOUtils.copyBytes(in, out, conf, close );//读写
System.out.println("下载结束");
//关闭
in.close();
out.close();
}
@After
public void after() throws IOException {
fs.close();
}
}
//指定hdfs的块下载方法一:
package test;
import java.io.FileOutputStream;
import java.net.URI;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HDFSTest6 {
FileSystem fs = null;
@Before
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf = new Configuration();
conf.set("dfs.replication", "2");
String user =new String("hadoop");
fs = FileSystem.get(uri, conf, user);
}
@Test
//遍历文件fs.listFiles(Path, true/false) 其中true表示递归;
public void listFile() throws Exception {
Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
//获取迭代器
RemoteIterator<LocatedFileStatus> iterator= fs.listFiles(f, true);
//循环迭代器
while(iterator.hasNext()) {
//LocatedFileStatus这个类定义了一个包含文件块位置的FileStatus。
LocatedFileStatus status = iterator.next();
System.out.println("BlockLocations="+Arrays.toString( status.getBlockLocations()));
System.out.println("Block-sum="+status.getBlockLocations().length);
//遍历输出块信息
BlockLocation[] blockLocations = status.getBlockLocations();
for (int i = 0; i < blockLocations.length; i++) {
System.out.println("---------------------------------");
System.out.println("Block-count="+ i );
System.out.println("blockLocations="+blockLocations[i] + " ");
System.out.println("Offset="+blockLocations[i].getOffset());
System.out.println("Length="+blockLocations[i].getLength());
//如果存在第2块,下载
if(i == 1) {
//获取hdfs文件系统的输入流(输入流:读取文件)
FSDataInputStream in = fs.open(f);
//获取本地文件系统的输出流
FileOutputStream out = new FileOutputStream("e:/hadoop-2.7.7-centos-6.7.tar.gz");
//读写
in.seek(blockLocations[i].getOffset());
long conf = blockLocations[i].getLength();
IOUtils.copyBytes(in, out, conf, true);
System.out.println("下载结束");
//关闭
in.close();
out.close();
}
}
}
}
@After
public void after() throws Exception {
fs.close();
}
}
//指定hdfs块下载方法2:
package test;
import java.io.FileOutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HDFSTest6 {
FileSystem fs = null;
@Before
public void before() throws Exception {
URI uri = new URI("hdfs://hadoop01:9000");
Configuration conf = new Configuration();
conf.set("dfs.replication", "2");
String user =new String("hadoop");
fs = FileSystem.get(uri, conf, user);
}
@Test
//下载指定块Block文件
public void getBlock() throws Exception {
//获取文件的基本信息
Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
FileStatus getFileStatus = fs.getFileStatus(f );
//获取文件的块信息
Path file =getFileStatus.getPath() ;
long start = 0;
long len =getFileStatus.getLen();
BlockLocation[] blockLocation = fs.getFileBlockLocations(file, start, len);
//判断是否存在第2块
if(blockLocation.length<2) {
throw new RuntimeException("该文件模块数<2");
}else {
//获取第2块的offset
long offset = blockLocation[1].getOffset();
//获取第2块的长度
long length = blockLocation[1].getLength();
//构建输入输出流
FSDataInputStream in = fs.open(f);
FileOutputStream out = new FileOutputStream("e:/cc");
//指定offset
in.seek(offset);
long count = length;
//执行读写
IOUtils.copyBytes(in, out, count , true);
System.out.println("下载块2完成");
//关闭
in.close();
out.close();
}
}
@After
public void after() throws Exception {
fs.close();
}
}