搭建开发环境
1)拷贝编译后的hadoop jar包到非中文路径
hadoop源码编译参考:https://blog.csdn.net/weixin_38023225/article/details/100576751
也可直接下载:https://download.csdn.net/download/weixin_38023225/11692985
2)配置HADOOP_HOME环境变量
3)配置Path环境变量
4)创建一个Maven工程HDFSClientDemo
5)导入相应的依赖坐标+日志添加
pom文件依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
</dependencies>
log4j日志文件设置 ,放在resource目录下
log4j.rootLogger=INFO,stdout,logfile
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/Spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
6)创建包
7)创建测试类HDFSClientDemo
/**
* Created by caimh on 2019/9/9.
*/
public class HDFSClientDemo {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
//1.获取HDFS客户端
Configuration conf = new Configuration();
//方式1
conf.set("fs.defaultFS", "hdfs://master-node:9000");
System.setProperty("HADOOP_USER_HOME", "caimh");
FileSystem fileSystem = FileSystem.get(conf);
//方式2
//FileSystem fileSystem = FileSystem.get(new URI("hdfs://master-node:9000"),conf,"caimh");
//2.创建目录
fileSystem.mkdirs(new Path("/hdfsClientTest"));
//3.关闭资源
fileSystem.close();
System.out.println("HDFS 目录创建成功!");
}
}
8)执行程序
客户端去操作hdfs时,是有一个用户身份的,默认情况下,hdfs客户端api会从jvm中获取一个参数来作为自己的用户身份:
-DHADOOP_USER_NAME=caimh,caimh为用户名称。
2.HDFS的JAVA API操作
public class HDFSClientDemo {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//获取HDFS客户端
Configuration conf = new Configuration();
fs = FileSystem.get(new URI("hdfs://master-node:9000"), conf, "caimh");
}
/**
* 上传文件
*
* @throws IOException
*/
@Test
public void uploadFileToHdfs() throws IOException {
//待上传文件本地路径
Path srcPath = new Path("e:/cmh.txt");
//上传HDFS路径
Path destPath = new Path("/HDFSClientTest");
fs.copyFromLocalFile(srcPath,destPath);
fs.close();
}
/**
* 从HDFS复制文件到本地
*
* @throws IOException
*/
@Test
public void copyFileToLocalFromHdfs() throws IOException {
fs.copyToLocalFile(false,new Path("/hdfsClientTest/cmh.txt"),new Path("e:/hdfsClient"));
fs.close();
}
/**
* 目录操作
*
* @throws IOException
*/
@Test
public void mkdirAndDeleteAndRename() throws IOException {
//创建目录
fs.mkdirs(new Path("/hdfsClientTest/cmh"));
//重命名文件或文件夹
fs.rename(new Path("/hdfsClientTest1"),new Path("/hdfsTest1"));
//删除文件夹
fs.delete(new Path("/hdfsTest1"),true);
fs.close();
}
/**
* 列出文件目录
*
* @throws IOException
*/
@Test
public void listFiles() throws IOException {
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
System.out.println(fileStatus.getPath());
System.out.println(fileStatus.getPath().getName());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getLen());
final BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation blk : blockLocations) {
System.out.println("block-length:"+blk.getLength()+"--"+"block-offset:"+blk.getOffset());
final String[] hosts = blk.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
}
fs.close();
}
/**
* 查看文件及文件夹信息
*
* @throws IOException
*/
@Test
public void listAll() throws IOException {
final FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
String flag = "";
for (FileStatus fileStatuse : fileStatuses) {
if(fileStatuse.isFile())
{
flag = "f--";
}
if(fileStatuse.isDirectory())
{
flag = "d--";
}
System.out.println(flag+fileStatuse.getPath().getName());
System.out.println(fileStatuse.getPermission());
}
fs.close();
}
@Test
public void getClient() throws IOException {
//1.获取HDFS客户端
Configuration conf = new Configuration();
//方式1
conf.set("fs.defaultFS", "hdfs://master-node:9000");
//客户端身份设置
System.setProperty("HADOOP_USER_HOME", "caimh");
FileSystem fs = FileSystem.get(conf);
//方式2
//FileSystem fs = FileSystem.get(new URI("hdfs://master-node:9000"),conf,"caimh");
//2.创建目录
fs.mkdirs(new Path("/hdfsClientTest"));
//3.上传文件
fs.copyFromLocalFile(new Path("e:/cmh.txt"), new Path("/hdfsClientTest"));
//4.关闭资源
fs.close();
System.out.println("HDFS 目录创建成功!");
}
}
Stream流形式操作
I/O流基础知识参考:https://blog.csdn.net/weixin_38023225/article/details/100728915
/**
* HDFS I/O操作
*/
public class HDFSClientStreamDemo {
/**
* 上传文件至hdfs
*
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void uploadFile() throws URISyntaxException, IOException, InterruptedException {
//1.获取hdfs客户端
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://master-node:9000"), conf, "caimh");
//2.输入流
BufferedInputStream bis = new BufferedInputStream(new FileInputStream("cmh.txt"));
//3.输出流
FSDataOutputStream fos = fs.create(new Path("/cmh.txt"));
//4.拷贝
IOUtils.copyBytes(bis, fos, conf);//bis,fos在IOUtils方法中已经关闭资源
//关闭资源
fs.close();
}
/**
* 从hdfs下载文件
*
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void downLoadFile() throws URISyntaxException, IOException, InterruptedException {
//1.获取hdfs客户端
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://master-node:9000"), conf, "caimh");
//2.hdfs输入流open
FSDataInputStream fis = fs.open(new Path("/cmh.txt"));
//3.输出流
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream("cmh_dl.txt"));
//4.下载(流的拷贝)
IOUtils.copyBytes(fis, bos, conf);
//5.关闭资源
fs.close();
}
/**
* 从hdfs下载文件(下载部分)
* K是千 M是兆 G是吉咖 T是太拉
* <p>
* 8bit(位)=1Byte(字节)
* <p>
* 1024Byte(字节)=1KB
* <p>
* 1024KB=1MB
* <p>
* 1024MB=1GB
* <p>
* 1024GB=1TB
*
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void downLoadPartFile() throws URISyntaxException, IOException, InterruptedException {
//1.获取hdfs客户端
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://master-node:9000"), conf, "caimh");
//2.hdfs输入流open
FSDataInputStream fis = fs.open(new Path("/cmh.txt"));
// FSDataInputStream fis = fs.open(new Path("/cmh.txt"), 128 * 1024);
//3.输出流
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream("cmh_dl.txt"));
//4.下载(流的拷贝)
// IOUtils.copyBytes(fis, bos, conf);
byte[] bytes = new byte[1024];
int len;
for (int i = 0; i < 1024 * 128; i++) {
while ((len = fis.read(bytes)) != -1) {
bos.write(bytes, 0, len);
}
}
//5.关闭资源
IOUtils.closeStream(bos);
IOUtils.closeStream(fis);
fs.close();
}
}