Hadoop实现了一个分布式文件系统(Hadoop Distributed File System)。
Hadoop的框架最核心的设计就是:HDFS和MapReduce。HDFS为海量的数据提供了存储,而MapReduce则为海量的数据提供了计算。
使用JAVA实现文件上传到HDFS,下载到本地:
package com.hdfs;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
public class HdfsFileIO {
private static Configuration conf;
private static FileSystem hdfs;
@BeforeTest(description = "初始化连接")
public static void init() throws Exception {
conf = new Configuration();
//指向hdfs,获取操作实例
hdfs = FileSystem.get(new URI("hdfs://localhost:9527"),conf,"root");
}
@Test(description = "文件上传")
public static void upload()throws Exception{
//后面的true,是指如果文件存在,则覆盖
FSDataOutputStream fout = hdfs.create(new Path("/mydir/001.jpg"), true);
InputStream in = new FileInputStream("E:/123.jpg");
//复制流,并且完成之后关闭流
IOUtils.copyBytes(in, fout, 1024,true);
IOUtils.closeStream(in);
IOUtils.closeStream(fout);
}
@Test(description = "文件下载")
public static void random()throws Exception{
FSDataInputStream fin = hdfs.open(new Path("/mydir/001.jpg"));
//从0起始位置的位置开始读
fin.seek(0);
OutputStream out = new FileOutputStream("E:/111.jpg");
IOUtils.copyBytes(fin, out, 1024,true);
IOUtils.closeStream(fin);
IOUtils.closeStream(out);
}
}
(MAVEN工程的)pom.xml的<dependencies>中增加:
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.4</version>
</dependency>