1.创建一个maven项目引入
1.相关的pom依赖:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
2.在src/main/resource目录下添加输出日志的配置文件log4j.properties
#配置根节点
log4j.rootLogger=INFO,Log4jConsole,Log4jFile
#控制台输出
log4j.appender.Log4jConsole=org.apache.log4j.ConsoleAppender
log4j.appender.Log4jConsole.Threshold=INFO
log4j.appender.Log4jConsole.ImmediateFlush=true
log4j.appender.Log4jConsole.Target=System.out
#输出格式
log4j.appender.Log4jConsole.layout=org.apache.log4j.PatternLayout
log4j.appender.Log4jConsole.layout.ConversionPattern=[%p][%d{yyyy-MM-dd HH:mm:ss,SSS}] [%c:%M:%L] %m%n
2.代码编写
package com.ns.hdfs.oper;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.permission.FsPermission;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Created by Administrator on 2018/9/4.
*/
public class HdfsStudy {
//日志
static Logger logger= LoggerFactory.getLogger(HdfsStudy.class);
//hdfs文件系统
static FileSystem fileSystem=null;
//在core-site.xml中配置 filesystem的地址
static final String uri="hdfs://localhost:9000" ;
/**
* 获取filesystem对象.
* @return
*/
public static FileSystem createFileSystem(){
Configuration configuration=new Configuration();
configuration.set("fs.defaultFS",uri);
// configuration.set("dfs.socket.timeout", "30000");
// configuration.set("dfs.datanode.socket.write.timeout", "30000");
// configuration.setBoolean("fs.hdfs.impl.disable.cache", true);
// configuration.set("dfs.datanode.max.xcievers", "4096");
// configuration.set("dfs.replication", "1");
// configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
try{
fileSystem=FileSystem.get(configuration);
}catch(Exception e){
logger.info(e.getMessage());
return fileSystem;
}
return fileSystem;
}
/**
* 创建目录到filesystem下
* @param fileSystem
* @param path 直接写路径即可,创建后文件夹的路径为:fileSystem.getWorkingDirectory()+path
* 例如:path=/tmp fileSystem.getWorkingDirectory()=hdfs://localhost:9000/user/Administrator,最后文件夹的路径为:hdfs://localhost:9000/user/Administrator/tmp
*
* @throws Exception
*/
public static void createDirectory(FileSystem fileSystem,String path) throws Exception {
boolean iscreate=fileSystem.mkdirs(new Path(path));
if(iscreate){
logger.info("创建成功");
return;
}
logger.info("创建失败");
}
/**
* 将文件上传到haddop文件系统中
* @param fileSystem
* @param srcfile 要上传的文件
* @param path 文件系统中的路径
* @throws Exception
*/
public static void createAndWriteFile(FileSystem fileSystem,String srcfile,String path) throws Exception{
//只创建文件
// fileSystem.createNewFile(new Path(path));
FSDataOutputStream dfos=fileSystem.create(new Path(path),true);
try(FileInputStream fis=new FileInputStream(srcfile)){
IOUtils.copy(fis,dfos);
}catch(Exception e){
e.printStackTrace();
}
logger.info("上传完成,文件存储位置:"+fileSystem.getWorkingDirectory()+path);
}
/**
* 将文件上传到haddop文件系统中
* @param fileSystem
* @param srcfile 要下载的文件
* @param path 文件系统中的路径
* @throws Exception
*/
public static void downloadFile(FileSystem fileSystem,String srcfile,String path) throws Exception{
FSDataInputStream fis=fileSystem.open(new Path(path));
// 从指定位置开始读取
// fis.seek(100L);
try(FileOutputStream fos=new FileOutputStream(srcfile)){
IOUtils.copy(fis,fos);
}catch(Exception e){
e.printStackTrace();
}
logger.info("下载完成,文件存储位置:"+srcfile);
}
/**
* 遍历hadoop目录下的指定文件夹
* @param fileSystem
* @param path 文件夹路径
* @throws Exception
*/
public static void listfile(FileSystem fileSystem,String path) throws Exception{
//path recusive 是否递归遍历
RemoteIterator<LocatedFileStatus> filestatuslist=fileSystem.listFiles(new Path(path),true);
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
while(filestatuslist.hasNext()){
LocatedFileStatus locatedFileStatus=filestatuslist.next();
FsPermission permission=locatedFileStatus.getPermission();
Path filepath=locatedFileStatus.getPath();
String group=locatedFileStatus.getGroup();
Long length=locatedFileStatus.getLen();
String onwer=locatedFileStatus.getOwner();
Long time=locatedFileStatus.getAccessTime();
logger.info("path:"+filepath+"\tlength:"+length+"\tgroup:"+group+"\tonwer:"+onwer+"\ttime:"+sdf.format(new Date(time)));
}
}
public static void deleteFile(FileSystem fileSystem,String path) throws Exception{
boolean isdelete=fileSystem.delete(new Path(path),false);
if(isdelete){
logger.info(fileSystem.getWorkingDirectory()+path+"删除成功");
}else{
logger.info(fileSystem.getWorkingDirectory()+path+"删除失败");
}
}
/**
*
* @param args
*/
public static void main(String[] args){
FileSystem fs=createFileSystem();
System.out.println(fs.getWorkingDirectory());
try{
//创建文件夹
createDirectory(fs,"/test/sina");
createDirectory(fs,"/test/sina/weibo");
fs.close();
fs=createFileSystem();
//创建文件
createAndWriteFile(fs,"f:\\hadooptest.txt","/test/sina/trs1.txt");
createAndWriteFile(fs,"f:\\hadooptest.txt","/test/sina/trs2.txt");
fs.close();
fs=createFileSystem();
//文件下载
downloadFile(fs,"f:\\local.txt","/test/sina/trs2.txt");
fs.close();
fs=createFileSystem();
//遍历
listfile(fs,"/test");
fs.close();
fs=createFileSystem();
//删除
deleteFile(fs,"/test/sina/trs2.txt");
}catch(Exception e){
e.printStackTrace();
}finally {
try {
fs.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
4.运行结果
5.错误总结
权限问题
方法一:
在hdfs-site.xml配置内添加以下配置并重启
<property> <name>dfs.permissions.enabled</name> <value>false</value> </property>
方法二:
使用 DistributedFileSystem dfs=new DistributedFileSystem();