1. 在启动hadoop之前,现在配置文件中 hdfs-site.xml 中配置
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
把Hadoop权限验证关闭,把hadoop.dll文件放到C:/windows/system32中。在启动hadoop集群。
2.建立maven项目 在pom.xml中 加入 依赖。 版本号与hadoop集群版本号一致。
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.4</version>
</dependency>
3.在java
my-study-spark项目中 中添加
D:\newworkspace\my-study-spark\src\resources\core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop1:9000</value>
</property>
</configuration>
4.启动项目的时候,有可能是 报错
java.io.IOException: No FileSystem for scheme: hdfs
所以需要从hadoop项目中拷贝
hadoop-hdfs-2.6.5.jar 把到项目中
5. HdfsClient.java
package com.eastcom.first.spark.data.hdfs;
import java.io.IOException;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HdfsClient {
static Configuration conf = new Configuration();
static FileSystem hdfs;
static {
String path = "D:/newworkspace/my-study-spark/src/resources/";
conf.addResource(new Path(path + "core-site.xml"));
conf.addResource(new Path(path + "hdfs-site.xml"));
conf.addResource(new Path(path + "mapred-site.xml"));
try {
hdfs = FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
NonTmpFileFilter fileFilter = new NonTmpFileFilter();
}
public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
FileSystem fs = null;
NonTmpFileFilter fileFilter = new NonTmpFileFilter();
// TODO Auto-generated method stub
String hdfsRoot = "hdfs://hadoop1:9000/";
// Configuration conf = new Configuration();
// conf.addResource(new
// Path("D:/newworkspace/my-study-spark/src/resources/core-site.xml"));
// FileSystem fileSystem = FileSystem.get(conf);
String hdfsPath = hdfsRoot + "flume";
long datasize = HdfsFileHelper.dataSize(hdfs, hdfsPath);
System.out.println(datasize);
FileStatus[] fileStatus = HdfsFileHelper.listStatus(hdfs, hdfsPath, fileFilter);
System.out.println(fileStatus);
for (FileStatus fStatus : fileStatus) {
System.out.println(fStatus);
}
}
}
6.HdfsFileHelper.java
package com.eastcom.first.spark.data.hdfs;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 本类是对hdfs上的文件进行操作
*
* @author Administrator
*
*/
public class HdfsFileHelper {
protected static Logger logger = LoggerFactory.getLogger(HdfsFileHelper.class);
/**
* 移动文件
*
* @param fs
* @param srcPath
* @param tagPath
* @throws IOException
*/
public static void moveFiles(FileSystem fs, String srcPath, String tagPath) throws IOException {
FileStatus[] files = fs.listStatus(new Path(srcPath), new NonTmpFileFilter());
for (FileStatus file : files) {
fs.rename(file.getPath(), new Path(tagPath, file.getPath().getName()));
}
}
/**
* 移动文件
*
* @param fs
* @param tagPath
* @param files
* @throws IOException
*/
public static void moveFiles(FileSystem fs, String tagPath, FileStatus[] files) throws IOException {
int index = 0;
for (FileStatus file : files) {
fs.rename(file.getPath(), new Path(tagPath, file.getPath().getName() + "-" + index++));
}
}
/**
* 删除文件
*
* @param fs
* @param srcPath
* @throws IOException
*/
public static void rmFiles(FileSystem fs, String srcPath) throws IOException {
FileStatus[] files = fs.listStatus(new Path(srcPath), new NonTmpFileFilter());
for (FileStatus file : files) {
fs.delete(file.getPath(), true);
}
}
/**
* 递归遍历指定路径path下的文件 返回
*
* PathFilter 过滤器 自定义 栓选 需要的文件
*
* @param fs
* @param path
* @param filter
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter filter)
throws FileNotFoundException, IOException {
return loopDir(fs, path, filter);
}
/**
* 递归遍历指定路径path下的文件 返回
*
* PathFilter 过滤器 自定义 栓选 需要的文件
*
* @param fs
* @param path
* @param filter
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public static FileStatus[] listStatus(FileSystem fs, String strDir, PathFilter filter)
throws FileNotFoundException, IOException {
Path path = new Path(strDir);
return loopDir(fs, path, filter);
}
/**
* 递归变量文件
*
* @param fs
* @param dir
* @param filter
* @return
* @throws FileNotFoundException
* @throws IOException
*/
private static FileStatus[] loopDir(FileSystem fs, Path dir, PathFilter filter)
throws FileNotFoundException, IOException {
List<FileStatus> result = new ArrayList<>();
FileStatus[] listStatus = fs.listStatus(dir, filter);
for (FileStatus status : listStatus) {
if (status.isDirectory()) {
FileStatus[] dir2 = loopDir(fs, status.getPath(), filter);
result.addAll(Arrays.asList(dir2));
} else {
result.add(status);
}
}
return result.toArray(new FileStatus[result.size()]);
}
/**
* 删除路径path
*
* @param fileSystem
* @param path
*/
public static void ensurePathNotExists(FileSystem fileSystem, Path path) {
try {
if (fileSystem.exists(path)) {
fileSystem.delete(path, true); // 如果path为地址,则需要设置为true,如果是文件,则可设置为true|false
}
} catch (IOException e) {
logger.error("", e);
}
}
/**
* 方法重载 , 删除路径path
*
* @param fileSystem
* @param path
*/
public static void ensurePathNotExists(FileSystem fileSystem, String path) {
ensurePathNotExists(fileSystem, new Path(path));
}
/**
* 判断路径path是否存在
*
* @param fileSystem
* @param path
* @return
*/
public static boolean taskInputExists(FileSystem fileSystem, String path) {
try {
return fileSystem.exists(new Path(path));
} catch (Exception e) {
logger.error("", e);
return false;
}
}
/**
* hdfs fs -du /hdfspath
*
* 方法重载
*
* @param conf
* @param hdfsPath
* @return
*/
public static long dataSize(Configuration conf, String hdfsPath) {
try {
FileSystem fileSystem = FileSystem.get(conf);
return dataSize(fileSystem, hdfsPath);
} catch (Exception e) {
return 0;
}
}
/**
* hdfs fs -du /hdfspath
*
* 获得指定路径下实际文件大小
*
* @param fileSystem
* @param hdfsPath
* @return
*/
public static long dataSize(FileSystem fileSystem, String hdfsPath) {
try {
Path path = new Path(hdfsPath);
ContentSummary contentSummary = fileSystem.getContentSummary(path);
long length = contentSummary.getLength();
return length;
} catch (Exception e) {
return 0l;
}
}
public static void deleteFile(String file, FileSystem fileSystem) throws IOException {
Path path = new Path(file);
if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}
fileSystem.delete(new Path(file), true);
fileSystem.close();
}
public static void mkdir(String dir, FileSystem fileSystem) throws IOException {
Path path = new Path(dir);
if (fileSystem.exists(path)) {
System.out.println("Dir " + dir + " already not exists");
return;
}
fileSystem.mkdirs(path);
fileSystem.close();
}
}
7.NonTmpFileFilter.java
package com.eastcom.first.spark.data.hdfs;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
public class NonTmpFileFilter implements PathFilter {
@Override
public boolean accept(Path path) {
if (path.getName().endsWith(".tmp")) {
return false;
}
return true;
}
}
over
参考
http://blog.csdn.net/zengmingen/article/details/52204429
https://my.oschina.net/leejun2005/blog/93973