java 操作hdfs 实例

最新推荐文章于 2024-07-25 16:52:43 发布

liuxiangke0210

最新推荐文章于 2024-07-25 16:52:43 发布

阅读量1.3k

点赞数

分类专栏： hadoop java

本文链接：https://blog.csdn.net/liuxiangke0210/article/details/75116235

版权

java 同时被 2 个专栏收录

79 篇文章 0 订阅

订阅专栏

hadoop

5 篇文章 0 订阅

订阅专栏

1. 在启动hadoop之前，现在配置文件中 hdfs-site.xml 中配置

<property>

     <name>dfs.permissions</name>

     <value>false</value>

</property>

把Hadoop权限验证关闭，把hadoop.dll文件放到C:/windows/system32中。在启动hadoop集群。

2.建立maven项目在pom.xml中加入依赖。版本号与hadoop集群版本号一致。

<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.6.4</version>
</dependency>

3.在java my-study-spark项目中中添加 D:\newworkspace\my-study-spark\src\resources\core-site.xml

<configuration>  
 
 <property>      
      <name>fs.defaultFS</name>      
      <value>hdfs://hadoop1:9000</value>      
 </property>  
 
 </configuration>

4.启动项目的时候，有可能是报错

java.io.IOException: No FileSystem for scheme: hdfs

所以需要从hadoop项目中拷贝 hadoop-hdfs-2.6.5.jar 把到项目中

5. HdfsClient.java

package com.eastcom.first.spark.data.hdfs;

import java.io.IOException;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class HdfsClient {

	static Configuration conf = new Configuration();
	static FileSystem hdfs;
	static {

		String path = "D:/newworkspace/my-study-spark/src/resources/";
		conf.addResource(new Path(path + "core-site.xml"));
		conf.addResource(new Path(path + "hdfs-site.xml"));
		conf.addResource(new Path(path + "mapred-site.xml"));
		try {
			hdfs = FileSystem.get(conf);
		} catch (IOException e) {
			e.printStackTrace();
		}

		NonTmpFileFilter fileFilter = new NonTmpFileFilter();
	}

	public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
		FileSystem fs = null;
		NonTmpFileFilter fileFilter = new NonTmpFileFilter();
		// TODO Auto-generated method stub
		String hdfsRoot = "hdfs://hadoop1:9000/";
		// Configuration conf = new Configuration();
		// conf.addResource(new
		// Path("D:/newworkspace/my-study-spark/src/resources/core-site.xml"));
		// FileSystem fileSystem = FileSystem.get(conf);
		String hdfsPath = hdfsRoot + "flume";
		long datasize = HdfsFileHelper.dataSize(hdfs, hdfsPath);
		System.out.println(datasize);
		FileStatus[] fileStatus = HdfsFileHelper.listStatus(hdfs, hdfsPath, fileFilter);
		System.out.println(fileStatus);
		for (FileStatus fStatus : fileStatus) {
			System.out.println(fStatus);

		}

	}

}

6.HdfsFileHelper.java

package com.eastcom.first.spark.data.hdfs;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 本类是对hdfs上的文件进行操作
 * 
 * @author Administrator
 *
 */

public class HdfsFileHelper {

	protected static Logger logger = LoggerFactory.getLogger(HdfsFileHelper.class);

	/**
	 * 移动文件
	 * 
	 * @param fs
	 * @param srcPath
	 * @param tagPath
	 * @throws IOException
	 */
	public static void moveFiles(FileSystem fs, String srcPath, String tagPath) throws IOException {
		FileStatus[] files = fs.listStatus(new Path(srcPath), new NonTmpFileFilter());
		for (FileStatus file : files) {
			fs.rename(file.getPath(), new Path(tagPath, file.getPath().getName()));
		}
	}

	/**
	 * 移动文件
	 * 
	 * @param fs
	 * @param tagPath
	 * @param files
	 * @throws IOException
	 */
	public static void moveFiles(FileSystem fs, String tagPath, FileStatus[] files) throws IOException {
		int index = 0;
		for (FileStatus file : files) {
			fs.rename(file.getPath(), new Path(tagPath, file.getPath().getName() + "-" + index++));
		}
	}

	/**
	 * 删除文件
	 * 
	 * @param fs
	 * @param srcPath
	 * @throws IOException
	 */
	public static void rmFiles(FileSystem fs, String srcPath) throws IOException {
		FileStatus[] files = fs.listStatus(new Path(srcPath), new NonTmpFileFilter());
		for (FileStatus file : files) {
			fs.delete(file.getPath(), true);
		}
	}

	/**
	 * 递归遍历指定路径path下的文件 返回
	 * 
	 * PathFilter 过滤器 自定义 栓选 需要的文件
	 * 
	 * @param fs
	 * @param path
	 * @param filter
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public static FileStatus[] listStatus(FileSystem fs, Path path, PathFilter filter)
			throws FileNotFoundException, IOException {
		return loopDir(fs, path, filter);
	}

	/**
	 * 递归遍历指定路径path下的文件 返回
	 * 
	 * PathFilter 过滤器 自定义 栓选 需要的文件
	 * 
	 * @param fs
	 * @param path
	 * @param filter
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public static FileStatus[] listStatus(FileSystem fs, String strDir, PathFilter filter)
			throws FileNotFoundException, IOException {
		Path path = new Path(strDir);
		return loopDir(fs, path, filter);
	}

	/**
	 * 递归变量文件
	 * 
	 * @param fs
	 * @param dir
	 * @param filter
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	private static FileStatus[] loopDir(FileSystem fs, Path dir, PathFilter filter)
			throws FileNotFoundException, IOException {
		List<FileStatus> result = new ArrayList<>();
		FileStatus[] listStatus = fs.listStatus(dir, filter);
		for (FileStatus status : listStatus) {
			if (status.isDirectory()) {
				FileStatus[] dir2 = loopDir(fs, status.getPath(), filter);
				result.addAll(Arrays.asList(dir2));
			} else {
				result.add(status);
			}
		}
		return result.toArray(new FileStatus[result.size()]);
	}

	/**
	 * 删除路径path
	 * 
	 * @param fileSystem
	 * @param path
	 */
	public static void ensurePathNotExists(FileSystem fileSystem, Path path) {
		try {
			if (fileSystem.exists(path)) {
				fileSystem.delete(path, true); // 如果path为地址，则需要设置为true,如果是文件，则可设置为true|false
			}
		} catch (IOException e) {
			logger.error("", e);
		}
	}

	/**
	 * 方法重载 ， 删除路径path
	 * 
	 * @param fileSystem
	 * @param path
	 */
	public static void ensurePathNotExists(FileSystem fileSystem, String path) {
		ensurePathNotExists(fileSystem, new Path(path));
	}

	/**
	 * 判断路径path是否存在
	 * 
	 * @param fileSystem
	 * @param path
	 * @return
	 */
	public static boolean taskInputExists(FileSystem fileSystem, String path) {
		try {
			return fileSystem.exists(new Path(path));
		} catch (Exception e) {
			logger.error("", e);
			return false;
		}
	}

	/**
	 * hdfs fs -du /hdfspath
	 * 
	 * 方法重载
	 * 
	 * @param conf
	 * @param hdfsPath
	 * @return
	 */
	public static long dataSize(Configuration conf, String hdfsPath) {
		try {
			FileSystem fileSystem = FileSystem.get(conf);
			return dataSize(fileSystem, hdfsPath);
		} catch (Exception e) {
			return 0;
		}
	}

	/**
	 * hdfs fs -du /hdfspath
	 * 
	 * 获得指定路径下实际文件大小
	 * 
	 * @param fileSystem
	 * @param hdfsPath
	 * @return
	 */
	public static long dataSize(FileSystem fileSystem, String hdfsPath) {
		try {
			Path path = new Path(hdfsPath);
			ContentSummary contentSummary = fileSystem.getContentSummary(path);
			long length = contentSummary.getLength();
			return length;
		} catch (Exception e) {
			return 0l;
		}

	}

	public static void deleteFile(String file, FileSystem fileSystem) throws IOException {

		Path path = new Path(file);
		if (!fileSystem.exists(path)) {
			System.out.println("File " + file + " does not exists");
			return;
		}
		fileSystem.delete(new Path(file), true);
		fileSystem.close();
	}

	public static void mkdir(String dir, FileSystem fileSystem) throws IOException {

		Path path = new Path(dir);
		if (fileSystem.exists(path)) {
			System.out.println("Dir " + dir + " already not exists");
			return;
		}
		fileSystem.mkdirs(path);
		fileSystem.close();
	}

}

7.NonTmpFileFilter.java

package com.eastcom.first.spark.data.hdfs;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

public class NonTmpFileFilter implements PathFilter {

	@Override
	public boolean accept(Path path) {
		if (path.getName().endsWith(".tmp")) {
			return false;
		}
		return true;
	}
}

over

参考

http://blog.csdn.net/zengmingen/article/details/52204429

https://my.oschina.net/leejun2005/blog/93973