Java操作HDFS

8 篇文章 0 订阅

1、添加hadoop相关的maven依赖

  	<!-- hadoop依赖 -->
	<dependency>
	    <groupId>org.apache.hadoop</groupId>
	    <artifactId>hadoop-client</artifactId>
	    <version>2.9.0</version>
	</dependency>
	<dependency>
	    <groupId>org.apache.hadoop</groupId>
	    <artifactId>hadoop-common</artifactId>
	    <version>2.9.0</version>
	    <scope>provided</scope>
	</dependency>
	<dependency>
	    <groupId>org.apache.hadoop</groupId>
	    <artifactId>hadoop-hdfs</artifactId>
	    <version>2.9.0</version>
	</dependency>
	
	<!-- junit -->
	<dependency>
	    <groupId>junit</groupId>
	    <artifactId>junit</artifactId>
	    <version>4.12</version>
	    <!-- 表示这个依赖的作用域范围为test -->
	    <!-- <scope>test</scope> -->
	</dependency>

2、HDFS操作类

package test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.zookeeper.common.IOUtils;

/**
 * operate hdfs file or directory util class
 * 
 * @author zhang
 * @since 2016-09-26
 *
 */
public class HDFSUtils {

	public static String uri = "hdfs://192.168.153.129:9000";

	/**
	 * make a new dir in the hdfs
	 * 
	 * @param dir the dir may like '/tmp/testdir'
	 * @return boolean true-success, false-failed
	 * @exception IOException something wrong happends when operating files
	 */
	public static boolean mkdir(String dir) throws IOException {
		if (StringUtils.isBlank(dir)) {
			return false;
		}
		dir = uri + dir;
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(dir), conf);
		if (!fs.exists(new Path(dir))) {
			fs.mkdirs(new Path(dir));
		}

		fs.close();
		return true;
	}

	/**
	 * delete a dir in the hdfs.
	 * if dir not exists, it will throw FileNotFoundException
	 * 
	 * @param dir the dir may like '/tmp/testdir'
	 * @return boolean true-success, false-failed
	 * @exception IOException something wrong happends when operating files
	 * 
	 */
	public static boolean deleteDir(String dir) throws IOException {
		if (StringUtils.isBlank(dir)) {
			return false;
		}
		dir = uri + dir;
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(dir), conf);
		fs.delete(new Path(dir), true);
		fs.close();
		return true;
	}

	/**
	 * list files/directories/links names under a directory, not include embed
	 * objects
	 * 
	 * @param dir a folder path may like '/tmp/testdir'
	 * @return List<String> list of file names
	 * @throws IOException file io exception
	 */
	public static List<String> listAll(String dir) throws IOException {
		if (StringUtils.isBlank(dir)) {
			return new ArrayList<String>();
		}
		dir = uri + dir;
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(dir), conf);
		FileStatus[] stats = fs.listStatus(new Path(dir));
		List<String> names = new ArrayList<String>();
		for (int i = 0; i < stats.length; ++i) {
			if (stats[i].isFile()) {
				// regular file
				names.add(stats[i].getPath().toString());
			} else if (stats[i].isDirectory()) {
				// dir
				names.add(stats[i].getPath().toString());
			} else if (stats[i].isSymlink()) {
				// is s symlink in linux
				names.add(stats[i].getPath().toString());
			}
		}

		fs.close();
		return names;
	}

	/*
	 * upload the local file to the hds, 
	 * notice that the path is full like /tmp/test.txt
	 * if local file not exists, it will throw a FileNotFoundException
	 * 
	 * @param localFile local file path, may like F:/test.txt or /usr/local/test.txt
	 * 
	 * @param hdfsFile hdfs file path, may like /tmp/dir
	 * @return boolean true-success, false-failed
	 * 
	 * @throws IOException file io exception
	 */
	public static boolean uploadLocalFile2HDFS(String localFile, String hdfsFile) throws IOException {
		if (StringUtils.isBlank(localFile) || StringUtils.isBlank(hdfsFile)) {
			return false;
		}
		hdfsFile = uri + hdfsFile;
		Configuration config = new Configuration();
		FileSystem hdfs = FileSystem.get(URI.create(uri), config);
		Path src = new Path(localFile);
		Path dst = new Path(hdfsFile);
		hdfs.copyFromLocalFile(src, dst);
		hdfs.close();
		return true;
	}

	/*
	 * create a new file in the hdfs.
	 * 
	 * notice that the toCreateFilePath is the full path
	 * 
	 * and write the content to the hdfs file.
	 */
	/**
	 * create a new file in the hdfs.
	 * if dir not exists, it will create one
	 * 
	 * @param newFile new file path, a full path name, may like '/tmp/test.txt'
	 * @param content file content
	 * @return boolean true-success, false-failed
	 * @throws IOException file io exception
	 */
	public static boolean createNewHDFSFile(String newFile, String content) throws IOException {
		if (StringUtils.isBlank(newFile) || null == content) {
			return false;
		}
		newFile = uri + newFile;
		Configuration config = new Configuration();
		FileSystem hdfs = FileSystem.get(URI.create(newFile), config);
		FSDataOutputStream os = hdfs.create(new Path(newFile));
		os.write(content.getBytes("UTF-8"));
		os.close();
		hdfs.close();
		return true;
	}

	/**
	 * delete the hdfs file
	 * 
	 * @param hdfsFile a full path name, may like '/tmp/test.txt'
	 * @return boolean true-success, false-failed
	 * @throws IOException file io exception
	 */
	public static boolean deleteHDFSFile(String hdfsFile) throws IOException {
		if (StringUtils.isBlank(hdfsFile)) {
			return false;
		}
		hdfsFile = uri + hdfsFile;
		Configuration config = new Configuration();
		FileSystem hdfs = FileSystem.get(URI.create(hdfsFile), config);
		Path path = new Path(hdfsFile);
		boolean isDeleted = hdfs.delete(path, true);
		hdfs.close();
		return isDeleted;
	}

	/**
	 * read the hdfs file content
	 * 
	 * @param hdfsFile a full path name, may like '/tmp/test.txt'
	 * @return byte[] file content
	 * @throws IOException file io exception
	 */
	public static byte[] readHDFSFile(String hdfsFile) throws Exception {
		if (StringUtils.isBlank(hdfsFile)) {
			return null;
		}
		hdfsFile = uri + hdfsFile;
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(hdfsFile), conf);
		// check if the file exists
		Path path = new Path(hdfsFile);
		if (fs.exists(path)) {
			FSDataInputStream is = fs.open(path);
			// get the file info to create the buffer
			FileStatus stat = fs.getFileStatus(path);
			// create the buffer
			byte[] buffer = new byte[Integer.parseInt(String.valueOf(stat.getLen()))];
			is.readFully(0, buffer);
			is.close();
			fs.close();
			return buffer;
		} else {
			throw new Exception("the file is not found .");
		}
	}

	/**
	 * append something to file dst
	 * 
	 * @param hdfsFile a full path name, may like '/tmp/test.txt'
	 * @param content string
	 * @return boolean true-success, false-failed
	 * @throws Exception something wrong
	 */
	public static boolean append(String hdfsFile, String content) throws Exception {
		if (StringUtils.isBlank(hdfsFile)) {
			return false;
		}
		if(StringUtils.isEmpty(content)){
			return true;
		}

		hdfsFile = uri + hdfsFile;
		Configuration conf = new Configuration();
		// solve the problem when appending at single datanode hadoop env  
		conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
		conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
		FileSystem fs = FileSystem.get(URI.create(hdfsFile), conf);
		// check if the file exists
		Path path = new Path(hdfsFile);
		if (fs.exists(path)) {
			try {
				InputStream in = new ByteArrayInputStream(content.getBytes());
				OutputStream out = fs.append(new Path(hdfsFile));
				IOUtils.copyBytes(in, out, 4096, true);
				out.close();
				in.close();
				fs.close();
			} catch (Exception ex) {
				fs.close();
				throw ex;
			}
		} else {
			createNewHDFSFile(hdfsFile, content);
		}
		return true;
	}

}

3、测试类

package test;


import java.util.List;

import org.junit.Assert;
import org.junit.Test;


public class HDFSUtilsTest {

	public String uri = "hdfs://192.168.153.129:9000";
	public String dir = "/user/output";
	public String parentDir = "/user";
	
	@Test
	public void testMkdirNormal() {
		try{
			HDFSUtils.deleteDir(dir);
			boolean result = HDFSUtils.mkdir(dir);
			Assert.assertEquals(true, result);
			
			List<String> listFile = HDFSUtils.listAll(parentDir);
			boolean existFile = false;
			for(String elem : listFile){
				if(elem.equals(uri + dir)){
					existFile = true;
					break;
				}
			}
			Assert.assertEquals(true, existFile);
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

	
	@Test
	public void testDeleteDir() {
		try{
			Assert.assertEquals(true, HDFSUtils.mkdir(dir));
			Assert.assertEquals(true, HDFSUtils.deleteDir(dir));
			List<String> listFile = HDFSUtils.listAll(parentDir);
			boolean existFile = false;
			for(String elem : listFile){
				if(uri + dir == elem){
					existFile = true;
					break;
				}
			}
			Assert.assertEquals(false, existFile);
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

	
	@Test
	public void testListFolder() {
		try{
			HDFSUtils.deleteDir(dir);
			Assert.assertEquals(true, HDFSUtils.mkdir(dir));
			List<String> listFile = HDFSUtils.listAll(dir);
			Assert.assertEquals(0, listFile.size());
			for(String file : listFile) {
				System.out.println(file);
			}
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

	
	@Test
	public void testUploadLocalFile2HDFS() {
		String localFile = "F:/aa.txt";
		String remoteFile = dir + "/text.txt";

		try{
			HDFSUtils.mkdir(dir);
			HDFSUtils.deleteHDFSFile(remoteFile);
			Assert.assertEquals(true, HDFSUtils.uploadLocalFile2HDFS(localFile, remoteFile));
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

	
	@Test
	public void testCreateNewHDFSFileNormal() {
		try{
			String newFile = dir + "/file1.txt";
			String content = "hello file1";
			
			HDFSUtils.deleteHDFSFile(newFile);
			Assert.assertEquals(true, HDFSUtils.createNewHDFSFile(newFile, content));
			String result = new String(HDFSUtils.readHDFSFile(newFile));
			Assert.assertEquals(content, result);
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

	@Test
	public void testCreateNewHDFSFileFoldNotexist1() {
		try{
			String newFile = dir + "/file1.txt";
			String content = "hello file1";
			
			Assert.assertEquals(true, HDFSUtils.deleteDir(dir));
			Assert.assertEquals(true, HDFSUtils.createNewHDFSFile(newFile, content));
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}
	
	@Test
	public void testDeleteHDFSFile() {
		this.testUploadLocalFile2HDFS();
		try{
			String remoteFile = dir + "/eclipse.ini";
			Assert.assertEquals(true, HDFSUtils.deleteHDFSFile(remoteFile));
		} catch(Exception ex){
			Assert.assertEquals(true, false);
		}
	}

	
	@Test
	public void testReadHDFSFile() {
		this.testUploadLocalFile2HDFS();
		try{
			String remoteFile = dir + "/eclipse.ini";
			String result = new String(HDFSUtils.readHDFSFile(remoteFile));
			Assert.assertEquals(true, result.length() > 0);
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}
	
	@Test
	public void testAppend() {
		try{
			String newFile = dir + "/file1.txt";
			String content1 = "hello append1\r\n";
			String content2 = "hello append2\r\n";
			
			HDFSUtils.deleteHDFSFile(newFile);
			Assert.assertEquals(true, HDFSUtils.createNewHDFSFile(newFile, ""));
			Assert.assertEquals(true, HDFSUtils.append(newFile, content1));
			Assert.assertEquals(content1, new String(HDFSUtils.readHDFSFile(newFile)));
			Assert.assertEquals(true, HDFSUtils.append(newFile, content2));
			Assert.assertEquals(content1 + content2, new String(HDFSUtils.readHDFSFile(newFile)));
		} catch(Exception ex){
			ex.printStackTrace();
			Assert.assertEquals(true, false);
		}
	}

}

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值