java整合spring和hadoop HDFS

最近捣鼓了一下HDFS,跟spring做了一下整合。分享一下,有不正确的地方可以留言。


首先下载整合需要用到的jar包,我这里已经打包上传了,可以直接下载:http://download.csdn.net/detail/kokjuis/9709325点击打开链接


具体hadoop如何配置,可以查看:http://blog.csdn.net/kokjuis/article/details/53537029 点击打开链接


这是我的工程目录




首先添加 hadoop配置文件 hbase-site.xml  ,这里只需要配置zk的地址和端口。


<?xml version="1.0" encoding="UTF-8"?>
<configuration>
	<property>
		<name>hbase.zookeeper.quorum</name>
		<value>127.0.0.1</value>
	</property>
	<property>
		<name>hbase.zookeeper.property.clientPort</name>
		<value>2181</value>
	</property>
</configuration>


然后在spring配置文件中配置下面信息,让spring帮我们注入hadoopConfiguration

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tx="http://www.springframework.org/schema/tx"
	xmlns:aop="http://www.springframework.org/schema/aop" xmlns:task="http://www.springframework.org/schema/task"
	xmlns:cache="http://www.springframework.org/schema/cache" xmlns:hdp="http://www.springframework.org/schema/hadoop"
	xsi:schemaLocation="http://www.springframework.org/schema/beans
	 http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
	 http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx.xsd  
     http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-3.1.xsd 
     http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task-3.1.xsd 
     http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache.xsd 
     http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd">

	<!-- 默认的hadoopConfiguration,默认ID为hadoopConfiguration,且对于hadoopFile等不需指定ref,自动注入hadoopConfiguration -->
	<hdp:configuration resources="classpath:hadoop/hbase-site.xml" />
	<hdp:hbase-configuration configuration-ref="hadoopConfiguration" />

	<!-- hadoop hdfs 操作类FileSystem,用来读写HDFS文件 -->
	<hdp:file-system id="hadoopFile" configuration-ref="hadoopConfiguration" />

	<!-- 配置HbaseTemplate -->
	<bean id="hbaseTemplate" class="org.springframework.data.hadoop.hbase.HbaseTemplate">
		<property name="configuration" ref="hbaseConfiguration" />
	</bean>


</beans>


配置比较简单,下面是关键的HDFS操作类


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;

/**
 * HDFS操作类
 * 
 * @author kokJuis
 * @version 1.0
 * @date 2016-12-12
 * @email 189155278@qq.com
 */
public class HDFSUtil {

	private HDFSUtil() {
	}

	// hadoop fs的配置文件
	static Configuration conf = new Configuration(true);
	static {
		// 指定hadoop fs的地址
		conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
		conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
	}

	/**
	 * 判断路径是否存在
	 * 
	 * @param conf
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static boolean exits(String path) throws IOException {
		FileSystem fs = FileSystem.get(conf);
		return fs.exists(new Path(path));
	}

	/**
	 * 创建文件
	 * 
	 * @param conf
	 * @param filePath
	 * @param contents
	 * @throws IOException
	 */
	public static void createFile(String filePath, byte[] contents)
			throws IOException {
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path(filePath);
		FSDataOutputStream outputStream = fs.create(path);
		outputStream.write(contents);
		outputStream.close();
		fs.close();
	}

	/**
	 * 创建文件
	 * 
	 * @param conf
	 * @param filePath
	 * @param fileContent
	 * @throws IOException
	 */
	public static void createFile(String filePath, String fileContent)
			throws IOException {
		createFile(filePath, fileContent.getBytes());
	}

	/**
	 * @param conf
	 * @param localFilePath
	 * @param remoteFilePath
	 * @throws IOException
	 */
	public static void copyFromLocalFile(String localFilePath,
			String remoteFilePath) throws IOException {
		FileSystem fs = FileSystem.get(conf);
		Path localPath = new Path(localFilePath);
		Path remotePath = new Path(remoteFilePath);
		fs.copyFromLocalFile(false, true, localPath, remotePath);
		fs.close();
	}

	/**
	 * 删除目录或文件
	 * 
	 * @param conf
	 * @param remoteFilePath
	 * @param recursive
	 * @return
	 * @throws IOException
	 */
	public static boolean deleteFile(String remoteFilePath, boolean recursive)
			throws IOException {
		FileSystem fs = FileSystem.get(conf);
		boolean result = fs.delete(new Path(remoteFilePath), recursive);
		fs.close();
		return result;
	}

	/**
	 * 删除目录或文件(如果有子目录,则级联删除)
	 * 
	 * @param conf
	 * @param remoteFilePath
	 * @return
	 * @throws IOException
	 */
	public static boolean deleteFile(String remoteFilePath) throws IOException {
		return deleteFile(remoteFilePath, true);
	}

	/**
	 * 文件重命名
	 * 
	 * @param conf
	 * @param oldFileName
	 * @param newFileName
	 * @return
	 * @throws IOException
	 */
	public static boolean renameFile(String oldFileName, String newFileName)
			throws IOException {
		FileSystem fs = FileSystem.get(conf);
		Path oldPath = new Path(oldFileName);
		Path newPath = new Path(newFileName);
		boolean result = fs.rename(oldPath, newPath);
		fs.close();
		return result;
	}

	/**
	 * 创建目录
	 * 
	 * @param conf
	 * @param dirName
	 * @return
	 * @throws IOException
	 */
	public static boolean createDirectory(String dirName) throws IOException {
		FileSystem fs = FileSystem.get(conf);
		Path dir = new Path(dirName);
		boolean result = false;
		if (!fs.exists(dir)) {
			result = fs.mkdirs(dir);
		}
		fs.close();
		return result;
	}

	/**
	 * 列出指定路径下的所有文件(不包含目录)
	 * 
	 * @param conf
	 * @param basePath
	 * @param recursive
	 */
	public static RemoteIterator<LocatedFileStatus> listFiles(String basePath,
			boolean recursive) throws IOException {
		FileSystem fs = FileSystem.get(conf);
		RemoteIterator<LocatedFileStatus> fileStatusRemoteIterator = fs
				.listFiles(new Path(basePath), recursive);

		return fileStatusRemoteIterator;
	}

	/**
	 * 列出指定路径下的文件(非递归)
	 * 
	 * @param conf
	 * @param basePath
	 * @return
	 * @throws IOException
	 */
	public static RemoteIterator<LocatedFileStatus> listFiles(String basePath)
			throws IOException {
		FileSystem fs = FileSystem.get(conf);
		RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(
				new Path(basePath), false);
		fs.close();
		return remoteIterator;
	}

	/**
	 * 列出指定目录下的文件\子目录信息(非递归)
	 * 
	 * @param conf
	 * @param dirPath
	 * @return
	 * @throws IOException
	 */
	public static FileStatus[] listStatus(String dirPath) throws IOException {
		FileSystem fs = FileSystem.get(conf);
		FileStatus[] fileStatuses = fs.listStatus(new Path(dirPath));
		fs.close();
		return fileStatuses;
	}

	/**
	 * 读取文件内容
	 * 
	 * @param conf
	 * @param filePath
	 * @return
	 * @throws IOException
	 */
	public static byte[] readFile(String filePath) throws IOException {
		byte[] fileContent = null;
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path(filePath);
		if (fs.exists(path)) {
			InputStream inputStream = null;
			ByteArrayOutputStream outputStream = null;
			try {
				inputStream = fs.open(path);
				outputStream = new ByteArrayOutputStream(
						inputStream.available());
				IOUtils.copyBytes(inputStream, outputStream, conf);
				fileContent = outputStream.toByteArray();
			} finally {
				IOUtils.closeStream(inputStream);
				IOUtils.closeStream(outputStream);
				fs.close();
			}
		}
		return fileContent;
	}

	/**
	 * 下载 hdfs上的文件
	 * 
	 * @param conf
	 * @param uri
	 * @param remote
	 * @param local
	 * @throws IOException
	 */
	public static void download(String remote, String local) throws IOException {
		Path path = new Path(remote);
		FileSystem fs = FileSystem.get(conf);
		fs.copyToLocalFile(path, new Path(local));
		System.out.println("download: from" + remote + " to " + local);
		fs.close();
	}

这里有一点需要注意的,就是在你使用HDFS的时候,要显示指定hadoop fs的地址,如果不指定fs的地址,虽然也能上传,但实际并没有使用到HDFS。




下面是一个下载文件的示例



评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值