Hdfs上文件 读写 工具类 Demo

读取HDFS上文件Demo

package com.utils;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 读取Hdfs上的文件
 *
 * @author chichuduxing
 * @date 2016年9月19日 下午14:19:14
 */
public class HdfsReader {
	/**
	 * 日志对象
	 */
	protected static final Logger logger = LoggerFactory.getLogger(HdfsReader.class);

	private FileSystem _fs = null;

	/**
	 * 行读取器
	 */
	private LineReader _lineReader = null;

	/**
	 * hdfs输入流
	 */
	private FSDataInputStream _fsInputStream = null;

	/**
	 * 构造函数
	 *
	 * @param fs
	 */
	public HdfsReader(FileSystem fs) {
		this._fs = fs;
	}

	/**
	 * 初始化
	 *
	 * @param file
	 *            要读取的文件路径(/tmp/readdemo.txt)
	 * @return
	 */
	public boolean Init(String file) {
		if (null == file || file.isEmpty()) {
			logger.error("file name is null");
			return false;
		}
		try {
			Path file_path = new Path(file);

			if (!_fs.exists(file_path)) {
				logger.error(file + " not exist!");
				return false;
			}

			// 打开数据流
			this._fsInputStream = this._fs.open(file_path);

			this._lineReader = new LineReader(_fsInputStream, _fs.getConf());

			return true;
		} catch (Exception e) {
			logger.error("create line reader failed --" + e.getMessage(), e);
			return false;
		}
	}

	/**
	 * 开始从文件按行读取数据
	 *
	 * @param dataList
	 *            读取到的文件信息
	 * @param lineCont
	 *            每次读取的行数
	 * @return 是否读取成功
	 */
	public boolean next(List<String> dataList, int lineCont) {
		if (null == this._lineReader || null == dataList) {
			return false;
		}

		Text line = new Text();
		while (dataList.size() < lineCont) {
			try {
				// 文件已经读完
				if (this._lineReader.readLine(line) <= 0) {
					CloseFileStream();
					break;
				}
			} catch (Exception e) {
				logger.error("read file failed --" + e.getMessage(), e);
				CloseFileStream();
				return false;
			}
			dataList.add(line.toString());
		}
		logger.info("get data count: " + dataList.size());

		return true;
	}

	/**
	 * 关闭文件流和行读取器
	 */
	private void CloseFileStream() {
		try {
			if (this._fsInputStream != null || this._lineReader != null) {
				_fsInputStream.close();
				_lineReader.close();
			}
		} catch (IOException e) {
			logger.error("CloseFileStream() failed --" + e.getMessage(), e);
		} finally {
			_fsInputStream = null;
			_lineReader = null;
		}
	}
}


写文件到HDFS上Demo

package com.utils;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 写文件到Hdfs上
 *
 * @author chichuduxing
 * @date 2016年9月19日 上午11:19:14
 */

public class HdfsWriter {
	/**
	 * 日志对象
	 */
	private static final Logger logger = LoggerFactory.getLogger(HdfsWriter.class);

	/**
	 * hdfs 上传目录
	 */
	private final String _hdfsOutDirectory;

	/**
	 * 临时文件
	 */
	private Path _tmpFilePath;

	/**
	 * hdfs 文件句柄
	 */
	private FileSystem _fs = null;

	/**
	 * 写入文件流 句柄
	 */
	private OutputStream _outputStream = null;

	/**
	 * 是否有写入数据
	 */
	private boolean _ifWriteData = false;

	/**
	 * @param fs
	 * @param outPath
	 *            输出文件路径(/tmp/output/test.txt)
	 */
	public HdfsWriter(FileSystem fs, String outPath) {
		this._hdfsOutDirectory = outPath;
		this._fs = fs;
	}

	/**
	 * 初始化hdfs写入目录、句柄
	 *
	 * @return
	 */
	public boolean init() throws Exception {
		logger.info("HdfsWriter.init() start...");
		try {
			// 清理临时的数据
			InitOutStream();
		} catch (Exception e) {
			logger.error("HdfsLoader.Init() failed.", e);

			return false;
		}
		logger.info("HdfsWriter.init() done...");
		return true;
	}

	/**
	 * 初始化输入流
	 *
	 * @return
	 */
	private void InitOutStream() throws Exception {
		try {
			// 生成临时文件
			String tmp_file_name = this._hdfsOutDirectory + ".tmp";
			this._tmpFilePath = new Path(tmp_file_name);

			if (this._fs.exists(this._tmpFilePath)) {
				this._fs.delete(this._tmpFilePath, true);
			}

			if (!this._fs.createNewFile(this._tmpFilePath)) {
				throw new Exception("create tmp hdfs file failed. --" + this._tmpFilePath);
			}
			logger.info("create hdfs tmp file success: " + tmp_file_name);

			// 获取文件流句柄
			this._outputStream = this._fs.append(this._tmpFilePath);
		} catch (Exception e) {
			this._outputStream = null;
			throw new Exception("HdfsWriter.InitOutStream() failed.", e);
		}
	}

	/**
	 * 写数据至hdfs
	 *
	 * @param data
	 */
	public void WriteData(String data) {
		if (null == data || data.isEmpty() || null == this._outputStream)
			return;

		// String 转为 bytes
		byte[] bcp_bytes = data.toString().getBytes();

		// 写bcp流数据至hdfs
		if (0 < bcp_bytes.length) {
			try {
				this._outputStream.write(bcp_bytes);
				this._outputStream.flush();

				logger.info("upload bcp data success. --" + this._tmpFilePath.toString());

				this._ifWriteData = true;
			} catch (Exception ex) {
				logger.error("##hdfs write error##: " + ex.getMessage(), ex);

				// 转移临时文件
				CloseFileStream();
				try {
					// 重置写入流
					InitOutStream();
				} catch (Exception e) {
					logger.error(e.getMessage());
				}
			}
		}
	}

	/**
	 * 关闭文件写入流
	 */
	public void CloseFileStream() {
		try {
			// 关闭文件流
			if (null != this._outputStream) {
				logger.info("close the hdfs file stream.");
				this._outputStream.close();
				this._outputStream = null;
			}
		} catch (Exception e) {
			logger.error("close the hdfs file stream failed,file is:" + this._tmpFilePath, e);
		} finally {
			this._outputStream = null;
		}

		// 转移临时文件
		RenameFile();
	}

	/**
	 * 重命名文件
	 */
	private void RenameFile() {
		// 是否有写入数据,防止出现空文件
		if (!this._ifWriteData)
			return;

		// 重命名文件名
		Path res_path = new Path(this._hdfsOutDirectory);

		try {
			if (this._fs.exists(res_path)) {
				this._fs.delete(res_path, true);
			}
			if (!this._fs.rename(this._tmpFilePath, res_path)) {
				logger.error("rename [" + this._tmpFilePath + "] to [" + res_path + "] failed.");
				return;
			}
		} catch (IOException e) {
			logger.error("rename the hdfs file failed,file is:" + this._tmpFilePath, e);
		}
		logger.info("rename [" + this._tmpFilePath + "] to [" + res_path + "] ok.");
	}
}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值