用java的api将数据从HDFS上存到HBASE中

import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

/**
 * 从HDFS批量导入数据到HBASE 
 * 
 * 示例文件是移动设备的上网信息
 */
public class BatchImport {
	static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text>{
		SimpleDateFormat dateformat1=new SimpleDateFormat("yyyyMMddHHmmss");
		Text v2 = new Text();
		
		protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
			final String[] splited = value.toString().split("\t");
			try {
				final Date date = new Date(Long.parseLong(splited[0].trim()));//上网时间
				final String dateFormat = dateformat1.format(date);
				String rowKey = splited[1]+":"+dateFormat;//将手机号和上网时间作为一个行键
				v2.set(rowKey+"\t"+value.toString());//将行键+行文本内容作为value2
				context.write(key, v2);//输出的是<偏移量,行键+行文本>
			} catch (NumberFormatException e) {
				final Counter counter = context.getCounter("BatchImport", "ErrorFormat");
				counter.increment(1L);
				System.out.println("出错了"+splited[0]+" "+e.getMessage());
			}
		};
	}
	
	/**
	 *  这边的reduce继承TableReducer
	 */
	static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable>{
		protected void reduce(LongWritable key, java.lang.Iterable<Text> values, 	Context context) throws java.io.IOException ,InterruptedException {
			for (Text text : values) {
				final String[] splited = text.toString().split("\t");
				
				final Put put = new Put(Bytes.toBytes(splited[0]));//参数是行键
				//参数是列族,列名,值
				put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes.toBytes(splited[1]));
				put.add(Bytes.toBytes("cf"), Bytes.toBytes("msisdn"), Bytes.toBytes(splited[2]));
				//省略其他字段,调用put.add(....)即可
				context.write(NullWritable.get(), put);
			}
		};
	}
	
	public static void main(String[] args) throws Exception {
		final Configuration configuration = new Configuration();
		//设置zookeeper
		configuration.set("hbase.zookeeper.quorum", "xxc");
		//设置hbase表名称
		configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");
		//将该值改大,防止hbase超时退出
		configuration.set("dfs.socket.timeout", "180000");
		
		final Job job = new Job(configuration, "HBaseBatchImport");
		
		job.setMapperClass(BatchImportMapper.class);
		job.setReducerClass(BatchImportReducer.class);
		//设置map的输出,不设置reduce的输出类型
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		
		job.setInputFormatClass(TextInputFormat.class);
		//********不再设置输出路径,而是设置输出格式类型**********
		job.setOutputFormatClass(TableOutputFormat.class);
		
		FileInputFormat.setInputPaths(job, "hdfs://xxc:9000/input");
		
		job.waitForCompletion(true);
	}
}

要将HDFS上的文件读入到HBase,可以使用Java编写MapReduce程序实现,以下是实现步骤: 1. 首先需要创建一个HBase表,可使用HBase Shell或Java API创建; 2. 编写MapReduce程序,其Map阶段读取HDFS上的文件,将数据转换成Put对象,然后将Put对象写入到HBase; 3. 在MapReduce程序设置HBase表名、列族名、列名等参数; 4. 在程序运行前,需要将HBase相关的jar包和配置文件加入到classpath; 5. 最后提交MapReduce任务运行即可。 以下是示例代码: ``` Configuration conf = HBaseConfiguration.create(); Job job = Job.getInstance(conf, "HDFS to HBase"); job.setJarByClass(HdfsToHBase.class); job.setMapperClass(HdfsToHBaseMapper.class); FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input")); TableMapReduceUtil.initTableReducerJob("table_name", HdfsToHBaseReducer.class, job); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); System.exit(job.waitForCompletion(true) ? 0 : 1); ``` 其HdfsToHBaseMapper类的代码如下: ``` public class HdfsToHBaseMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { private final static byte[] CF = "cf".getBytes(); private final static byte[] COLUMN = "column".getBytes(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split(","); String rowKey = fields[0]; Put put = new Put(Bytes.toBytes(rowKey)); put.addColumn(CF, COLUMN, Bytes.toBytes(fields[1])); context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)), put); } } ``` 其HdfsToHBaseReducer类的代码如下: ``` public class HdfsToHBaseReducer extends TableReducer<ImmutableBytesWritable, Put, ImmutableBytesWritable> { @Override protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException { for (Put put : values) { context.write(key, put); } } } ``` 在以上代码,CF和COLUMN分别表示HBase表的列族名和列名,可以根据实际情况修改。同时,也要根据实际情况修改HDFS文件路径、HBase表名等参数。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值