MR操作hbase的一点心得(含hbase表拷贝样例代码)

最近在写基于hbase的MR程序。总结如下:

      1、使用TableMapper来读取表

      2、写入表的第一种方式是用TableMapReduceUtil.initTableReducerJob的方法,这里既可以在map阶段输出,也能在reduce阶段输出。区别是Reduce的class设置为null或者实际的reduce 以下是一个表copy的例子:

  1. package com.run.test; 
  2.  
  3. import java.io.IOException; 
  4. import java.util.List; 
  5. import org.apache.hadoop.conf.Configuration; 
  6. import org.apache.hadoop.conf.Configured; 
  7. import org.apache.hadoop.hbase.KeyValue; 
  8. import org.apache.hadoop.hbase.client.Put; 
  9. import org.apache.hadoop.hbase.client.Result; 
  10. import org.apache.hadoop.hbase.client.Scan; 
  11. import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 
  12. import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 
  13. import org.apache.hadoop.hbase.mapreduce.TableMapper; 
  14. import org.apache.hadoop.mapreduce.Job; 
  15. import org.apache.hadoop.util.Tool; 
  16.  
  17. public class TableCopy extends Configured implements Tool{ 
  18.      
  19.     static class CopyMapper extends TableMapper<ImmutableBytesWritable,Put>{ 
  20.  
  21.         @Override 
  22.         protected void map(ImmutableBytesWritable key, Result value, 
  23.                 Context context) throws IOException, InterruptedException { 
  24.             // TODO Auto-generated method stub 
  25.             //将查询结果保存到list 
  26.             List<KeyValue> kvs =  value.list(); 
  27.             Put p = new Put(); 
  28.             //将结果装载到Put 
  29.             for(KeyValue kv : kvs) 
  30.                 p.add(kv); 
  31.             //将结果写入到Reduce 
  32.             context.write(key, p); 
  33.         } 
  34.          
  35.     } 
  36.      
  37.     public static Job createSubmittableJob(Configuration conf, String[] args)throws IOException{ 
  38.         String jobName = args[0]; 
  39.         String srcTable = args[1]; 
  40.         String dstTable = args[2]; 
  41.         Scan sc = new Scan(); 
  42.         sc.setCaching(10000); 
  43.         sc.setCacheBlocks(false); 
  44.         Job job = new Job(conf,jobName); 
  45.         job.setJarByClass(TableCopy.class); 
  46.         job.setNumReduceTasks(0); 
  47.         TableMapReduceUtil.initTableMapperJob(srcTable, sc, CopyMapper.class, ImmutableBytesWritable.class, Result.class, job); 
  48.         TableMapReduceUtil.initTableReducerJob(dstTable, null, job); 
  49.         return job; 
  50.          
  51.     } 
  52.      
  53.     @Override 
  54.     public int run(String[] args)throws Exception{ 
  55.         Job job = createSubmittableJob(getConf(), args); 
  56.         return job.waitForCompletion(true)? 0 : 1
  57.     } 
  58.      
package com.run.test;

import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;

public class TableCopy extends Configured implements Tool{
	
	static class CopyMapper extends TableMapper<ImmutableBytesWritable,Put>{

		@Override
		protected void map(ImmutableBytesWritable key, Result value,
				Context context) throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			//将查询结果保存到list
			List<KeyValue> kvs =  value.list();
			Put p = new Put();
			//将结果装载到Put
			for(KeyValue kv : kvs)
				p.add(kv);
			//将结果写入到Reduce
			context.write(key, p);
		}
		
	}
	
	public static Job createSubmittableJob(Configuration conf, String[] args)throws IOException{
		String jobName = args[0];
		String srcTable = args[1];
		String dstTable = args[2];
		Scan sc = new Scan();
		sc.setCaching(10000);
		sc.setCacheBlocks(false);
		Job job = new Job(conf,jobName);
		job.setJarByClass(TableCopy.class);
		job.setNumReduceTasks(0);
		TableMapReduceUtil.initTableMapperJob(srcTable, sc, CopyMapper.class, ImmutableBytesWritable.class, Result.class, job);
		TableMapReduceUtil.initTableReducerJob(dstTable, null, job);
		return job;
		
	}
	
	@Override
	public int run(String[] args)throws Exception{
		Job job = createSubmittableJob(getConf(), args);
		return job.waitForCompletion(true)? 0 : 1;
	}
	
}
 
3、写入表的方式还有一种,就是调用hbase的原生api,即HTable.put的方式写入数据(这种方式适合写少量数据,或者统计后的结果)
展开阅读全文

没有更多推荐了,返回首页