为HBASE构建辅助索引



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * 如果要对heroes中的name和email列构建索引,则运行参数设为heroes info name email
 */
public class HBaseIndexBuilder extends Configured implements Tool{
    static Configuration conf;
    // 索引表唯一的一列为INDEX:ROW,其中INDEX为列族
    public static final byte[] INDEX_COLUMN = Bytes.toBytes("INDEX");
    public static final byte[] INDEX_QUALIFIER = Bytes.toBytes("ROW");

    public static class MyMapper extends Mapper<ImmutableBytesWritable,Result,ImmutableBytesWritable,Writable>{
        private byte[] family;
        //存储了“列名”到“表名-列名”的映射
        //前者用于获取某列的值,并作为索引表的键值;后者用于作为索引表的表名
        private HashMap<byte[],ImmutableBytesWritable> indexes;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            Configuration configuration = context.getConfiguration();
            //通过configuration.set()方法传递参数,详见下面的configuraJob方法
            String tableName = configuration.get("index.tablename");
            String [] fields = configuration.getStrings("index.fields");

            //fields内需要做索引的列名
            String familyName = configuration.get("index.familyname");
            family = Bytes.toBytes(familyName);
            //初始化indexes方法
            indexes = new HashMap<byte[], ImmutableBytesWritable>();
            for(String field : fields){
                //如果给name做索引,则索引表的名称为"heroes-name"

                //name:heroes-name
                //email:heroes-email
                indexes.put(Bytes.toBytes(field),new ImmutableBytesWritable(Bytes.toBytes(tableName+"-"+field)));

            }
        }

        @Override
        protected void map(ImmutableBytesWritable rowkey, Result result, Context context) throws IOException, InterruptedException {
            for(Map.Entry<byte[],ImmutableBytesWritable> index : indexes.entrySet()){
                //name
                byte[] qualifier = index.getKey();//获得列名
                //heroes-name
                ImmutableBytesWritable tableName = index.getValue();//索引表的表名
                //info:name
                byte[] value = result.getValue(family,qualifier);//根据“列族:列名”获得元素值

                if(value !=null){
                    //以列值为行健,在列"INDEX:ROW"中插入行健
                    Put put = new Put(value);//row key
                    //family,column,value
                    put.add(INDEX_COLUMN,INDEX_QUALIFIER,rowkey.get());
                    //在tableName表上执行put操作
                    //使用MultiOutputFormat时,第二个参数必须是Put或者Delete类型
                    context.write(tableName,put);
                }
            }
        }
    }
    @Override
    public int run(String[] args) throws Exception {
        Job job = configureJob(conf,args);

        return job.waitForCompletion(true) ? 0 : 1 ;
    }

    private Job configureJob(Configuration conf, String[] otherArgs) throws IOException {
        String tableName = otherArgs[0];//heroes
        String columnFamily = otherArgs[1];//info
       // conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
        conf.set(TableInputFormat.INPUT_TABLE,tableName);// "hbase.mapreduce.inputtable"
        conf.set("index.tablename",tableName);
        conf.set("index.familyname",columnFamily);
        //heroes info name email
        String[] fields = new String[otherArgs.length-2];
        for(int i=0;i<fields.length;i++){
            fields[i] = otherArgs[i+2];//为name 和email
        }

        conf.setStrings("index.fields",fields);
        conf.set("index.familyname" ,"attributes");

        //Job job = new Job(conf,tableName);
        Job job = Job.getInstance(conf, this.getClass().getName());
        job.setJarByClass(HBaseIndexBuilder.class);
        job.setMapperClass(MyMapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(TableInputFormat.class);
        job.setOutputFormatClass(MultiTableOutputFormat.class);

            return job;
    }

    public static void main(String[] args) throws Exception {
        conf = HBaseConfiguration.create();
        String [] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
        if(otherArgs.length<3){
            System.err.println("参数过少");
            System.exit(-1);
        }
        int exitCode = ToolRunner.run(new HBaseIndexBuilder(), otherArgs);
        System.exit(exitCode);
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值