Hbase-自定义MR生成Hfile

  • 自定义MR生成Hfile

生成Hfile基本流程:

1.  设置Mapper的输出KV类型:

         K: ImmutableBytesWritable(代表行键)

         V: KeyValue  (代表cell)

2.  开发Mapper

        读取你的原始数据,按你的需求做处理

        输出rowkey作为K,输出一些KeyValue(Put)作为V

3.  配置job参数

        Zookeeper的连接地址

       配置输出的OutputFormat为HFileOutputFormat2,并为其设置参数

4.  提交job

     导入HFile到RegionServer的流程

     构建一个表描述对象

     构建一个region定位工具

     然后用BulkLoadHFilesTool来doBulkload操作

  • 数据

  • 代码演示

import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

public class LoadData {
    static class LoadDataMapper extends Mapper<LongWritable, Text,Text,MovieBean>{
        Gson gs = new Gson();
        Text k = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) {
            try {
                String line = value.toString();
                MovieBean mb = gs.fromJson(line, MovieBean.class);
                String s = StringUtils.leftPad(mb.getMovie(),5,'0');
                String rk = s+"_"+mb.getTimeStamp();
//            String v = rk+","+mb.getMovie()+","+mb.getRate()+","+mb.getTimeStamp()+","+mb.getUid();
                k.set(rk);
                context.write(k,mb);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    static class LoadDataReducer extends TableReducer<Text,MovieBean, ImmutableBytesWritable>{
        @Override
        protected void reduce(Text key, Iterable<MovieBean> values, Context context) throws IOException, InterruptedException {
            String rk = key.toString();
            Put put = new Put(rk.getBytes());
            MovieBean mb = values.iterator().next();
            put.addColumn("cf".getBytes(),"movie".getBytes(),mb.getMovie().getBytes());
            put.addColumn("cf".getBytes(),"rate".getBytes(), Bytes.toBytes(mb.getRate()));
            put.addColumn("cf".getBytes(),"timeStamp".getBytes(),mb.getTimeStamp().getBytes());
            put.addColumn("cf".getBytes(),"Uid".getBytes(),mb.getUid().getBytes());
            context.write(null,put);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","linux01:2181,linux02:2181,linux03:2181");
        Job job = Job.getInstance(conf, "load");

        job.setMapperClass(LoadDataMapper.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MovieBean.class);

        FileInputFormat.setInputPaths(job,new Path("D:\\mrdata\\mrdata\\movie\\input"));
        TableMapReduceUtil.initTableReducerJob("movie_22",LoadDataReducer.class,job);

        job.waitForCompletion(true);
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值