Hbase的Buck Loading操作

Hbase的Buck Loading操作

package com.shujia.bulkloading;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.Import;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.mapreduce.SimpleTotalOrderPartitioner;
import org.apache.hadoop.hbase.regionserver.throttle.PressureAwareThroughputController;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MRBench;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class BulkLoadingDemo {

    /**
     *输出类型要求:
     *      1.key为RowKey 所以类型必须包含Bytes 并且RowKey是支持排序
     *
     */
    public static class BulkLoadingMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>{
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException {
            String[] line = value.toString().split(",");
            String rowKey =line[0]+line[1];

            String county=line[2];
            String x=line[3];
            String y=line[4];

            /**
             * KeyValue(final byte [] row,final byte [] family,
             *          final byte [] qualifier,final byte [] value)
             */
            KeyValue county_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "county".getBytes()
                    ,Bytes.toBytes(county)
            );
            KeyValue x_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "x".getBytes()
                    ,Bytes.toBytes(x)
            );
            KeyValue y_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "y".getBytes()
                    ,Bytes.toBytes(y)
            );

            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),county_kv);
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),x_kv);
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),y_kv);
        }
    }

    public static void main(String[] args) throws Exception {
        //基本配置
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");

        Job job = Job.getInstance(conf);
        job.setJobName("BulkLoadingDemo");
        job.setJarByClass(BulkLoadingDemo.class);

        //设置Mapper和Reducer
        job.setMapperClass(BulkLoadingMapper.class);

        //设置分区排序操作
        job.setPartitionerClass(SimpleTotalOrderPartitioner.class);

        //设置输入输出路径
        TextInputFormat.addInputPath(job,new Path("/data/DIANXIN.csv"));
        FileOutputFormat.setOutputPath(job,new Path("/data/dianxin_bulk/"));

        //Reducer是否需要
        //RowKey写入时需要按照全局有序进行数据写入
        job.setReducerClass(Import.KeyValueReducer.class);


        Connection hbaseConf = ConnectionFactory.createConnection(conf);

        RegionLocator regionLocator = hbaseConf.getRegionLocator(TableName.valueOf("dianxin_bulk"));
        //需要先创建Hbase表
        Table dianxin_bulk = hbaseConf.getTable(TableName.valueOf("dianxin_bulk"));
        //create 'dianxin_bulk','info'
        HTableDescriptor tableDescriptor = dianxin_bulk.getTableDescriptor();


        //将数据转换为HFile格式
        HFileOutputFormat2.configureIncrementalLoad(job,tableDescriptor,regionLocator);
        job.waitForCompletion(true);

        //
        LoadIncrementalHFiles incrementalHFiles = new LoadIncrementalHFiles(conf);
        incrementalHFiles.doBulkLoad(new Path("/data/dianxin_bulk/"),hbaseConf.getAdmin(),dianxin_bulk,regionLocator);

    }
}

打包上传

  • 7
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值