Hbase的Buck Loading操作

最新推荐文章于 2024-07-20 20:04:43 发布

难以触及的高度

最新推荐文章于 2024-07-20 20:04:43 发布

阅读量333

点赞数 7

文章标签： hbase 数据库大数据

本文链接：https://blog.csdn.net/2301_77836489/article/details/139459841

版权

Hbase的Buck Loading操作

package com.shujia.bulkloading;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.Import;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.mapreduce.SimpleTotalOrderPartitioner;
import org.apache.hadoop.hbase.regionserver.throttle.PressureAwareThroughputController;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MRBench;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class BulkLoadingDemo {

    /**
     *输出类型要求：
     *      1.key为RowKey 所以类型必须包含Bytes 并且RowKey是支持排序
     *
     */
    public static class BulkLoadingMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>{
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException {
            String[] line = value.toString().split(",");
            String rowKey =line[0]+line[1];

            String county=line[2];
            String x=line[3];
            String y=line[4];

            /**
             * KeyValue(final byte [] row,final byte [] family,
             *          final byte [] qualifier,final byte [] value)
             */
            KeyValue county_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "county".getBytes()
                    ,Bytes.toBytes(county)
            );
            KeyValue x_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "x".getBytes()
                    ,Bytes.toBytes(x)
            );
            KeyValue y_kv = new KeyValue(
                    Bytes.toBytes(rowKey)
                    , "info".getBytes()
                    , "y".getBytes()
                    ,Bytes.toBytes(y)
            );

            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),county_kv);
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),x_kv);
            context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)),y_kv);
        }
    }

    public static void main(String[] args) throws Exception {
        //基本配置
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");

        Job job = Job.getInstance(conf);
        job.setJobName("BulkLoadingDemo");
        job.setJarByClass(BulkLoadingDemo.class);

        //设置Mapper和Reducer
        job.setMapperClass(BulkLoadingMapper.class);

        //设置分区排序操作
        job.setPartitionerClass(SimpleTotalOrderPartitioner.class);

        //设置输入输出路径
        TextInputFormat.addInputPath(job,new Path("/data/DIANXIN.csv"));
        FileOutputFormat.setOutputPath(job,new Path("/data/dianxin_bulk/"));

        //Reducer是否需要
        //RowKey写入时需要按照全局有序进行数据写入
        job.setReducerClass(Import.KeyValueReducer.class);


        Connection hbaseConf = ConnectionFactory.createConnection(conf);

        RegionLocator regionLocator = hbaseConf.getRegionLocator(TableName.valueOf("dianxin_bulk"));
        //需要先创建Hbase表
        Table dianxin_bulk = hbaseConf.getTable(TableName.valueOf("dianxin_bulk"));
        //create 'dianxin_bulk','info'
        HTableDescriptor tableDescriptor = dianxin_bulk.getTableDescriptor();


        //将数据转换为HFile格式
        HFileOutputFormat2.configureIncrementalLoad(job,tableDescriptor,regionLocator);
        job.waitForCompletion(true);

        //
        LoadIncrementalHFiles incrementalHFiles = new LoadIncrementalHFiles(conf);
        incrementalHFiles.doBulkLoad(new Path("/data/dianxin_bulk/"),hbaseConf.getAdmin(),dianxin_bulk,regionLocator);

    }
}