多MapReduce任务

最新推荐文章于 2021-07-22 23:40:47 发布

拉普达男孩

最新推荐文章于 2021-07-22 23:40:47 发布

阅读量339

点赞数

分类专栏：大数据文章标签：多MapReduce 写HFile SplitRegions时长过长 Compacte性能 MapReduce分组排序分区

本文链接：https://blog.csdn.net/ITwangnengjie/article/details/103270280

版权

大数据专栏收录该内容

25 篇文章 3 订阅

订阅专栏

在写HFile过程中：

1）因为存在相同rowkey的数据有很多，存在大的时间戳数据覆盖小的时间戳数据的情况；

2）还有如果全部数据都写成HFile，生成的HDFS数据集会很大，以本代码为案例，每次生成2T的数据量；

3）所有数据导入到HBase之后，HBase会根据HFile最大存储值进行SplitRegions。这会导致HBase表性能受限。

4）所有的数据覆盖处理，都由HBase的compaction完成，减弱HBase性能。

【

Compaction：

在HBase中，每当memstore的数据flush到磁盘后，就形成一个storefile，当storefile的数量越来越大时，会严重影响HBase的读性能，所以必须将过多的storefile文件进行合并操作。Compaction是Buffer-flush-merge的LSM-Tree模型的关键操作，主要起到如下几个作用：

（1）合并文件

（2）清除删除、过期、多余版本的数据

（3）提高读写数据的效率
HBase中实现了两种Compaction：

这两种compaction方式的区别是：

<1> Minor操作只用来做部分文件的合并操作以及包括minVersion=0并且设置ttl的过期版本清理，不做任何删除数据、多版本数据的清理工作。

<2>Major操作是对Region下的HStore下的所有StoreFile执行合并操作，最终的结果是整理合并出一个文件。
】

针对这个在写HFile过程中遇到的问题，我的解决方案是：

在MR代码中，增加过滤过期时间戳的数据，代码完成Compaction过程，并减少总的数据量，为HBaseSplit和Compaction减少性能压力。主要用到MR的自定义分组排序分区功能。

下面是我的代码

分组排序类：

package com.xxx.xxx.xxx.xxx.usepartition.getrecentnewmileage;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * implements WritableComparable<StrTimeWritable>
 * 以str+time进行reduce排序分组
 * 按str 分组 然后组内按time排序
 */
public class StrTimeWritable implements WritableComparable<StrTimeWritable> {//implements Writable{

    public String getStr() {
        return str;
    }

    public void setStr(String str) {
        this.str = str;
    }

    public int getTime() {
        return time;
    }

    public void setTime(int time) {
        this.time = time;
    }

    public String str;
    public int time;
    public StrTimeWritable(String str,int time){
        this.str = str ;
        this.time = time ;
    }
    // 如果空构造函数被覆盖，一定要显示的定义一下，否则反序列化时会抛异常。
    public StrTimeWritable(){

    }
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(str);
        dataOutput.writeInt(time);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.str = dataInput.readUTF();
        this.time = dataInput.readInt();
    }
/*
        先按str从小到大排序
            在str排序的基础上，按时间的从小到大排序
 */
    @Override
    public int compareTo(StrTimeWritable o) {
        if(o.str.equals(this.str)){
            if(o.time==this.time){
                return 0;
            }else{
                return this.time-o.time;
            }
        }else{
            return this.str.compareTo(o.str);
        }
    }
}

分区类（作用：使相同组的数据在同一个Reduce下）：

package com.xxx.xxx.xxx.xxx.usepartition.getrecentnewmileage;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

import java.util.HashMap;

/**
 * 相同的str末3位放在一个partition下
 * 在reduce过程中，相同的str会在同一个reduce下
 */
public class strpartition extends HashPartitioner<StrTimeWritable,Text> {

    /*
    * 1)numPartitions其实我们可以设置,在job.setNumReduceTasks(n)设置。
    * 2)如果我们job.setNumReduceTasks(5),那么这里的numPartitions=5,
    *   那么默认的HashPartitioner的机制就是用key的hashcode%numPartitions
    *   来决定分区属于哪个分区，所以分区数量就等于我们设置的reduce数量5个。
    */
    @Override
    public int getPartition(StrTimeWritable key,Text value , int numReduceTasks) {
        //srt : adf1efdadfa 047701
        //以str的最后3位 为分区依据
        //大写母、或者数字

        String strFinalTwoCharStr =key.getStr();//以整个str 作为分区
        return (strFinalTwoCharStr.hashCode() & Integer.MAX_VALUE) % numReduceTasks;
        //Integer hash = numMap.get(key.toString().substring(0, 1));
        //将没有匹配到的数据放入3号分区
        //return hash==null?3:hash;
    }
}

主MapReduce和Main函数代码：

package com.xxx.xxx.xxx.xxx.usepartition.five_min_qu_to_hbase;
/**
 */

import com.xxx.Util;
import com.xxx.xxx.xxx.xxx.usepartition.getrecentnewmileage.StrTimeWritable;
import com.xxx.xxx.xxx.xxx.usepartition.getrecentnewmileage.strpartition;
import org.apache.hadoop.hbase.mapred.TableOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcInputFormat;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
/**
 * 2019/11/25测试通过
 * HFileOutputFormat2.configureIncrementalLoadMap（推荐方式）
 * 最新的宜采用版本
 */
public class TwoMRFiveMinQuV3 {
    public static class HiveOrcToCellKVMapper extends
            Mapper<NullWritable, OrcStruct, StrTimeWritable, Text>{

        @Override
        public void map(NullWritable key, OrcStruct value, Context context)
                throws IOException, InterruptedException {
            if (value != null && value.getFieldValue(0) != null
                    && value.getFieldValue(1) != null
                    && value.getFieldValue(2) != null
                    && value.getFieldValue(5) != null) {
                String str = value.getFieldValue(0).toString().toUpperCase();
                String func = value.getFieldValue(1).toString();
                int time = Integer.parseInt(value.getFieldValue(2).toString());
                String data = value.getFieldValue(5).toString().toUpperCase();
                int k_time = time;
                if (time % (5 * 60) != 0) {
                    //为5分钟的倍数，则不变；否则，向上取五的倍数的整数
                    // 例如：2019/11/09 13:30:01 我们所保留的时间取2019/11/09 13:35:00
                    //       2019/11/09 13:34:37 我们所保留的时间取2019/11/09 13:35:00
                    //       2019/11/09 13:35:00 我们所保留的时间取2019/11/09 13:35:00
                    //          println(format.format(time*1000L))
                    //          println(format.format(((time / (5 * 60)) * 5 * 60 + 5 * 60) * 1000L))
                    k_time = (time / (5 * 60)) * 5 * 60 + 5 * 60;
                }
                StrTimeWritable strTimeWritable = new StrTimeWritable(str, k_time);
                context.write(strTimeWritable,new Text(func+"\t"+time+"\t"+data));
            }
        }
    }
    public static class CellKVCompactionReducer extends
            Reducer<StrTimeWritable, Text, Text,Text> {
        String checkSubQuID = "";

        @Override
        public void setup(Context context) throws IOException {
            checkSubQuID = context.getConfiguration().get("checkSubQuID");
        }
        @Override
        public void reduce(StrTimeWritable key, Iterable<Text> values,Context context)
                throws IOException, InterruptedException {
            String str = key.getStr();
            int k_time = key.getTime();
            List<ReduceValueForSort> textList = new LinkedList<>();
            for(Text val: values){//func+"\t"+time+"\t"+data
                if (!"".equals(val.toString())) {
                    String[] temp_str = val.toString().split("\t");
                    textList.add(new ReduceValueForSort(temp_str[0],Integer.parseInt(temp_str[1]),temp_str[2]));
                }
            }
            textList.sort(Comparator.comparingInt(ReduceValueForSort::getTime));//按时间排序

            String[] subQuIDs = checkSubQuID.split("\t");
            List<String> subQuIDList = new LinkedList<>();
            for (String a : subQuIDs) subQuIDList.add(a);
            Map<String, String> datas_Map = new HashMap();
            for(ReduceValueForSort rvfs : textList) {
                String func = rvfs.func;
                int time = rvfs.time;
                String data =rvfs.data;
                if (func.equals("007")) {
                    for (int index = 0; (index * 16) < data.length(); index++) {
                        String tempQuData = data.substring(index * 16, index * 16 + 16);
                        String re_a = tempQuData;
                        if (index == 0) {
                            re_a = "00000000" + re_a;
                        }
                        if (index == 1) {
                            re_a = "00000001" + re_a;
                        }
                        if (index == 2) {
                            re_a = "00000002" + re_a;
                        }
                        if (index == 3) {
                            re_a = "00000003" + re_a;
                        }
                        if (index == 4) {
                            re_a = "00000004" + re_a;
                        }
                        if (index == 5) {
                            re_a = "00000005" + re_a;
                        }
                        if (index == 6) {
                            re_a = "00000006" + re_a;
                        }
                        if (index == 7) {
                            re_a = "00000007" + re_a;
                        }
                        if (index == 8) {
                            re_a = "00000008" + re_a;
                        }
                        if (index == 9) {
                            re_a = "00000009" + re_a;
                        }
                        if (index == 10) {
                            re_a = "00000010" + re_a;
                        }
                        if (index == 11) {
                            re_a = "00000011" + re_a;
                        }
                        if (index == 12) {
                            re_a = "00000012" + re_a;
                        }
                        if (index == 13) {
                            re_a = "00000013" + re_a;
                        }
                        if (index == 14) {
                            re_a = "00000014" + re_a;
                        }
                        if (index == 15) {
                            re_a = "00000015" + re_a;
                        }
                        if (index == 16) {
                            re_a = "00000016" + re_a;
                        }
                        if (index == 17) {
                            re_a = "00000017" + re_a;
                        }
                        if (index == 18) {
                            re_a = "00000018" + re_a;
                        }
                        if (index == 19) {
                            re_a = "00000019" + re_a;
                        }
                        if (index == 20) {
                            re_a = "00000020" + re_a;
                        }
                        if (index == 21) {
                            re_a = "00000021" + re_a;
                        }
                        if (index == 22) {
                            re_a = "00000022" + re_a;
                        }
                        if (index == 23) {
                            re_a = "00000023" + re_a;
                        }
                        if (index == 24) {
                            re_a = "00000024" + re_a;
                        }
                        if (index == 25) {
                            re_a = "00000025" + re_a;
                        }
                        if (index == 26) {
                            re_a = "00000026" + re_a;
                        }
                        if (index == 27) {
                            re_a = "00000027" + re_a;
                        }
                        if (index == 28) {
                            re_a = "00000028" + re_a;
                        }
                        if (index == 29) {
                            re_a = "00000029" + re_a;
                        }
                        if (index == 30) {
                            re_a = "00000030" + re_a;
                        }
                        if (index == 31) {
                            re_a = "00000031" + re_a;
                        }
                        if (index == 32) {
                            re_a = "00000032" + re_a;
                        }
//          if(index > 32)  {re_a = "00000000"+ re_a} //Default
                        if (index <= 32) {
                            if (subQuIDList.contains(re_a.substring(0, 8))) {
                                datas_Map.put(re_a.substring(0, 10), re_a.substring(8));
                            } else {
                                datas_Map.put(re_a.substring(0, 8), re_a.substring(8));
                            }
                        }
                    }
                } else {
                    for (int i = 0; i < data.length(); i = i + 24) {
                        String temp_qu_data = data.substring(i, i + 24);
                        if (subQuIDList.contains(temp_qu_data.substring(0, 8))) {
                            datas_Map.put(temp_qu_data.substring(0, 10), temp_qu_data.substring(8));
                        } else {
                            datas_Map.put(temp_qu_data.substring(0, 8), temp_qu_data.substring(8));
                        }
                    }
                }
            }
            String rowKey = str.substring(11) + str.substring(0, 11) + "_" + (Integer.MAX_VALUE - k_time);
            Iterator iterator = datas_Map.keySet().iterator();
            StringBuffer stringBuffer = new StringBuffer();
            while (iterator.hasNext()){
                String qualifier = (String) iterator.next();
                String qu_v = datas_Map.get(qualifier);
                stringBuffer.append(qualifier+":"+qu_v+"\t");
            }
            context.write(new Text(rowKey), new Text(stringBuffer.toString()));
        }
    }
    public static class HFileMapper2 extends
            Mapper<Object, Text, ImmutableBytesWritable, KeyValue> {
        public static final byte[] CF = Bytes.toBytes("common");

        @Override
        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            //输入文本内容：str.substring(11) + str.substring(0, 11) + "_" + (Integer.MAX_VALUE - k_time)+"\t"+
            //qualifier+":"+qu_v+"\t"+qualifier1+":"+qu_v1+"\t"...
            String[] values = value.toString().split("\t");
            String rowKey = values[0];
            for(int i=1;i< values.length;i++){
                if(null != values[i]) {
                    KeyValue keyValue = new KeyValue(Bytes.toBytes(rowKey),
                            CF,
                            Bytes.toBytes(values[i].split(":")[0]),
                            (Integer.MAX_VALUE - Integer.parseInt(rowKey.split("_")[1])) * 1000L,
                            Bytes.toBytes(values[i].split(":")[1]));
                    context.write(new ImmutableBytesWritable(Bytes.toBytes(rowKey)), keyValue);
                }
            }
        }
    }
    public static void main(String args[]) throws Exception {
//        System.out.println("设置HADOOP_USER_NAME用户");
//        System.setProperty("HADOOP_USER_NAME","hdfs");//仅对写HFile load到HBase中
//        System.setProperty("HADOOP_USER_NAME", "hadoop");
        String tableName = args[0];
        String startYMD = args[1];
        String stopYMD = args[2];
        String functionHDFSPath = args[3];//需要处理的功能号HDFS文件目录
        String subQuIDHDFSPath = args[4];
        String jobOutputHDFSPath = args[5];//预处理输出目录
        String jobOutputHDFSPath2 = args[6];//输出目录
        //HBase配置
        //1.获得配置文件对象
        Configuration conf = HBaseConfiguration.create();
        conf.set(TableOutputFormat.OUTPUT_TABLE,tableName);
        conf.set("hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily", "400");
        conf.set("orc.mapred.output.schema", "struct<str:String,func:int,time:int,type:int,grp:int,datas:String>");
//        mrConf.set("dfs.permissions","true");
        conf.set("mapreduce.map.memory.mb", "3072");
        conf.set("mapreduce.reduce.memory.mb", "4096");
        conf.set("yarn.nodemanager.resource.memory-mb", "8192");
        conf.set("yarn.nodemanager.vmem-pmem-ratio", "5");//2.1
        conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true");
        conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," +
                "org.apache.hadoop.io.serializer.WritableSerialization," +
                "org.apache.hadoop.hbase.mapreduce.KeyValueSerialization," +
                "org.apache.hadoop.hbase.mapreduce.MutationSerialization," +
                "org.apache.hadoop.hbase.mapreduce.ResultSerialization" );
        String sub_QuIDs  = getSubQuIDAll(subQuIDHDFSPath, conf);
        System.out.println(sub_CQuDs);
        conf.set("checkSubQuID", sub_QuIDs);
        Job job1 = Job.getInstance(conf, "FivMinQuMR_" + startYMD + "_" + stopYMD);
        job1.setJarByClass(FiveMinQuMRT.class);
        job1.setMapperClass(HiveOrcToCellKVMapper.class);
        job1.setReducerClass(CellKVCompactionReducer.class);
        job1.setPartitionerClass(strpartition.class);
        job1.setMapOutputKeyClass(StrTimeWritable.class);
        job1.setNumReduceTasks(200);
        job1.setInputFormatClass(OrcInputFormat.class);
        //设置输入的orc文件
        List<String> DateList = new LinkedList<>();
        if (startYMD.equals(stopYMD)) {
            DateList.add(startYMD);
        } else {
            int daysCount = Util.getDaysFromCalendar(startYMD, stopYMD) + 2;
            DateList = Util.getDates(startYMD, daysCount);
        }
        List<Integer> FunctAll = getFunctAll(functionHDFSPath, conf);
        for (Integer temp : FunctAll) {//测试，输入的FunctAll
            System.out.println(temp);
        }
        for (String dataDates : DateList) {
            String year = dataDates.substring(0, 4);//年
            String month = dataDates.substring(4, 6);//月
            String day = dataDates.substring(6);//日
            for (Integer function : FunctAll) {
                FileSystem fileSystem = FileSystem.get(conf);
                Path inputPath1 = new Path("/user/xxx/xxx/xxx/funct=" + function + "/years=" + year + "/months=" + month + "/days=" + day + "/");
                boolean result = fileSystem.isDirectory(inputPath1);
                if (result == true)
                    FileInputFormat.addInputPath(job1, inputPath1);
            }
        }
        //第一个普通的 MR job输出到HDFS
        Path outputPath = new Path(jobOutputHDFSPath);
        FileOutputFormat.setOutputPath(job1, outputPath);
        outputPath.getFileSystem(conf).delete(outputPath, true);
        job1.waitForCompletion(true);
        if (job1.isSuccessful()) {//导入数据
            Job job2 = Job.getInstance(conf, "FivMinQuMR_" + startYMD + "_" + stopYMD);
            job2.setJarByClass(FiveMinQuMRT.class);
            job2.setMapperClass(HFileMapper2.class);
            job2.setReducerClass(KeyValueSortReducer.class);
            job2.setOutputKeyClass(ImmutableBytesWritable.class);
            job2.setOutputValueClass(KeyValue.class);
            job2.setNumReduceTasks(100);
            Path outputPath2 = new Path(jobOutputHDFSPath2);
            FileInputFormat.addInputPath(job2, outputPath);
            FileOutputFormat.setOutputPath(job2, outputPath2);
            outputPath.getFileSystem(conf).delete(outputPath2, true);

//        2.建立连接
            Connection connection = ConnectionFactory.createConnection(conf);
            //3.获得会话
            Admin admin = connection.getAdmin();
            TableName tableNames = TableName.valueOf(tableName);
            Table table = connection.getTable(tableNames);
            //设置输出的文件格式
//        hadoop中的job.setOutputKeyClass(theClass)与job.setOutputValueClass(theClass)，
//        但是有的程序处理以上两个外还有job.setMapOutputKeyClass(theClass）与job.setMapOutputValueClass(Text.class)，
//        一直没弄懂是怎么回事，网上查了下，原来当mapper与reducer的输出类型一致时可以用job.setOutputKeyClass(theClass)
//        与job.setOutputValueClass(theClass)这两个进行配置就行，但是当mapper用于reducer两个的输出类型不一致的时候就需
//        要分别进行配置了。
//        HFileOutputFormat2.configureIncrementalLoad源码中有设置job.setOutputKeyClass(theClass)job.setOutputValueClass(theClass)
//        分别为ImmutableBytesWritable 、Put（KeyValue）
//        这就导致了要么设置Map 和 Reduce 的输出都为ImmutableBytesWritable 、Put（KeyValue）
//        要么只有一个Map 并且设置输出为ImmutableBytesWritable 、Put（KeyValue）
//        HFileOutputFormat2.configureIncrementalLoad(job, table, connection.getRegionLocator(tableNames));//执行代码的用户是hdfs用户生成Hfile的临时目录的拥有者也是hdfs 但是在执行BulkLoad 操作用户却是Hbase用户，真是叫人头大  //这个会导致，没有权限：Failing Oozie Launcher, Main class [org.apache.oozie.action.hadoop.JavaMain], main() threw exception, org.apache.hadoop.security.AccessControlException: Permission denied: user=wang.nengjie, access=EXECUTE, inode="/user/yarn":yarn:supergroup:drwx------
//        HFileOutputFormat.configureIncrementalLoad(job,new HTable(hbConf,tableName));
            HFileOutputFormat2.configureIncrementalLoadMap(job2, table);
            job2.waitForCompletion(true);
            if (job2.isSuccessful()) {//导入数据
                try {
                    LoadIncrementalHFiles loadFiles = new LoadIncrementalHFiles(conf);
                    loadFiles.doBulkLoad(outputPath2, admin, table, connection.getRegionLocator(tableNames));
                    System.out.println("Bulk Load Completed..");
                } catch (Exception exception) {
                    exception.printStackTrace();
                }
            }
            System.exit(job2.waitForCompletion(true) ? 0 : 1);
        }
    }

    public static void doBulkLoad(String pathToHFile, String tableName) {
        try {
            Configuration configuration = new Configuration();
            HConnection connection = HConnectionManager.createConnection(configuration);
            HBaseConfiguration.addHbaseResources(configuration);
            LoadIncrementalHFiles loadFiles = new LoadIncrementalHFiles(configuration);
            loadFiles.doBulkLoad(new Path(pathToHFile), (HTable) connection.getTable(tableName));
            System.out.println("Bulk Load Completed..");
        } catch (Exception exception) {
            exception.printStackTrace();
        }

    }

    public static List<Integer> getFunctAll(String functionSrcPath, Configuration conf) {
        List<Integer> list = new LinkedList<>();
        try {
            Path path = new Path(functionSrcPath);//HDFS路径Path
            FileSystem fs = FileSystem.get(conf);
//            List<String> files = new ArrayList<String>();
            if (fs.exists(path)) {
//                        for (FileStatus status : fs.listStatus(path)) {//file name
//                            files.add(status.getPath().toString());
//                        }
                InputStream in = fs.open(path);
                BufferedReader buff = new BufferedReader(new InputStreamReader(in));
//                in.close();//不可加该代码  不然出错：System.exit(job.waitForCompletion(true) ? 0 : 1);
                String str = null;
                while ((str = buff.readLine()) != null) {//一行一个
                    if (str.matches("[0-9]+")) {
                        list.add(Integer.parseInt(str));
                    }
                }
            } else {
                throw new IOException("function file not exists!");
            }
//            fs.close();
        } catch (IOException e) {
        } catch (Exception e) {
        }
        return list;
    }

    public static String getSubQuIDAll(String functionSrcPath, Configuration conf) {
        String str = "";
        try {
            Path path = new Path(functionSrcPath);//HDFS路径Path
            FileSystem fs = FileSystem.get(conf);
//            List<String> files = new ArrayList<String>();
            if (fs.exists(path)) {
//                        for (FileStatus status : fs.listStatus(path)) {//file name
//                            files.add(status.getPath().toString());
//                        }
                InputStream in = fs.open(path);
                BufferedReader buff = new BufferedReader(new InputStreamReader(in));
//                in.close();//不可加该代码  不然出错：System.exit(job.waitForCompletion(true) ? 0 : 1);
                str = buff.readLine();
//                while ((str = buff.readLine()) != null) {//一行一个功能号
//                }

            } else {
                throw new IOException("function file not exists!");
            }
//            fs.close();
        } catch (IOException e) {
        } catch (Exception e) {
        }
        return str;
    }
}

最有一个为为了List对象排序的一个代码：

package com.xxx.xxx.xxx.xxx.usepartition.five_min_qu_to_hbase;

public class ReduceValueForSort {
    String func;
    int time;
    String data;

    public ReduceValueForSort(String func, int time, String data) {
        this.func = func;
        this.time = time;
        this.data = data;
    }

    public int getTime(){
        return  time;
    }
}

最终，我们运行几个T的数据时间为5个小时。HBase长时间处于SplitRegion得到改善。但是因为我生成的HFile文件为100个，每个未压缩状态下几个G，总共500多G，所以还存在HFile合并的状态，即Compacte状态。因为我们设置的HFile最大为50G所以估计在合并完成之后，还有少量的Split状态。