MapReduce写HFile,doBulkLoad方式批量导入到HBase(用 HFileOutputFormat2.configureIncrementalLoad方式不推荐)

package com.xxx.xxxx.hivetable.xxxx.usepartition.five_min_xxx_to_hbase;
/**
 * 2019/11/20
 */
import org.apache.hadoop.hbase.mapred.TableOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcInputFormat;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;

public class FiveMinQuMap {
  
    public static class HiveORCToHFileMapper extends
            Mapper<NullWritable, OrcStruct, ImmutableBytesWritable, Put> {
        public static final byte[] CF = Bytes.toBytes("common");
        String checkSubQuID = "";

        @Override
        public void setup(Context context) throws IOException {
            checkSubQuID = context.getConfiguration().get("checkSubQuID");
        }

        @Override
        public void map(NullWritable key, OrcStruct value, Context context)
                throws IOException, InterruptedException {
            if (value != null && value.getFieldValue(0) != null
                    && value.getFieldValue(1) != null
                    && value.getFieldValue(2) != null
//                    && value.getFieldValue(3) != null
//                    && value.getFieldValue(4) != null
                    && value.getFieldValue(5) != null) {
                String a = value.getFieldValue(0).toString().toUpperCase();  
                String b= value.getFieldValue(1).toString(); //
                int time = Integer.parseInt(value.getFieldValue(2).toString());//time
                String data = value.getFieldValue(5).toString().toUpperCase(); //data
                String[] subQuIDs = checkSubQuID.split("\t");
                List<String> subQuIDList = new LinkedList<>();
                for (String a : subQuIDs) subQuIDList.add(a);
                Map<String, String> datas_Map = new HashMap();
                if (b.equals("007")) {
                    for (int index = 0; (index * 16) < data.length(); index++) {
                        String tempQuData = data.substring(index * 16, index * 16 + 16);
                        String re_a = tempQuData;
                        if (index == 0) {
                            re_a = "00000000" + re_a;
                        }
                        if (index == 1) {
                            re_a = "00000001" + re_a;
                        }
                        if (index == 2) {
                            re_a = "00000002" + re_a;
                        }
                        if (index == 3) {
                            re_a = "00000003" + re_a;
                        }
                        if (index == 4) {
                            re_a = "00000004" + re_a;
                        }
                        if (index == 5) {
                            re_a = "00000005" + re_a;
                        }
                        if (index == 6) {
                            re_a = "00000006" + re_a;
                        }
                        if (index == 7) {
                            re_a = "00000007" + re_a;
                        }
                        if (index == 8) {
                            re_a = "00000008" + re_a;
                        }
                        if (index == 9) {
                            re_a = "00000009" + re_a;
                        }
                        if (index == 10) {
                            re_a = "00000010" + re_a;
                        }
                        if (index == 11) {
                            re_a = "00000011" + re_a;
                        }
                        if (index == 12) {
                            re_a = "00000012" + re_a;
                        }
                        if (index == 13) {
                            re_a = "00000013" + re_a;
                        }
                        if (index == 14) {
                            re_a = "00000014" + re_a;
                        }
                        if (index == 15) {
                            re_a = "00000015" + re_a;
                        }
                        if (index == 16) {
                            re_a = "00000016" + re_a;
                        }
                        if (index == 17) {
                            re_a = "00000017" + re_a;
                        }
                        if (index == 18) {
                            re_a = "00000018" + re_a;
                        }
                        if (index == 19) {
                            re_a = "00000019" + re_a;
                        }
                        if (index == 20) {
                            re_a = "00000020" + re_a;
                        }
                        if (index == 21) {
                            re_a = "00000021" + re_a;
                        }
                        if (index == 22) {
                            re_a = "00000022" + re_a;
                        }
                        if (index == 23) {
                            re_a = "00000023" + re_a;
                        }
                        if (index == 24) {
                            re_a = "00000024" + re_a;
                        }
                        if (index == 25) {
                            re_a = "00000025" + re_a;
                        }
                        if (index == 26) {
                            re_a = "00000026" + re_a;
                        }
                        if (index == 27) {
                            re_a = "00000027" + re_a;
                        }
                        if (index == 28) {
                            re_a = "00000028" + re_a;
                        }
                        if (index == 29) {
                            re_a = "00000029" + re_a;
                        }
                        if (index == 30) {
                            re_a = "00000030" + re_a;
                        }
                        if (index == 31) {
                            re_a = "00000031" + re_a;
                        }
                        if (index == 32) {
                            re_a = "00000032" + re_a;
                        }
//          if(index > 32)  {re_a = "00000000"+ re_a} //Default
                        if (index <= 32) {
                            if (subQuIDList.contains(re_a.substring(0, 8))) {
                                datas_Map.put(re_a.substring(0, 10), re_a.substring(8));
                            } else {
                                datas_Map.put(re_a.substring(0, 8), re_a.substring(8));
                            }
                        }
                    }
                } else {
                    for (int i = 0; i < data.length(); i = i + 24) {
                        String temp_Qu_data = data.substring(i, i + 24);
                        if (subQuIDList.contains(temp_Qu_data.substring(0, 8))) {
                            datas_Map.put(temp_Qu_data.substring(0, 10), temp_Qu_data.substring(8));
                        } else {
                            datas_Map.put(temp_Qu_data.substring(0, 8), temp_Qu_data.substring(8));
                        }
                    }
                }
                if (time % (5 * 60) != 0) {
                    //为5分钟的倍数,则不变;否则,向上取五的倍数的整数
                    // 例如:2019/11/09 13:30:01 我们所保留的时间取2019/11/09 13:35:00
                    //       2019/11/09 13:34:37 我们所保留的时间取2019/11/09 13:35:00
                    //       2019/11/09 13:35:00 我们所保留的时间取2019/11/09 13:35:00
                    //          println(format.format(time*1000L))
                    //          println(format.format(((time / (5 * 60)) * 5 * 60 + 5 * 60) * 1000L))
                    time = (time / (5 * 60)) * 5 * 60 + 5 * 60;
                }
                String rowKey = a.substring(11) + a.substring(0, 11) + "_" + (Integer.MAX_VALUE - time);
                ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
                Put put = new Put(Bytes.toBytes(rowKey));
                Iterator iterator = datas_Map.keySet().iterator();
                while (iterator.hasNext()) {
                    String qualifier = (String) iterator.next();
//                        KeyValue keyValue = new KeyValue(Bytes.toBytes(rowKey), CF, Bytes.toBytes(qualifier),(long)time, Bytes.toBytes(datas_Map.get(qualifier)));
                    put.add(CF, Bytes.toBytes(qualifier), time*1000L, Bytes.toBytes(datas_Map.get(qualifier))); // 测试的时候value和key相同
                }
                context.write(immutableBytesWritable, put);
            }
        }
    }

    public static void main(String args[]) throws Exception {
//        System.out.println("设置HADOOP_USER_NAME用户");
//        System.setProperty("HADOOP_USER_NAME","hdf
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值