package com.xxx.xxxx.hivetable.xxxx.usepartition.five_min_xxx_to_hbase;
/**
* 2019/11/20
*/
import org.apache.hadoop.hbase.mapred.TableOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapreduce.OrcInputFormat;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
public class FiveMinQuMap {
public static class HiveORCToHFileMapper extends
Mapper<NullWritable, OrcStruct, ImmutableBytesWritable, Put> {
public static final byte[] CF = Bytes.toBytes("common");
String checkSubQuID = "";
@Override
public void setup(Context context) throws IOException {
checkSubQuID = context.getConfiguration().get("checkSubQuID");
}
@Override
public void map(NullWritable key, OrcStruct value, Context context)
throws IOException, InterruptedException {
if (value != null && value.getFieldValue(0) != null
&& value.getFieldValue(1) != null
&& value.getFieldValue(2) != null
// && value.getFieldValue(3) != null
// && value.getFieldValue(4) != null
&& value.getFieldValue(5) != null) {
String a = value.getFieldValue(0).toString().toUpperCase();
String b= value.getFieldValue(1).toString(); //
int time = Integer.parseInt(value.getFieldValue(2).toString());//time
String data = value.getFieldValue(5).toString().toUpperCase(); //data
String[] subQuIDs = checkSubQuID.split("\t");
List<String> subQuIDList = new LinkedList<>();
for (String a : subQuIDs) subQuIDList.add(a);
Map<String, String> datas_Map = new HashMap();
if (b.equals("007")) {
for (int index = 0; (index * 16) < data.length(); index++) {
String tempQuData = data.substring(index * 16, index * 16 + 16);
String re_a = tempQuData;
if (index == 0) {
re_a = "00000000" + re_a;
}
if (index == 1) {
re_a = "00000001" + re_a;
}
if (index == 2) {
re_a = "00000002" + re_a;
}
if (index == 3) {
re_a = "00000003" + re_a;
}
if (index == 4) {
re_a = "00000004" + re_a;
}
if (index == 5) {
re_a = "00000005" + re_a;
}
if (index == 6) {
re_a = "00000006" + re_a;
}
if (index == 7) {
re_a = "00000007" + re_a;
}
if (index == 8) {
re_a = "00000008" + re_a;
}
if (index == 9) {
re_a = "00000009" + re_a;
}
if (index == 10) {
re_a = "00000010" + re_a;
}
if (index == 11) {
re_a = "00000011" + re_a;
}
if (index == 12) {
re_a = "00000012" + re_a;
}
if (index == 13) {
re_a = "00000013" + re_a;
}
if (index == 14) {
re_a = "00000014" + re_a;
}
if (index == 15) {
re_a = "00000015" + re_a;
}
if (index == 16) {
re_a = "00000016" + re_a;
}
if (index == 17) {
re_a = "00000017" + re_a;
}
if (index == 18) {
re_a = "00000018" + re_a;
}
if (index == 19) {
re_a = "00000019" + re_a;
}
if (index == 20) {
re_a = "00000020" + re_a;
}
if (index == 21) {
re_a = "00000021" + re_a;
}
if (index == 22) {
re_a = "00000022" + re_a;
}
if (index == 23) {
re_a = "00000023" + re_a;
}
if (index == 24) {
re_a = "00000024" + re_a;
}
if (index == 25) {
re_a = "00000025" + re_a;
}
if (index == 26) {
re_a = "00000026" + re_a;
}
if (index == 27) {
re_a = "00000027" + re_a;
}
if (index == 28) {
re_a = "00000028" + re_a;
}
if (index == 29) {
re_a = "00000029" + re_a;
}
if (index == 30) {
re_a = "00000030" + re_a;
}
if (index == 31) {
re_a = "00000031" + re_a;
}
if (index == 32) {
re_a = "00000032" + re_a;
}
// if(index > 32) {re_a = "00000000"+ re_a} //Default
if (index <= 32) {
if (subQuIDList.contains(re_a.substring(0, 8))) {
datas_Map.put(re_a.substring(0, 10), re_a.substring(8));
} else {
datas_Map.put(re_a.substring(0, 8), re_a.substring(8));
}
}
}
} else {
for (int i = 0; i < data.length(); i = i + 24) {
String temp_Qu_data = data.substring(i, i + 24);
if (subQuIDList.contains(temp_Qu_data.substring(0, 8))) {
datas_Map.put(temp_Qu_data.substring(0, 10), temp_Qu_data.substring(8));
} else {
datas_Map.put(temp_Qu_data.substring(0, 8), temp_Qu_data.substring(8));
}
}
}
if (time % (5 * 60) != 0) {
//为5分钟的倍数,则不变;否则,向上取五的倍数的整数
// 例如:2019/11/09 13:30:01 我们所保留的时间取2019/11/09 13:35:00
// 2019/11/09 13:34:37 我们所保留的时间取2019/11/09 13:35:00
// 2019/11/09 13:35:00 我们所保留的时间取2019/11/09 13:35:00
// println(format.format(time*1000L))
// println(format.format(((time / (5 * 60)) * 5 * 60 + 5 * 60) * 1000L))
time = (time / (5 * 60)) * 5 * 60 + 5 * 60;
}
String rowKey = a.substring(11) + a.substring(0, 11) + "_" + (Integer.MAX_VALUE - time);
ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
Put put = new Put(Bytes.toBytes(rowKey));
Iterator iterator = datas_Map.keySet().iterator();
while (iterator.hasNext()) {
String qualifier = (String) iterator.next();
// KeyValue keyValue = new KeyValue(Bytes.toBytes(rowKey), CF, Bytes.toBytes(qualifier),(long)time, Bytes.toBytes(datas_Map.get(qualifier)));
put.add(CF, Bytes.toBytes(qualifier), time*1000L, Bytes.toBytes(datas_Map.get(qualifier))); // 测试的时候value和key相同
}
context.write(immutableBytesWritable, put);
}
}
}
public static void main(String args[]) throws Exception {
// System.out.println("设置HADOOP_USER_NAME用户");
// System.setProperty("HADOOP_USER_NAME","hdf
MapReduce写HFile,doBulkLoad方式批量导入到HBase(用 HFileOutputFormat2.configureIncrementalLoad方式不推荐)
最新推荐文章于 2022-06-30 18:35:50 发布