package com.zz.hbase.ccrc;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import com.zz.hive.Constants;
import com.zz.util.DateUtil;
public class HBaseToHiveJob {
public static class HBaseToHiveMapper extends TableMapper<Text, Text> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder("");
Map<String, String> m = new HashMap<String, String>();
List<String> temp = new ArrayList<String>();
List<String> a = new ArrayList<String>();
String sp = "-";
String tableName = "";
String str = "";
String keyStr = "";
for (Entry<byte[], byte[]> entry : value.getFamilyMap("aaaa".getBytes()).entrySet()) {
str = new String(entry.getValue());
if (str != null) {
keyStr = new String(entry.getKey());
if ("event".equals(keyStr)) {
tableName = str;
} else {
m.put(keyStr, str);
}
}
}
sb = sort(tableName, m, sb, temp, sp, a);
context.write(new Text(tableName), new Text(new String(sb)));
}
/**
* 按照event值类型重新组合数据存储到hdfs上
* @param tableName
* @param map
* @param sb
* @param t
* @param sp
* @param a
* @return
*/
private StringBuilder sort(String tableName, Map<String, String> map, StringBuilder sb, List<String> t, String sp, List<String> a) {
if("Summary".equals(tableName)) {
a = com.zz.option.Constants.SUMMARY_COLUMN;//其中一个表定义的好的List,下面也是一样的
return handler(a, t, sb, sp, map);
}
return sb;
}
/**
* 每条数据的各个值之间加上分隔符,值为null的用“-”占位
* @param a
* @param t
* @param sb
* @param sp
* @param m
* @return
*/
private StringBuilder handler(List<String> a, List<String> t, StringBuilder sb, String sp, Map<String, String> m) {
String mapValue;
for (String val : a) {
mapValue = m.get(val);
sb.append(mapValue != null ? mapValue : sp);
sb.append(Constants.BLANK_CHAR);
}
return sb.deleteCharAt(sb.length() - 1);
}
}
public static class HBaseToHiveReducer extends Reducer<Text, Text, NullWritable, Text> {
private MultipleOutputs<NullWritable, Text> out;
@Override
protected void setup(Context context) {
out = new MultipleOutputs<NullWritable, Text>(context);
}
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
String tableName = key.toString();
String path = tableName + "/" + tableName;
for (Text val : values) {
out.write(NullWritable.get(), new Text(val), path);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
out.close();
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf, "Fetch HBase2Hive" + DateUtil.getNowTime());
job.setJarByClass(HBaseToHiveJob.class);
Scan scan = new Scan();
scan.setCaching(500);
scan.setCacheBlocks(false);
scan.addFamily(Bytes.toBytes("aaaa"));
Path output = new Path(args[0]);
final FileSystem fileSystem = output.getFileSystem(conf);
fileSystem.delete(output, true);
TableMapReduceUtil.initTableMapperJob("Fetch", scan, HBaseToHiveMapper.class, Text.class, Text.class, job, false);
job.setReducerClass(HBaseToHiveReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, output);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
MapReduce从HBase多路径导出数据到Hive
最新推荐文章于 2021-12-16 21:58:20 发布